1 /* $NetBSD: fpu.c,v 1.94 2026/01/17 10:42:39 bouyer Exp $ */ 2 3 /* 4 * Copyright (c) 2008, 2019 The NetBSD Foundation, Inc. All 5 * rights reserved. 6 * 7 * This code is derived from software developed for The NetBSD Foundation 8 * by Andrew Doran and Maxime Villard. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1991 The Regents of the University of California. 34 * All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 * 60 * @(#)npx.c 7.2 (Berkeley) 5/12/91 61 */ 62 63 /* 64 * Copyright (c) 1994, 1995, 1998 Charles M. Hannum. All rights reserved. 65 * Copyright (c) 1990 William Jolitz. 66 * 67 * Redistribution and use in source and binary forms, with or without 68 * modification, are permitted provided that the following conditions 69 * are met: 70 * 1. Redistributions of source code must retain the above copyright 71 * notice, this list of conditions and the following disclaimer. 72 * 2. Redistributions in binary form must reproduce the above copyright 73 * notice, this list of conditions and the following disclaimer in the 74 * documentation and/or other materials provided with the distribution. 75 * 3. All advertising materials mentioning features or use of this software 76 * must display the following acknowledgement: 77 * This product includes software developed by the University of 78 * California, Berkeley and its contributors. 79 * 4. Neither the name of the University nor the names of its contributors 80 * may be used to endorse or promote products derived from this software 81 * without specific prior written permission. 82 * 83 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 84 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 85 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 86 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 87 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 88 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 89 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 90 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 91 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 92 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 93 * SUCH DAMAGE. 94 * 95 * @(#)npx.c 7.2 (Berkeley) 5/12/91 96 */ 97 98 #include <sys/cdefs.h> 99 __KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.94 2026/01/17 10:42:39 bouyer Exp $"); 100 101 #include "opt_ddb.h" 102 #include "opt_multiprocessor.h" 103 104 #include <sys/param.h> 105 #include <sys/systm.h> 106 #include <sys/conf.h> 107 #include <sys/cpu.h> 108 #include <sys/file.h> 109 #include <sys/proc.h> 110 #include <sys/kernel.h> 111 #include <sys/sysctl.h> 112 #include <sys/xcall.h> 113 114 #include <machine/cpu.h> 115 #include <machine/cpuvar.h> 116 #include <machine/cputypes.h> 117 #include <machine/intr.h> 118 #include <machine/cpufunc.h> 119 #include <machine/pcb.h> 120 #include <machine/trap.h> 121 #include <machine/specialreg.h> 122 #include <x86/cpu.h> 123 #include <x86/fpu.h> 124 125 #ifdef DDB 126 #include <ddb/ddb.h> 127 #endif 128 129 #ifdef XENPV 130 #define clts() HYPERVISOR_fpu_taskswitch(0) 131 #define stts() HYPERVISOR_fpu_taskswitch(1) 132 #endif 133 134 void fpu_handle_deferred(void); 135 void fpu_switch(struct lwp *, struct lwp *); 136 137 uint32_t x86_fpu_mxcsr_mask __read_mostly = 0; 138 139 static const union savefpu safe_fpu_storage __aligned(64) = { 140 .sv_xmm = { 141 .fx_mxcsr = __SAFE_MXCSR__, 142 }, 143 }; 144 static const union savefpu zero_fpu_storage __aligned(64); 145 146 static const void *safe_fpu __read_mostly = &safe_fpu_storage; 147 static const void *zero_fpu __read_mostly = &zero_fpu_storage; 148 149 /* 150 * x86_fpu_save_separate_p() 151 * 152 * True if we allocate the FPU save space separately, outside the 153 * struct pcb itself, because it doesn't fit in a single page. 154 */ 155 bool 156 x86_fpu_save_separate_p(void) 157 { 158 159 return x86_fpu_save_size > 160 PAGE_SIZE - offsetof(struct pcb, pcb_savefpusmall); 161 } 162 163 static inline union savefpu * 164 fpu_lwp_area(struct lwp *l) 165 { 166 struct pcb *pcb = lwp_getpcb(l); 167 union savefpu *area = pcb->pcb_savefpu; 168 169 KASSERT((l->l_flag & LW_SYSTEM) == 0); 170 if (l == curlwp) { 171 fpu_save(); 172 } 173 KASSERT(!(l->l_md.md_flags & MDL_FPU_IN_CPU)); 174 175 return area; 176 } 177 178 static inline void 179 fpu_save_lwp(struct lwp *l) 180 { 181 struct pcb *pcb = lwp_getpcb(l); 182 union savefpu *area = pcb->pcb_savefpu; 183 int s; 184 185 s = splvm(); 186 if (l->l_md.md_flags & MDL_FPU_IN_CPU) { 187 KASSERT((l->l_flag & LW_SYSTEM) == 0); 188 fpu_area_save(area, x86_xsave_features, !(l->l_proc->p_flag & PK_32)); 189 l->l_md.md_flags &= ~MDL_FPU_IN_CPU; 190 } 191 splx(s); 192 } 193 194 /* 195 * Bring curlwp's FPU state in memory. It will get installed back in the CPU 196 * when returning to userland. 197 */ 198 void 199 fpu_save(void) 200 { 201 fpu_save_lwp(curlwp); 202 } 203 204 void 205 fpuinit(struct cpu_info *ci) 206 { 207 /* 208 * This might not be strictly necessary since it will be initialized 209 * for each process. However it does no harm. 210 */ 211 clts(); 212 fninit(); 213 stts(); 214 } 215 216 /* 217 * fpuinit_mxcsr_mask() 218 * 219 * Called once by cpu_init on the primary CPU. Initializes 220 * x86_fpu_mxcsr_mask based on the initial FPU state, and 221 * initializes save_fpu and zero_fpu if necessary when the 222 * hardware's FPU save size is larger than union savefpu. 223 * 224 * XXX Rename this function! 225 */ 226 void 227 fpuinit_mxcsr_mask(void) 228 { 229 /* 230 * If the CPU's x86 fpu save size is larger than union savefpu, 231 * we have to allocate larger buffers for the safe and zero FPU 232 * states used here and by fpu_kern_enter/leave. 233 * 234 * Note: This is NOT the same as x86_fpu_save_separate_p(), 235 * which may have a little more space than union savefpu. 236 */ 237 const bool allocfpusave = x86_fpu_save_size > sizeof(union savefpu); 238 vaddr_t va; 239 240 #if defined XENPV 241 if (x86_fpu_save_separate_p()) { 242 /* 243 * XXX Temporary workaround for PR kern/59371 until we 244 * work out the implications. 245 */ 246 panic("NetBSD/xen does not support fpu save size %u", 247 x86_fpu_save_size); 248 } 249 #elif defined __i386__ 250 if (x86_fpu_save_separate_p()) { 251 /* 252 * XXX Need to teach cpu_uarea_alloc/free to allocate a 253 * separate fpu save space, and make pcb_savefpu a 254 * pointer indirection -- currently only done on amd64, 255 * not on i386. 256 * 257 * But the primary motivation on amd64 is the 8192-byte 258 * TILEDATA state for Intel AMX (Advanced Matrix 259 * Extensions), which doesn't work in 32-bit mode 260 * anyway, so on such machines we ought to just disable 261 * it in the first place and keep x86_fpu_save_size 262 * down: 263 * 264 * While Intel AMX instructions can be executed 265 * only in 64-bit mode, instructions of the XSAVE 266 * feature set can operate on TILECFG and TILEDATA 267 * in any mode. It is recommended that only 268 * 64-bit operating systems enable Intel AMX by 269 * setting XCR0[18:17]. 270 * 271 * --Intel 64 and IA-32 Architectures Software 272 * Developer's Manual, Volume 1: Basic 273 * Architecture, Order Number: 253665-087US, March 274 * 2025, Sec. 13.3 `Enabling the XSAVE feature set 275 * and XSAVE-enabled features', p. 13-6. 276 * https://cdrdv2.intel.com/v1/dl/getContent/671436 277 * https://web.archive.org/web/20250404141850/https://cdrdv2-public.intel.com/851056/253665-087-sdm-vol-1.pdf 278 * https://web.archive.org/web/20250404141850if_/https://cdrdv2-public.intel.com/851056/253665-087-sdm-vol-1.pdf#page=324 279 */ 280 panic("NetBSD/i386 does not support fpu save size %u", 281 x86_fpu_save_size); 282 } 283 #endif 284 285 #ifndef XENPV 286 struct fxsave fpusave __aligned(64); 287 u_long psl; 288 289 memset(&fpusave, 0, sizeof(fpusave)); 290 291 /* Disable interrupts, and enable FPU */ 292 psl = x86_read_psl(); 293 x86_disable_intr(); 294 clts(); 295 296 /* Fill in the FPU area */ 297 fxsave(&fpusave); 298 299 /* Restore previous state */ 300 stts(); 301 x86_write_psl(psl); 302 303 if (fpusave.fx_mxcsr_mask == 0) { 304 x86_fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__; 305 } else { 306 x86_fpu_mxcsr_mask = fpusave.fx_mxcsr_mask; 307 } 308 #else 309 /* 310 * XXX XXX XXX: On Xen the FXSAVE above faults. That's because 311 * &fpusave is not 16-byte aligned. Stack alignment problem 312 * somewhere, it seems. 313 */ 314 x86_fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__; 315 #endif 316 317 /* 318 * If necessary, allocate FPU save spaces for safe or zero FPU 319 * state, for fpu_kern_enter/leave. 320 */ 321 if (allocfpusave) { 322 __CTASSERT(PAGE_SIZE >= 64); 323 324 va = uvm_km_alloc(kernel_map, x86_fpu_save_size, PAGE_SIZE, 325 UVM_KMF_WIRED|UVM_KMF_ZERO|UVM_KMF_WAITVA); 326 memcpy((void *)va, &safe_fpu_storage, 327 sizeof(safe_fpu_storage)); 328 uvm_km_protect(kernel_map, va, x86_fpu_save_size, 329 VM_PROT_READ); 330 safe_fpu = (void *)va; 331 332 va = uvm_km_alloc(kernel_map, x86_fpu_save_size, PAGE_SIZE, 333 UVM_KMF_WIRED|UVM_KMF_ZERO|UVM_KMF_WAITVA); 334 /* 335 * No initialization -- just want zeroes! In fact we 336 * could share this with other all-zero pages. 337 */ 338 uvm_km_protect(kernel_map, va, x86_fpu_save_size, 339 VM_PROT_READ); 340 zero_fpu = (void *)va; 341 } 342 } 343 344 static inline void 345 fpu_errata_amd(void) 346 { 347 uint16_t sw; 348 349 /* 350 * AMD FPUs do not restore FIP, FDP, and FOP on fxrstor and xrstor 351 * when FSW.ES=0, leaking other threads' execution history. 352 * 353 * Clear them manually by loading a zero (fldummy). We do this 354 * unconditionally, regardless of FSW.ES. 355 * 356 * Before that, clear the ES bit in the x87 status word if it is 357 * currently set, in order to avoid causing a fault in the 358 * upcoming load. 359 * 360 * Newer generations of AMD CPUs have CPUID_Fn80000008_EBX[2], 361 * which indicates that FIP/FDP/FOP are restored (same behavior 362 * as Intel). We're not using it though. 363 */ 364 fnstsw(&sw); 365 if (sw & 0x80) 366 fnclex(); 367 fldummy(); 368 } 369 370 #ifdef __x86_64__ 371 #define XS64(x) (is_64bit ? x##64 : x) 372 #else 373 #define XS64(x) x 374 #endif 375 376 void 377 fpu_area_save(void *area, uint64_t xsave_features, bool is_64bit) 378 { 379 switch (x86_fpu_save) { 380 case FPU_SAVE_FSAVE: 381 fnsave(area); 382 break; 383 case FPU_SAVE_FXSAVE: 384 XS64(fxsave)(area); 385 break; 386 case FPU_SAVE_XSAVE: 387 XS64(xsave)(area, xsave_features); 388 break; 389 case FPU_SAVE_XSAVEOPT: 390 XS64(xsaveopt)(area, xsave_features); 391 break; 392 } 393 394 stts(); 395 } 396 397 void 398 fpu_area_restore(const void *area, uint64_t xsave_features, bool is_64bit) 399 { 400 clts(); 401 402 switch (x86_fpu_save) { 403 case FPU_SAVE_FSAVE: 404 frstor(area); 405 break; 406 case FPU_SAVE_FXSAVE: 407 if (cpu_vendor == CPUVENDOR_AMD) 408 fpu_errata_amd(); 409 XS64(fxrstor)(area); 410 break; 411 case FPU_SAVE_XSAVE: 412 case FPU_SAVE_XSAVEOPT: 413 if (cpu_vendor == CPUVENDOR_AMD) 414 fpu_errata_amd(); 415 XS64(xrstor)(area, xsave_features); 416 break; 417 } 418 } 419 420 void 421 fpu_handle_deferred(void) 422 { 423 struct pcb *pcb = lwp_getpcb(curlwp); 424 fpu_area_restore(pcb->pcb_savefpu, x86_xsave_features, 425 !(curlwp->l_proc->p_flag & PK_32)); 426 } 427 428 void 429 fpu_switch(struct lwp *oldlwp, struct lwp *newlwp) 430 { 431 struct cpu_info *ci __diagused = curcpu(); 432 struct pcb *pcb; 433 434 KASSERTMSG(ci->ci_ilevel >= IPL_SCHED, "cpu%d ilevel=%d", 435 cpu_index(ci), ci->ci_ilevel); 436 437 if (oldlwp->l_md.md_flags & MDL_FPU_IN_CPU) { 438 KASSERT(!(oldlwp->l_flag & LW_SYSTEM)); 439 pcb = lwp_getpcb(oldlwp); 440 fpu_area_save(pcb->pcb_savefpu, x86_xsave_features, 441 !(oldlwp->l_proc->p_flag & PK_32)); 442 oldlwp->l_md.md_flags &= ~MDL_FPU_IN_CPU; 443 } 444 KASSERT(!(newlwp->l_md.md_flags & MDL_FPU_IN_CPU)); 445 } 446 447 void 448 fpu_lwp_fork(struct lwp *l1, struct lwp *l2) 449 { 450 struct pcb *pcb2 = lwp_getpcb(l2); 451 union savefpu *fpu_save; 452 453 /* Kernel threads have no FPU. */ 454 if (__predict_false(l2->l_flag & LW_SYSTEM)) { 455 return; 456 } 457 458 /* For init(8). */ 459 if (__predict_false(l1->l_flag & LW_SYSTEM)) { 460 memset(pcb2->pcb_savefpu, 0, x86_fpu_save_size); 461 return; 462 } 463 464 fpu_save = fpu_lwp_area(l1); 465 memcpy(pcb2->pcb_savefpu, fpu_save, x86_fpu_save_size); 466 l2->l_md.md_flags &= ~MDL_FPU_IN_CPU; 467 } 468 469 void 470 fpu_lwp_abandon(struct lwp *l) 471 { 472 int s; 473 474 KASSERT(l == curlwp); 475 s = splvm(); 476 l->l_md.md_flags &= ~MDL_FPU_IN_CPU; 477 stts(); 478 splx(s); 479 } 480 481 /* -------------------------------------------------------------------------- */ 482 483 /* 484 * fpu_kern_enter() 485 * 486 * Begin using the FPU. Raises to splvm, disabling most 487 * interrupts and rendering the thread non-preemptible; caller 488 * should not use this for long periods of time, and must call 489 * fpu_kern_leave() afterward. Non-recursive -- you cannot call 490 * fpu_kern_enter() again without calling fpu_kern_leave() first. 491 * 492 * Must be used only at IPL_VM or below -- never in IPL_SCHED or 493 * IPL_HIGH interrupt handlers. 494 */ 495 void 496 fpu_kern_enter(void) 497 { 498 struct lwp *l = curlwp; 499 struct cpu_info *ci; 500 int s; 501 502 s = splvm(); 503 504 ci = curcpu(); 505 #if 0 506 /* 507 * Can't assert this because if the caller holds a spin lock at 508 * IPL_VM, and previously held and released a spin lock at 509 * higher IPL, the IPL remains raised above IPL_VM. 510 */ 511 KASSERTMSG(ci->ci_ilevel <= IPL_VM || cold, "ilevel=%d", 512 ci->ci_ilevel); 513 #endif 514 KASSERT(ci->ci_kfpu_spl == -1); 515 ci->ci_kfpu_spl = s; 516 517 /* 518 * If we are in a softint and have a pinned lwp, the fpu state is that 519 * of the pinned lwp, so save it there. 520 */ 521 while ((l->l_pflag & LP_INTR) && (l->l_switchto != NULL)) 522 l = l->l_switchto; 523 fpu_save_lwp(l); 524 525 /* 526 * Clear CR0_TS, which fpu_save_lwp set if it saved anything -- 527 * otherwise the CPU will trap if we try to use the FPU under 528 * the false impression that there has been a task switch since 529 * the last FPU usage requiring that we save the FPU state. 530 */ 531 clts(); 532 533 /* 534 * Zero the FPU registers and install safe control words. 535 */ 536 fpu_area_restore(safe_fpu, x86_xsave_features, /*is_64bit*/false); 537 } 538 539 /* 540 * fpu_kern_leave() 541 * 542 * End using the FPU after fpu_kern_enter(). 543 */ 544 void 545 fpu_kern_leave(void) 546 { 547 struct cpu_info *ci = curcpu(); 548 int s; 549 550 #if 0 551 /* 552 * Can't assert this because if the caller holds a spin lock at 553 * IPL_VM, and previously held and released a spin lock at 554 * higher IPL, the IPL remains raised above IPL_VM. 555 */ 556 KASSERT(ci->ci_ilevel == IPL_VM || cold); 557 #endif 558 KASSERT(ci->ci_kfpu_spl != -1); 559 560 /* 561 * Zero the fpu registers; otherwise we might leak secrets 562 * through Spectre-class attacks to userland, even if there are 563 * no bugs in fpu state management. 564 */ 565 fpu_area_restore(zero_fpu, x86_xsave_features, /*is_64bit*/false); 566 567 /* 568 * Set CR0_TS again so that the kernel can't accidentally use 569 * the FPU. 570 */ 571 stts(); 572 573 s = ci->ci_kfpu_spl; 574 ci->ci_kfpu_spl = -1; 575 splx(s); 576 } 577 578 /* -------------------------------------------------------------------------- */ 579 580 /* 581 * The following table is used to ensure that the FPE_... value 582 * that is passed as a trapcode to the signal handler of the user 583 * process does not have more than one bit set. 584 * 585 * Multiple bits may be set if SSE simd instructions generate errors 586 * on more than one value or if the user process modifies the control 587 * word while a status word bit is already set (which this is a sign 588 * of bad coding). 589 * We have no choice than to narrow them down to one bit, since we must 590 * not send a trapcode that is not exactly one of the FPE_ macros. 591 * 592 * The mechanism has a static table with 127 entries. Each combination 593 * of the 7 FPU status word exception bits directly translates to a 594 * position in this table, where a single FPE_... value is stored. 595 * This FPE_... value stored there is considered the "most important" 596 * of the exception bits and will be sent as the signal code. The 597 * precedence of the bits is based upon Intel Document "Numerical 598 * Applications", Chapter "Special Computational Situations". 599 * 600 * The code to choose one of these values does these steps: 601 * 1) Throw away status word bits that cannot be masked. 602 * 2) Throw away the bits currently masked in the control word, 603 * assuming the user isn't interested in them anymore. 604 * 3) Reinsert status word bit 7 (stack fault) if it is set, which 605 * cannot be masked but must be preserved. 606 * 'Stack fault' is a sub-class of 'invalid operation'. 607 * 4) Use the remaining bits to point into the trapcode table. 608 * 609 * The 6 maskable bits in order of their preference, as stated in the 610 * above referenced Intel manual: 611 * 1 Invalid operation (FP_X_INV) 612 * 1a Stack underflow 613 * 1b Stack overflow 614 * 1c Operand of unsupported format 615 * 1d SNaN operand. 616 * 2 QNaN operand (not an exception, irrelevant here) 617 * 3 Any other invalid-operation not mentioned above or zero divide 618 * (FP_X_INV, FP_X_DZ) 619 * 4 Denormal operand (FP_X_DNML) 620 * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL) 621 * 6 Inexact result (FP_X_IMP) 622 * 623 * NB: the above seems to mix up the mxscr error bits and the x87 ones. 624 * They are in the same order, but there is no EN_SW_STACK_FAULT in the mmx 625 * status. 626 * 627 * The table is nearly, but not quite, in bit order (ZERODIV and DENORM 628 * are swapped). 629 * 630 * This table assumes that any stack fault is cleared - so that an INVOP 631 * fault will only be reported as FLTSUB once. 632 * This might not happen if the mask is being changed. 633 */ 634 #define FPE_xxx1(f) (f & EN_SW_INVOP \ 635 ? (f & EN_SW_STACK_FAULT ? FPE_FLTSUB : FPE_FLTINV) \ 636 : f & EN_SW_ZERODIV ? FPE_FLTDIV \ 637 : f & EN_SW_DENORM ? FPE_FLTUND \ 638 : f & EN_SW_OVERFLOW ? FPE_FLTOVF \ 639 : f & EN_SW_UNDERFLOW ? FPE_FLTUND \ 640 : f & EN_SW_PRECLOSS ? FPE_FLTRES \ 641 : f & EN_SW_STACK_FAULT ? FPE_FLTSUB : 0) 642 #define FPE_xxx2(f) FPE_xxx1(f), FPE_xxx1((f + 1)) 643 #define FPE_xxx4(f) FPE_xxx2(f), FPE_xxx2((f + 2)) 644 #define FPE_xxx8(f) FPE_xxx4(f), FPE_xxx4((f + 4)) 645 #define FPE_xxx16(f) FPE_xxx8(f), FPE_xxx8((f + 8)) 646 #define FPE_xxx32(f) FPE_xxx16(f), FPE_xxx16((f + 16)) 647 static const uint8_t fpetable[128] = { 648 FPE_xxx32(0), FPE_xxx32(32), FPE_xxx32(64), FPE_xxx32(96) 649 }; 650 #undef FPE_xxx1 651 #undef FPE_xxx2 652 #undef FPE_xxx4 653 #undef FPE_xxx8 654 #undef FPE_xxx16 655 #undef FPE_xxx32 656 657 /* 658 * This is a synchronous trap on either an x87 instruction (due to an unmasked 659 * error on the previous x87 instruction) or on an SSE/SSE2/etc instruction due 660 * to an error on the instruction itself. 661 * 662 * If trap actually generates a signal, then the fpu state is saved and then 663 * copied onto the lwp's user-stack, and then recovered from there when the 664 * signal returns. 665 * 666 * All this code needs to do is save the reason for the trap. For x87 traps the 667 * status word bits need clearing to stop the trap re-occurring. For SSE traps 668 * the mxcsr bits are 'sticky' and need clearing to not confuse a later trap. 669 * 670 * We come here with interrupts disabled. 671 */ 672 void 673 fputrap(struct trapframe *frame) 674 { 675 uint32_t statbits; 676 ksiginfo_t ksi; 677 678 if (__predict_false(!USERMODE(frame->tf_cs))) { 679 register_t ip = X86_TF_RIP(frame); 680 char where[128]; 681 682 #ifdef DDB 683 db_symstr(where, sizeof(where), (db_expr_t)ip, DB_STGY_PROC); 684 #else 685 snprintf(where, sizeof(where), "%p", (void *)ip); 686 #endif 687 x86_enable_intr(); 688 panic("fpu trap from kernel at %s, trapframe %p\n", where, 689 frame); 690 } 691 692 KASSERT(curlwp->l_md.md_flags & MDL_FPU_IN_CPU); 693 694 if (frame->tf_trapno == T_XMM) { 695 uint32_t mxcsr; 696 x86_stmxcsr(&mxcsr); 697 statbits = mxcsr; 698 /* Clear the sticky status bits */ 699 mxcsr &= ~0x3f; 700 x86_ldmxcsr(&mxcsr); 701 702 /* Remove masked interrupts and non-status bits */ 703 statbits &= ~(statbits >> 7) & 0x3f; 704 /* Mark this is an XMM status */ 705 statbits |= 0x10000; 706 } else { 707 uint16_t cw, sw; 708 /* Get current control and status words */ 709 fnstcw(&cw); 710 fnstsw(&sw); 711 /* Clear any pending exceptions from status word */ 712 fnclex(); 713 714 /* Remove masked interrupts */ 715 statbits = sw & ~(cw & 0x3f); 716 } 717 718 /* Doesn't matter now if we get pre-empted */ 719 x86_enable_intr(); 720 721 KSI_INIT_TRAP(&ksi); 722 ksi.ksi_signo = SIGFPE; 723 ksi.ksi_addr = (void *)X86_TF_RIP(frame); 724 ksi.ksi_code = fpetable[statbits & 0x7f]; 725 ksi.ksi_trap = statbits; 726 (*curlwp->l_proc->p_emul->e_trapsignal)(curlwp, &ksi); 727 } 728 729 void 730 fpudna(struct trapframe *frame) 731 { 732 #ifdef XENPV 733 /* 734 * Xen produes spurious fpudna traps, just do nothing. 735 */ 736 if (USERMODE(frame->tf_cs)) { 737 clts(); 738 return; 739 } 740 #endif 741 panic("fpudna from %s, ip %p, trapframe %p", 742 USERMODE(frame->tf_cs) ? "userland" : "kernel", 743 (void *)X86_TF_RIP(frame), frame); 744 } 745 746 /* -------------------------------------------------------------------------- */ 747 748 static inline void 749 fpu_xstate_reload(union savefpu *fpu_save, uint64_t xstate) 750 { 751 /* 752 * Force a reload of the given xstate during the next XRSTOR. 753 */ 754 if (x86_fpu_save >= FPU_SAVE_XSAVE) { 755 fpu_save->sv_xsave_hdr.xsh_xstate_bv |= xstate; 756 } 757 } 758 759 void 760 fpu_set_default_cw(struct lwp *l, unsigned int x87_cw) 761 { 762 union savefpu *fpu_save = fpu_lwp_area(l); 763 struct pcb *pcb = lwp_getpcb(l); 764 765 if (i386_use_fxsave) { 766 fpu_save->sv_xmm.fx_cw = x87_cw; 767 if (x87_cw != __INITIAL_NPXCW__) { 768 fpu_xstate_reload(fpu_save, XCR0_X87); 769 } 770 } else { 771 fpu_save->sv_87.s87_cw = x87_cw; 772 } 773 pcb->pcb_fpu_dflt_cw = x87_cw; 774 } 775 776 void 777 fpu_clear(struct lwp *l, unsigned int x87_cw) 778 { 779 union savefpu *fpu_save; 780 struct pcb *pcb; 781 782 KASSERT(l == curlwp); 783 fpu_save = fpu_lwp_area(l); 784 785 switch (x86_fpu_save) { 786 case FPU_SAVE_FSAVE: 787 memset(&fpu_save->sv_87, 0, x86_fpu_save_size); 788 fpu_save->sv_87.s87_tw = 0xffff; 789 fpu_save->sv_87.s87_cw = x87_cw; 790 break; 791 case FPU_SAVE_FXSAVE: 792 memset(&fpu_save->sv_xmm, 0, x86_fpu_save_size); 793 fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__; 794 fpu_save->sv_xmm.fx_mxcsr_mask = x86_fpu_mxcsr_mask; 795 fpu_save->sv_xmm.fx_cw = x87_cw; 796 break; 797 case FPU_SAVE_XSAVE: 798 case FPU_SAVE_XSAVEOPT: 799 memset(&fpu_save->sv_xmm, 0, x86_fpu_save_size); 800 fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__; 801 fpu_save->sv_xmm.fx_mxcsr_mask = x86_fpu_mxcsr_mask; 802 fpu_save->sv_xmm.fx_cw = x87_cw; 803 if (__predict_false(x87_cw != __INITIAL_NPXCW__)) { 804 fpu_xstate_reload(fpu_save, XCR0_X87); 805 } 806 break; 807 } 808 809 pcb = lwp_getpcb(l); 810 pcb->pcb_fpu_dflt_cw = x87_cw; 811 } 812 813 void 814 fpu_sigreset(struct lwp *l) 815 { 816 union savefpu *fpu_save = fpu_lwp_area(l); 817 struct pcb *pcb = lwp_getpcb(l); 818 819 /* 820 * For signal handlers the register values don't matter. Just reset 821 * a few fields. 822 */ 823 if (i386_use_fxsave) { 824 fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__; 825 fpu_save->sv_xmm.fx_mxcsr_mask = x86_fpu_mxcsr_mask; 826 fpu_save->sv_xmm.fx_tw = 0; 827 fpu_save->sv_xmm.fx_cw = pcb->pcb_fpu_dflt_cw; 828 } else { 829 fpu_save->sv_87.s87_tw = 0xffff; 830 fpu_save->sv_87.s87_cw = pcb->pcb_fpu_dflt_cw; 831 } 832 } 833 834 void 835 process_write_fpregs_xmm(struct lwp *l, const struct fxsave *fpregs) 836 { 837 union savefpu *fpu_save = fpu_lwp_area(l); 838 839 if (i386_use_fxsave) { 840 memcpy(&fpu_save->sv_xmm, fpregs, sizeof(fpu_save->sv_xmm)); 841 842 /* 843 * Invalid bits in mxcsr or mxcsr_mask will cause faults. 844 */ 845 fpu_save->sv_xmm.fx_mxcsr_mask &= x86_fpu_mxcsr_mask; 846 fpu_save->sv_xmm.fx_mxcsr &= fpu_save->sv_xmm.fx_mxcsr_mask; 847 848 fpu_xstate_reload(fpu_save, XCR0_X87 | XCR0_SSE); 849 } else { 850 process_xmm_to_s87(fpregs, &fpu_save->sv_87); 851 } 852 } 853 854 void 855 process_write_fpregs_s87(struct lwp *l, const struct save87 *fpregs) 856 { 857 union savefpu *fpu_save = fpu_lwp_area(l); 858 859 if (i386_use_fxsave) { 860 process_s87_to_xmm(fpregs, &fpu_save->sv_xmm); 861 fpu_xstate_reload(fpu_save, XCR0_X87 | XCR0_SSE); 862 } else { 863 memcpy(&fpu_save->sv_87, fpregs, sizeof(fpu_save->sv_87)); 864 } 865 } 866 867 void 868 process_read_fpregs_xmm(struct lwp *l, struct fxsave *fpregs) 869 { 870 union savefpu *fpu_save = fpu_lwp_area(l); 871 872 if (i386_use_fxsave) { 873 memcpy(fpregs, &fpu_save->sv_xmm, sizeof(fpu_save->sv_xmm)); 874 } else { 875 memset(fpregs, 0, sizeof(*fpregs)); 876 process_s87_to_xmm(&fpu_save->sv_87, fpregs); 877 } 878 } 879 880 void 881 process_read_fpregs_s87(struct lwp *l, struct save87 *fpregs) 882 { 883 union savefpu *fpu_save = fpu_lwp_area(l); 884 885 if (i386_use_fxsave) { 886 memset(fpregs, 0, sizeof(*fpregs)); 887 process_xmm_to_s87(&fpu_save->sv_xmm, fpregs); 888 } else { 889 memcpy(fpregs, &fpu_save->sv_87, sizeof(fpu_save->sv_87)); 890 } 891 } 892 893 int 894 process_read_xstate(struct lwp *l, struct xstate *xstate) 895 { 896 union savefpu *fpu_save = fpu_lwp_area(l); 897 898 if (x86_fpu_save == FPU_SAVE_FSAVE) { 899 /* Convert from legacy FSAVE format. */ 900 memset(&xstate->xs_fxsave, 0, sizeof(xstate->xs_fxsave)); 901 process_s87_to_xmm(&fpu_save->sv_87, &xstate->xs_fxsave); 902 903 /* We only got x87 data. */ 904 xstate->xs_rfbm = XCR0_X87; 905 xstate->xs_xstate_bv = XCR0_X87; 906 return 0; 907 } 908 909 /* Copy the legacy area. */ 910 memcpy(&xstate->xs_fxsave, fpu_save->sv_xsave_hdr.xsh_fxsave, 911 sizeof(xstate->xs_fxsave)); 912 913 if (x86_fpu_save == FPU_SAVE_FXSAVE) { 914 /* FXSAVE means we've got x87 + SSE data. */ 915 xstate->xs_rfbm = XCR0_X87 | XCR0_SSE; 916 xstate->xs_xstate_bv = XCR0_X87 | XCR0_SSE; 917 return 0; 918 } 919 920 /* Copy the bitmap indicating which states are available. */ 921 xstate->xs_rfbm = x86_xsave_features & XCR0_FPU; 922 xstate->xs_xstate_bv = fpu_save->sv_xsave_hdr.xsh_xstate_bv; 923 KASSERT(!(xstate->xs_xstate_bv & ~xstate->xs_rfbm)); 924 925 #define COPY_COMPONENT(xcr0_val, xsave_val, field) \ 926 if (xstate->xs_xstate_bv & xcr0_val) { \ 927 KASSERT(x86_xsave_offsets[xsave_val] \ 928 >= sizeof(struct xsave_header)); \ 929 KASSERT(x86_xsave_sizes[xsave_val] \ 930 >= sizeof(xstate->field)); \ 931 memcpy(&xstate->field, \ 932 (char*)fpu_save + x86_xsave_offsets[xsave_val], \ 933 sizeof(xstate->field)); \ 934 } 935 936 COPY_COMPONENT(XCR0_YMM_Hi128, XSAVE_YMM_Hi128, xs_ymm_hi128); 937 COPY_COMPONENT(XCR0_Opmask, XSAVE_Opmask, xs_opmask); 938 COPY_COMPONENT(XCR0_ZMM_Hi256, XSAVE_ZMM_Hi256, xs_zmm_hi256); 939 COPY_COMPONENT(XCR0_Hi16_ZMM, XSAVE_Hi16_ZMM, xs_hi16_zmm); 940 941 #undef COPY_COMPONENT 942 943 return 0; 944 } 945 946 int 947 process_verify_xstate(const struct xstate *xstate) 948 { 949 /* xstate_bv must be a subset of RFBM */ 950 if (xstate->xs_xstate_bv & ~xstate->xs_rfbm) 951 return EINVAL; 952 953 switch (x86_fpu_save) { 954 case FPU_SAVE_FSAVE: 955 if ((xstate->xs_rfbm & ~XCR0_X87)) 956 return EINVAL; 957 break; 958 case FPU_SAVE_FXSAVE: 959 if ((xstate->xs_rfbm & ~(XCR0_X87 | XCR0_SSE))) 960 return EINVAL; 961 break; 962 default: 963 /* Verify whether no unsupported features are enabled */ 964 if ((xstate->xs_rfbm & ~(x86_xsave_features & XCR0_FPU)) != 0) 965 return EINVAL; 966 } 967 968 return 0; 969 } 970 971 int 972 process_write_xstate(struct lwp *l, const struct xstate *xstate) 973 { 974 union savefpu *fpu_save = fpu_lwp_area(l); 975 976 /* Convert data into legacy FSAVE format. */ 977 if (x86_fpu_save == FPU_SAVE_FSAVE) { 978 if (xstate->xs_xstate_bv & XCR0_X87) 979 process_xmm_to_s87(&xstate->xs_fxsave, &fpu_save->sv_87); 980 return 0; 981 } 982 983 /* If XSAVE is supported, make sure that xstate_bv is set correctly. */ 984 if (x86_fpu_save >= FPU_SAVE_XSAVE) { 985 /* 986 * Bit-wise "xstate->xs_rfbm ? xstate->xs_xstate_bv : 987 * fpu_save->sv_xsave_hdr.xsh_xstate_bv" 988 */ 989 fpu_save->sv_xsave_hdr.xsh_xstate_bv = 990 (fpu_save->sv_xsave_hdr.xsh_xstate_bv & ~xstate->xs_rfbm) | 991 xstate->xs_xstate_bv; 992 } 993 994 if (xstate->xs_xstate_bv & XCR0_X87) { 995 /* 996 * X87 state is split into two areas, interspersed with SSE 997 * data. 998 */ 999 memcpy(&fpu_save->sv_xmm, &xstate->xs_fxsave, 24); 1000 memcpy(fpu_save->sv_xmm.fx_87_ac, xstate->xs_fxsave.fx_87_ac, 1001 sizeof(xstate->xs_fxsave.fx_87_ac)); 1002 } 1003 1004 /* 1005 * Copy MXCSR if either SSE or AVX state is requested, to match the 1006 * XSAVE behavior for those flags. 1007 */ 1008 if (xstate->xs_xstate_bv & (XCR0_SSE|XCR0_YMM_Hi128)) { 1009 /* 1010 * Invalid bits in mxcsr or mxcsr_mask will cause faults. 1011 */ 1012 fpu_save->sv_xmm.fx_mxcsr_mask = xstate->xs_fxsave.fx_mxcsr_mask 1013 & x86_fpu_mxcsr_mask; 1014 fpu_save->sv_xmm.fx_mxcsr = xstate->xs_fxsave.fx_mxcsr & 1015 fpu_save->sv_xmm.fx_mxcsr_mask; 1016 } 1017 1018 if (xstate->xs_xstate_bv & XCR0_SSE) { 1019 memcpy(&fpu_save->sv_xsave_hdr.xsh_fxsave[160], 1020 xstate->xs_fxsave.fx_xmm, sizeof(xstate->xs_fxsave.fx_xmm)); 1021 } 1022 1023 #define COPY_COMPONENT(xcr0_val, xsave_val, field) \ 1024 if (xstate->xs_xstate_bv & xcr0_val) { \ 1025 KASSERT(x86_xsave_offsets[xsave_val] \ 1026 >= sizeof(struct xsave_header)); \ 1027 KASSERT(x86_xsave_sizes[xsave_val] \ 1028 >= sizeof(xstate->field)); \ 1029 memcpy((char *)fpu_save + x86_xsave_offsets[xsave_val], \ 1030 &xstate->field, sizeof(xstate->field)); \ 1031 } 1032 1033 COPY_COMPONENT(XCR0_YMM_Hi128, XSAVE_YMM_Hi128, xs_ymm_hi128); 1034 COPY_COMPONENT(XCR0_Opmask, XSAVE_Opmask, xs_opmask); 1035 COPY_COMPONENT(XCR0_ZMM_Hi256, XSAVE_ZMM_Hi256, xs_zmm_hi256); 1036 COPY_COMPONENT(XCR0_Hi16_ZMM, XSAVE_Hi16_ZMM, xs_hi16_zmm); 1037 1038 #undef COPY_COMPONENT 1039 1040 return 0; 1041 } 1042