1 1.1 mrg /* FPU-related code for x86 and x86_64 processors. 2 1.1.1.3 mrg Copyright (C) 2005-2022 Free Software Foundation, Inc. 3 1.1 mrg Contributed by Francois-Xavier Coudert <coudert (at) clipper.ens.fr> 4 1.1 mrg 5 1.1 mrg This file is part of the GNU Fortran 95 runtime library (libgfortran). 6 1.1 mrg 7 1.1 mrg Libgfortran is free software; you can redistribute it and/or 8 1.1 mrg modify it under the terms of the GNU General Public 9 1.1 mrg License as published by the Free Software Foundation; either 10 1.1 mrg version 3 of the License, or (at your option) any later version. 11 1.1 mrg 12 1.1 mrg Libgfortran is distributed in the hope that it will be useful, 13 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of 14 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 1.1 mrg GNU General Public License for more details. 16 1.1 mrg 17 1.1 mrg Under Section 7 of GPL version 3, you are granted additional 18 1.1 mrg permissions described in the GCC Runtime Library Exception, version 19 1.1 mrg 3.1, as published by the Free Software Foundation. 20 1.1 mrg 21 1.1 mrg You should have received a copy of the GNU General Public License and 22 1.1 mrg a copy of the GCC Runtime Library Exception along with this program; 23 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24 1.1 mrg <http://www.gnu.org/licenses/>. */ 25 1.1 mrg 26 1.1 mrg #ifndef __SSE_MATH__ 27 1.1 mrg #include "cpuid.h" 28 1.1 mrg #endif 29 1.1 mrg 30 1.1 mrg static int 31 1.1 mrg has_sse (void) 32 1.1 mrg { 33 1.1 mrg #ifndef __SSE_MATH__ 34 1.1 mrg unsigned int eax, ebx, ecx, edx; 35 1.1 mrg 36 1.1 mrg if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx)) 37 1.1 mrg return 0; 38 1.1 mrg 39 1.1 mrg return edx & bit_SSE; 40 1.1 mrg #else 41 1.1 mrg return 1; 42 1.1 mrg #endif 43 1.1 mrg } 44 1.1 mrg 45 1.1 mrg /* i387 exceptions -- see linux <fpu_control.h> header file for details. */ 46 1.1 mrg #define _FPU_MASK_IM 0x01 47 1.1 mrg #define _FPU_MASK_DM 0x02 48 1.1 mrg #define _FPU_MASK_ZM 0x04 49 1.1 mrg #define _FPU_MASK_OM 0x08 50 1.1 mrg #define _FPU_MASK_UM 0x10 51 1.1 mrg #define _FPU_MASK_PM 0x20 52 1.1 mrg #define _FPU_MASK_ALL 0x3f 53 1.1 mrg 54 1.1 mrg #define _FPU_EX_ALL 0x3f 55 1.1 mrg 56 1.1 mrg /* i387 rounding modes. */ 57 1.1 mrg 58 1.1 mrg #define _FPU_RC_NEAREST 0x0 59 1.1 mrg #define _FPU_RC_DOWN 0x1 60 1.1 mrg #define _FPU_RC_UP 0x2 61 1.1 mrg #define _FPU_RC_ZERO 0x3 62 1.1 mrg 63 1.1 mrg #define _FPU_RC_MASK 0x3 64 1.1 mrg 65 1.1 mrg /* Enable flush to zero mode. */ 66 1.1 mrg 67 1.1 mrg #define MXCSR_FTZ (1 << 15) 68 1.1 mrg 69 1.1 mrg 70 1.1 mrg /* This structure corresponds to the layout of the block 71 1.1 mrg written by FSTENV. */ 72 1.1.1.3 mrg struct fenv 73 1.1 mrg { 74 1.1 mrg unsigned short int __control_word; 75 1.1 mrg unsigned short int __unused1; 76 1.1 mrg unsigned short int __status_word; 77 1.1 mrg unsigned short int __unused2; 78 1.1 mrg unsigned short int __tags; 79 1.1 mrg unsigned short int __unused3; 80 1.1 mrg unsigned int __eip; 81 1.1 mrg unsigned short int __cs_selector; 82 1.1.1.3 mrg unsigned int __opcode:11; 83 1.1.1.3 mrg unsigned int __unused4:5; 84 1.1 mrg unsigned int __data_offset; 85 1.1 mrg unsigned short int __data_selector; 86 1.1 mrg unsigned short int __unused5; 87 1.1 mrg unsigned int __mxcsr; 88 1.1.1.3 mrg } __attribute__ ((gcc_struct)); 89 1.1 mrg 90 1.1 mrg /* Check we can actually store the FPU state in the allocated size. */ 91 1.1.1.3 mrg _Static_assert (sizeof(struct fenv) <= (size_t) GFC_FPE_STATE_BUFFER_SIZE, 92 1.1 mrg "GFC_FPE_STATE_BUFFER_SIZE is too small"); 93 1.1 mrg 94 1.1.1.3 mrg #ifdef __SSE_MATH__ 95 1.1.1.3 mrg # define __math_force_eval_div(x, y) \ 96 1.1.1.3 mrg do { \ 97 1.1.1.3 mrg __asm__ ("" : "+x" (x)); __asm__ __volatile__ ("" : : "x" (x / y)); \ 98 1.1.1.3 mrg } while (0) 99 1.1.1.3 mrg #else 100 1.1.1.3 mrg # define __math_force_eval_div(x, y) \ 101 1.1.1.3 mrg do { \ 102 1.1.1.3 mrg __asm__ ("" : "+t" (x)); __asm__ __volatile__ ("" : : "f" (x / y)); \ 103 1.1.1.3 mrg } while (0) 104 1.1.1.3 mrg #endif 105 1.1 mrg 106 1.1 mrg /* Raise the supported floating-point exceptions from EXCEPTS. Other 107 1.1 mrg bits in EXCEPTS are ignored. Code originally borrowed from 108 1.1 mrg libatomic/config/x86/fenv.c. */ 109 1.1 mrg 110 1.1 mrg static void 111 1.1 mrg local_feraiseexcept (int excepts) 112 1.1 mrg { 113 1.1.1.3 mrg struct fenv temp; 114 1.1.1.3 mrg 115 1.1 mrg if (excepts & _FPU_MASK_IM) 116 1.1 mrg { 117 1.1 mrg float f = 0.0f; 118 1.1.1.3 mrg __math_force_eval_div (f, f); 119 1.1 mrg } 120 1.1 mrg if (excepts & _FPU_MASK_DM) 121 1.1 mrg { 122 1.1 mrg __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp)); 123 1.1 mrg temp.__status_word |= _FPU_MASK_DM; 124 1.1 mrg __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp)); 125 1.1 mrg __asm__ __volatile__ ("fwait"); 126 1.1 mrg } 127 1.1 mrg if (excepts & _FPU_MASK_ZM) 128 1.1 mrg { 129 1.1 mrg float f = 1.0f, g = 0.0f; 130 1.1.1.3 mrg __math_force_eval_div (f, g); 131 1.1 mrg } 132 1.1 mrg if (excepts & _FPU_MASK_OM) 133 1.1 mrg { 134 1.1 mrg __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp)); 135 1.1 mrg temp.__status_word |= _FPU_MASK_OM; 136 1.1 mrg __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp)); 137 1.1 mrg __asm__ __volatile__ ("fwait"); 138 1.1 mrg } 139 1.1 mrg if (excepts & _FPU_MASK_UM) 140 1.1 mrg { 141 1.1 mrg __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp)); 142 1.1 mrg temp.__status_word |= _FPU_MASK_UM; 143 1.1 mrg __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp)); 144 1.1 mrg __asm__ __volatile__ ("fwait"); 145 1.1 mrg } 146 1.1 mrg if (excepts & _FPU_MASK_PM) 147 1.1 mrg { 148 1.1 mrg float f = 1.0f, g = 3.0f; 149 1.1.1.3 mrg __math_force_eval_div (f, g); 150 1.1 mrg } 151 1.1 mrg } 152 1.1 mrg 153 1.1 mrg 154 1.1 mrg void 155 1.1 mrg set_fpu_trap_exceptions (int trap, int notrap) 156 1.1 mrg { 157 1.1 mrg int exc_set = 0, exc_clr = 0; 158 1.1 mrg unsigned short cw; 159 1.1 mrg 160 1.1 mrg if (trap & GFC_FPE_INVALID) exc_set |= _FPU_MASK_IM; 161 1.1 mrg if (trap & GFC_FPE_DENORMAL) exc_set |= _FPU_MASK_DM; 162 1.1 mrg if (trap & GFC_FPE_ZERO) exc_set |= _FPU_MASK_ZM; 163 1.1 mrg if (trap & GFC_FPE_OVERFLOW) exc_set |= _FPU_MASK_OM; 164 1.1 mrg if (trap & GFC_FPE_UNDERFLOW) exc_set |= _FPU_MASK_UM; 165 1.1 mrg if (trap & GFC_FPE_INEXACT) exc_set |= _FPU_MASK_PM; 166 1.1 mrg 167 1.1 mrg if (notrap & GFC_FPE_INVALID) exc_clr |= _FPU_MASK_IM; 168 1.1 mrg if (notrap & GFC_FPE_DENORMAL) exc_clr |= _FPU_MASK_DM; 169 1.1 mrg if (notrap & GFC_FPE_ZERO) exc_clr |= _FPU_MASK_ZM; 170 1.1 mrg if (notrap & GFC_FPE_OVERFLOW) exc_clr |= _FPU_MASK_OM; 171 1.1 mrg if (notrap & GFC_FPE_UNDERFLOW) exc_clr |= _FPU_MASK_UM; 172 1.1 mrg if (notrap & GFC_FPE_INEXACT) exc_clr |= _FPU_MASK_PM; 173 1.1 mrg 174 1.1 mrg __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw)); 175 1.1 mrg 176 1.1 mrg cw |= exc_clr; 177 1.1 mrg cw &= ~exc_set; 178 1.1 mrg 179 1.1 mrg __asm__ __volatile__ ("fnclex\n\tfldcw\t%0" : : "m" (cw)); 180 1.1 mrg 181 1.1 mrg if (has_sse()) 182 1.1 mrg { 183 1.1 mrg unsigned int cw_sse; 184 1.1 mrg 185 1.1 mrg __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse)); 186 1.1 mrg 187 1.1 mrg /* The SSE exception masks are shifted by 7 bits. */ 188 1.1 mrg cw_sse |= (exc_clr << 7); 189 1.1 mrg cw_sse &= ~(exc_set << 7); 190 1.1 mrg 191 1.1 mrg /* Clear stalled exception flags. */ 192 1.1 mrg cw_sse &= ~_FPU_EX_ALL; 193 1.1 mrg 194 1.1 mrg __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse)); 195 1.1 mrg } 196 1.1 mrg } 197 1.1 mrg 198 1.1 mrg void 199 1.1 mrg set_fpu (void) 200 1.1 mrg { 201 1.1 mrg set_fpu_trap_exceptions (options.fpe, 0); 202 1.1 mrg } 203 1.1 mrg 204 1.1 mrg int 205 1.1 mrg get_fpu_trap_exceptions (void) 206 1.1 mrg { 207 1.1 mrg unsigned short cw; 208 1.1 mrg int mask; 209 1.1 mrg int res = 0; 210 1.1 mrg 211 1.1 mrg __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw)); 212 1.1 mrg mask = cw; 213 1.1 mrg 214 1.1 mrg if (has_sse()) 215 1.1 mrg { 216 1.1 mrg unsigned int cw_sse; 217 1.1 mrg 218 1.1 mrg __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse)); 219 1.1 mrg 220 1.1 mrg /* The SSE exception masks are shifted by 7 bits. */ 221 1.1 mrg mask |= (cw_sse >> 7); 222 1.1 mrg } 223 1.1 mrg 224 1.1 mrg mask = ~mask & _FPU_MASK_ALL; 225 1.1 mrg 226 1.1 mrg if (mask & _FPU_MASK_IM) res |= GFC_FPE_INVALID; 227 1.1 mrg if (mask & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL; 228 1.1 mrg if (mask & _FPU_MASK_ZM) res |= GFC_FPE_ZERO; 229 1.1 mrg if (mask & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW; 230 1.1 mrg if (mask & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW; 231 1.1 mrg if (mask & _FPU_MASK_PM) res |= GFC_FPE_INEXACT; 232 1.1 mrg 233 1.1 mrg return res; 234 1.1 mrg } 235 1.1 mrg 236 1.1 mrg int 237 1.1 mrg support_fpu_trap (int flag __attribute__((unused))) 238 1.1 mrg { 239 1.1 mrg return 1; 240 1.1 mrg } 241 1.1 mrg 242 1.1 mrg int 243 1.1 mrg get_fpu_except_flags (void) 244 1.1 mrg { 245 1.1 mrg unsigned short cw; 246 1.1 mrg int excepts; 247 1.1 mrg int res = 0; 248 1.1 mrg 249 1.1 mrg __asm__ __volatile__ ("fnstsw\t%0" : "=am" (cw)); 250 1.1 mrg excepts = cw; 251 1.1 mrg 252 1.1 mrg if (has_sse()) 253 1.1 mrg { 254 1.1 mrg unsigned int cw_sse; 255 1.1 mrg 256 1.1 mrg __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse)); 257 1.1 mrg excepts |= cw_sse; 258 1.1 mrg } 259 1.1 mrg 260 1.1 mrg excepts &= _FPU_EX_ALL; 261 1.1 mrg 262 1.1 mrg if (excepts & _FPU_MASK_IM) res |= GFC_FPE_INVALID; 263 1.1 mrg if (excepts & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL; 264 1.1 mrg if (excepts & _FPU_MASK_ZM) res |= GFC_FPE_ZERO; 265 1.1 mrg if (excepts & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW; 266 1.1 mrg if (excepts & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW; 267 1.1 mrg if (excepts & _FPU_MASK_PM) res |= GFC_FPE_INEXACT; 268 1.1 mrg 269 1.1 mrg return res; 270 1.1 mrg } 271 1.1 mrg 272 1.1 mrg void 273 1.1 mrg set_fpu_except_flags (int set, int clear) 274 1.1 mrg { 275 1.1.1.3 mrg struct fenv temp; 276 1.1 mrg int exc_set = 0, exc_clr = 0; 277 1.1 mrg 278 1.1 mrg /* Translate from GFC_PE_* values to _FPU_MASK_* values. */ 279 1.1 mrg if (set & GFC_FPE_INVALID) 280 1.1 mrg exc_set |= _FPU_MASK_IM; 281 1.1 mrg if (clear & GFC_FPE_INVALID) 282 1.1 mrg exc_clr |= _FPU_MASK_IM; 283 1.1 mrg 284 1.1 mrg if (set & GFC_FPE_DENORMAL) 285 1.1 mrg exc_set |= _FPU_MASK_DM; 286 1.1 mrg if (clear & GFC_FPE_DENORMAL) 287 1.1 mrg exc_clr |= _FPU_MASK_DM; 288 1.1 mrg 289 1.1 mrg if (set & GFC_FPE_ZERO) 290 1.1 mrg exc_set |= _FPU_MASK_ZM; 291 1.1 mrg if (clear & GFC_FPE_ZERO) 292 1.1 mrg exc_clr |= _FPU_MASK_ZM; 293 1.1 mrg 294 1.1 mrg if (set & GFC_FPE_OVERFLOW) 295 1.1 mrg exc_set |= _FPU_MASK_OM; 296 1.1 mrg if (clear & GFC_FPE_OVERFLOW) 297 1.1 mrg exc_clr |= _FPU_MASK_OM; 298 1.1 mrg 299 1.1 mrg if (set & GFC_FPE_UNDERFLOW) 300 1.1 mrg exc_set |= _FPU_MASK_UM; 301 1.1 mrg if (clear & GFC_FPE_UNDERFLOW) 302 1.1 mrg exc_clr |= _FPU_MASK_UM; 303 1.1 mrg 304 1.1 mrg if (set & GFC_FPE_INEXACT) 305 1.1 mrg exc_set |= _FPU_MASK_PM; 306 1.1 mrg if (clear & GFC_FPE_INEXACT) 307 1.1 mrg exc_clr |= _FPU_MASK_PM; 308 1.1 mrg 309 1.1 mrg 310 1.1 mrg /* Change the flags. This is tricky on 387 (unlike SSE), because we have 311 1.1 mrg FNSTSW but no FLDSW instruction. */ 312 1.1 mrg __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp)); 313 1.1 mrg temp.__status_word &= ~exc_clr; 314 1.1 mrg __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp)); 315 1.1 mrg 316 1.1 mrg /* Change the flags on SSE. */ 317 1.1 mrg 318 1.1 mrg if (has_sse()) 319 1.1 mrg { 320 1.1 mrg unsigned int cw_sse; 321 1.1 mrg 322 1.1 mrg __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse)); 323 1.1 mrg cw_sse &= ~exc_clr; 324 1.1 mrg __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse)); 325 1.1 mrg } 326 1.1 mrg 327 1.1 mrg local_feraiseexcept (exc_set); 328 1.1 mrg } 329 1.1 mrg 330 1.1 mrg int 331 1.1 mrg support_fpu_flag (int flag __attribute__((unused))) 332 1.1 mrg { 333 1.1 mrg return 1; 334 1.1 mrg } 335 1.1 mrg 336 1.1 mrg void 337 1.1 mrg set_fpu_rounding_mode (int round) 338 1.1 mrg { 339 1.1 mrg int round_mode; 340 1.1 mrg unsigned short cw; 341 1.1 mrg 342 1.1 mrg switch (round) 343 1.1 mrg { 344 1.1 mrg case GFC_FPE_TONEAREST: 345 1.1 mrg round_mode = _FPU_RC_NEAREST; 346 1.1 mrg break; 347 1.1 mrg case GFC_FPE_UPWARD: 348 1.1 mrg round_mode = _FPU_RC_UP; 349 1.1 mrg break; 350 1.1 mrg case GFC_FPE_DOWNWARD: 351 1.1 mrg round_mode = _FPU_RC_DOWN; 352 1.1 mrg break; 353 1.1 mrg case GFC_FPE_TOWARDZERO: 354 1.1 mrg round_mode = _FPU_RC_ZERO; 355 1.1 mrg break; 356 1.1 mrg default: 357 1.1 mrg return; /* Should be unreachable. */ 358 1.1 mrg } 359 1.1 mrg 360 1.1 mrg __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw)); 361 1.1 mrg 362 1.1 mrg /* The x87 round control bits are shifted by 10 bits. */ 363 1.1 mrg cw &= ~(_FPU_RC_MASK << 10); 364 1.1 mrg cw |= round_mode << 10; 365 1.1 mrg 366 1.1 mrg __asm__ __volatile__ ("fldcw\t%0" : : "m" (cw)); 367 1.1 mrg 368 1.1 mrg if (has_sse()) 369 1.1 mrg { 370 1.1 mrg unsigned int cw_sse; 371 1.1 mrg 372 1.1 mrg __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse)); 373 1.1 mrg 374 1.1 mrg /* The SSE round control bits are shifted by 13 bits. */ 375 1.1 mrg cw_sse &= ~(_FPU_RC_MASK << 13); 376 1.1 mrg cw_sse |= round_mode << 13; 377 1.1 mrg 378 1.1 mrg __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse)); 379 1.1 mrg } 380 1.1 mrg } 381 1.1 mrg 382 1.1 mrg int 383 1.1 mrg get_fpu_rounding_mode (void) 384 1.1 mrg { 385 1.1 mrg int round_mode; 386 1.1 mrg 387 1.1 mrg #ifdef __SSE_MATH__ 388 1.1 mrg unsigned int cw; 389 1.1 mrg 390 1.1 mrg __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw)); 391 1.1 mrg 392 1.1 mrg /* The SSE round control bits are shifted by 13 bits. */ 393 1.1 mrg round_mode = cw >> 13; 394 1.1 mrg #else 395 1.1 mrg unsigned short cw; 396 1.1 mrg 397 1.1 mrg __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw)); 398 1.1 mrg 399 1.1 mrg /* The x87 round control bits are shifted by 10 bits. */ 400 1.1 mrg round_mode = cw >> 10; 401 1.1 mrg #endif 402 1.1 mrg 403 1.1 mrg round_mode &= _FPU_RC_MASK; 404 1.1 mrg 405 1.1 mrg switch (round_mode) 406 1.1 mrg { 407 1.1 mrg case _FPU_RC_NEAREST: 408 1.1 mrg return GFC_FPE_TONEAREST; 409 1.1 mrg case _FPU_RC_UP: 410 1.1 mrg return GFC_FPE_UPWARD; 411 1.1 mrg case _FPU_RC_DOWN: 412 1.1 mrg return GFC_FPE_DOWNWARD; 413 1.1 mrg case _FPU_RC_ZERO: 414 1.1 mrg return GFC_FPE_TOWARDZERO; 415 1.1 mrg default: 416 1.1 mrg return 0; /* Should be unreachable. */ 417 1.1 mrg } 418 1.1 mrg } 419 1.1 mrg 420 1.1 mrg int 421 1.1 mrg support_fpu_rounding_mode (int mode __attribute__((unused))) 422 1.1 mrg { 423 1.1 mrg return 1; 424 1.1 mrg } 425 1.1 mrg 426 1.1 mrg void 427 1.1 mrg get_fpu_state (void *state) 428 1.1 mrg { 429 1.1.1.3 mrg struct fenv *envp = state; 430 1.1 mrg 431 1.1 mrg __asm__ __volatile__ ("fnstenv\t%0" : "=m" (*envp)); 432 1.1 mrg 433 1.1 mrg /* fnstenv has the side effect of masking all exceptions, so we need 434 1.1 mrg to restore the control word after that. */ 435 1.1 mrg __asm__ __volatile__ ("fldcw\t%0" : : "m" (envp->__control_word)); 436 1.1 mrg 437 1.1 mrg if (has_sse()) 438 1.1 mrg __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (envp->__mxcsr)); 439 1.1 mrg } 440 1.1 mrg 441 1.1 mrg void 442 1.1 mrg set_fpu_state (void *state) 443 1.1 mrg { 444 1.1.1.3 mrg struct fenv *envp = state; 445 1.1 mrg 446 1.1 mrg /* glibc sources (sysdeps/x86_64/fpu/fesetenv.c) do something more 447 1.1 mrg complex than this, but I think it suffices in our case. */ 448 1.1 mrg __asm__ __volatile__ ("fldenv\t%0" : : "m" (*envp)); 449 1.1 mrg 450 1.1 mrg if (has_sse()) 451 1.1 mrg __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (envp->__mxcsr)); 452 1.1 mrg } 453 1.1 mrg 454 1.1 mrg 455 1.1 mrg int 456 1.1 mrg support_fpu_underflow_control (int kind) 457 1.1 mrg { 458 1.1 mrg if (!has_sse()) 459 1.1 mrg return 0; 460 1.1 mrg 461 1.1 mrg return (kind == 4 || kind == 8) ? 1 : 0; 462 1.1 mrg } 463 1.1 mrg 464 1.1 mrg 465 1.1 mrg int 466 1.1 mrg get_fpu_underflow_mode (void) 467 1.1 mrg { 468 1.1 mrg unsigned int cw_sse; 469 1.1 mrg 470 1.1 mrg if (!has_sse()) 471 1.1 mrg return 1; 472 1.1 mrg 473 1.1 mrg __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse)); 474 1.1 mrg 475 1.1 mrg /* Return 0 for abrupt underflow (flush to zero), 1 for gradual underflow. */ 476 1.1 mrg return (cw_sse & MXCSR_FTZ) ? 0 : 1; 477 1.1 mrg } 478 1.1 mrg 479 1.1 mrg 480 1.1 mrg void 481 1.1 mrg set_fpu_underflow_mode (int gradual) 482 1.1 mrg { 483 1.1 mrg unsigned int cw_sse; 484 1.1 mrg 485 1.1 mrg if (!has_sse()) 486 1.1 mrg return; 487 1.1 mrg 488 1.1 mrg __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse)); 489 1.1 mrg 490 1.1 mrg if (gradual) 491 1.1 mrg cw_sse &= ~MXCSR_FTZ; 492 1.1 mrg else 493 1.1 mrg cw_sse |= MXCSR_FTZ; 494 1.1 mrg 495 1.1 mrg __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse)); 496 1.1 mrg } 497 1.1 mrg 498