Home | History | Annotate | Line # | Download | only in config
fpu-387.h revision 1.1
      1  1.1  mrg /* FPU-related code for x86 and x86_64 processors.
      2  1.1  mrg    Copyright (C) 2005-2019 Free Software Foundation, Inc.
      3  1.1  mrg    Contributed by Francois-Xavier Coudert <coudert (at) clipper.ens.fr>
      4  1.1  mrg 
      5  1.1  mrg This file is part of the GNU Fortran 95 runtime library (libgfortran).
      6  1.1  mrg 
      7  1.1  mrg Libgfortran is free software; you can redistribute it and/or
      8  1.1  mrg modify it under the terms of the GNU General Public
      9  1.1  mrg License as published by the Free Software Foundation; either
     10  1.1  mrg version 3 of the License, or (at your option) any later version.
     11  1.1  mrg 
     12  1.1  mrg Libgfortran is distributed in the hope that it will be useful,
     13  1.1  mrg but WITHOUT ANY WARRANTY; without even the implied warranty of
     14  1.1  mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     15  1.1  mrg GNU General Public License for more details.
     16  1.1  mrg 
     17  1.1  mrg Under Section 7 of GPL version 3, you are granted additional
     18  1.1  mrg permissions described in the GCC Runtime Library Exception, version
     19  1.1  mrg 3.1, as published by the Free Software Foundation.
     20  1.1  mrg 
     21  1.1  mrg You should have received a copy of the GNU General Public License and
     22  1.1  mrg a copy of the GCC Runtime Library Exception along with this program;
     23  1.1  mrg see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     24  1.1  mrg <http://www.gnu.org/licenses/>.  */
     25  1.1  mrg 
     26  1.1  mrg #ifndef __SSE_MATH__
     27  1.1  mrg #include "cpuid.h"
     28  1.1  mrg #endif
     29  1.1  mrg 
     30  1.1  mrg static int
     31  1.1  mrg has_sse (void)
     32  1.1  mrg {
     33  1.1  mrg #ifndef __SSE_MATH__
     34  1.1  mrg   unsigned int eax, ebx, ecx, edx;
     35  1.1  mrg 
     36  1.1  mrg   if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
     37  1.1  mrg     return 0;
     38  1.1  mrg 
     39  1.1  mrg   return edx & bit_SSE;
     40  1.1  mrg #else
     41  1.1  mrg   return 1;
     42  1.1  mrg #endif
     43  1.1  mrg }
     44  1.1  mrg 
     45  1.1  mrg /* i387 exceptions -- see linux <fpu_control.h> header file for details.  */
     46  1.1  mrg #define _FPU_MASK_IM  0x01
     47  1.1  mrg #define _FPU_MASK_DM  0x02
     48  1.1  mrg #define _FPU_MASK_ZM  0x04
     49  1.1  mrg #define _FPU_MASK_OM  0x08
     50  1.1  mrg #define _FPU_MASK_UM  0x10
     51  1.1  mrg #define _FPU_MASK_PM  0x20
     52  1.1  mrg #define _FPU_MASK_ALL 0x3f
     53  1.1  mrg 
     54  1.1  mrg #define _FPU_EX_ALL   0x3f
     55  1.1  mrg 
     56  1.1  mrg /* i387 rounding modes.  */
     57  1.1  mrg 
     58  1.1  mrg #define _FPU_RC_NEAREST 0x0
     59  1.1  mrg #define _FPU_RC_DOWN    0x1
     60  1.1  mrg #define _FPU_RC_UP      0x2
     61  1.1  mrg #define _FPU_RC_ZERO    0x3
     62  1.1  mrg 
     63  1.1  mrg #define _FPU_RC_MASK    0x3
     64  1.1  mrg 
     65  1.1  mrg /* Enable flush to zero mode.  */
     66  1.1  mrg 
     67  1.1  mrg #define MXCSR_FTZ (1 << 15)
     68  1.1  mrg 
     69  1.1  mrg 
     70  1.1  mrg /* This structure corresponds to the layout of the block
     71  1.1  mrg    written by FSTENV.  */
     72  1.1  mrg typedef struct
     73  1.1  mrg {
     74  1.1  mrg   unsigned short int __control_word;
     75  1.1  mrg   unsigned short int __unused1;
     76  1.1  mrg   unsigned short int __status_word;
     77  1.1  mrg   unsigned short int __unused2;
     78  1.1  mrg   unsigned short int __tags;
     79  1.1  mrg   unsigned short int __unused3;
     80  1.1  mrg   unsigned int __eip;
     81  1.1  mrg   unsigned short int __cs_selector;
     82  1.1  mrg   unsigned short int __opcode;
     83  1.1  mrg   unsigned int __data_offset;
     84  1.1  mrg   unsigned short int __data_selector;
     85  1.1  mrg   unsigned short int __unused5;
     86  1.1  mrg   unsigned int __mxcsr;
     87  1.1  mrg }
     88  1.1  mrg my_fenv_t;
     89  1.1  mrg 
     90  1.1  mrg /* Check we can actually store the FPU state in the allocated size.  */
     91  1.1  mrg _Static_assert (sizeof(my_fenv_t) <= (size_t) GFC_FPE_STATE_BUFFER_SIZE,
     92  1.1  mrg 		"GFC_FPE_STATE_BUFFER_SIZE is too small");
     93  1.1  mrg 
     94  1.1  mrg 
     95  1.1  mrg /* Raise the supported floating-point exceptions from EXCEPTS.  Other
     96  1.1  mrg    bits in EXCEPTS are ignored.  Code originally borrowed from
     97  1.1  mrg    libatomic/config/x86/fenv.c.  */
     98  1.1  mrg 
     99  1.1  mrg static void
    100  1.1  mrg local_feraiseexcept (int excepts)
    101  1.1  mrg {
    102  1.1  mrg   if (excepts & _FPU_MASK_IM)
    103  1.1  mrg     {
    104  1.1  mrg       float f = 0.0f;
    105  1.1  mrg #ifdef __SSE_MATH__
    106  1.1  mrg       volatile float r __attribute__ ((unused));
    107  1.1  mrg       __asm__ __volatile__ ("%vdivss\t{%0, %d0|%d0, %0}" : "+x" (f));
    108  1.1  mrg       r = f; /* Needed to trigger exception.   */
    109  1.1  mrg #else
    110  1.1  mrg       __asm__ __volatile__ ("fdiv\t{%y0, %0|%0, %y0}" : "+t" (f));
    111  1.1  mrg       /* No need for fwait, exception is triggered by emitted fstp.  */
    112  1.1  mrg #endif
    113  1.1  mrg     }
    114  1.1  mrg   if (excepts & _FPU_MASK_DM)
    115  1.1  mrg     {
    116  1.1  mrg       my_fenv_t temp;
    117  1.1  mrg       __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
    118  1.1  mrg       temp.__status_word |= _FPU_MASK_DM;
    119  1.1  mrg       __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
    120  1.1  mrg       __asm__ __volatile__ ("fwait");
    121  1.1  mrg     }
    122  1.1  mrg   if (excepts & _FPU_MASK_ZM)
    123  1.1  mrg     {
    124  1.1  mrg       float f = 1.0f, g = 0.0f;
    125  1.1  mrg #ifdef __SSE_MATH__
    126  1.1  mrg       volatile float r __attribute__ ((unused));
    127  1.1  mrg       __asm__ __volatile__ ("%vdivss\t{%1, %d0|%d0, %1}" : "+x" (f) : "xm" (g));
    128  1.1  mrg       r = f; /* Needed to trigger exception.   */
    129  1.1  mrg #else
    130  1.1  mrg       __asm__ __volatile__ ("fdivs\t%1" : "+t" (f) : "m" (g));
    131  1.1  mrg       /* No need for fwait, exception is triggered by emitted fstp.  */
    132  1.1  mrg #endif
    133  1.1  mrg     }
    134  1.1  mrg   if (excepts & _FPU_MASK_OM)
    135  1.1  mrg     {
    136  1.1  mrg       my_fenv_t temp;
    137  1.1  mrg       __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
    138  1.1  mrg       temp.__status_word |= _FPU_MASK_OM;
    139  1.1  mrg       __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
    140  1.1  mrg       __asm__ __volatile__ ("fwait");
    141  1.1  mrg     }
    142  1.1  mrg   if (excepts & _FPU_MASK_UM)
    143  1.1  mrg     {
    144  1.1  mrg       my_fenv_t temp;
    145  1.1  mrg       __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
    146  1.1  mrg       temp.__status_word |= _FPU_MASK_UM;
    147  1.1  mrg       __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
    148  1.1  mrg       __asm__ __volatile__ ("fwait");
    149  1.1  mrg     }
    150  1.1  mrg   if (excepts & _FPU_MASK_PM)
    151  1.1  mrg     {
    152  1.1  mrg       float f = 1.0f, g = 3.0f;
    153  1.1  mrg #ifdef __SSE_MATH__
    154  1.1  mrg       volatile float r __attribute__ ((unused));
    155  1.1  mrg       __asm__ __volatile__ ("%vdivss\t{%1, %d0|%d0, %1}" : "+x" (f) : "xm" (g));
    156  1.1  mrg       r = f; /* Needed to trigger exception.   */
    157  1.1  mrg #else
    158  1.1  mrg       __asm__ __volatile__ ("fdivs\t%1" : "+t" (f) : "m" (g));
    159  1.1  mrg       /* No need for fwait, exception is triggered by emitted fstp.  */
    160  1.1  mrg #endif
    161  1.1  mrg     }
    162  1.1  mrg }
    163  1.1  mrg 
    164  1.1  mrg 
    165  1.1  mrg void
    166  1.1  mrg set_fpu_trap_exceptions (int trap, int notrap)
    167  1.1  mrg {
    168  1.1  mrg   int exc_set = 0, exc_clr = 0;
    169  1.1  mrg   unsigned short cw;
    170  1.1  mrg 
    171  1.1  mrg   if (trap & GFC_FPE_INVALID) exc_set |= _FPU_MASK_IM;
    172  1.1  mrg   if (trap & GFC_FPE_DENORMAL) exc_set |= _FPU_MASK_DM;
    173  1.1  mrg   if (trap & GFC_FPE_ZERO) exc_set |= _FPU_MASK_ZM;
    174  1.1  mrg   if (trap & GFC_FPE_OVERFLOW) exc_set |= _FPU_MASK_OM;
    175  1.1  mrg   if (trap & GFC_FPE_UNDERFLOW) exc_set |= _FPU_MASK_UM;
    176  1.1  mrg   if (trap & GFC_FPE_INEXACT) exc_set |= _FPU_MASK_PM;
    177  1.1  mrg 
    178  1.1  mrg   if (notrap & GFC_FPE_INVALID) exc_clr |= _FPU_MASK_IM;
    179  1.1  mrg   if (notrap & GFC_FPE_DENORMAL) exc_clr |= _FPU_MASK_DM;
    180  1.1  mrg   if (notrap & GFC_FPE_ZERO) exc_clr |= _FPU_MASK_ZM;
    181  1.1  mrg   if (notrap & GFC_FPE_OVERFLOW) exc_clr |= _FPU_MASK_OM;
    182  1.1  mrg   if (notrap & GFC_FPE_UNDERFLOW) exc_clr |= _FPU_MASK_UM;
    183  1.1  mrg   if (notrap & GFC_FPE_INEXACT) exc_clr |= _FPU_MASK_PM;
    184  1.1  mrg 
    185  1.1  mrg   __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
    186  1.1  mrg 
    187  1.1  mrg   cw |= exc_clr;
    188  1.1  mrg   cw &= ~exc_set;
    189  1.1  mrg 
    190  1.1  mrg   __asm__ __volatile__ ("fnclex\n\tfldcw\t%0" : : "m" (cw));
    191  1.1  mrg 
    192  1.1  mrg   if (has_sse())
    193  1.1  mrg     {
    194  1.1  mrg       unsigned int cw_sse;
    195  1.1  mrg 
    196  1.1  mrg       __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
    197  1.1  mrg 
    198  1.1  mrg       /* The SSE exception masks are shifted by 7 bits.  */
    199  1.1  mrg       cw_sse |= (exc_clr << 7);
    200  1.1  mrg       cw_sse &= ~(exc_set << 7);
    201  1.1  mrg 
    202  1.1  mrg       /* Clear stalled exception flags.  */
    203  1.1  mrg       cw_sse &= ~_FPU_EX_ALL;
    204  1.1  mrg 
    205  1.1  mrg       __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
    206  1.1  mrg     }
    207  1.1  mrg }
    208  1.1  mrg 
    209  1.1  mrg void
    210  1.1  mrg set_fpu (void)
    211  1.1  mrg {
    212  1.1  mrg   set_fpu_trap_exceptions (options.fpe, 0);
    213  1.1  mrg }
    214  1.1  mrg 
    215  1.1  mrg int
    216  1.1  mrg get_fpu_trap_exceptions (void)
    217  1.1  mrg {
    218  1.1  mrg   unsigned short cw;
    219  1.1  mrg   int mask;
    220  1.1  mrg   int res = 0;
    221  1.1  mrg 
    222  1.1  mrg   __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
    223  1.1  mrg   mask = cw;
    224  1.1  mrg 
    225  1.1  mrg   if (has_sse())
    226  1.1  mrg     {
    227  1.1  mrg       unsigned int cw_sse;
    228  1.1  mrg 
    229  1.1  mrg       __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
    230  1.1  mrg 
    231  1.1  mrg       /* The SSE exception masks are shifted by 7 bits.  */
    232  1.1  mrg       mask |= (cw_sse >> 7);
    233  1.1  mrg     }
    234  1.1  mrg 
    235  1.1  mrg   mask = ~mask & _FPU_MASK_ALL;
    236  1.1  mrg 
    237  1.1  mrg   if (mask & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
    238  1.1  mrg   if (mask & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
    239  1.1  mrg   if (mask & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
    240  1.1  mrg   if (mask & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
    241  1.1  mrg   if (mask & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
    242  1.1  mrg   if (mask & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
    243  1.1  mrg 
    244  1.1  mrg   return res;
    245  1.1  mrg }
    246  1.1  mrg 
    247  1.1  mrg int
    248  1.1  mrg support_fpu_trap (int flag __attribute__((unused)))
    249  1.1  mrg {
    250  1.1  mrg   return 1;
    251  1.1  mrg }
    252  1.1  mrg 
    253  1.1  mrg int
    254  1.1  mrg get_fpu_except_flags (void)
    255  1.1  mrg {
    256  1.1  mrg   unsigned short cw;
    257  1.1  mrg   int excepts;
    258  1.1  mrg   int res = 0;
    259  1.1  mrg 
    260  1.1  mrg   __asm__ __volatile__ ("fnstsw\t%0" : "=am" (cw));
    261  1.1  mrg   excepts = cw;
    262  1.1  mrg 
    263  1.1  mrg   if (has_sse())
    264  1.1  mrg     {
    265  1.1  mrg       unsigned int cw_sse;
    266  1.1  mrg 
    267  1.1  mrg       __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
    268  1.1  mrg       excepts |= cw_sse;
    269  1.1  mrg     }
    270  1.1  mrg 
    271  1.1  mrg   excepts &= _FPU_EX_ALL;
    272  1.1  mrg 
    273  1.1  mrg   if (excepts & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
    274  1.1  mrg   if (excepts & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
    275  1.1  mrg   if (excepts & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
    276  1.1  mrg   if (excepts & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
    277  1.1  mrg   if (excepts & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
    278  1.1  mrg   if (excepts & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
    279  1.1  mrg 
    280  1.1  mrg   return res;
    281  1.1  mrg }
    282  1.1  mrg 
    283  1.1  mrg void
    284  1.1  mrg set_fpu_except_flags (int set, int clear)
    285  1.1  mrg {
    286  1.1  mrg   my_fenv_t temp;
    287  1.1  mrg   int exc_set = 0, exc_clr = 0;
    288  1.1  mrg 
    289  1.1  mrg   /* Translate from GFC_PE_* values to _FPU_MASK_* values.  */
    290  1.1  mrg   if (set & GFC_FPE_INVALID)
    291  1.1  mrg     exc_set |= _FPU_MASK_IM;
    292  1.1  mrg   if (clear & GFC_FPE_INVALID)
    293  1.1  mrg     exc_clr |= _FPU_MASK_IM;
    294  1.1  mrg 
    295  1.1  mrg   if (set & GFC_FPE_DENORMAL)
    296  1.1  mrg     exc_set |= _FPU_MASK_DM;
    297  1.1  mrg   if (clear & GFC_FPE_DENORMAL)
    298  1.1  mrg     exc_clr |= _FPU_MASK_DM;
    299  1.1  mrg 
    300  1.1  mrg   if (set & GFC_FPE_ZERO)
    301  1.1  mrg     exc_set |= _FPU_MASK_ZM;
    302  1.1  mrg   if (clear & GFC_FPE_ZERO)
    303  1.1  mrg     exc_clr |= _FPU_MASK_ZM;
    304  1.1  mrg 
    305  1.1  mrg   if (set & GFC_FPE_OVERFLOW)
    306  1.1  mrg     exc_set |= _FPU_MASK_OM;
    307  1.1  mrg   if (clear & GFC_FPE_OVERFLOW)
    308  1.1  mrg     exc_clr |= _FPU_MASK_OM;
    309  1.1  mrg 
    310  1.1  mrg   if (set & GFC_FPE_UNDERFLOW)
    311  1.1  mrg     exc_set |= _FPU_MASK_UM;
    312  1.1  mrg   if (clear & GFC_FPE_UNDERFLOW)
    313  1.1  mrg     exc_clr |= _FPU_MASK_UM;
    314  1.1  mrg 
    315  1.1  mrg   if (set & GFC_FPE_INEXACT)
    316  1.1  mrg     exc_set |= _FPU_MASK_PM;
    317  1.1  mrg   if (clear & GFC_FPE_INEXACT)
    318  1.1  mrg     exc_clr |= _FPU_MASK_PM;
    319  1.1  mrg 
    320  1.1  mrg 
    321  1.1  mrg   /* Change the flags. This is tricky on 387 (unlike SSE), because we have
    322  1.1  mrg      FNSTSW but no FLDSW instruction.  */
    323  1.1  mrg   __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
    324  1.1  mrg   temp.__status_word &= ~exc_clr;
    325  1.1  mrg   __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
    326  1.1  mrg 
    327  1.1  mrg   /* Change the flags on SSE.  */
    328  1.1  mrg 
    329  1.1  mrg   if (has_sse())
    330  1.1  mrg   {
    331  1.1  mrg     unsigned int cw_sse;
    332  1.1  mrg 
    333  1.1  mrg     __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
    334  1.1  mrg     cw_sse &= ~exc_clr;
    335  1.1  mrg     __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
    336  1.1  mrg   }
    337  1.1  mrg 
    338  1.1  mrg   local_feraiseexcept (exc_set);
    339  1.1  mrg }
    340  1.1  mrg 
    341  1.1  mrg int
    342  1.1  mrg support_fpu_flag (int flag __attribute__((unused)))
    343  1.1  mrg {
    344  1.1  mrg   return 1;
    345  1.1  mrg }
    346  1.1  mrg 
    347  1.1  mrg void
    348  1.1  mrg set_fpu_rounding_mode (int round)
    349  1.1  mrg {
    350  1.1  mrg   int round_mode;
    351  1.1  mrg   unsigned short cw;
    352  1.1  mrg 
    353  1.1  mrg   switch (round)
    354  1.1  mrg     {
    355  1.1  mrg     case GFC_FPE_TONEAREST:
    356  1.1  mrg       round_mode = _FPU_RC_NEAREST;
    357  1.1  mrg       break;
    358  1.1  mrg     case GFC_FPE_UPWARD:
    359  1.1  mrg       round_mode = _FPU_RC_UP;
    360  1.1  mrg       break;
    361  1.1  mrg     case GFC_FPE_DOWNWARD:
    362  1.1  mrg       round_mode = _FPU_RC_DOWN;
    363  1.1  mrg       break;
    364  1.1  mrg     case GFC_FPE_TOWARDZERO:
    365  1.1  mrg       round_mode = _FPU_RC_ZERO;
    366  1.1  mrg       break;
    367  1.1  mrg     default:
    368  1.1  mrg       return; /* Should be unreachable.  */
    369  1.1  mrg     }
    370  1.1  mrg 
    371  1.1  mrg   __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
    372  1.1  mrg 
    373  1.1  mrg   /* The x87 round control bits are shifted by 10 bits.  */
    374  1.1  mrg   cw &= ~(_FPU_RC_MASK << 10);
    375  1.1  mrg   cw |= round_mode << 10;
    376  1.1  mrg 
    377  1.1  mrg   __asm__ __volatile__ ("fldcw\t%0" : : "m" (cw));
    378  1.1  mrg 
    379  1.1  mrg   if (has_sse())
    380  1.1  mrg     {
    381  1.1  mrg       unsigned int cw_sse;
    382  1.1  mrg 
    383  1.1  mrg       __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
    384  1.1  mrg 
    385  1.1  mrg       /* The SSE round control bits are shifted by 13 bits.  */
    386  1.1  mrg       cw_sse &= ~(_FPU_RC_MASK << 13);
    387  1.1  mrg       cw_sse |= round_mode << 13;
    388  1.1  mrg 
    389  1.1  mrg       __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
    390  1.1  mrg     }
    391  1.1  mrg }
    392  1.1  mrg 
    393  1.1  mrg int
    394  1.1  mrg get_fpu_rounding_mode (void)
    395  1.1  mrg {
    396  1.1  mrg   int round_mode;
    397  1.1  mrg 
    398  1.1  mrg #ifdef __SSE_MATH__
    399  1.1  mrg   unsigned int cw;
    400  1.1  mrg 
    401  1.1  mrg   __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw));
    402  1.1  mrg 
    403  1.1  mrg   /* The SSE round control bits are shifted by 13 bits.  */
    404  1.1  mrg   round_mode = cw >> 13;
    405  1.1  mrg #else
    406  1.1  mrg   unsigned short cw;
    407  1.1  mrg 
    408  1.1  mrg   __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
    409  1.1  mrg 
    410  1.1  mrg   /* The x87 round control bits are shifted by 10 bits.  */
    411  1.1  mrg   round_mode = cw >> 10;
    412  1.1  mrg #endif
    413  1.1  mrg 
    414  1.1  mrg   round_mode &= _FPU_RC_MASK;
    415  1.1  mrg 
    416  1.1  mrg   switch (round_mode)
    417  1.1  mrg     {
    418  1.1  mrg     case _FPU_RC_NEAREST:
    419  1.1  mrg       return GFC_FPE_TONEAREST;
    420  1.1  mrg     case _FPU_RC_UP:
    421  1.1  mrg       return GFC_FPE_UPWARD;
    422  1.1  mrg     case _FPU_RC_DOWN:
    423  1.1  mrg       return GFC_FPE_DOWNWARD;
    424  1.1  mrg     case _FPU_RC_ZERO:
    425  1.1  mrg       return GFC_FPE_TOWARDZERO;
    426  1.1  mrg     default:
    427  1.1  mrg       return 0; /* Should be unreachable.  */
    428  1.1  mrg     }
    429  1.1  mrg }
    430  1.1  mrg 
    431  1.1  mrg int
    432  1.1  mrg support_fpu_rounding_mode (int mode __attribute__((unused)))
    433  1.1  mrg {
    434  1.1  mrg   return 1;
    435  1.1  mrg }
    436  1.1  mrg 
    437  1.1  mrg void
    438  1.1  mrg get_fpu_state (void *state)
    439  1.1  mrg {
    440  1.1  mrg   my_fenv_t *envp = state;
    441  1.1  mrg 
    442  1.1  mrg   __asm__ __volatile__ ("fnstenv\t%0" : "=m" (*envp));
    443  1.1  mrg 
    444  1.1  mrg   /* fnstenv has the side effect of masking all exceptions, so we need
    445  1.1  mrg      to restore the control word after that.  */
    446  1.1  mrg   __asm__ __volatile__ ("fldcw\t%0" : : "m" (envp->__control_word));
    447  1.1  mrg 
    448  1.1  mrg   if (has_sse())
    449  1.1  mrg     __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (envp->__mxcsr));
    450  1.1  mrg }
    451  1.1  mrg 
    452  1.1  mrg void
    453  1.1  mrg set_fpu_state (void *state)
    454  1.1  mrg {
    455  1.1  mrg   my_fenv_t *envp = state;
    456  1.1  mrg 
    457  1.1  mrg   /* glibc sources (sysdeps/x86_64/fpu/fesetenv.c) do something more
    458  1.1  mrg      complex than this, but I think it suffices in our case.  */
    459  1.1  mrg   __asm__ __volatile__ ("fldenv\t%0" : : "m" (*envp));
    460  1.1  mrg 
    461  1.1  mrg   if (has_sse())
    462  1.1  mrg     __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (envp->__mxcsr));
    463  1.1  mrg }
    464  1.1  mrg 
    465  1.1  mrg 
    466  1.1  mrg int
    467  1.1  mrg support_fpu_underflow_control (int kind)
    468  1.1  mrg {
    469  1.1  mrg   if (!has_sse())
    470  1.1  mrg     return 0;
    471  1.1  mrg 
    472  1.1  mrg   return (kind == 4 || kind == 8) ? 1 : 0;
    473  1.1  mrg }
    474  1.1  mrg 
    475  1.1  mrg 
    476  1.1  mrg int
    477  1.1  mrg get_fpu_underflow_mode (void)
    478  1.1  mrg {
    479  1.1  mrg   unsigned int cw_sse;
    480  1.1  mrg 
    481  1.1  mrg   if (!has_sse())
    482  1.1  mrg     return 1;
    483  1.1  mrg 
    484  1.1  mrg   __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
    485  1.1  mrg 
    486  1.1  mrg   /* Return 0 for abrupt underflow (flush to zero), 1 for gradual underflow.  */
    487  1.1  mrg   return (cw_sse & MXCSR_FTZ) ? 0 : 1;
    488  1.1  mrg }
    489  1.1  mrg 
    490  1.1  mrg 
    491  1.1  mrg void
    492  1.1  mrg set_fpu_underflow_mode (int gradual)
    493  1.1  mrg {
    494  1.1  mrg   unsigned int cw_sse;
    495  1.1  mrg 
    496  1.1  mrg   if (!has_sse())
    497  1.1  mrg     return;
    498  1.1  mrg 
    499  1.1  mrg   __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
    500  1.1  mrg 
    501  1.1  mrg   if (gradual)
    502  1.1  mrg     cw_sse &= ~MXCSR_FTZ;
    503  1.1  mrg   else
    504  1.1  mrg     cw_sse |= MXCSR_FTZ;
    505  1.1  mrg 
    506  1.1  mrg   __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
    507  1.1  mrg }
    508  1.1  mrg 
    509