Home | History | Annotate | Line # | Download | only in config
fpu-387.h revision 1.1.1.2
      1 /* FPU-related code for x86 and x86_64 processors.
      2    Copyright (C) 2005-2020 Free Software Foundation, Inc.
      3    Contributed by Francois-Xavier Coudert <coudert (at) clipper.ens.fr>
      4 
      5 This file is part of the GNU Fortran 95 runtime library (libgfortran).
      6 
      7 Libgfortran is free software; you can redistribute it and/or
      8 modify it under the terms of the GNU General Public
      9 License as published by the Free Software Foundation; either
     10 version 3 of the License, or (at your option) any later version.
     11 
     12 Libgfortran is distributed in the hope that it will be useful,
     13 but WITHOUT ANY WARRANTY; without even the implied warranty of
     14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     15 GNU General Public License for more details.
     16 
     17 Under Section 7 of GPL version 3, you are granted additional
     18 permissions described in the GCC Runtime Library Exception, version
     19 3.1, as published by the Free Software Foundation.
     20 
     21 You should have received a copy of the GNU General Public License and
     22 a copy of the GCC Runtime Library Exception along with this program;
     23 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     24 <http://www.gnu.org/licenses/>.  */
     25 
     26 #ifndef __SSE_MATH__
     27 #include "cpuid.h"
     28 #endif
     29 
     30 static int
     31 has_sse (void)
     32 {
     33 #ifndef __SSE_MATH__
     34   unsigned int eax, ebx, ecx, edx;
     35 
     36   if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
     37     return 0;
     38 
     39   return edx & bit_SSE;
     40 #else
     41   return 1;
     42 #endif
     43 }
     44 
     45 /* i387 exceptions -- see linux <fpu_control.h> header file for details.  */
     46 #define _FPU_MASK_IM  0x01
     47 #define _FPU_MASK_DM  0x02
     48 #define _FPU_MASK_ZM  0x04
     49 #define _FPU_MASK_OM  0x08
     50 #define _FPU_MASK_UM  0x10
     51 #define _FPU_MASK_PM  0x20
     52 #define _FPU_MASK_ALL 0x3f
     53 
     54 #define _FPU_EX_ALL   0x3f
     55 
     56 /* i387 rounding modes.  */
     57 
     58 #define _FPU_RC_NEAREST 0x0
     59 #define _FPU_RC_DOWN    0x1
     60 #define _FPU_RC_UP      0x2
     61 #define _FPU_RC_ZERO    0x3
     62 
     63 #define _FPU_RC_MASK    0x3
     64 
     65 /* Enable flush to zero mode.  */
     66 
     67 #define MXCSR_FTZ (1 << 15)
     68 
     69 
     70 /* This structure corresponds to the layout of the block
     71    written by FSTENV.  */
     72 typedef struct
     73 {
     74   unsigned short int __control_word;
     75   unsigned short int __unused1;
     76   unsigned short int __status_word;
     77   unsigned short int __unused2;
     78   unsigned short int __tags;
     79   unsigned short int __unused3;
     80   unsigned int __eip;
     81   unsigned short int __cs_selector;
     82   unsigned short int __opcode;
     83   unsigned int __data_offset;
     84   unsigned short int __data_selector;
     85   unsigned short int __unused5;
     86   unsigned int __mxcsr;
     87 }
     88 my_fenv_t;
     89 
     90 /* Check we can actually store the FPU state in the allocated size.  */
     91 _Static_assert (sizeof(my_fenv_t) <= (size_t) GFC_FPE_STATE_BUFFER_SIZE,
     92 		"GFC_FPE_STATE_BUFFER_SIZE is too small");
     93 
     94 
     95 /* Raise the supported floating-point exceptions from EXCEPTS.  Other
     96    bits in EXCEPTS are ignored.  Code originally borrowed from
     97    libatomic/config/x86/fenv.c.  */
     98 
     99 static void
    100 local_feraiseexcept (int excepts)
    101 {
    102   if (excepts & _FPU_MASK_IM)
    103     {
    104       float f = 0.0f;
    105 #ifdef __SSE_MATH__
    106       __asm__ __volatile__ ("%vdivss\t{%0, %d0|%d0, %0}" : "+x" (f));
    107 #else
    108       __asm__ __volatile__ ("fdiv\t{%y0, %0|%0, %y0}" : "+t" (f));
    109       /* No need for fwait, exception is triggered by emitted fstp.  */
    110 #endif
    111     }
    112   if (excepts & _FPU_MASK_DM)
    113     {
    114       my_fenv_t temp;
    115       __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
    116       temp.__status_word |= _FPU_MASK_DM;
    117       __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
    118       __asm__ __volatile__ ("fwait");
    119     }
    120   if (excepts & _FPU_MASK_ZM)
    121     {
    122       float f = 1.0f, g = 0.0f;
    123 #ifdef __SSE_MATH__
    124       __asm__ __volatile__ ("%vdivss\t{%1, %d0|%d0, %1}" : "+x" (f) : "xm" (g));
    125 #else
    126       __asm__ __volatile__ ("fdivs\t%1" : "+t" (f) : "m" (g));
    127       /* No need for fwait, exception is triggered by emitted fstp.  */
    128 #endif
    129     }
    130   if (excepts & _FPU_MASK_OM)
    131     {
    132       my_fenv_t temp;
    133       __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
    134       temp.__status_word |= _FPU_MASK_OM;
    135       __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
    136       __asm__ __volatile__ ("fwait");
    137     }
    138   if (excepts & _FPU_MASK_UM)
    139     {
    140       my_fenv_t temp;
    141       __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
    142       temp.__status_word |= _FPU_MASK_UM;
    143       __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
    144       __asm__ __volatile__ ("fwait");
    145     }
    146   if (excepts & _FPU_MASK_PM)
    147     {
    148       float f = 1.0f, g = 3.0f;
    149 #ifdef __SSE_MATH__
    150       __asm__ __volatile__ ("%vdivss\t{%1, %d0|%d0, %1}" : "+x" (f) : "xm" (g));
    151 #else
    152       __asm__ __volatile__ ("fdivs\t%1" : "+t" (f) : "m" (g));
    153       /* No need for fwait, exception is triggered by emitted fstp.  */
    154 #endif
    155     }
    156 }
    157 
    158 
    159 void
    160 set_fpu_trap_exceptions (int trap, int notrap)
    161 {
    162   int exc_set = 0, exc_clr = 0;
    163   unsigned short cw;
    164 
    165   if (trap & GFC_FPE_INVALID) exc_set |= _FPU_MASK_IM;
    166   if (trap & GFC_FPE_DENORMAL) exc_set |= _FPU_MASK_DM;
    167   if (trap & GFC_FPE_ZERO) exc_set |= _FPU_MASK_ZM;
    168   if (trap & GFC_FPE_OVERFLOW) exc_set |= _FPU_MASK_OM;
    169   if (trap & GFC_FPE_UNDERFLOW) exc_set |= _FPU_MASK_UM;
    170   if (trap & GFC_FPE_INEXACT) exc_set |= _FPU_MASK_PM;
    171 
    172   if (notrap & GFC_FPE_INVALID) exc_clr |= _FPU_MASK_IM;
    173   if (notrap & GFC_FPE_DENORMAL) exc_clr |= _FPU_MASK_DM;
    174   if (notrap & GFC_FPE_ZERO) exc_clr |= _FPU_MASK_ZM;
    175   if (notrap & GFC_FPE_OVERFLOW) exc_clr |= _FPU_MASK_OM;
    176   if (notrap & GFC_FPE_UNDERFLOW) exc_clr |= _FPU_MASK_UM;
    177   if (notrap & GFC_FPE_INEXACT) exc_clr |= _FPU_MASK_PM;
    178 
    179   __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
    180 
    181   cw |= exc_clr;
    182   cw &= ~exc_set;
    183 
    184   __asm__ __volatile__ ("fnclex\n\tfldcw\t%0" : : "m" (cw));
    185 
    186   if (has_sse())
    187     {
    188       unsigned int cw_sse;
    189 
    190       __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
    191 
    192       /* The SSE exception masks are shifted by 7 bits.  */
    193       cw_sse |= (exc_clr << 7);
    194       cw_sse &= ~(exc_set << 7);
    195 
    196       /* Clear stalled exception flags.  */
    197       cw_sse &= ~_FPU_EX_ALL;
    198 
    199       __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
    200     }
    201 }
    202 
    203 void
    204 set_fpu (void)
    205 {
    206   set_fpu_trap_exceptions (options.fpe, 0);
    207 }
    208 
    209 int
    210 get_fpu_trap_exceptions (void)
    211 {
    212   unsigned short cw;
    213   int mask;
    214   int res = 0;
    215 
    216   __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
    217   mask = cw;
    218 
    219   if (has_sse())
    220     {
    221       unsigned int cw_sse;
    222 
    223       __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
    224 
    225       /* The SSE exception masks are shifted by 7 bits.  */
    226       mask |= (cw_sse >> 7);
    227     }
    228 
    229   mask = ~mask & _FPU_MASK_ALL;
    230 
    231   if (mask & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
    232   if (mask & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
    233   if (mask & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
    234   if (mask & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
    235   if (mask & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
    236   if (mask & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
    237 
    238   return res;
    239 }
    240 
    241 int
    242 support_fpu_trap (int flag __attribute__((unused)))
    243 {
    244   return 1;
    245 }
    246 
    247 int
    248 get_fpu_except_flags (void)
    249 {
    250   unsigned short cw;
    251   int excepts;
    252   int res = 0;
    253 
    254   __asm__ __volatile__ ("fnstsw\t%0" : "=am" (cw));
    255   excepts = cw;
    256 
    257   if (has_sse())
    258     {
    259       unsigned int cw_sse;
    260 
    261       __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
    262       excepts |= cw_sse;
    263     }
    264 
    265   excepts &= _FPU_EX_ALL;
    266 
    267   if (excepts & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
    268   if (excepts & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
    269   if (excepts & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
    270   if (excepts & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
    271   if (excepts & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
    272   if (excepts & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
    273 
    274   return res;
    275 }
    276 
    277 void
    278 set_fpu_except_flags (int set, int clear)
    279 {
    280   my_fenv_t temp;
    281   int exc_set = 0, exc_clr = 0;
    282 
    283   /* Translate from GFC_PE_* values to _FPU_MASK_* values.  */
    284   if (set & GFC_FPE_INVALID)
    285     exc_set |= _FPU_MASK_IM;
    286   if (clear & GFC_FPE_INVALID)
    287     exc_clr |= _FPU_MASK_IM;
    288 
    289   if (set & GFC_FPE_DENORMAL)
    290     exc_set |= _FPU_MASK_DM;
    291   if (clear & GFC_FPE_DENORMAL)
    292     exc_clr |= _FPU_MASK_DM;
    293 
    294   if (set & GFC_FPE_ZERO)
    295     exc_set |= _FPU_MASK_ZM;
    296   if (clear & GFC_FPE_ZERO)
    297     exc_clr |= _FPU_MASK_ZM;
    298 
    299   if (set & GFC_FPE_OVERFLOW)
    300     exc_set |= _FPU_MASK_OM;
    301   if (clear & GFC_FPE_OVERFLOW)
    302     exc_clr |= _FPU_MASK_OM;
    303 
    304   if (set & GFC_FPE_UNDERFLOW)
    305     exc_set |= _FPU_MASK_UM;
    306   if (clear & GFC_FPE_UNDERFLOW)
    307     exc_clr |= _FPU_MASK_UM;
    308 
    309   if (set & GFC_FPE_INEXACT)
    310     exc_set |= _FPU_MASK_PM;
    311   if (clear & GFC_FPE_INEXACT)
    312     exc_clr |= _FPU_MASK_PM;
    313 
    314 
    315   /* Change the flags. This is tricky on 387 (unlike SSE), because we have
    316      FNSTSW but no FLDSW instruction.  */
    317   __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
    318   temp.__status_word &= ~exc_clr;
    319   __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
    320 
    321   /* Change the flags on SSE.  */
    322 
    323   if (has_sse())
    324   {
    325     unsigned int cw_sse;
    326 
    327     __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
    328     cw_sse &= ~exc_clr;
    329     __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
    330   }
    331 
    332   local_feraiseexcept (exc_set);
    333 }
    334 
    335 int
    336 support_fpu_flag (int flag __attribute__((unused)))
    337 {
    338   return 1;
    339 }
    340 
    341 void
    342 set_fpu_rounding_mode (int round)
    343 {
    344   int round_mode;
    345   unsigned short cw;
    346 
    347   switch (round)
    348     {
    349     case GFC_FPE_TONEAREST:
    350       round_mode = _FPU_RC_NEAREST;
    351       break;
    352     case GFC_FPE_UPWARD:
    353       round_mode = _FPU_RC_UP;
    354       break;
    355     case GFC_FPE_DOWNWARD:
    356       round_mode = _FPU_RC_DOWN;
    357       break;
    358     case GFC_FPE_TOWARDZERO:
    359       round_mode = _FPU_RC_ZERO;
    360       break;
    361     default:
    362       return; /* Should be unreachable.  */
    363     }
    364 
    365   __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
    366 
    367   /* The x87 round control bits are shifted by 10 bits.  */
    368   cw &= ~(_FPU_RC_MASK << 10);
    369   cw |= round_mode << 10;
    370 
    371   __asm__ __volatile__ ("fldcw\t%0" : : "m" (cw));
    372 
    373   if (has_sse())
    374     {
    375       unsigned int cw_sse;
    376 
    377       __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
    378 
    379       /* The SSE round control bits are shifted by 13 bits.  */
    380       cw_sse &= ~(_FPU_RC_MASK << 13);
    381       cw_sse |= round_mode << 13;
    382 
    383       __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
    384     }
    385 }
    386 
    387 int
    388 get_fpu_rounding_mode (void)
    389 {
    390   int round_mode;
    391 
    392 #ifdef __SSE_MATH__
    393   unsigned int cw;
    394 
    395   __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw));
    396 
    397   /* The SSE round control bits are shifted by 13 bits.  */
    398   round_mode = cw >> 13;
    399 #else
    400   unsigned short cw;
    401 
    402   __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
    403 
    404   /* The x87 round control bits are shifted by 10 bits.  */
    405   round_mode = cw >> 10;
    406 #endif
    407 
    408   round_mode &= _FPU_RC_MASK;
    409 
    410   switch (round_mode)
    411     {
    412     case _FPU_RC_NEAREST:
    413       return GFC_FPE_TONEAREST;
    414     case _FPU_RC_UP:
    415       return GFC_FPE_UPWARD;
    416     case _FPU_RC_DOWN:
    417       return GFC_FPE_DOWNWARD;
    418     case _FPU_RC_ZERO:
    419       return GFC_FPE_TOWARDZERO;
    420     default:
    421       return 0; /* Should be unreachable.  */
    422     }
    423 }
    424 
    425 int
    426 support_fpu_rounding_mode (int mode __attribute__((unused)))
    427 {
    428   return 1;
    429 }
    430 
    431 void
    432 get_fpu_state (void *state)
    433 {
    434   my_fenv_t *envp = state;
    435 
    436   __asm__ __volatile__ ("fnstenv\t%0" : "=m" (*envp));
    437 
    438   /* fnstenv has the side effect of masking all exceptions, so we need
    439      to restore the control word after that.  */
    440   __asm__ __volatile__ ("fldcw\t%0" : : "m" (envp->__control_word));
    441 
    442   if (has_sse())
    443     __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (envp->__mxcsr));
    444 }
    445 
    446 void
    447 set_fpu_state (void *state)
    448 {
    449   my_fenv_t *envp = state;
    450 
    451   /* glibc sources (sysdeps/x86_64/fpu/fesetenv.c) do something more
    452      complex than this, but I think it suffices in our case.  */
    453   __asm__ __volatile__ ("fldenv\t%0" : : "m" (*envp));
    454 
    455   if (has_sse())
    456     __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (envp->__mxcsr));
    457 }
    458 
    459 
    460 int
    461 support_fpu_underflow_control (int kind)
    462 {
    463   if (!has_sse())
    464     return 0;
    465 
    466   return (kind == 4 || kind == 8) ? 1 : 0;
    467 }
    468 
    469 
    470 int
    471 get_fpu_underflow_mode (void)
    472 {
    473   unsigned int cw_sse;
    474 
    475   if (!has_sse())
    476     return 1;
    477 
    478   __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
    479 
    480   /* Return 0 for abrupt underflow (flush to zero), 1 for gradual underflow.  */
    481   return (cw_sse & MXCSR_FTZ) ? 0 : 1;
    482 }
    483 
    484 
    485 void
    486 set_fpu_underflow_mode (int gradual)
    487 {
    488   unsigned int cw_sse;
    489 
    490   if (!has_sse())
    491     return;
    492 
    493   __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
    494 
    495   if (gradual)
    496     cw_sse &= ~MXCSR_FTZ;
    497   else
    498     cw_sse |= MXCSR_FTZ;
    499 
    500   __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
    501 }
    502 
    503