Home | History | Annotate | Line # | Download | only in config
fpu-387.h revision 1.1.1.1
      1 /* FPU-related code for x86 and x86_64 processors.
      2    Copyright (C) 2005-2019 Free Software Foundation, Inc.
      3    Contributed by Francois-Xavier Coudert <coudert (at) clipper.ens.fr>
      4 
      5 This file is part of the GNU Fortran 95 runtime library (libgfortran).
      6 
      7 Libgfortran is free software; you can redistribute it and/or
      8 modify it under the terms of the GNU General Public
      9 License as published by the Free Software Foundation; either
     10 version 3 of the License, or (at your option) any later version.
     11 
     12 Libgfortran is distributed in the hope that it will be useful,
     13 but WITHOUT ANY WARRANTY; without even the implied warranty of
     14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     15 GNU General Public License for more details.
     16 
     17 Under Section 7 of GPL version 3, you are granted additional
     18 permissions described in the GCC Runtime Library Exception, version
     19 3.1, as published by the Free Software Foundation.
     20 
     21 You should have received a copy of the GNU General Public License and
     22 a copy of the GCC Runtime Library Exception along with this program;
     23 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     24 <http://www.gnu.org/licenses/>.  */
     25 
     26 #ifndef __SSE_MATH__
     27 #include "cpuid.h"
     28 #endif
     29 
     30 static int
     31 has_sse (void)
     32 {
     33 #ifndef __SSE_MATH__
     34   unsigned int eax, ebx, ecx, edx;
     35 
     36   if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
     37     return 0;
     38 
     39   return edx & bit_SSE;
     40 #else
     41   return 1;
     42 #endif
     43 }
     44 
     45 /* i387 exceptions -- see linux <fpu_control.h> header file for details.  */
     46 #define _FPU_MASK_IM  0x01
     47 #define _FPU_MASK_DM  0x02
     48 #define _FPU_MASK_ZM  0x04
     49 #define _FPU_MASK_OM  0x08
     50 #define _FPU_MASK_UM  0x10
     51 #define _FPU_MASK_PM  0x20
     52 #define _FPU_MASK_ALL 0x3f
     53 
     54 #define _FPU_EX_ALL   0x3f
     55 
     56 /* i387 rounding modes.  */
     57 
     58 #define _FPU_RC_NEAREST 0x0
     59 #define _FPU_RC_DOWN    0x1
     60 #define _FPU_RC_UP      0x2
     61 #define _FPU_RC_ZERO    0x3
     62 
     63 #define _FPU_RC_MASK    0x3
     64 
     65 /* Enable flush to zero mode.  */
     66 
     67 #define MXCSR_FTZ (1 << 15)
     68 
     69 
     70 /* This structure corresponds to the layout of the block
     71    written by FSTENV.  */
     72 typedef struct
     73 {
     74   unsigned short int __control_word;
     75   unsigned short int __unused1;
     76   unsigned short int __status_word;
     77   unsigned short int __unused2;
     78   unsigned short int __tags;
     79   unsigned short int __unused3;
     80   unsigned int __eip;
     81   unsigned short int __cs_selector;
     82   unsigned short int __opcode;
     83   unsigned int __data_offset;
     84   unsigned short int __data_selector;
     85   unsigned short int __unused5;
     86   unsigned int __mxcsr;
     87 }
     88 my_fenv_t;
     89 
     90 /* Check we can actually store the FPU state in the allocated size.  */
     91 _Static_assert (sizeof(my_fenv_t) <= (size_t) GFC_FPE_STATE_BUFFER_SIZE,
     92 		"GFC_FPE_STATE_BUFFER_SIZE is too small");
     93 
     94 
     95 /* Raise the supported floating-point exceptions from EXCEPTS.  Other
     96    bits in EXCEPTS are ignored.  Code originally borrowed from
     97    libatomic/config/x86/fenv.c.  */
     98 
     99 static void
    100 local_feraiseexcept (int excepts)
    101 {
    102   if (excepts & _FPU_MASK_IM)
    103     {
    104       float f = 0.0f;
    105 #ifdef __SSE_MATH__
    106       volatile float r __attribute__ ((unused));
    107       __asm__ __volatile__ ("%vdivss\t{%0, %d0|%d0, %0}" : "+x" (f));
    108       r = f; /* Needed to trigger exception.   */
    109 #else
    110       __asm__ __volatile__ ("fdiv\t{%y0, %0|%0, %y0}" : "+t" (f));
    111       /* No need for fwait, exception is triggered by emitted fstp.  */
    112 #endif
    113     }
    114   if (excepts & _FPU_MASK_DM)
    115     {
    116       my_fenv_t temp;
    117       __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
    118       temp.__status_word |= _FPU_MASK_DM;
    119       __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
    120       __asm__ __volatile__ ("fwait");
    121     }
    122   if (excepts & _FPU_MASK_ZM)
    123     {
    124       float f = 1.0f, g = 0.0f;
    125 #ifdef __SSE_MATH__
    126       volatile float r __attribute__ ((unused));
    127       __asm__ __volatile__ ("%vdivss\t{%1, %d0|%d0, %1}" : "+x" (f) : "xm" (g));
    128       r = f; /* Needed to trigger exception.   */
    129 #else
    130       __asm__ __volatile__ ("fdivs\t%1" : "+t" (f) : "m" (g));
    131       /* No need for fwait, exception is triggered by emitted fstp.  */
    132 #endif
    133     }
    134   if (excepts & _FPU_MASK_OM)
    135     {
    136       my_fenv_t temp;
    137       __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
    138       temp.__status_word |= _FPU_MASK_OM;
    139       __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
    140       __asm__ __volatile__ ("fwait");
    141     }
    142   if (excepts & _FPU_MASK_UM)
    143     {
    144       my_fenv_t temp;
    145       __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
    146       temp.__status_word |= _FPU_MASK_UM;
    147       __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
    148       __asm__ __volatile__ ("fwait");
    149     }
    150   if (excepts & _FPU_MASK_PM)
    151     {
    152       float f = 1.0f, g = 3.0f;
    153 #ifdef __SSE_MATH__
    154       volatile float r __attribute__ ((unused));
    155       __asm__ __volatile__ ("%vdivss\t{%1, %d0|%d0, %1}" : "+x" (f) : "xm" (g));
    156       r = f; /* Needed to trigger exception.   */
    157 #else
    158       __asm__ __volatile__ ("fdivs\t%1" : "+t" (f) : "m" (g));
    159       /* No need for fwait, exception is triggered by emitted fstp.  */
    160 #endif
    161     }
    162 }
    163 
    164 
    165 void
    166 set_fpu_trap_exceptions (int trap, int notrap)
    167 {
    168   int exc_set = 0, exc_clr = 0;
    169   unsigned short cw;
    170 
    171   if (trap & GFC_FPE_INVALID) exc_set |= _FPU_MASK_IM;
    172   if (trap & GFC_FPE_DENORMAL) exc_set |= _FPU_MASK_DM;
    173   if (trap & GFC_FPE_ZERO) exc_set |= _FPU_MASK_ZM;
    174   if (trap & GFC_FPE_OVERFLOW) exc_set |= _FPU_MASK_OM;
    175   if (trap & GFC_FPE_UNDERFLOW) exc_set |= _FPU_MASK_UM;
    176   if (trap & GFC_FPE_INEXACT) exc_set |= _FPU_MASK_PM;
    177 
    178   if (notrap & GFC_FPE_INVALID) exc_clr |= _FPU_MASK_IM;
    179   if (notrap & GFC_FPE_DENORMAL) exc_clr |= _FPU_MASK_DM;
    180   if (notrap & GFC_FPE_ZERO) exc_clr |= _FPU_MASK_ZM;
    181   if (notrap & GFC_FPE_OVERFLOW) exc_clr |= _FPU_MASK_OM;
    182   if (notrap & GFC_FPE_UNDERFLOW) exc_clr |= _FPU_MASK_UM;
    183   if (notrap & GFC_FPE_INEXACT) exc_clr |= _FPU_MASK_PM;
    184 
    185   __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
    186 
    187   cw |= exc_clr;
    188   cw &= ~exc_set;
    189 
    190   __asm__ __volatile__ ("fnclex\n\tfldcw\t%0" : : "m" (cw));
    191 
    192   if (has_sse())
    193     {
    194       unsigned int cw_sse;
    195 
    196       __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
    197 
    198       /* The SSE exception masks are shifted by 7 bits.  */
    199       cw_sse |= (exc_clr << 7);
    200       cw_sse &= ~(exc_set << 7);
    201 
    202       /* Clear stalled exception flags.  */
    203       cw_sse &= ~_FPU_EX_ALL;
    204 
    205       __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
    206     }
    207 }
    208 
    209 void
    210 set_fpu (void)
    211 {
    212   set_fpu_trap_exceptions (options.fpe, 0);
    213 }
    214 
    215 int
    216 get_fpu_trap_exceptions (void)
    217 {
    218   unsigned short cw;
    219   int mask;
    220   int res = 0;
    221 
    222   __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
    223   mask = cw;
    224 
    225   if (has_sse())
    226     {
    227       unsigned int cw_sse;
    228 
    229       __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
    230 
    231       /* The SSE exception masks are shifted by 7 bits.  */
    232       mask |= (cw_sse >> 7);
    233     }
    234 
    235   mask = ~mask & _FPU_MASK_ALL;
    236 
    237   if (mask & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
    238   if (mask & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
    239   if (mask & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
    240   if (mask & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
    241   if (mask & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
    242   if (mask & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
    243 
    244   return res;
    245 }
    246 
    247 int
    248 support_fpu_trap (int flag __attribute__((unused)))
    249 {
    250   return 1;
    251 }
    252 
    253 int
    254 get_fpu_except_flags (void)
    255 {
    256   unsigned short cw;
    257   int excepts;
    258   int res = 0;
    259 
    260   __asm__ __volatile__ ("fnstsw\t%0" : "=am" (cw));
    261   excepts = cw;
    262 
    263   if (has_sse())
    264     {
    265       unsigned int cw_sse;
    266 
    267       __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
    268       excepts |= cw_sse;
    269     }
    270 
    271   excepts &= _FPU_EX_ALL;
    272 
    273   if (excepts & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
    274   if (excepts & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
    275   if (excepts & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
    276   if (excepts & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
    277   if (excepts & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
    278   if (excepts & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
    279 
    280   return res;
    281 }
    282 
    283 void
    284 set_fpu_except_flags (int set, int clear)
    285 {
    286   my_fenv_t temp;
    287   int exc_set = 0, exc_clr = 0;
    288 
    289   /* Translate from GFC_PE_* values to _FPU_MASK_* values.  */
    290   if (set & GFC_FPE_INVALID)
    291     exc_set |= _FPU_MASK_IM;
    292   if (clear & GFC_FPE_INVALID)
    293     exc_clr |= _FPU_MASK_IM;
    294 
    295   if (set & GFC_FPE_DENORMAL)
    296     exc_set |= _FPU_MASK_DM;
    297   if (clear & GFC_FPE_DENORMAL)
    298     exc_clr |= _FPU_MASK_DM;
    299 
    300   if (set & GFC_FPE_ZERO)
    301     exc_set |= _FPU_MASK_ZM;
    302   if (clear & GFC_FPE_ZERO)
    303     exc_clr |= _FPU_MASK_ZM;
    304 
    305   if (set & GFC_FPE_OVERFLOW)
    306     exc_set |= _FPU_MASK_OM;
    307   if (clear & GFC_FPE_OVERFLOW)
    308     exc_clr |= _FPU_MASK_OM;
    309 
    310   if (set & GFC_FPE_UNDERFLOW)
    311     exc_set |= _FPU_MASK_UM;
    312   if (clear & GFC_FPE_UNDERFLOW)
    313     exc_clr |= _FPU_MASK_UM;
    314 
    315   if (set & GFC_FPE_INEXACT)
    316     exc_set |= _FPU_MASK_PM;
    317   if (clear & GFC_FPE_INEXACT)
    318     exc_clr |= _FPU_MASK_PM;
    319 
    320 
    321   /* Change the flags. This is tricky on 387 (unlike SSE), because we have
    322      FNSTSW but no FLDSW instruction.  */
    323   __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
    324   temp.__status_word &= ~exc_clr;
    325   __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
    326 
    327   /* Change the flags on SSE.  */
    328 
    329   if (has_sse())
    330   {
    331     unsigned int cw_sse;
    332 
    333     __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
    334     cw_sse &= ~exc_clr;
    335     __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
    336   }
    337 
    338   local_feraiseexcept (exc_set);
    339 }
    340 
    341 int
    342 support_fpu_flag (int flag __attribute__((unused)))
    343 {
    344   return 1;
    345 }
    346 
    347 void
    348 set_fpu_rounding_mode (int round)
    349 {
    350   int round_mode;
    351   unsigned short cw;
    352 
    353   switch (round)
    354     {
    355     case GFC_FPE_TONEAREST:
    356       round_mode = _FPU_RC_NEAREST;
    357       break;
    358     case GFC_FPE_UPWARD:
    359       round_mode = _FPU_RC_UP;
    360       break;
    361     case GFC_FPE_DOWNWARD:
    362       round_mode = _FPU_RC_DOWN;
    363       break;
    364     case GFC_FPE_TOWARDZERO:
    365       round_mode = _FPU_RC_ZERO;
    366       break;
    367     default:
    368       return; /* Should be unreachable.  */
    369     }
    370 
    371   __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
    372 
    373   /* The x87 round control bits are shifted by 10 bits.  */
    374   cw &= ~(_FPU_RC_MASK << 10);
    375   cw |= round_mode << 10;
    376 
    377   __asm__ __volatile__ ("fldcw\t%0" : : "m" (cw));
    378 
    379   if (has_sse())
    380     {
    381       unsigned int cw_sse;
    382 
    383       __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
    384 
    385       /* The SSE round control bits are shifted by 13 bits.  */
    386       cw_sse &= ~(_FPU_RC_MASK << 13);
    387       cw_sse |= round_mode << 13;
    388 
    389       __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
    390     }
    391 }
    392 
    393 int
    394 get_fpu_rounding_mode (void)
    395 {
    396   int round_mode;
    397 
    398 #ifdef __SSE_MATH__
    399   unsigned int cw;
    400 
    401   __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw));
    402 
    403   /* The SSE round control bits are shifted by 13 bits.  */
    404   round_mode = cw >> 13;
    405 #else
    406   unsigned short cw;
    407 
    408   __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
    409 
    410   /* The x87 round control bits are shifted by 10 bits.  */
    411   round_mode = cw >> 10;
    412 #endif
    413 
    414   round_mode &= _FPU_RC_MASK;
    415 
    416   switch (round_mode)
    417     {
    418     case _FPU_RC_NEAREST:
    419       return GFC_FPE_TONEAREST;
    420     case _FPU_RC_UP:
    421       return GFC_FPE_UPWARD;
    422     case _FPU_RC_DOWN:
    423       return GFC_FPE_DOWNWARD;
    424     case _FPU_RC_ZERO:
    425       return GFC_FPE_TOWARDZERO;
    426     default:
    427       return 0; /* Should be unreachable.  */
    428     }
    429 }
    430 
    431 int
    432 support_fpu_rounding_mode (int mode __attribute__((unused)))
    433 {
    434   return 1;
    435 }
    436 
    437 void
    438 get_fpu_state (void *state)
    439 {
    440   my_fenv_t *envp = state;
    441 
    442   __asm__ __volatile__ ("fnstenv\t%0" : "=m" (*envp));
    443 
    444   /* fnstenv has the side effect of masking all exceptions, so we need
    445      to restore the control word after that.  */
    446   __asm__ __volatile__ ("fldcw\t%0" : : "m" (envp->__control_word));
    447 
    448   if (has_sse())
    449     __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (envp->__mxcsr));
    450 }
    451 
    452 void
    453 set_fpu_state (void *state)
    454 {
    455   my_fenv_t *envp = state;
    456 
    457   /* glibc sources (sysdeps/x86_64/fpu/fesetenv.c) do something more
    458      complex than this, but I think it suffices in our case.  */
    459   __asm__ __volatile__ ("fldenv\t%0" : : "m" (*envp));
    460 
    461   if (has_sse())
    462     __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (envp->__mxcsr));
    463 }
    464 
    465 
    466 int
    467 support_fpu_underflow_control (int kind)
    468 {
    469   if (!has_sse())
    470     return 0;
    471 
    472   return (kind == 4 || kind == 8) ? 1 : 0;
    473 }
    474 
    475 
    476 int
    477 get_fpu_underflow_mode (void)
    478 {
    479   unsigned int cw_sse;
    480 
    481   if (!has_sse())
    482     return 1;
    483 
    484   __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
    485 
    486   /* Return 0 for abrupt underflow (flush to zero), 1 for gradual underflow.  */
    487   return (cw_sse & MXCSR_FTZ) ? 0 : 1;
    488 }
    489 
    490 
    491 void
    492 set_fpu_underflow_mode (int gradual)
    493 {
    494   unsigned int cw_sse;
    495 
    496   if (!has_sse())
    497     return;
    498 
    499   __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
    500 
    501   if (gradual)
    502     cw_sse &= ~MXCSR_FTZ;
    503   else
    504     cw_sse |= MXCSR_FTZ;
    505 
    506   __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
    507 }
    508 
    509