Home | History | Annotate | Line # | Download | only in config
fpu-387.h revision 1.1.1.4
      1 /* FPU-related code for x86 and x86_64 processors.
      2    Copyright (C) 2005-2024 Free Software Foundation, Inc.
      3    Contributed by Francois-Xavier Coudert <coudert (at) clipper.ens.fr>
      4 
      5 This file is part of the GNU Fortran 95 runtime library (libgfortran).
      6 
      7 Libgfortran is free software; you can redistribute it and/or
      8 modify it under the terms of the GNU General Public
      9 License as published by the Free Software Foundation; either
     10 version 3 of the License, or (at your option) any later version.
     11 
     12 Libgfortran is distributed in the hope that it will be useful,
     13 but WITHOUT ANY WARRANTY; without even the implied warranty of
     14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     15 GNU General Public License for more details.
     16 
     17 Under Section 7 of GPL version 3, you are granted additional
     18 permissions described in the GCC Runtime Library Exception, version
     19 3.1, as published by the Free Software Foundation.
     20 
     21 You should have received a copy of the GNU General Public License and
     22 a copy of the GCC Runtime Library Exception along with this program;
     23 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     24 <http://www.gnu.org/licenses/>.  */
     25 
     26 #ifndef __SSE_MATH__
     27 #include "cpuid.h"
     28 #endif
     29 
     30 static int
     31 has_sse (void)
     32 {
     33 #ifndef __SSE_MATH__
     34   unsigned int eax, ebx, ecx, edx;
     35 
     36   if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
     37     return 0;
     38 
     39   return edx & bit_SSE;
     40 #else
     41   return 1;
     42 #endif
     43 }
     44 
     45 /* i387 exceptions -- see linux <fpu_control.h> header file for details.  */
     46 #define _FPU_MASK_IM  0x01
     47 #define _FPU_MASK_DM  0x02
     48 #define _FPU_MASK_ZM  0x04
     49 #define _FPU_MASK_OM  0x08
     50 #define _FPU_MASK_UM  0x10
     51 #define _FPU_MASK_PM  0x20
     52 #define _FPU_MASK_ALL 0x3f
     53 
     54 #define _FPU_EX_ALL   0x3f
     55 
     56 /* i387 rounding modes.  */
     57 
     58 #define _FPU_RC_NEAREST 0x0
     59 #define _FPU_RC_DOWN    0x1
     60 #define _FPU_RC_UP      0x2
     61 #define _FPU_RC_ZERO    0x3
     62 
     63 #define _FPU_RC_MASK    0x3
     64 
     65 /* Enable flush to zero mode.  */
     66 
     67 #define MXCSR_FTZ (1 << 15)
     68 
     69 
     70 /* This structure corresponds to the layout of the block
     71    written by FSTENV.  */
     72 struct fenv
     73 {
     74   unsigned short int __control_word;
     75   unsigned short int __unused1;
     76   unsigned short int __status_word;
     77   unsigned short int __unused2;
     78   unsigned short int __tags;
     79   unsigned short int __unused3;
     80   unsigned int __eip;
     81   unsigned short int __cs_selector;
     82   unsigned int __opcode:11;
     83   unsigned int __unused4:5;
     84   unsigned int __data_offset;
     85   unsigned short int __data_selector;
     86   unsigned short int __unused5;
     87   unsigned int __mxcsr;
     88 } __attribute__ ((gcc_struct));
     89 
     90 /* Check we can actually store the FPU state in the allocated size.  */
     91 _Static_assert (sizeof(struct fenv) <= (size_t) GFC_FPE_STATE_BUFFER_SIZE,
     92 		"GFC_FPE_STATE_BUFFER_SIZE is too small");
     93 
     94 #ifdef __SSE_MATH__
     95 # define __math_force_eval_div(x, y)					\
     96   do {									\
     97     __asm__ ("" : "+x" (x)); __asm__ __volatile__ ("" : : "x" (x / y));	\
     98   } while (0)
     99 #else
    100 # define __math_force_eval_div(x, y)					\
    101   do {									\
    102     __asm__ ("" : "+t" (x)); __asm__ __volatile__ ("" : : "f" (x / y));	\
    103   } while (0)
    104 #endif
    105 
    106 /* Raise the supported floating-point exceptions from EXCEPTS.  Other
    107    bits in EXCEPTS are ignored.  Code originally borrowed from
    108    libatomic/config/x86/fenv.c.  */
    109 
    110 static void
    111 local_feraiseexcept (int excepts)
    112 {
    113   struct fenv temp;
    114 
    115   if (excepts & _FPU_MASK_IM)
    116     {
    117       float f = 0.0f;
    118       __math_force_eval_div (f, f);
    119     }
    120   if (excepts & _FPU_MASK_DM)
    121     {
    122       __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
    123       temp.__status_word |= _FPU_MASK_DM;
    124       __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
    125       __asm__ __volatile__ ("fwait");
    126     }
    127   if (excepts & _FPU_MASK_ZM)
    128     {
    129       float f = 1.0f, g = 0.0f;
    130       __math_force_eval_div (f, g);
    131     }
    132   if (excepts & _FPU_MASK_OM)
    133     {
    134       __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
    135       temp.__status_word |= _FPU_MASK_OM;
    136       __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
    137       __asm__ __volatile__ ("fwait");
    138     }
    139   if (excepts & _FPU_MASK_UM)
    140     {
    141       __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
    142       temp.__status_word |= _FPU_MASK_UM;
    143       __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
    144       __asm__ __volatile__ ("fwait");
    145     }
    146   if (excepts & _FPU_MASK_PM)
    147     {
    148       float f = 1.0f, g = 3.0f;
    149       __math_force_eval_div (f, g);
    150     }
    151 }
    152 
    153 
    154 void
    155 set_fpu_trap_exceptions (int trap, int notrap)
    156 {
    157   int exc_set = 0, exc_clr = 0;
    158   unsigned short cw;
    159 
    160   if (trap & GFC_FPE_INVALID) exc_set |= _FPU_MASK_IM;
    161   if (trap & GFC_FPE_DENORMAL) exc_set |= _FPU_MASK_DM;
    162   if (trap & GFC_FPE_ZERO) exc_set |= _FPU_MASK_ZM;
    163   if (trap & GFC_FPE_OVERFLOW) exc_set |= _FPU_MASK_OM;
    164   if (trap & GFC_FPE_UNDERFLOW) exc_set |= _FPU_MASK_UM;
    165   if (trap & GFC_FPE_INEXACT) exc_set |= _FPU_MASK_PM;
    166 
    167   if (notrap & GFC_FPE_INVALID) exc_clr |= _FPU_MASK_IM;
    168   if (notrap & GFC_FPE_DENORMAL) exc_clr |= _FPU_MASK_DM;
    169   if (notrap & GFC_FPE_ZERO) exc_clr |= _FPU_MASK_ZM;
    170   if (notrap & GFC_FPE_OVERFLOW) exc_clr |= _FPU_MASK_OM;
    171   if (notrap & GFC_FPE_UNDERFLOW) exc_clr |= _FPU_MASK_UM;
    172   if (notrap & GFC_FPE_INEXACT) exc_clr |= _FPU_MASK_PM;
    173 
    174   __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
    175 
    176   cw |= exc_clr;
    177   cw &= ~exc_set;
    178 
    179   __asm__ __volatile__ ("fnclex\n\tfldcw\t%0" : : "m" (cw));
    180 
    181   if (has_sse())
    182     {
    183       unsigned int cw_sse;
    184 
    185       __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
    186 
    187       /* The SSE exception masks are shifted by 7 bits.  */
    188       cw_sse |= (exc_clr << 7);
    189       cw_sse &= ~(exc_set << 7);
    190 
    191       /* Clear stalled exception flags.  */
    192       cw_sse &= ~_FPU_EX_ALL;
    193 
    194       __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
    195     }
    196 }
    197 
    198 void
    199 set_fpu (void)
    200 {
    201   set_fpu_trap_exceptions (options.fpe, 0);
    202 }
    203 
    204 int
    205 get_fpu_trap_exceptions (void)
    206 {
    207   unsigned short cw;
    208   int mask;
    209   int res = 0;
    210 
    211   __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
    212   mask = cw;
    213 
    214   if (has_sse())
    215     {
    216       unsigned int cw_sse;
    217 
    218       __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
    219 
    220       /* The SSE exception masks are shifted by 7 bits.  */
    221       mask |= (cw_sse >> 7);
    222     }
    223 
    224   mask = ~mask & _FPU_MASK_ALL;
    225 
    226   if (mask & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
    227   if (mask & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
    228   if (mask & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
    229   if (mask & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
    230   if (mask & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
    231   if (mask & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
    232 
    233   return res;
    234 }
    235 
    236 int
    237 support_fpu_trap (int flag __attribute__((unused)))
    238 {
    239   return 1;
    240 }
    241 
    242 int
    243 get_fpu_except_flags (void)
    244 {
    245   unsigned short cw;
    246   int excepts;
    247   int res = 0;
    248 
    249   __asm__ __volatile__ ("fnstsw\t%0" : "=am" (cw));
    250   excepts = cw;
    251 
    252   if (has_sse())
    253     {
    254       unsigned int cw_sse;
    255 
    256       __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
    257       excepts |= cw_sse;
    258     }
    259 
    260   excepts &= _FPU_EX_ALL;
    261 
    262   if (excepts & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
    263   if (excepts & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
    264   if (excepts & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
    265   if (excepts & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
    266   if (excepts & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
    267   if (excepts & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
    268 
    269   return res;
    270 }
    271 
    272 void
    273 set_fpu_except_flags (int set, int clear)
    274 {
    275   struct fenv temp;
    276   int exc_set = 0, exc_clr = 0;
    277 
    278   /* Translate from GFC_PE_* values to _FPU_MASK_* values.  */
    279   if (set & GFC_FPE_INVALID)
    280     exc_set |= _FPU_MASK_IM;
    281   if (clear & GFC_FPE_INVALID)
    282     exc_clr |= _FPU_MASK_IM;
    283 
    284   if (set & GFC_FPE_DENORMAL)
    285     exc_set |= _FPU_MASK_DM;
    286   if (clear & GFC_FPE_DENORMAL)
    287     exc_clr |= _FPU_MASK_DM;
    288 
    289   if (set & GFC_FPE_ZERO)
    290     exc_set |= _FPU_MASK_ZM;
    291   if (clear & GFC_FPE_ZERO)
    292     exc_clr |= _FPU_MASK_ZM;
    293 
    294   if (set & GFC_FPE_OVERFLOW)
    295     exc_set |= _FPU_MASK_OM;
    296   if (clear & GFC_FPE_OVERFLOW)
    297     exc_clr |= _FPU_MASK_OM;
    298 
    299   if (set & GFC_FPE_UNDERFLOW)
    300     exc_set |= _FPU_MASK_UM;
    301   if (clear & GFC_FPE_UNDERFLOW)
    302     exc_clr |= _FPU_MASK_UM;
    303 
    304   if (set & GFC_FPE_INEXACT)
    305     exc_set |= _FPU_MASK_PM;
    306   if (clear & GFC_FPE_INEXACT)
    307     exc_clr |= _FPU_MASK_PM;
    308 
    309 
    310   /* Change the flags. This is tricky on 387 (unlike SSE), because we have
    311      FNSTSW but no FLDSW instruction.  */
    312   __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
    313   temp.__status_word &= ~exc_clr;
    314   __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
    315 
    316   /* Change the flags on SSE.  */
    317 
    318   if (has_sse())
    319   {
    320     unsigned int cw_sse;
    321 
    322     __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
    323     cw_sse &= ~exc_clr;
    324     __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
    325   }
    326 
    327   local_feraiseexcept (exc_set);
    328 }
    329 
    330 int
    331 support_fpu_flag (int flag __attribute__((unused)))
    332 {
    333   return 1;
    334 }
    335 
    336 void
    337 set_fpu_rounding_mode (int round)
    338 {
    339   int round_mode;
    340   unsigned short cw;
    341 
    342   switch (round)
    343     {
    344     case GFC_FPE_TONEAREST:
    345       round_mode = _FPU_RC_NEAREST;
    346       break;
    347     case GFC_FPE_UPWARD:
    348       round_mode = _FPU_RC_UP;
    349       break;
    350     case GFC_FPE_DOWNWARD:
    351       round_mode = _FPU_RC_DOWN;
    352       break;
    353     case GFC_FPE_TOWARDZERO:
    354       round_mode = _FPU_RC_ZERO;
    355       break;
    356     default:
    357       return; /* Should be unreachable.  */
    358     }
    359 
    360   __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
    361 
    362   /* The x87 round control bits are shifted by 10 bits.  */
    363   cw &= ~(_FPU_RC_MASK << 10);
    364   cw |= round_mode << 10;
    365 
    366   __asm__ __volatile__ ("fldcw\t%0" : : "m" (cw));
    367 
    368   if (has_sse())
    369     {
    370       unsigned int cw_sse;
    371 
    372       __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
    373 
    374       /* The SSE round control bits are shifted by 13 bits.  */
    375       cw_sse &= ~(_FPU_RC_MASK << 13);
    376       cw_sse |= round_mode << 13;
    377 
    378       __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
    379     }
    380 }
    381 
    382 int
    383 get_fpu_rounding_mode (void)
    384 {
    385   int round_mode;
    386 
    387 #ifdef __SSE_MATH__
    388   unsigned int cw;
    389 
    390   __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw));
    391 
    392   /* The SSE round control bits are shifted by 13 bits.  */
    393   round_mode = cw >> 13;
    394 #else
    395   unsigned short cw;
    396 
    397   __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
    398 
    399   /* The x87 round control bits are shifted by 10 bits.  */
    400   round_mode = cw >> 10;
    401 #endif
    402 
    403   round_mode &= _FPU_RC_MASK;
    404 
    405   switch (round_mode)
    406     {
    407     case _FPU_RC_NEAREST:
    408       return GFC_FPE_TONEAREST;
    409     case _FPU_RC_UP:
    410       return GFC_FPE_UPWARD;
    411     case _FPU_RC_DOWN:
    412       return GFC_FPE_DOWNWARD;
    413     case _FPU_RC_ZERO:
    414       return GFC_FPE_TOWARDZERO;
    415     default:
    416       return 0; /* Should be unreachable.  */
    417     }
    418 }
    419 
    420 int
    421 support_fpu_rounding_mode (int mode)
    422 {
    423   if (mode == GFC_FPE_AWAY)
    424     return 0;
    425   else
    426     return 1;
    427 }
    428 
    429 void
    430 get_fpu_state (void *state)
    431 {
    432   struct fenv *envp = state;
    433 
    434   __asm__ __volatile__ ("fnstenv\t%0" : "=m" (*envp));
    435 
    436   /* fnstenv has the side effect of masking all exceptions, so we need
    437      to restore the control word after that.  */
    438   __asm__ __volatile__ ("fldcw\t%0" : : "m" (envp->__control_word));
    439 
    440   if (has_sse())
    441     __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (envp->__mxcsr));
    442 }
    443 
    444 void
    445 set_fpu_state (void *state)
    446 {
    447   struct fenv *envp = state;
    448 
    449   /* glibc sources (sysdeps/x86_64/fpu/fesetenv.c) do something more
    450      complex than this, but I think it suffices in our case.  */
    451   __asm__ __volatile__ ("fldenv\t%0" : : "m" (*envp));
    452 
    453   if (has_sse())
    454     __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (envp->__mxcsr));
    455 }
    456 
    457 
    458 int
    459 support_fpu_underflow_control (int kind)
    460 {
    461   if (!has_sse())
    462     return 0;
    463 
    464   return (kind == 4 || kind == 8) ? 1 : 0;
    465 }
    466 
    467 
    468 int
    469 get_fpu_underflow_mode (void)
    470 {
    471   unsigned int cw_sse;
    472 
    473   if (!has_sse())
    474     return 1;
    475 
    476   __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
    477 
    478   /* Return 0 for abrupt underflow (flush to zero), 1 for gradual underflow.  */
    479   return (cw_sse & MXCSR_FTZ) ? 0 : 1;
    480 }
    481 
    482 
    483 void
    484 set_fpu_underflow_mode (int gradual)
    485 {
    486   unsigned int cw_sse;
    487 
    488   if (!has_sse())
    489     return;
    490 
    491   __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
    492 
    493   if (gradual)
    494     cw_sse &= ~MXCSR_FTZ;
    495   else
    496     cw_sse |= MXCSR_FTZ;
    497 
    498   __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
    499 }
    500 
    501