Home | History | Annotate | Line # | Download | only in config
      1      1.1  mrg /* FPU-related code for x86 and x86_64 processors.
      2  1.1.1.3  mrg    Copyright (C) 2005-2022 Free Software Foundation, Inc.
      3      1.1  mrg    Contributed by Francois-Xavier Coudert <coudert (at) clipper.ens.fr>
      4      1.1  mrg 
      5      1.1  mrg This file is part of the GNU Fortran 95 runtime library (libgfortran).
      6      1.1  mrg 
      7      1.1  mrg Libgfortran is free software; you can redistribute it and/or
      8      1.1  mrg modify it under the terms of the GNU General Public
      9      1.1  mrg License as published by the Free Software Foundation; either
     10      1.1  mrg version 3 of the License, or (at your option) any later version.
     11      1.1  mrg 
     12      1.1  mrg Libgfortran is distributed in the hope that it will be useful,
     13      1.1  mrg but WITHOUT ANY WARRANTY; without even the implied warranty of
     14      1.1  mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     15      1.1  mrg GNU General Public License for more details.
     16      1.1  mrg 
     17      1.1  mrg Under Section 7 of GPL version 3, you are granted additional
     18      1.1  mrg permissions described in the GCC Runtime Library Exception, version
     19      1.1  mrg 3.1, as published by the Free Software Foundation.
     20      1.1  mrg 
     21      1.1  mrg You should have received a copy of the GNU General Public License and
     22      1.1  mrg a copy of the GCC Runtime Library Exception along with this program;
     23      1.1  mrg see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     24      1.1  mrg <http://www.gnu.org/licenses/>.  */
     25      1.1  mrg 
     26      1.1  mrg #ifndef __SSE_MATH__
     27      1.1  mrg #include "cpuid.h"
     28      1.1  mrg #endif
     29      1.1  mrg 
     30      1.1  mrg static int
     31      1.1  mrg has_sse (void)
     32      1.1  mrg {
     33      1.1  mrg #ifndef __SSE_MATH__
     34      1.1  mrg   unsigned int eax, ebx, ecx, edx;
     35      1.1  mrg 
     36      1.1  mrg   if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
     37      1.1  mrg     return 0;
     38      1.1  mrg 
     39      1.1  mrg   return edx & bit_SSE;
     40      1.1  mrg #else
     41      1.1  mrg   return 1;
     42      1.1  mrg #endif
     43      1.1  mrg }
     44      1.1  mrg 
     45      1.1  mrg /* i387 exceptions -- see linux <fpu_control.h> header file for details.  */
     46      1.1  mrg #define _FPU_MASK_IM  0x01
     47      1.1  mrg #define _FPU_MASK_DM  0x02
     48      1.1  mrg #define _FPU_MASK_ZM  0x04
     49      1.1  mrg #define _FPU_MASK_OM  0x08
     50      1.1  mrg #define _FPU_MASK_UM  0x10
     51      1.1  mrg #define _FPU_MASK_PM  0x20
     52      1.1  mrg #define _FPU_MASK_ALL 0x3f
     53      1.1  mrg 
     54      1.1  mrg #define _FPU_EX_ALL   0x3f
     55      1.1  mrg 
     56      1.1  mrg /* i387 rounding modes.  */
     57      1.1  mrg 
     58      1.1  mrg #define _FPU_RC_NEAREST 0x0
     59      1.1  mrg #define _FPU_RC_DOWN    0x1
     60      1.1  mrg #define _FPU_RC_UP      0x2
     61      1.1  mrg #define _FPU_RC_ZERO    0x3
     62      1.1  mrg 
     63      1.1  mrg #define _FPU_RC_MASK    0x3
     64      1.1  mrg 
     65      1.1  mrg /* Enable flush to zero mode.  */
     66      1.1  mrg 
     67      1.1  mrg #define MXCSR_FTZ (1 << 15)
     68      1.1  mrg 
     69      1.1  mrg 
     70      1.1  mrg /* This structure corresponds to the layout of the block
     71      1.1  mrg    written by FSTENV.  */
     72  1.1.1.3  mrg struct fenv
     73      1.1  mrg {
     74      1.1  mrg   unsigned short int __control_word;
     75      1.1  mrg   unsigned short int __unused1;
     76      1.1  mrg   unsigned short int __status_word;
     77      1.1  mrg   unsigned short int __unused2;
     78      1.1  mrg   unsigned short int __tags;
     79      1.1  mrg   unsigned short int __unused3;
     80      1.1  mrg   unsigned int __eip;
     81      1.1  mrg   unsigned short int __cs_selector;
     82  1.1.1.3  mrg   unsigned int __opcode:11;
     83  1.1.1.3  mrg   unsigned int __unused4:5;
     84      1.1  mrg   unsigned int __data_offset;
     85      1.1  mrg   unsigned short int __data_selector;
     86      1.1  mrg   unsigned short int __unused5;
     87      1.1  mrg   unsigned int __mxcsr;
     88  1.1.1.3  mrg } __attribute__ ((gcc_struct));
     89      1.1  mrg 
     90      1.1  mrg /* Check we can actually store the FPU state in the allocated size.  */
     91  1.1.1.3  mrg _Static_assert (sizeof(struct fenv) <= (size_t) GFC_FPE_STATE_BUFFER_SIZE,
     92      1.1  mrg 		"GFC_FPE_STATE_BUFFER_SIZE is too small");
     93      1.1  mrg 
     94  1.1.1.3  mrg #ifdef __SSE_MATH__
     95  1.1.1.3  mrg # define __math_force_eval_div(x, y)					\
     96  1.1.1.3  mrg   do {									\
     97  1.1.1.3  mrg     __asm__ ("" : "+x" (x)); __asm__ __volatile__ ("" : : "x" (x / y));	\
     98  1.1.1.3  mrg   } while (0)
     99  1.1.1.3  mrg #else
    100  1.1.1.3  mrg # define __math_force_eval_div(x, y)					\
    101  1.1.1.3  mrg   do {									\
    102  1.1.1.3  mrg     __asm__ ("" : "+t" (x)); __asm__ __volatile__ ("" : : "f" (x / y));	\
    103  1.1.1.3  mrg   } while (0)
    104  1.1.1.3  mrg #endif
    105      1.1  mrg 
    106      1.1  mrg /* Raise the supported floating-point exceptions from EXCEPTS.  Other
    107      1.1  mrg    bits in EXCEPTS are ignored.  Code originally borrowed from
    108      1.1  mrg    libatomic/config/x86/fenv.c.  */
    109      1.1  mrg 
    110      1.1  mrg static void
    111      1.1  mrg local_feraiseexcept (int excepts)
    112      1.1  mrg {
    113  1.1.1.3  mrg   struct fenv temp;
    114  1.1.1.3  mrg 
    115      1.1  mrg   if (excepts & _FPU_MASK_IM)
    116      1.1  mrg     {
    117      1.1  mrg       float f = 0.0f;
    118  1.1.1.3  mrg       __math_force_eval_div (f, f);
    119      1.1  mrg     }
    120      1.1  mrg   if (excepts & _FPU_MASK_DM)
    121      1.1  mrg     {
    122      1.1  mrg       __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
    123      1.1  mrg       temp.__status_word |= _FPU_MASK_DM;
    124      1.1  mrg       __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
    125      1.1  mrg       __asm__ __volatile__ ("fwait");
    126      1.1  mrg     }
    127      1.1  mrg   if (excepts & _FPU_MASK_ZM)
    128      1.1  mrg     {
    129      1.1  mrg       float f = 1.0f, g = 0.0f;
    130  1.1.1.3  mrg       __math_force_eval_div (f, g);
    131      1.1  mrg     }
    132      1.1  mrg   if (excepts & _FPU_MASK_OM)
    133      1.1  mrg     {
    134      1.1  mrg       __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
    135      1.1  mrg       temp.__status_word |= _FPU_MASK_OM;
    136      1.1  mrg       __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
    137      1.1  mrg       __asm__ __volatile__ ("fwait");
    138      1.1  mrg     }
    139      1.1  mrg   if (excepts & _FPU_MASK_UM)
    140      1.1  mrg     {
    141      1.1  mrg       __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
    142      1.1  mrg       temp.__status_word |= _FPU_MASK_UM;
    143      1.1  mrg       __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
    144      1.1  mrg       __asm__ __volatile__ ("fwait");
    145      1.1  mrg     }
    146      1.1  mrg   if (excepts & _FPU_MASK_PM)
    147      1.1  mrg     {
    148      1.1  mrg       float f = 1.0f, g = 3.0f;
    149  1.1.1.3  mrg       __math_force_eval_div (f, g);
    150      1.1  mrg     }
    151      1.1  mrg }
    152      1.1  mrg 
    153      1.1  mrg 
    154      1.1  mrg void
    155      1.1  mrg set_fpu_trap_exceptions (int trap, int notrap)
    156      1.1  mrg {
    157      1.1  mrg   int exc_set = 0, exc_clr = 0;
    158      1.1  mrg   unsigned short cw;
    159      1.1  mrg 
    160      1.1  mrg   if (trap & GFC_FPE_INVALID) exc_set |= _FPU_MASK_IM;
    161      1.1  mrg   if (trap & GFC_FPE_DENORMAL) exc_set |= _FPU_MASK_DM;
    162      1.1  mrg   if (trap & GFC_FPE_ZERO) exc_set |= _FPU_MASK_ZM;
    163      1.1  mrg   if (trap & GFC_FPE_OVERFLOW) exc_set |= _FPU_MASK_OM;
    164      1.1  mrg   if (trap & GFC_FPE_UNDERFLOW) exc_set |= _FPU_MASK_UM;
    165      1.1  mrg   if (trap & GFC_FPE_INEXACT) exc_set |= _FPU_MASK_PM;
    166      1.1  mrg 
    167      1.1  mrg   if (notrap & GFC_FPE_INVALID) exc_clr |= _FPU_MASK_IM;
    168      1.1  mrg   if (notrap & GFC_FPE_DENORMAL) exc_clr |= _FPU_MASK_DM;
    169      1.1  mrg   if (notrap & GFC_FPE_ZERO) exc_clr |= _FPU_MASK_ZM;
    170      1.1  mrg   if (notrap & GFC_FPE_OVERFLOW) exc_clr |= _FPU_MASK_OM;
    171      1.1  mrg   if (notrap & GFC_FPE_UNDERFLOW) exc_clr |= _FPU_MASK_UM;
    172      1.1  mrg   if (notrap & GFC_FPE_INEXACT) exc_clr |= _FPU_MASK_PM;
    173      1.1  mrg 
    174      1.1  mrg   __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
    175      1.1  mrg 
    176      1.1  mrg   cw |= exc_clr;
    177      1.1  mrg   cw &= ~exc_set;
    178      1.1  mrg 
    179      1.1  mrg   __asm__ __volatile__ ("fnclex\n\tfldcw\t%0" : : "m" (cw));
    180      1.1  mrg 
    181      1.1  mrg   if (has_sse())
    182      1.1  mrg     {
    183      1.1  mrg       unsigned int cw_sse;
    184      1.1  mrg 
    185      1.1  mrg       __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
    186      1.1  mrg 
    187      1.1  mrg       /* The SSE exception masks are shifted by 7 bits.  */
    188      1.1  mrg       cw_sse |= (exc_clr << 7);
    189      1.1  mrg       cw_sse &= ~(exc_set << 7);
    190      1.1  mrg 
    191      1.1  mrg       /* Clear stalled exception flags.  */
    192      1.1  mrg       cw_sse &= ~_FPU_EX_ALL;
    193      1.1  mrg 
    194      1.1  mrg       __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
    195      1.1  mrg     }
    196      1.1  mrg }
    197      1.1  mrg 
    198      1.1  mrg void
    199      1.1  mrg set_fpu (void)
    200      1.1  mrg {
    201      1.1  mrg   set_fpu_trap_exceptions (options.fpe, 0);
    202      1.1  mrg }
    203      1.1  mrg 
    204      1.1  mrg int
    205      1.1  mrg get_fpu_trap_exceptions (void)
    206      1.1  mrg {
    207      1.1  mrg   unsigned short cw;
    208      1.1  mrg   int mask;
    209      1.1  mrg   int res = 0;
    210      1.1  mrg 
    211      1.1  mrg   __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
    212      1.1  mrg   mask = cw;
    213      1.1  mrg 
    214      1.1  mrg   if (has_sse())
    215      1.1  mrg     {
    216      1.1  mrg       unsigned int cw_sse;
    217      1.1  mrg 
    218      1.1  mrg       __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
    219      1.1  mrg 
    220      1.1  mrg       /* The SSE exception masks are shifted by 7 bits.  */
    221      1.1  mrg       mask |= (cw_sse >> 7);
    222      1.1  mrg     }
    223      1.1  mrg 
    224      1.1  mrg   mask = ~mask & _FPU_MASK_ALL;
    225      1.1  mrg 
    226      1.1  mrg   if (mask & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
    227      1.1  mrg   if (mask & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
    228      1.1  mrg   if (mask & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
    229      1.1  mrg   if (mask & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
    230      1.1  mrg   if (mask & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
    231      1.1  mrg   if (mask & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
    232      1.1  mrg 
    233      1.1  mrg   return res;
    234      1.1  mrg }
    235      1.1  mrg 
    236      1.1  mrg int
    237      1.1  mrg support_fpu_trap (int flag __attribute__((unused)))
    238      1.1  mrg {
    239      1.1  mrg   return 1;
    240      1.1  mrg }
    241      1.1  mrg 
    242      1.1  mrg int
    243      1.1  mrg get_fpu_except_flags (void)
    244      1.1  mrg {
    245      1.1  mrg   unsigned short cw;
    246      1.1  mrg   int excepts;
    247      1.1  mrg   int res = 0;
    248      1.1  mrg 
    249      1.1  mrg   __asm__ __volatile__ ("fnstsw\t%0" : "=am" (cw));
    250      1.1  mrg   excepts = cw;
    251      1.1  mrg 
    252      1.1  mrg   if (has_sse())
    253      1.1  mrg     {
    254      1.1  mrg       unsigned int cw_sse;
    255      1.1  mrg 
    256      1.1  mrg       __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
    257      1.1  mrg       excepts |= cw_sse;
    258      1.1  mrg     }
    259      1.1  mrg 
    260      1.1  mrg   excepts &= _FPU_EX_ALL;
    261      1.1  mrg 
    262      1.1  mrg   if (excepts & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
    263      1.1  mrg   if (excepts & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
    264      1.1  mrg   if (excepts & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
    265      1.1  mrg   if (excepts & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
    266      1.1  mrg   if (excepts & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
    267      1.1  mrg   if (excepts & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
    268      1.1  mrg 
    269      1.1  mrg   return res;
    270      1.1  mrg }
    271      1.1  mrg 
    272      1.1  mrg void
    273      1.1  mrg set_fpu_except_flags (int set, int clear)
    274      1.1  mrg {
    275  1.1.1.3  mrg   struct fenv temp;
    276      1.1  mrg   int exc_set = 0, exc_clr = 0;
    277      1.1  mrg 
    278      1.1  mrg   /* Translate from GFC_PE_* values to _FPU_MASK_* values.  */
    279      1.1  mrg   if (set & GFC_FPE_INVALID)
    280      1.1  mrg     exc_set |= _FPU_MASK_IM;
    281      1.1  mrg   if (clear & GFC_FPE_INVALID)
    282      1.1  mrg     exc_clr |= _FPU_MASK_IM;
    283      1.1  mrg 
    284      1.1  mrg   if (set & GFC_FPE_DENORMAL)
    285      1.1  mrg     exc_set |= _FPU_MASK_DM;
    286      1.1  mrg   if (clear & GFC_FPE_DENORMAL)
    287      1.1  mrg     exc_clr |= _FPU_MASK_DM;
    288      1.1  mrg 
    289      1.1  mrg   if (set & GFC_FPE_ZERO)
    290      1.1  mrg     exc_set |= _FPU_MASK_ZM;
    291      1.1  mrg   if (clear & GFC_FPE_ZERO)
    292      1.1  mrg     exc_clr |= _FPU_MASK_ZM;
    293      1.1  mrg 
    294      1.1  mrg   if (set & GFC_FPE_OVERFLOW)
    295      1.1  mrg     exc_set |= _FPU_MASK_OM;
    296      1.1  mrg   if (clear & GFC_FPE_OVERFLOW)
    297      1.1  mrg     exc_clr |= _FPU_MASK_OM;
    298      1.1  mrg 
    299      1.1  mrg   if (set & GFC_FPE_UNDERFLOW)
    300      1.1  mrg     exc_set |= _FPU_MASK_UM;
    301      1.1  mrg   if (clear & GFC_FPE_UNDERFLOW)
    302      1.1  mrg     exc_clr |= _FPU_MASK_UM;
    303      1.1  mrg 
    304      1.1  mrg   if (set & GFC_FPE_INEXACT)
    305      1.1  mrg     exc_set |= _FPU_MASK_PM;
    306      1.1  mrg   if (clear & GFC_FPE_INEXACT)
    307      1.1  mrg     exc_clr |= _FPU_MASK_PM;
    308      1.1  mrg 
    309      1.1  mrg 
    310      1.1  mrg   /* Change the flags. This is tricky on 387 (unlike SSE), because we have
    311      1.1  mrg      FNSTSW but no FLDSW instruction.  */
    312      1.1  mrg   __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
    313      1.1  mrg   temp.__status_word &= ~exc_clr;
    314      1.1  mrg   __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
    315      1.1  mrg 
    316      1.1  mrg   /* Change the flags on SSE.  */
    317      1.1  mrg 
    318      1.1  mrg   if (has_sse())
    319      1.1  mrg   {
    320      1.1  mrg     unsigned int cw_sse;
    321      1.1  mrg 
    322      1.1  mrg     __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
    323      1.1  mrg     cw_sse &= ~exc_clr;
    324      1.1  mrg     __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
    325      1.1  mrg   }
    326      1.1  mrg 
    327      1.1  mrg   local_feraiseexcept (exc_set);
    328      1.1  mrg }
    329      1.1  mrg 
    330      1.1  mrg int
    331      1.1  mrg support_fpu_flag (int flag __attribute__((unused)))
    332      1.1  mrg {
    333      1.1  mrg   return 1;
    334      1.1  mrg }
    335      1.1  mrg 
    336      1.1  mrg void
    337      1.1  mrg set_fpu_rounding_mode (int round)
    338      1.1  mrg {
    339      1.1  mrg   int round_mode;
    340      1.1  mrg   unsigned short cw;
    341      1.1  mrg 
    342      1.1  mrg   switch (round)
    343      1.1  mrg     {
    344      1.1  mrg     case GFC_FPE_TONEAREST:
    345      1.1  mrg       round_mode = _FPU_RC_NEAREST;
    346      1.1  mrg       break;
    347      1.1  mrg     case GFC_FPE_UPWARD:
    348      1.1  mrg       round_mode = _FPU_RC_UP;
    349      1.1  mrg       break;
    350      1.1  mrg     case GFC_FPE_DOWNWARD:
    351      1.1  mrg       round_mode = _FPU_RC_DOWN;
    352      1.1  mrg       break;
    353      1.1  mrg     case GFC_FPE_TOWARDZERO:
    354      1.1  mrg       round_mode = _FPU_RC_ZERO;
    355      1.1  mrg       break;
    356      1.1  mrg     default:
    357      1.1  mrg       return; /* Should be unreachable.  */
    358      1.1  mrg     }
    359      1.1  mrg 
    360      1.1  mrg   __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
    361      1.1  mrg 
    362      1.1  mrg   /* The x87 round control bits are shifted by 10 bits.  */
    363      1.1  mrg   cw &= ~(_FPU_RC_MASK << 10);
    364      1.1  mrg   cw |= round_mode << 10;
    365      1.1  mrg 
    366      1.1  mrg   __asm__ __volatile__ ("fldcw\t%0" : : "m" (cw));
    367      1.1  mrg 
    368      1.1  mrg   if (has_sse())
    369      1.1  mrg     {
    370      1.1  mrg       unsigned int cw_sse;
    371      1.1  mrg 
    372      1.1  mrg       __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
    373      1.1  mrg 
    374      1.1  mrg       /* The SSE round control bits are shifted by 13 bits.  */
    375      1.1  mrg       cw_sse &= ~(_FPU_RC_MASK << 13);
    376      1.1  mrg       cw_sse |= round_mode << 13;
    377      1.1  mrg 
    378      1.1  mrg       __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
    379      1.1  mrg     }
    380      1.1  mrg }
    381      1.1  mrg 
    382      1.1  mrg int
    383      1.1  mrg get_fpu_rounding_mode (void)
    384      1.1  mrg {
    385      1.1  mrg   int round_mode;
    386      1.1  mrg 
    387      1.1  mrg #ifdef __SSE_MATH__
    388      1.1  mrg   unsigned int cw;
    389      1.1  mrg 
    390      1.1  mrg   __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw));
    391      1.1  mrg 
    392      1.1  mrg   /* The SSE round control bits are shifted by 13 bits.  */
    393      1.1  mrg   round_mode = cw >> 13;
    394      1.1  mrg #else
    395      1.1  mrg   unsigned short cw;
    396      1.1  mrg 
    397      1.1  mrg   __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
    398      1.1  mrg 
    399      1.1  mrg   /* The x87 round control bits are shifted by 10 bits.  */
    400      1.1  mrg   round_mode = cw >> 10;
    401      1.1  mrg #endif
    402      1.1  mrg 
    403      1.1  mrg   round_mode &= _FPU_RC_MASK;
    404      1.1  mrg 
    405      1.1  mrg   switch (round_mode)
    406      1.1  mrg     {
    407      1.1  mrg     case _FPU_RC_NEAREST:
    408      1.1  mrg       return GFC_FPE_TONEAREST;
    409      1.1  mrg     case _FPU_RC_UP:
    410      1.1  mrg       return GFC_FPE_UPWARD;
    411      1.1  mrg     case _FPU_RC_DOWN:
    412      1.1  mrg       return GFC_FPE_DOWNWARD;
    413      1.1  mrg     case _FPU_RC_ZERO:
    414      1.1  mrg       return GFC_FPE_TOWARDZERO;
    415      1.1  mrg     default:
    416      1.1  mrg       return 0; /* Should be unreachable.  */
    417      1.1  mrg     }
    418      1.1  mrg }
    419      1.1  mrg 
    420      1.1  mrg int
    421      1.1  mrg support_fpu_rounding_mode (int mode __attribute__((unused)))
    422      1.1  mrg {
    423      1.1  mrg   return 1;
    424      1.1  mrg }
    425      1.1  mrg 
    426      1.1  mrg void
    427      1.1  mrg get_fpu_state (void *state)
    428      1.1  mrg {
    429  1.1.1.3  mrg   struct fenv *envp = state;
    430      1.1  mrg 
    431      1.1  mrg   __asm__ __volatile__ ("fnstenv\t%0" : "=m" (*envp));
    432      1.1  mrg 
    433      1.1  mrg   /* fnstenv has the side effect of masking all exceptions, so we need
    434      1.1  mrg      to restore the control word after that.  */
    435      1.1  mrg   __asm__ __volatile__ ("fldcw\t%0" : : "m" (envp->__control_word));
    436      1.1  mrg 
    437      1.1  mrg   if (has_sse())
    438      1.1  mrg     __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (envp->__mxcsr));
    439      1.1  mrg }
    440      1.1  mrg 
    441      1.1  mrg void
    442      1.1  mrg set_fpu_state (void *state)
    443      1.1  mrg {
    444  1.1.1.3  mrg   struct fenv *envp = state;
    445      1.1  mrg 
    446      1.1  mrg   /* glibc sources (sysdeps/x86_64/fpu/fesetenv.c) do something more
    447      1.1  mrg      complex than this, but I think it suffices in our case.  */
    448      1.1  mrg   __asm__ __volatile__ ("fldenv\t%0" : : "m" (*envp));
    449      1.1  mrg 
    450      1.1  mrg   if (has_sse())
    451      1.1  mrg     __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (envp->__mxcsr));
    452      1.1  mrg }
    453      1.1  mrg 
    454      1.1  mrg 
    455      1.1  mrg int
    456      1.1  mrg support_fpu_underflow_control (int kind)
    457      1.1  mrg {
    458      1.1  mrg   if (!has_sse())
    459      1.1  mrg     return 0;
    460      1.1  mrg 
    461      1.1  mrg   return (kind == 4 || kind == 8) ? 1 : 0;
    462      1.1  mrg }
    463      1.1  mrg 
    464      1.1  mrg 
    465      1.1  mrg int
    466      1.1  mrg get_fpu_underflow_mode (void)
    467      1.1  mrg {
    468      1.1  mrg   unsigned int cw_sse;
    469      1.1  mrg 
    470      1.1  mrg   if (!has_sse())
    471      1.1  mrg     return 1;
    472      1.1  mrg 
    473      1.1  mrg   __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
    474      1.1  mrg 
    475      1.1  mrg   /* Return 0 for abrupt underflow (flush to zero), 1 for gradual underflow.  */
    476      1.1  mrg   return (cw_sse & MXCSR_FTZ) ? 0 : 1;
    477      1.1  mrg }
    478      1.1  mrg 
    479      1.1  mrg 
    480      1.1  mrg void
    481      1.1  mrg set_fpu_underflow_mode (int gradual)
    482      1.1  mrg {
    483      1.1  mrg   unsigned int cw_sse;
    484      1.1  mrg 
    485      1.1  mrg   if (!has_sse())
    486      1.1  mrg     return;
    487      1.1  mrg 
    488      1.1  mrg   __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
    489      1.1  mrg 
    490      1.1  mrg   if (gradual)
    491      1.1  mrg     cw_sse &= ~MXCSR_FTZ;
    492      1.1  mrg   else
    493      1.1  mrg     cw_sse |= MXCSR_FTZ;
    494      1.1  mrg 
    495      1.1  mrg   __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
    496      1.1  mrg }
    497      1.1  mrg 
    498