Home | History | Annotate | Line # | Download | only in i386
      1  1.12  mrg /* Copyright (C) 2007-2022 Free Software Foundation, Inc.
      2   1.1  mrg 
      3   1.1  mrg    This file is part of GCC.
      4   1.1  mrg 
      5   1.1  mrg    GCC is free software; you can redistribute it and/or modify
      6   1.1  mrg    it under the terms of the GNU General Public License as published by
      7   1.1  mrg    the Free Software Foundation; either version 3, or (at your option)
      8   1.1  mrg    any later version.
      9   1.1  mrg 
     10   1.1  mrg    GCC is distributed in the hope that it will be useful,
     11   1.1  mrg    but WITHOUT ANY WARRANTY; without even the implied warranty of
     12   1.1  mrg    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13   1.1  mrg    GNU General Public License for more details.
     14   1.1  mrg 
     15   1.1  mrg    Under Section 7 of GPL version 3, you are granted additional
     16   1.1  mrg    permissions described in the GCC Runtime Library Exception, version
     17   1.1  mrg    3.1, as published by the Free Software Foundation.
     18   1.1  mrg 
     19   1.1  mrg    You should have received a copy of the GNU General Public License and
     20   1.1  mrg    a copy of the GCC Runtime Library Exception along with this program;
     21   1.1  mrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     22   1.1  mrg    <http://www.gnu.org/licenses/>.  */
     23   1.1  mrg 
     24   1.1  mrg #ifndef _X86INTRIN_H_INCLUDED
     25   1.1  mrg # error "Never use <fma4intrin.h> directly; include <x86intrin.h> instead."
     26   1.1  mrg #endif
     27   1.1  mrg 
     28   1.1  mrg #ifndef _FMA4INTRIN_H_INCLUDED
     29   1.1  mrg #define _FMA4INTRIN_H_INCLUDED
     30   1.1  mrg 
     31   1.1  mrg /* We need definitions from the SSE4A, SSE3, SSE2 and SSE header files.  */
     32   1.1  mrg #include <ammintrin.h>
     33   1.1  mrg 
     34   1.5  mrg #ifndef __FMA4__
     35   1.5  mrg #pragma GCC push_options
     36   1.5  mrg #pragma GCC target("fma4")
     37   1.5  mrg #define __DISABLE_FMA4__
     38   1.5  mrg #endif /* __FMA4__ */
     39   1.5  mrg 
     40   1.1  mrg /* 128b Floating point multiply/add type instructions.  */
     41   1.1  mrg extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     42   1.1  mrg _mm_macc_ps (__m128 __A, __m128 __B, __m128 __C)
     43   1.1  mrg {
     44   1.1  mrg   return (__m128) __builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
     45   1.1  mrg }
     46   1.1  mrg 
     47   1.1  mrg extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     48   1.1  mrg _mm_macc_pd (__m128d __A, __m128d __B, __m128d __C)
     49   1.1  mrg {
     50   1.1  mrg   return (__m128d) __builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
     51   1.1  mrg }
     52   1.1  mrg 
     53   1.1  mrg extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     54   1.1  mrg _mm_macc_ss (__m128 __A, __m128 __B, __m128 __C)
     55   1.1  mrg {
     56   1.1  mrg   return (__m128) __builtin_ia32_vfmaddss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
     57   1.1  mrg }
     58   1.1  mrg 
     59   1.1  mrg extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     60   1.1  mrg _mm_macc_sd (__m128d __A, __m128d __B, __m128d __C)
     61   1.1  mrg {
     62   1.1  mrg   return (__m128d) __builtin_ia32_vfmaddsd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
     63   1.1  mrg }
     64   1.1  mrg 
     65   1.1  mrg extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     66   1.1  mrg _mm_msub_ps (__m128 __A, __m128 __B, __m128 __C)
     67   1.1  mrg 
     68   1.1  mrg {
     69   1.3  mrg   return (__m128) __builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
     70   1.1  mrg }
     71   1.1  mrg 
     72   1.1  mrg extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     73   1.1  mrg _mm_msub_pd (__m128d __A, __m128d __B, __m128d __C)
     74   1.1  mrg {
     75   1.3  mrg   return (__m128d) __builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
     76   1.1  mrg }
     77   1.1  mrg 
     78   1.1  mrg extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     79   1.1  mrg _mm_msub_ss (__m128 __A, __m128 __B, __m128 __C)
     80   1.1  mrg {
     81   1.3  mrg   return (__m128) __builtin_ia32_vfmaddss ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
     82   1.1  mrg }
     83   1.1  mrg 
     84   1.1  mrg extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     85   1.1  mrg _mm_msub_sd (__m128d __A, __m128d __B, __m128d __C)
     86   1.1  mrg {
     87   1.3  mrg   return (__m128d) __builtin_ia32_vfmaddsd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
     88   1.1  mrg }
     89   1.1  mrg 
     90   1.1  mrg extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     91   1.1  mrg _mm_nmacc_ps (__m128 __A, __m128 __B, __m128 __C)
     92   1.1  mrg {
     93   1.3  mrg   return (__m128) __builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
     94   1.1  mrg }
     95   1.1  mrg 
     96   1.1  mrg extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     97   1.1  mrg _mm_nmacc_pd (__m128d __A, __m128d __B, __m128d __C)
     98   1.1  mrg {
     99   1.3  mrg   return (__m128d) __builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, (__v2df)__C);
    100   1.1  mrg }
    101   1.1  mrg 
    102   1.1  mrg extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    103   1.1  mrg _mm_nmacc_ss (__m128 __A, __m128 __B, __m128 __C)
    104   1.1  mrg {
    105   1.3  mrg   return (__m128) __builtin_ia32_vfmaddss (-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
    106   1.1  mrg }
    107   1.1  mrg 
    108   1.1  mrg extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    109   1.1  mrg _mm_nmacc_sd (__m128d __A, __m128d __B, __m128d __C)
    110   1.1  mrg {
    111   1.3  mrg   return (__m128d) __builtin_ia32_vfmaddsd (-(__v2df)__A, (__v2df)__B, (__v2df)__C);
    112   1.1  mrg }
    113   1.1  mrg 
    114   1.1  mrg extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    115   1.1  mrg _mm_nmsub_ps (__m128 __A, __m128 __B, __m128 __C)
    116   1.1  mrg {
    117   1.3  mrg   return (__m128) __builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
    118   1.1  mrg }
    119   1.1  mrg 
    120   1.1  mrg extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    121   1.1  mrg _mm_nmsub_pd (__m128d __A, __m128d __B, __m128d __C)
    122   1.1  mrg {
    123   1.3  mrg   return (__m128d) __builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
    124   1.1  mrg }
    125   1.1  mrg 
    126   1.1  mrg extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    127   1.1  mrg _mm_nmsub_ss (__m128 __A, __m128 __B, __m128 __C)
    128   1.1  mrg {
    129   1.3  mrg   return (__m128) __builtin_ia32_vfmaddss (-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
    130   1.1  mrg }
    131   1.1  mrg 
    132   1.1  mrg extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    133   1.1  mrg _mm_nmsub_sd (__m128d __A, __m128d __B, __m128d __C)
    134   1.1  mrg {
    135   1.3  mrg   return (__m128d) __builtin_ia32_vfmaddsd (-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
    136   1.1  mrg }
    137   1.1  mrg 
    138   1.1  mrg extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    139   1.1  mrg _mm_maddsub_ps (__m128 __A, __m128 __B, __m128 __C)
    140   1.1  mrg {
    141   1.1  mrg   return (__m128) __builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
    142   1.1  mrg }
    143   1.1  mrg 
    144   1.1  mrg extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    145   1.1  mrg _mm_maddsub_pd (__m128d __A, __m128d __B, __m128d __C)
    146   1.1  mrg {
    147   1.1  mrg   return (__m128d) __builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
    148   1.1  mrg }
    149   1.1  mrg 
    150   1.1  mrg extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    151   1.1  mrg _mm_msubadd_ps (__m128 __A, __m128 __B, __m128 __C)
    152   1.1  mrg {
    153   1.3  mrg   return (__m128) __builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
    154   1.1  mrg }
    155   1.1  mrg 
    156   1.1  mrg extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    157   1.1  mrg _mm_msubadd_pd (__m128d __A, __m128d __B, __m128d __C)
    158   1.1  mrg {
    159   1.3  mrg   return (__m128d) __builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
    160   1.1  mrg }
    161   1.1  mrg 
    162   1.1  mrg /* 256b Floating point multiply/add type instructions.  */
    163   1.1  mrg extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    164   1.1  mrg _mm256_macc_ps (__m256 __A, __m256 __B, __m256 __C)
    165   1.1  mrg {
    166   1.1  mrg   return (__m256) __builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
    167   1.1  mrg }
    168   1.1  mrg 
    169   1.1  mrg extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    170   1.1  mrg _mm256_macc_pd (__m256d __A, __m256d __B, __m256d __C)
    171   1.1  mrg {
    172   1.1  mrg   return (__m256d) __builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C);
    173   1.1  mrg }
    174   1.1  mrg 
    175   1.1  mrg extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    176   1.1  mrg _mm256_msub_ps (__m256 __A, __m256 __B, __m256 __C)
    177   1.1  mrg 
    178   1.1  mrg {
    179   1.3  mrg   return (__m256) __builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
    180   1.1  mrg }
    181   1.1  mrg 
    182   1.1  mrg extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    183   1.1  mrg _mm256_msub_pd (__m256d __A, __m256d __B, __m256d __C)
    184   1.1  mrg {
    185   1.3  mrg   return (__m256d) __builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C);
    186   1.1  mrg }
    187   1.1  mrg 
    188   1.1  mrg extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    189   1.1  mrg _mm256_nmacc_ps (__m256 __A, __m256 __B, __m256 __C)
    190   1.1  mrg {
    191   1.3  mrg   return (__m256) __builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
    192   1.1  mrg }
    193   1.1  mrg 
    194   1.1  mrg extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    195   1.1  mrg _mm256_nmacc_pd (__m256d __A, __m256d __B, __m256d __C)
    196   1.1  mrg {
    197   1.3  mrg   return (__m256d) __builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, (__v4df)__C);
    198   1.1  mrg }
    199   1.1  mrg 
    200   1.1  mrg extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    201   1.1  mrg _mm256_nmsub_ps (__m256 __A, __m256 __B, __m256 __C)
    202   1.1  mrg {
    203   1.3  mrg   return (__m256) __builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
    204   1.1  mrg }
    205   1.1  mrg 
    206   1.1  mrg extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    207   1.1  mrg _mm256_nmsub_pd (__m256d __A, __m256d __B, __m256d __C)
    208   1.1  mrg {
    209   1.3  mrg   return (__m256d) __builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
    210   1.1  mrg }
    211   1.1  mrg 
    212   1.1  mrg extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    213   1.1  mrg _mm256_maddsub_ps (__m256 __A, __m256 __B, __m256 __C)
    214   1.1  mrg {
    215   1.1  mrg   return (__m256) __builtin_ia32_vfmaddsubps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
    216   1.1  mrg }
    217   1.1  mrg 
    218   1.1  mrg extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    219   1.1  mrg _mm256_maddsub_pd (__m256d __A, __m256d __B, __m256d __C)
    220   1.1  mrg {
    221   1.1  mrg   return (__m256d) __builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C);
    222   1.1  mrg }
    223   1.1  mrg 
    224   1.1  mrg extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    225   1.1  mrg _mm256_msubadd_ps (__m256 __A, __m256 __B, __m256 __C)
    226   1.1  mrg {
    227   1.3  mrg   return (__m256) __builtin_ia32_vfmaddsubps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
    228   1.1  mrg }
    229   1.1  mrg 
    230   1.1  mrg extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    231   1.1  mrg _mm256_msubadd_pd (__m256d __A, __m256d __B, __m256d __C)
    232   1.1  mrg {
    233   1.3  mrg   return (__m256d) __builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C);
    234   1.1  mrg }
    235   1.1  mrg 
    236   1.5  mrg #ifdef __DISABLE_FMA4__
    237   1.5  mrg #undef __DISABLE_FMA4__
    238   1.5  mrg #pragma GCC pop_options
    239   1.5  mrg #endif /* __DISABLE_FMA4__ */
    240   1.1  mrg 
    241   1.1  mrg #endif
    242