Home | History | Annotate | Line # | Download | only in i386
      1  1.7  mrg /* Copyright (C) 2013-2022 Free Software Foundation, Inc.
      2  1.1  mrg 
      3  1.1  mrg    This file is part of GCC.
      4  1.1  mrg 
      5  1.1  mrg    GCC is free software; you can redistribute it and/or modify
      6  1.1  mrg    it under the terms of the GNU General Public License as published by
      7  1.1  mrg    the Free Software Foundation; either version 3, or (at your option)
      8  1.1  mrg    any later version.
      9  1.1  mrg 
     10  1.1  mrg    GCC is distributed in the hope that it will be useful,
     11  1.1  mrg    but WITHOUT ANY WARRANTY; without even the implied warranty of
     12  1.1  mrg    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13  1.1  mrg    GNU General Public License for more details.
     14  1.1  mrg 
     15  1.1  mrg    Under Section 7 of GPL version 3, you are granted additional
     16  1.1  mrg    permissions described in the GCC Runtime Library Exception, version
     17  1.1  mrg    3.1, as published by the Free Software Foundation.
     18  1.1  mrg 
     19  1.1  mrg    You should have received a copy of the GNU General Public License and
     20  1.1  mrg    a copy of the GCC Runtime Library Exception along with this program;
     21  1.1  mrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     22  1.1  mrg    <http://www.gnu.org/licenses/>.  */
     23  1.1  mrg 
     24  1.1  mrg #ifndef _IMMINTRIN_H_INCLUDED
     25  1.1  mrg #error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
     26  1.1  mrg #endif
     27  1.1  mrg 
     28  1.1  mrg #ifndef _AVX512ERINTRIN_H_INCLUDED
     29  1.1  mrg #define _AVX512ERINTRIN_H_INCLUDED
     30  1.1  mrg 
     31  1.1  mrg #ifndef __AVX512ER__
     32  1.1  mrg #pragma GCC push_options
     33  1.1  mrg #pragma GCC target("avx512er")
     34  1.1  mrg #define __DISABLE_AVX512ER__
     35  1.1  mrg #endif /* __AVX512ER__ */
     36  1.1  mrg 
     37  1.1  mrg /* Internal data types for implementing the intrinsics.  */
     38  1.1  mrg typedef double __v8df __attribute__ ((__vector_size__ (64)));
     39  1.1  mrg typedef float __v16sf __attribute__ ((__vector_size__ (64)));
     40  1.1  mrg 
     41  1.1  mrg /* The Intel API is flexible enough that we must allow aliasing with other
     42  1.1  mrg    vector types, and their scalar components.  */
     43  1.1  mrg typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
     44  1.1  mrg typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
     45  1.1  mrg 
     46  1.1  mrg typedef unsigned char  __mmask8;
     47  1.1  mrg typedef unsigned short __mmask16;
     48  1.1  mrg 
     49  1.1  mrg #ifdef __OPTIMIZE__
     50  1.1  mrg extern __inline __m512d
     51  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     52  1.1  mrg _mm512_exp2a23_round_pd (__m512d __A, int __R)
     53  1.1  mrg {
     54  1.1  mrg   return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
     55  1.7  mrg 					       (__v8df) _mm512_undefined_pd (),
     56  1.1  mrg 					       (__mmask8) -1, __R);
     57  1.1  mrg }
     58  1.1  mrg 
     59  1.1  mrg extern __inline __m512d
     60  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     61  1.1  mrg _mm512_mask_exp2a23_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
     62  1.1  mrg {
     63  1.1  mrg   return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
     64  1.1  mrg 					       (__v8df) __W,
     65  1.1  mrg 					       (__mmask8) __U, __R);
     66  1.1  mrg }
     67  1.1  mrg 
     68  1.1  mrg extern __inline __m512d
     69  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     70  1.1  mrg _mm512_maskz_exp2a23_round_pd (__mmask8 __U, __m512d __A, int __R)
     71  1.1  mrg {
     72  1.1  mrg   return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
     73  1.1  mrg 					       (__v8df) _mm512_setzero_pd (),
     74  1.1  mrg 					       (__mmask8) __U, __R);
     75  1.1  mrg }
     76  1.1  mrg 
     77  1.1  mrg extern __inline __m512
     78  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     79  1.1  mrg _mm512_exp2a23_round_ps (__m512 __A, int __R)
     80  1.1  mrg {
     81  1.1  mrg   return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
     82  1.7  mrg 					      (__v16sf) _mm512_undefined_ps (),
     83  1.1  mrg 					      (__mmask16) -1, __R);
     84  1.1  mrg }
     85  1.1  mrg 
     86  1.1  mrg extern __inline __m512
     87  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     88  1.1  mrg _mm512_mask_exp2a23_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
     89  1.1  mrg {
     90  1.1  mrg   return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
     91  1.1  mrg 					      (__v16sf) __W,
     92  1.1  mrg 					      (__mmask16) __U, __R);
     93  1.1  mrg }
     94  1.1  mrg 
     95  1.1  mrg extern __inline __m512
     96  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     97  1.1  mrg _mm512_maskz_exp2a23_round_ps (__mmask16 __U, __m512 __A, int __R)
     98  1.1  mrg {
     99  1.1  mrg   return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
    100  1.1  mrg 					      (__v16sf) _mm512_setzero_ps (),
    101  1.1  mrg 					      (__mmask16) __U, __R);
    102  1.1  mrg }
    103  1.1  mrg 
    104  1.1  mrg extern __inline __m512d
    105  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    106  1.1  mrg _mm512_rcp28_round_pd (__m512d __A, int __R)
    107  1.1  mrg {
    108  1.1  mrg   return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
    109  1.7  mrg 						(__v8df) _mm512_undefined_pd (),
    110  1.1  mrg 						(__mmask8) -1, __R);
    111  1.1  mrg }
    112  1.1  mrg 
    113  1.1  mrg extern __inline __m512d
    114  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    115  1.1  mrg _mm512_mask_rcp28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
    116  1.1  mrg {
    117  1.1  mrg   return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
    118  1.1  mrg 						(__v8df) __W,
    119  1.1  mrg 						(__mmask8) __U, __R);
    120  1.1  mrg }
    121  1.1  mrg 
    122  1.1  mrg extern __inline __m512d
    123  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    124  1.1  mrg _mm512_maskz_rcp28_round_pd (__mmask8 __U, __m512d __A, int __R)
    125  1.1  mrg {
    126  1.1  mrg   return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
    127  1.1  mrg 						(__v8df) _mm512_setzero_pd (),
    128  1.1  mrg 						(__mmask8) __U, __R);
    129  1.1  mrg }
    130  1.1  mrg 
    131  1.1  mrg extern __inline __m512
    132  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    133  1.1  mrg _mm512_rcp28_round_ps (__m512 __A, int __R)
    134  1.1  mrg {
    135  1.1  mrg   return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
    136  1.7  mrg 					       (__v16sf) _mm512_undefined_ps (),
    137  1.1  mrg 					       (__mmask16) -1, __R);
    138  1.1  mrg }
    139  1.1  mrg 
    140  1.1  mrg extern __inline __m512
    141  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    142  1.1  mrg _mm512_mask_rcp28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
    143  1.1  mrg {
    144  1.1  mrg   return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
    145  1.1  mrg 					       (__v16sf) __W,
    146  1.1  mrg 					       (__mmask16) __U, __R);
    147  1.1  mrg }
    148  1.1  mrg 
    149  1.1  mrg extern __inline __m512
    150  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    151  1.1  mrg _mm512_maskz_rcp28_round_ps (__mmask16 __U, __m512 __A, int __R)
    152  1.1  mrg {
    153  1.1  mrg   return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
    154  1.1  mrg 					       (__v16sf) _mm512_setzero_ps (),
    155  1.1  mrg 					       (__mmask16) __U, __R);
    156  1.1  mrg }
    157  1.1  mrg 
    158  1.1  mrg extern __inline __m128d
    159  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    160  1.1  mrg _mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R)
    161  1.1  mrg {
    162  1.1  mrg   return (__m128d) __builtin_ia32_rcp28sd_round ((__v2df) __B,
    163  1.1  mrg 						 (__v2df) __A,
    164  1.1  mrg 						 __R);
    165  1.1  mrg }
    166  1.1  mrg 
    167  1.7  mrg extern __inline __m128d
    168  1.7  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    169  1.7  mrg _mm_mask_rcp28_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
    170  1.7  mrg 			 __m128d __B, int __R)
    171  1.7  mrg {
    172  1.7  mrg   return (__m128d) __builtin_ia32_rcp28sd_mask_round ((__v2df) __B,
    173  1.7  mrg 						      (__v2df) __A,
    174  1.7  mrg 						      (__v2df) __W,
    175  1.7  mrg 						      __U,
    176  1.7  mrg 						      __R);
    177  1.7  mrg }
    178  1.7  mrg 
    179  1.7  mrg extern __inline __m128d
    180  1.7  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    181  1.7  mrg _mm_maskz_rcp28_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __R)
    182  1.7  mrg {
    183  1.7  mrg   return (__m128d) __builtin_ia32_rcp28sd_mask_round ((__v2df) __B,
    184  1.7  mrg 						      (__v2df) __A,
    185  1.7  mrg 						      (__v2df)
    186  1.7  mrg 						      _mm_setzero_pd (),
    187  1.7  mrg 						      __U,
    188  1.7  mrg 						      __R);
    189  1.7  mrg }
    190  1.7  mrg 
    191  1.1  mrg extern __inline __m128
    192  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    193  1.1  mrg _mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R)
    194  1.1  mrg {
    195  1.1  mrg   return (__m128) __builtin_ia32_rcp28ss_round ((__v4sf) __B,
    196  1.1  mrg 						(__v4sf) __A,
    197  1.1  mrg 						__R);
    198  1.1  mrg }
    199  1.1  mrg 
    200  1.7  mrg extern __inline __m128
    201  1.7  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    202  1.7  mrg _mm_mask_rcp28_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
    203  1.7  mrg 			 __m128 __B, int __R)
    204  1.7  mrg {
    205  1.7  mrg   return (__m128) __builtin_ia32_rcp28ss_mask_round ((__v4sf) __B,
    206  1.7  mrg 						     (__v4sf) __A,
    207  1.7  mrg 						     (__v4sf) __W,
    208  1.7  mrg 						     __U,
    209  1.7  mrg 						     __R);
    210  1.7  mrg }
    211  1.7  mrg 
    212  1.7  mrg extern __inline __m128
    213  1.7  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    214  1.7  mrg _mm_maskz_rcp28_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __R)
    215  1.7  mrg {
    216  1.7  mrg   return (__m128) __builtin_ia32_rcp28ss_mask_round ((__v4sf) __B,
    217  1.7  mrg 						     (__v4sf) __A,
    218  1.7  mrg 						     (__v4sf)
    219  1.7  mrg 						     _mm_setzero_ps (),
    220  1.7  mrg 						     __U,
    221  1.7  mrg 						     __R);
    222  1.7  mrg }
    223  1.7  mrg 
    224  1.1  mrg extern __inline __m512d
    225  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    226  1.1  mrg _mm512_rsqrt28_round_pd (__m512d __A, int __R)
    227  1.1  mrg {
    228  1.1  mrg   return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
    229  1.7  mrg 						  (__v8df) _mm512_undefined_pd (),
    230  1.1  mrg 						  (__mmask8) -1, __R);
    231  1.1  mrg }
    232  1.1  mrg 
    233  1.1  mrg extern __inline __m512d
    234  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    235  1.1  mrg _mm512_mask_rsqrt28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
    236  1.1  mrg {
    237  1.1  mrg   return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
    238  1.1  mrg 						  (__v8df) __W,
    239  1.1  mrg 						  (__mmask8) __U, __R);
    240  1.1  mrg }
    241  1.1  mrg 
    242  1.1  mrg extern __inline __m512d
    243  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    244  1.1  mrg _mm512_maskz_rsqrt28_round_pd (__mmask8 __U, __m512d __A, int __R)
    245  1.1  mrg {
    246  1.1  mrg   return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
    247  1.1  mrg 						  (__v8df) _mm512_setzero_pd (),
    248  1.1  mrg 						  (__mmask8) __U, __R);
    249  1.1  mrg }
    250  1.1  mrg 
    251  1.1  mrg extern __inline __m512
    252  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    253  1.1  mrg _mm512_rsqrt28_round_ps (__m512 __A, int __R)
    254  1.1  mrg {
    255  1.1  mrg   return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
    256  1.7  mrg 						 (__v16sf) _mm512_undefined_ps (),
    257  1.1  mrg 						 (__mmask16) -1, __R);
    258  1.1  mrg }
    259  1.1  mrg 
    260  1.1  mrg extern __inline __m512
    261  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    262  1.1  mrg _mm512_mask_rsqrt28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
    263  1.1  mrg {
    264  1.1  mrg   return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
    265  1.1  mrg 						 (__v16sf) __W,
    266  1.1  mrg 						 (__mmask16) __U, __R);
    267  1.1  mrg }
    268  1.1  mrg 
    269  1.1  mrg extern __inline __m512
    270  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    271  1.1  mrg _mm512_maskz_rsqrt28_round_ps (__mmask16 __U, __m512 __A, int __R)
    272  1.1  mrg {
    273  1.1  mrg   return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
    274  1.1  mrg 						 (__v16sf) _mm512_setzero_ps (),
    275  1.1  mrg 						 (__mmask16) __U, __R);
    276  1.1  mrg }
    277  1.1  mrg 
    278  1.1  mrg extern __inline __m128d
    279  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    280  1.1  mrg _mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R)
    281  1.1  mrg {
    282  1.1  mrg   return (__m128d) __builtin_ia32_rsqrt28sd_round ((__v2df) __B,
    283  1.1  mrg 						   (__v2df) __A,
    284  1.1  mrg 						   __R);
    285  1.1  mrg }
    286  1.1  mrg 
    287  1.7  mrg extern __inline __m128d
    288  1.7  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    289  1.7  mrg _mm_mask_rsqrt28_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
    290  1.7  mrg 			   __m128d __B, int __R)
    291  1.7  mrg {
    292  1.7  mrg   return (__m128d) __builtin_ia32_rsqrt28sd_mask_round ((__v2df) __B,
    293  1.7  mrg 							(__v2df) __A,
    294  1.7  mrg 							(__v2df) __W,
    295  1.7  mrg 							__U,
    296  1.7  mrg 							__R);
    297  1.7  mrg }
    298  1.7  mrg 
    299  1.7  mrg extern __inline __m128d
    300  1.7  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    301  1.7  mrg _mm_maskz_rsqrt28_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __R)
    302  1.7  mrg {
    303  1.7  mrg   return (__m128d) __builtin_ia32_rsqrt28sd_mask_round ((__v2df) __B,
    304  1.7  mrg 							(__v2df) __A,
    305  1.7  mrg 							(__v2df)
    306  1.7  mrg 							_mm_setzero_pd (),
    307  1.7  mrg 							__U,
    308  1.7  mrg 							__R);
    309  1.7  mrg }
    310  1.7  mrg 
    311  1.1  mrg extern __inline __m128
    312  1.1  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    313  1.1  mrg _mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R)
    314  1.1  mrg {
    315  1.1  mrg   return (__m128) __builtin_ia32_rsqrt28ss_round ((__v4sf) __B,
    316  1.1  mrg 						  (__v4sf) __A,
    317  1.1  mrg 						  __R);
    318  1.1  mrg }
    319  1.1  mrg 
    320  1.7  mrg extern __inline __m128
    321  1.7  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    322  1.7  mrg _mm_mask_rsqrt28_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
    323  1.7  mrg 			   __m128 __B, int __R)
    324  1.7  mrg {
    325  1.7  mrg   return (__m128) __builtin_ia32_rsqrt28ss_mask_round ((__v4sf) __B,
    326  1.7  mrg 						       (__v4sf) __A,
    327  1.7  mrg 						       (__v4sf) __W,
    328  1.7  mrg 						       __U,
    329  1.7  mrg 						       __R);
    330  1.7  mrg }
    331  1.7  mrg 
    332  1.7  mrg extern __inline __m128
    333  1.7  mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    334  1.7  mrg _mm_maskz_rsqrt28_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __R)
    335  1.7  mrg {
    336  1.7  mrg   return (__m128) __builtin_ia32_rsqrt28ss_mask_round ((__v4sf) __B,
    337  1.7  mrg 						       (__v4sf) __A,
    338  1.7  mrg 						       (__v4sf)
    339  1.7  mrg 						       _mm_setzero_ps (),
    340  1.7  mrg 						       __U,
    341  1.7  mrg 						       __R);
    342  1.7  mrg }
    343  1.7  mrg 
    344  1.1  mrg #else
    345  1.1  mrg #define _mm512_exp2a23_round_pd(A, C)            \
    346  1.1  mrg     __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
    347  1.1  mrg 
    348  1.1  mrg #define _mm512_mask_exp2a23_round_pd(W, U, A, C) \
    349  1.1  mrg     __builtin_ia32_exp2pd_mask(A, W, U, C)
    350  1.1  mrg 
    351  1.1  mrg #define _mm512_maskz_exp2a23_round_pd(U, A, C)   \
    352  1.1  mrg     __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
    353  1.1  mrg 
    354  1.1  mrg #define _mm512_exp2a23_round_ps(A, C)            \
    355  1.1  mrg     __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
    356  1.1  mrg 
    357  1.1  mrg #define _mm512_mask_exp2a23_round_ps(W, U, A, C) \
    358  1.1  mrg     __builtin_ia32_exp2ps_mask(A, W, U, C)
    359  1.1  mrg 
    360  1.1  mrg #define _mm512_maskz_exp2a23_round_ps(U, A, C)   \
    361  1.1  mrg     __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
    362  1.1  mrg 
    363  1.1  mrg #define _mm512_rcp28_round_pd(A, C)            \
    364  1.1  mrg     __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
    365  1.1  mrg 
    366  1.1  mrg #define _mm512_mask_rcp28_round_pd(W, U, A, C) \
    367  1.1  mrg     __builtin_ia32_rcp28pd_mask(A, W, U, C)
    368  1.1  mrg 
    369  1.1  mrg #define _mm512_maskz_rcp28_round_pd(U, A, C)   \
    370  1.1  mrg     __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
    371  1.1  mrg 
    372  1.1  mrg #define _mm512_rcp28_round_ps(A, C)            \
    373  1.1  mrg     __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
    374  1.1  mrg 
    375  1.1  mrg #define _mm512_mask_rcp28_round_ps(W, U, A, C) \
    376  1.1  mrg     __builtin_ia32_rcp28ps_mask(A, W, U, C)
    377  1.1  mrg 
    378  1.1  mrg #define _mm512_maskz_rcp28_round_ps(U, A, C)   \
    379  1.1  mrg     __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
    380  1.1  mrg 
    381  1.1  mrg #define _mm512_rsqrt28_round_pd(A, C)            \
    382  1.1  mrg     __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
    383  1.1  mrg 
    384  1.1  mrg #define _mm512_mask_rsqrt28_round_pd(W, U, A, C) \
    385  1.1  mrg     __builtin_ia32_rsqrt28pd_mask(A, W, U, C)
    386  1.1  mrg 
    387  1.1  mrg #define _mm512_maskz_rsqrt28_round_pd(U, A, C)   \
    388  1.1  mrg     __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
    389  1.1  mrg 
    390  1.1  mrg #define _mm512_rsqrt28_round_ps(A, C)            \
    391  1.1  mrg     __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
    392  1.1  mrg 
    393  1.1  mrg #define _mm512_mask_rsqrt28_round_ps(W, U, A, C) \
    394  1.1  mrg     __builtin_ia32_rsqrt28ps_mask(A, W, U, C)
    395  1.1  mrg 
    396  1.1  mrg #define _mm512_maskz_rsqrt28_round_ps(U, A, C)   \
    397  1.1  mrg     __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
    398  1.1  mrg 
    399  1.1  mrg #define _mm_rcp28_round_sd(A, B, R)	\
    400  1.1  mrg     __builtin_ia32_rcp28sd_round(A, B, R)
    401  1.1  mrg 
    402  1.7  mrg #define _mm_mask_rcp28_round_sd(W, U, A, B, R)	\
    403  1.7  mrg     __builtin_ia32_rcp28sd_mask_round ((A), (B), (W), (U), (R))
    404  1.7  mrg 
    405  1.7  mrg #define _mm_maskz_rcp28_round_sd(U, A, B, R)	\
    406  1.7  mrg     __builtin_ia32_rcp28sd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (), \
    407  1.7  mrg 				       (U), (R))
    408  1.7  mrg 
    409  1.1  mrg #define _mm_rcp28_round_ss(A, B, R)	\
    410  1.1  mrg     __builtin_ia32_rcp28ss_round(A, B, R)
    411  1.1  mrg 
    412  1.7  mrg #define _mm_mask_rcp28_round_ss(W, U, A, B, R)	\
    413  1.7  mrg     __builtin_ia32_rcp28ss_mask_round ((A), (B), (W), (U), (R))
    414  1.7  mrg 
    415  1.7  mrg #define _mm_maskz_rcp28_round_ss(U, A, B, R)	\
    416  1.7  mrg     __builtin_ia32_rcp28ss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (), \
    417  1.7  mrg 				       (U), (R))
    418  1.7  mrg 
    419  1.1  mrg #define _mm_rsqrt28_round_sd(A, B, R)	\
    420  1.1  mrg     __builtin_ia32_rsqrt28sd_round(A, B, R)
    421  1.1  mrg 
    422  1.7  mrg #define _mm_mask_rsqrt28_round_sd(W, U, A, B, R)	\
    423  1.7  mrg     __builtin_ia32_rsqrt28sd_mask_round ((A), (B), (W), (U), (R))
    424  1.7  mrg 
    425  1.7  mrg #define _mm_maskz_rsqrt28_round_sd(U, A, B, R)	\
    426  1.7  mrg     __builtin_ia32_rsqrt28sd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (),\
    427  1.7  mrg 					 (U), (R))
    428  1.7  mrg 
    429  1.1  mrg #define _mm_rsqrt28_round_ss(A, B, R)	\
    430  1.1  mrg     __builtin_ia32_rsqrt28ss_round(A, B, R)
    431  1.1  mrg 
    432  1.7  mrg #define _mm_mask_rsqrt28_round_ss(W, U, A, B, R)	\
    433  1.7  mrg     __builtin_ia32_rsqrt28ss_mask_round ((A), (B), (W), (U), (R))
    434  1.7  mrg 
    435  1.7  mrg #define _mm_maskz_rsqrt28_round_ss(U, A, B, R)	\
    436  1.7  mrg     __builtin_ia32_rsqrt28ss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (),\
    437  1.7  mrg 					 (U), (R))
    438  1.7  mrg 
    439  1.1  mrg #endif
    440  1.1  mrg 
    441  1.7  mrg #define _mm_mask_rcp28_sd(W, U, A, B)\
    442  1.7  mrg     _mm_mask_rcp28_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
    443  1.7  mrg 
    444  1.7  mrg #define _mm_maskz_rcp28_sd(U, A, B)\
    445  1.7  mrg     _mm_maskz_rcp28_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
    446  1.7  mrg 
    447  1.7  mrg #define _mm_mask_rcp28_ss(W, U, A, B)\
    448  1.7  mrg     _mm_mask_rcp28_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
    449  1.7  mrg 
    450  1.7  mrg #define _mm_maskz_rcp28_ss(U, A, B)\
    451  1.7  mrg     _mm_maskz_rcp28_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
    452  1.7  mrg 
    453  1.7  mrg #define _mm_mask_rsqrt28_sd(W, U, A, B)\
    454  1.7  mrg     _mm_mask_rsqrt28_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
    455  1.7  mrg 
    456  1.7  mrg #define _mm_maskz_rsqrt28_sd(U, A, B)\
    457  1.7  mrg     _mm_maskz_rsqrt28_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
    458  1.7  mrg 
    459  1.7  mrg #define _mm_mask_rsqrt28_ss(W, U, A, B)\
    460  1.7  mrg     _mm_mask_rsqrt28_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
    461  1.7  mrg 
    462  1.7  mrg #define _mm_maskz_rsqrt28_ss(U, A, B)\
    463  1.7  mrg     _mm_maskz_rsqrt28_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
    464  1.7  mrg 
    465  1.1  mrg #define _mm512_exp2a23_pd(A)                    \
    466  1.1  mrg     _mm512_exp2a23_round_pd(A, _MM_FROUND_CUR_DIRECTION)
    467  1.1  mrg 
    468  1.1  mrg #define _mm512_mask_exp2a23_pd(W, U, A)   \
    469  1.1  mrg     _mm512_mask_exp2a23_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
    470  1.1  mrg 
    471  1.1  mrg #define _mm512_maskz_exp2a23_pd(U, A)     \
    472  1.1  mrg     _mm512_maskz_exp2a23_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
    473  1.1  mrg 
    474  1.1  mrg #define _mm512_exp2a23_ps(A)                    \
    475  1.1  mrg     _mm512_exp2a23_round_ps(A, _MM_FROUND_CUR_DIRECTION)
    476  1.1  mrg 
    477  1.1  mrg #define _mm512_mask_exp2a23_ps(W, U, A)   \
    478  1.1  mrg     _mm512_mask_exp2a23_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
    479  1.1  mrg 
    480  1.1  mrg #define _mm512_maskz_exp2a23_ps(U, A)     \
    481  1.1  mrg     _mm512_maskz_exp2a23_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
    482  1.1  mrg 
    483  1.1  mrg #define _mm512_rcp28_pd(A)                    \
    484  1.1  mrg     _mm512_rcp28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
    485  1.1  mrg 
    486  1.1  mrg #define _mm512_mask_rcp28_pd(W, U, A)   \
    487  1.1  mrg     _mm512_mask_rcp28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
    488  1.1  mrg 
    489  1.1  mrg #define _mm512_maskz_rcp28_pd(U, A)     \
    490  1.1  mrg     _mm512_maskz_rcp28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
    491  1.1  mrg 
    492  1.1  mrg #define _mm512_rcp28_ps(A)                    \
    493  1.1  mrg     _mm512_rcp28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
    494  1.1  mrg 
    495  1.1  mrg #define _mm512_mask_rcp28_ps(W, U, A)   \
    496  1.1  mrg     _mm512_mask_rcp28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
    497  1.1  mrg 
    498  1.1  mrg #define _mm512_maskz_rcp28_ps(U, A)     \
    499  1.1  mrg     _mm512_maskz_rcp28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
    500  1.1  mrg 
    501  1.1  mrg #define _mm512_rsqrt28_pd(A)                    \
    502  1.1  mrg     _mm512_rsqrt28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
    503  1.1  mrg 
    504  1.1  mrg #define _mm512_mask_rsqrt28_pd(W, U, A)   \
    505  1.1  mrg     _mm512_mask_rsqrt28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
    506  1.1  mrg 
    507  1.1  mrg #define _mm512_maskz_rsqrt28_pd(U, A)     \
    508  1.1  mrg     _mm512_maskz_rsqrt28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
    509  1.1  mrg 
    510  1.1  mrg #define _mm512_rsqrt28_ps(A)                    \
    511  1.1  mrg     _mm512_rsqrt28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
    512  1.1  mrg 
    513  1.1  mrg #define _mm512_mask_rsqrt28_ps(W, U, A)   \
    514  1.1  mrg     _mm512_mask_rsqrt28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
    515  1.1  mrg 
    516  1.1  mrg #define _mm512_maskz_rsqrt28_ps(U, A)     \
    517  1.1  mrg     _mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
    518  1.1  mrg 
    519  1.1  mrg #define _mm_rcp28_sd(A, B)	\
    520  1.1  mrg     __builtin_ia32_rcp28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
    521  1.1  mrg 
    522  1.1  mrg #define _mm_rcp28_ss(A, B)	\
    523  1.1  mrg     __builtin_ia32_rcp28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
    524  1.1  mrg 
    525  1.1  mrg #define _mm_rsqrt28_sd(A, B)	\
    526  1.1  mrg     __builtin_ia32_rsqrt28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
    527  1.1  mrg 
    528  1.1  mrg #define _mm_rsqrt28_ss(A, B)	\
    529  1.1  mrg     __builtin_ia32_rsqrt28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
    530  1.1  mrg 
    531  1.1  mrg #ifdef __DISABLE_AVX512ER__
    532  1.1  mrg #undef __DISABLE_AVX512ER__
    533  1.1  mrg #pragma GCC pop_options
    534  1.1  mrg #endif /* __DISABLE_AVX512ER__ */
    535  1.1  mrg 
    536  1.1  mrg #endif /* _AVX512ERINTRIN_H_INCLUDED */
    537