Home | History | Annotate | Line # | Download | only in i386
avx512erintrin.h revision 1.1.1.7
      1 /* Copyright (C) 2013-2022 Free Software Foundation, Inc.
      2 
      3    This file is part of GCC.
      4 
      5    GCC is free software; you can redistribute it and/or modify
      6    it under the terms of the GNU General Public License as published by
      7    the Free Software Foundation; either version 3, or (at your option)
      8    any later version.
      9 
     10    GCC is distributed in the hope that it will be useful,
     11    but WITHOUT ANY WARRANTY; without even the implied warranty of
     12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13    GNU General Public License for more details.
     14 
     15    Under Section 7 of GPL version 3, you are granted additional
     16    permissions described in the GCC Runtime Library Exception, version
     17    3.1, as published by the Free Software Foundation.
     18 
     19    You should have received a copy of the GNU General Public License and
     20    a copy of the GCC Runtime Library Exception along with this program;
     21    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     22    <http://www.gnu.org/licenses/>.  */
     23 
     24 #ifndef _IMMINTRIN_H_INCLUDED
     25 #error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
     26 #endif
     27 
     28 #ifndef _AVX512ERINTRIN_H_INCLUDED
     29 #define _AVX512ERINTRIN_H_INCLUDED
     30 
     31 #ifndef __AVX512ER__
     32 #pragma GCC push_options
     33 #pragma GCC target("avx512er")
     34 #define __DISABLE_AVX512ER__
     35 #endif /* __AVX512ER__ */
     36 
     37 /* Internal data types for implementing the intrinsics.  */
     38 typedef double __v8df __attribute__ ((__vector_size__ (64)));
     39 typedef float __v16sf __attribute__ ((__vector_size__ (64)));
     40 
     41 /* The Intel API is flexible enough that we must allow aliasing with other
     42    vector types, and their scalar components.  */
     43 typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
     44 typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
     45 
     46 typedef unsigned char  __mmask8;
     47 typedef unsigned short __mmask16;
     48 
     49 #ifdef __OPTIMIZE__
     50 extern __inline __m512d
     51 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     52 _mm512_exp2a23_round_pd (__m512d __A, int __R)
     53 {
     54   return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
     55 					       (__v8df) _mm512_undefined_pd (),
     56 					       (__mmask8) -1, __R);
     57 }
     58 
     59 extern __inline __m512d
     60 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     61 _mm512_mask_exp2a23_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
     62 {
     63   return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
     64 					       (__v8df) __W,
     65 					       (__mmask8) __U, __R);
     66 }
     67 
     68 extern __inline __m512d
     69 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     70 _mm512_maskz_exp2a23_round_pd (__mmask8 __U, __m512d __A, int __R)
     71 {
     72   return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
     73 					       (__v8df) _mm512_setzero_pd (),
     74 					       (__mmask8) __U, __R);
     75 }
     76 
     77 extern __inline __m512
     78 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     79 _mm512_exp2a23_round_ps (__m512 __A, int __R)
     80 {
     81   return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
     82 					      (__v16sf) _mm512_undefined_ps (),
     83 					      (__mmask16) -1, __R);
     84 }
     85 
     86 extern __inline __m512
     87 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     88 _mm512_mask_exp2a23_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
     89 {
     90   return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
     91 					      (__v16sf) __W,
     92 					      (__mmask16) __U, __R);
     93 }
     94 
     95 extern __inline __m512
     96 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     97 _mm512_maskz_exp2a23_round_ps (__mmask16 __U, __m512 __A, int __R)
     98 {
     99   return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
    100 					      (__v16sf) _mm512_setzero_ps (),
    101 					      (__mmask16) __U, __R);
    102 }
    103 
    104 extern __inline __m512d
    105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    106 _mm512_rcp28_round_pd (__m512d __A, int __R)
    107 {
    108   return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
    109 						(__v8df) _mm512_undefined_pd (),
    110 						(__mmask8) -1, __R);
    111 }
    112 
    113 extern __inline __m512d
    114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    115 _mm512_mask_rcp28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
    116 {
    117   return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
    118 						(__v8df) __W,
    119 						(__mmask8) __U, __R);
    120 }
    121 
    122 extern __inline __m512d
    123 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    124 _mm512_maskz_rcp28_round_pd (__mmask8 __U, __m512d __A, int __R)
    125 {
    126   return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
    127 						(__v8df) _mm512_setzero_pd (),
    128 						(__mmask8) __U, __R);
    129 }
    130 
    131 extern __inline __m512
    132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    133 _mm512_rcp28_round_ps (__m512 __A, int __R)
    134 {
    135   return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
    136 					       (__v16sf) _mm512_undefined_ps (),
    137 					       (__mmask16) -1, __R);
    138 }
    139 
    140 extern __inline __m512
    141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    142 _mm512_mask_rcp28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
    143 {
    144   return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
    145 					       (__v16sf) __W,
    146 					       (__mmask16) __U, __R);
    147 }
    148 
    149 extern __inline __m512
    150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    151 _mm512_maskz_rcp28_round_ps (__mmask16 __U, __m512 __A, int __R)
    152 {
    153   return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
    154 					       (__v16sf) _mm512_setzero_ps (),
    155 					       (__mmask16) __U, __R);
    156 }
    157 
    158 extern __inline __m128d
    159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    160 _mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R)
    161 {
    162   return (__m128d) __builtin_ia32_rcp28sd_round ((__v2df) __B,
    163 						 (__v2df) __A,
    164 						 __R);
    165 }
    166 
    167 extern __inline __m128d
    168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    169 _mm_mask_rcp28_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
    170 			 __m128d __B, int __R)
    171 {
    172   return (__m128d) __builtin_ia32_rcp28sd_mask_round ((__v2df) __B,
    173 						      (__v2df) __A,
    174 						      (__v2df) __W,
    175 						      __U,
    176 						      __R);
    177 }
    178 
    179 extern __inline __m128d
    180 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    181 _mm_maskz_rcp28_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __R)
    182 {
    183   return (__m128d) __builtin_ia32_rcp28sd_mask_round ((__v2df) __B,
    184 						      (__v2df) __A,
    185 						      (__v2df)
    186 						      _mm_setzero_pd (),
    187 						      __U,
    188 						      __R);
    189 }
    190 
    191 extern __inline __m128
    192 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    193 _mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R)
    194 {
    195   return (__m128) __builtin_ia32_rcp28ss_round ((__v4sf) __B,
    196 						(__v4sf) __A,
    197 						__R);
    198 }
    199 
    200 extern __inline __m128
    201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    202 _mm_mask_rcp28_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
    203 			 __m128 __B, int __R)
    204 {
    205   return (__m128) __builtin_ia32_rcp28ss_mask_round ((__v4sf) __B,
    206 						     (__v4sf) __A,
    207 						     (__v4sf) __W,
    208 						     __U,
    209 						     __R);
    210 }
    211 
    212 extern __inline __m128
    213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    214 _mm_maskz_rcp28_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __R)
    215 {
    216   return (__m128) __builtin_ia32_rcp28ss_mask_round ((__v4sf) __B,
    217 						     (__v4sf) __A,
    218 						     (__v4sf)
    219 						     _mm_setzero_ps (),
    220 						     __U,
    221 						     __R);
    222 }
    223 
    224 extern __inline __m512d
    225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    226 _mm512_rsqrt28_round_pd (__m512d __A, int __R)
    227 {
    228   return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
    229 						  (__v8df) _mm512_undefined_pd (),
    230 						  (__mmask8) -1, __R);
    231 }
    232 
    233 extern __inline __m512d
    234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    235 _mm512_mask_rsqrt28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
    236 {
    237   return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
    238 						  (__v8df) __W,
    239 						  (__mmask8) __U, __R);
    240 }
    241 
    242 extern __inline __m512d
    243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    244 _mm512_maskz_rsqrt28_round_pd (__mmask8 __U, __m512d __A, int __R)
    245 {
    246   return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
    247 						  (__v8df) _mm512_setzero_pd (),
    248 						  (__mmask8) __U, __R);
    249 }
    250 
    251 extern __inline __m512
    252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    253 _mm512_rsqrt28_round_ps (__m512 __A, int __R)
    254 {
    255   return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
    256 						 (__v16sf) _mm512_undefined_ps (),
    257 						 (__mmask16) -1, __R);
    258 }
    259 
    260 extern __inline __m512
    261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    262 _mm512_mask_rsqrt28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
    263 {
    264   return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
    265 						 (__v16sf) __W,
    266 						 (__mmask16) __U, __R);
    267 }
    268 
    269 extern __inline __m512
    270 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    271 _mm512_maskz_rsqrt28_round_ps (__mmask16 __U, __m512 __A, int __R)
    272 {
    273   return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
    274 						 (__v16sf) _mm512_setzero_ps (),
    275 						 (__mmask16) __U, __R);
    276 }
    277 
    278 extern __inline __m128d
    279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    280 _mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R)
    281 {
    282   return (__m128d) __builtin_ia32_rsqrt28sd_round ((__v2df) __B,
    283 						   (__v2df) __A,
    284 						   __R);
    285 }
    286 
    287 extern __inline __m128d
    288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    289 _mm_mask_rsqrt28_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
    290 			   __m128d __B, int __R)
    291 {
    292   return (__m128d) __builtin_ia32_rsqrt28sd_mask_round ((__v2df) __B,
    293 							(__v2df) __A,
    294 							(__v2df) __W,
    295 							__U,
    296 							__R);
    297 }
    298 
    299 extern __inline __m128d
    300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    301 _mm_maskz_rsqrt28_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __R)
    302 {
    303   return (__m128d) __builtin_ia32_rsqrt28sd_mask_round ((__v2df) __B,
    304 							(__v2df) __A,
    305 							(__v2df)
    306 							_mm_setzero_pd (),
    307 							__U,
    308 							__R);
    309 }
    310 
    311 extern __inline __m128
    312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    313 _mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R)
    314 {
    315   return (__m128) __builtin_ia32_rsqrt28ss_round ((__v4sf) __B,
    316 						  (__v4sf) __A,
    317 						  __R);
    318 }
    319 
    320 extern __inline __m128
    321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    322 _mm_mask_rsqrt28_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
    323 			   __m128 __B, int __R)
    324 {
    325   return (__m128) __builtin_ia32_rsqrt28ss_mask_round ((__v4sf) __B,
    326 						       (__v4sf) __A,
    327 						       (__v4sf) __W,
    328 						       __U,
    329 						       __R);
    330 }
    331 
    332 extern __inline __m128
    333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    334 _mm_maskz_rsqrt28_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __R)
    335 {
    336   return (__m128) __builtin_ia32_rsqrt28ss_mask_round ((__v4sf) __B,
    337 						       (__v4sf) __A,
    338 						       (__v4sf)
    339 						       _mm_setzero_ps (),
    340 						       __U,
    341 						       __R);
    342 }
    343 
    344 #else
    345 #define _mm512_exp2a23_round_pd(A, C)            \
    346     __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
    347 
    348 #define _mm512_mask_exp2a23_round_pd(W, U, A, C) \
    349     __builtin_ia32_exp2pd_mask(A, W, U, C)
    350 
    351 #define _mm512_maskz_exp2a23_round_pd(U, A, C)   \
    352     __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
    353 
    354 #define _mm512_exp2a23_round_ps(A, C)            \
    355     __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
    356 
    357 #define _mm512_mask_exp2a23_round_ps(W, U, A, C) \
    358     __builtin_ia32_exp2ps_mask(A, W, U, C)
    359 
    360 #define _mm512_maskz_exp2a23_round_ps(U, A, C)   \
    361     __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
    362 
    363 #define _mm512_rcp28_round_pd(A, C)            \
    364     __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
    365 
    366 #define _mm512_mask_rcp28_round_pd(W, U, A, C) \
    367     __builtin_ia32_rcp28pd_mask(A, W, U, C)
    368 
    369 #define _mm512_maskz_rcp28_round_pd(U, A, C)   \
    370     __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
    371 
    372 #define _mm512_rcp28_round_ps(A, C)            \
    373     __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
    374 
    375 #define _mm512_mask_rcp28_round_ps(W, U, A, C) \
    376     __builtin_ia32_rcp28ps_mask(A, W, U, C)
    377 
    378 #define _mm512_maskz_rcp28_round_ps(U, A, C)   \
    379     __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
    380 
    381 #define _mm512_rsqrt28_round_pd(A, C)            \
    382     __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
    383 
    384 #define _mm512_mask_rsqrt28_round_pd(W, U, A, C) \
    385     __builtin_ia32_rsqrt28pd_mask(A, W, U, C)
    386 
    387 #define _mm512_maskz_rsqrt28_round_pd(U, A, C)   \
    388     __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
    389 
    390 #define _mm512_rsqrt28_round_ps(A, C)            \
    391     __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
    392 
    393 #define _mm512_mask_rsqrt28_round_ps(W, U, A, C) \
    394     __builtin_ia32_rsqrt28ps_mask(A, W, U, C)
    395 
    396 #define _mm512_maskz_rsqrt28_round_ps(U, A, C)   \
    397     __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
    398 
    399 #define _mm_rcp28_round_sd(A, B, R)	\
    400     __builtin_ia32_rcp28sd_round(A, B, R)
    401 
    402 #define _mm_mask_rcp28_round_sd(W, U, A, B, R)	\
    403     __builtin_ia32_rcp28sd_mask_round ((A), (B), (W), (U), (R))
    404 
    405 #define _mm_maskz_rcp28_round_sd(U, A, B, R)	\
    406     __builtin_ia32_rcp28sd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (), \
    407 				       (U), (R))
    408 
    409 #define _mm_rcp28_round_ss(A, B, R)	\
    410     __builtin_ia32_rcp28ss_round(A, B, R)
    411 
    412 #define _mm_mask_rcp28_round_ss(W, U, A, B, R)	\
    413     __builtin_ia32_rcp28ss_mask_round ((A), (B), (W), (U), (R))
    414 
    415 #define _mm_maskz_rcp28_round_ss(U, A, B, R)	\
    416     __builtin_ia32_rcp28ss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (), \
    417 				       (U), (R))
    418 
    419 #define _mm_rsqrt28_round_sd(A, B, R)	\
    420     __builtin_ia32_rsqrt28sd_round(A, B, R)
    421 
    422 #define _mm_mask_rsqrt28_round_sd(W, U, A, B, R)	\
    423     __builtin_ia32_rsqrt28sd_mask_round ((A), (B), (W), (U), (R))
    424 
    425 #define _mm_maskz_rsqrt28_round_sd(U, A, B, R)	\
    426     __builtin_ia32_rsqrt28sd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (),\
    427 					 (U), (R))
    428 
    429 #define _mm_rsqrt28_round_ss(A, B, R)	\
    430     __builtin_ia32_rsqrt28ss_round(A, B, R)
    431 
    432 #define _mm_mask_rsqrt28_round_ss(W, U, A, B, R)	\
    433     __builtin_ia32_rsqrt28ss_mask_round ((A), (B), (W), (U), (R))
    434 
    435 #define _mm_maskz_rsqrt28_round_ss(U, A, B, R)	\
    436     __builtin_ia32_rsqrt28ss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (),\
    437 					 (U), (R))
    438 
    439 #endif
    440 
    441 #define _mm_mask_rcp28_sd(W, U, A, B)\
    442     _mm_mask_rcp28_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
    443 
    444 #define _mm_maskz_rcp28_sd(U, A, B)\
    445     _mm_maskz_rcp28_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
    446 
    447 #define _mm_mask_rcp28_ss(W, U, A, B)\
    448     _mm_mask_rcp28_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
    449 
    450 #define _mm_maskz_rcp28_ss(U, A, B)\
    451     _mm_maskz_rcp28_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
    452 
    453 #define _mm_mask_rsqrt28_sd(W, U, A, B)\
    454     _mm_mask_rsqrt28_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
    455 
    456 #define _mm_maskz_rsqrt28_sd(U, A, B)\
    457     _mm_maskz_rsqrt28_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
    458 
    459 #define _mm_mask_rsqrt28_ss(W, U, A, B)\
    460     _mm_mask_rsqrt28_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
    461 
    462 #define _mm_maskz_rsqrt28_ss(U, A, B)\
    463     _mm_maskz_rsqrt28_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
    464 
    465 #define _mm512_exp2a23_pd(A)                    \
    466     _mm512_exp2a23_round_pd(A, _MM_FROUND_CUR_DIRECTION)
    467 
    468 #define _mm512_mask_exp2a23_pd(W, U, A)   \
    469     _mm512_mask_exp2a23_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
    470 
    471 #define _mm512_maskz_exp2a23_pd(U, A)     \
    472     _mm512_maskz_exp2a23_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
    473 
    474 #define _mm512_exp2a23_ps(A)                    \
    475     _mm512_exp2a23_round_ps(A, _MM_FROUND_CUR_DIRECTION)
    476 
    477 #define _mm512_mask_exp2a23_ps(W, U, A)   \
    478     _mm512_mask_exp2a23_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
    479 
    480 #define _mm512_maskz_exp2a23_ps(U, A)     \
    481     _mm512_maskz_exp2a23_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
    482 
    483 #define _mm512_rcp28_pd(A)                    \
    484     _mm512_rcp28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
    485 
    486 #define _mm512_mask_rcp28_pd(W, U, A)   \
    487     _mm512_mask_rcp28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
    488 
    489 #define _mm512_maskz_rcp28_pd(U, A)     \
    490     _mm512_maskz_rcp28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
    491 
    492 #define _mm512_rcp28_ps(A)                    \
    493     _mm512_rcp28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
    494 
    495 #define _mm512_mask_rcp28_ps(W, U, A)   \
    496     _mm512_mask_rcp28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
    497 
    498 #define _mm512_maskz_rcp28_ps(U, A)     \
    499     _mm512_maskz_rcp28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
    500 
    501 #define _mm512_rsqrt28_pd(A)                    \
    502     _mm512_rsqrt28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
    503 
    504 #define _mm512_mask_rsqrt28_pd(W, U, A)   \
    505     _mm512_mask_rsqrt28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
    506 
    507 #define _mm512_maskz_rsqrt28_pd(U, A)     \
    508     _mm512_maskz_rsqrt28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
    509 
    510 #define _mm512_rsqrt28_ps(A)                    \
    511     _mm512_rsqrt28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
    512 
    513 #define _mm512_mask_rsqrt28_ps(W, U, A)   \
    514     _mm512_mask_rsqrt28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
    515 
    516 #define _mm512_maskz_rsqrt28_ps(U, A)     \
    517     _mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
    518 
    519 #define _mm_rcp28_sd(A, B)	\
    520     __builtin_ia32_rcp28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
    521 
    522 #define _mm_rcp28_ss(A, B)	\
    523     __builtin_ia32_rcp28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
    524 
    525 #define _mm_rsqrt28_sd(A, B)	\
    526     __builtin_ia32_rsqrt28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
    527 
    528 #define _mm_rsqrt28_ss(A, B)	\
    529     __builtin_ia32_rsqrt28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
    530 
    531 #ifdef __DISABLE_AVX512ER__
    532 #undef __DISABLE_AVX512ER__
    533 #pragma GCC pop_options
    534 #endif /* __DISABLE_AVX512ER__ */
    535 
    536 #endif /* _AVX512ERINTRIN_H_INCLUDED */
    537