1 1.7 mrg /* Copyright (C) 2013-2022 Free Software Foundation, Inc. 2 1.1 mrg 3 1.1 mrg This file is part of GCC. 4 1.1 mrg 5 1.1 mrg GCC is free software; you can redistribute it and/or modify 6 1.1 mrg it under the terms of the GNU General Public License as published by 7 1.1 mrg the Free Software Foundation; either version 3, or (at your option) 8 1.1 mrg any later version. 9 1.1 mrg 10 1.1 mrg GCC is distributed in the hope that it will be useful, 11 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of 12 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 1.1 mrg GNU General Public License for more details. 14 1.1 mrg 15 1.1 mrg Under Section 7 of GPL version 3, you are granted additional 16 1.1 mrg permissions described in the GCC Runtime Library Exception, version 17 1.1 mrg 3.1, as published by the Free Software Foundation. 18 1.1 mrg 19 1.1 mrg You should have received a copy of the GNU General Public License and 20 1.1 mrg a copy of the GCC Runtime Library Exception along with this program; 21 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 1.1 mrg <http://www.gnu.org/licenses/>. */ 23 1.1 mrg 24 1.1 mrg #ifndef _IMMINTRIN_H_INCLUDED 25 1.1 mrg #error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead." 26 1.1 mrg #endif 27 1.1 mrg 28 1.1 mrg #ifndef _AVX512ERINTRIN_H_INCLUDED 29 1.1 mrg #define _AVX512ERINTRIN_H_INCLUDED 30 1.1 mrg 31 1.1 mrg #ifndef __AVX512ER__ 32 1.1 mrg #pragma GCC push_options 33 1.1 mrg #pragma GCC target("avx512er") 34 1.1 mrg #define __DISABLE_AVX512ER__ 35 1.1 mrg #endif /* __AVX512ER__ */ 36 1.1 mrg 37 1.1 mrg /* Internal data types for implementing the intrinsics. */ 38 1.1 mrg typedef double __v8df __attribute__ ((__vector_size__ (64))); 39 1.1 mrg typedef float __v16sf __attribute__ ((__vector_size__ (64))); 40 1.1 mrg 41 1.1 mrg /* The Intel API is flexible enough that we must allow aliasing with other 42 1.1 mrg vector types, and their scalar components. */ 43 1.1 mrg typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__)); 44 1.1 mrg typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); 45 1.1 mrg 46 1.1 mrg typedef unsigned char __mmask8; 47 1.1 mrg typedef unsigned short __mmask16; 48 1.1 mrg 49 1.1 mrg #ifdef __OPTIMIZE__ 50 1.1 mrg extern __inline __m512d 51 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 52 1.1 mrg _mm512_exp2a23_round_pd (__m512d __A, int __R) 53 1.1 mrg { 54 1.1 mrg return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A, 55 1.7 mrg (__v8df) _mm512_undefined_pd (), 56 1.1 mrg (__mmask8) -1, __R); 57 1.1 mrg } 58 1.1 mrg 59 1.1 mrg extern __inline __m512d 60 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 61 1.1 mrg _mm512_mask_exp2a23_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R) 62 1.1 mrg { 63 1.1 mrg return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A, 64 1.1 mrg (__v8df) __W, 65 1.1 mrg (__mmask8) __U, __R); 66 1.1 mrg } 67 1.1 mrg 68 1.1 mrg extern __inline __m512d 69 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 70 1.1 mrg _mm512_maskz_exp2a23_round_pd (__mmask8 __U, __m512d __A, int __R) 71 1.1 mrg { 72 1.1 mrg return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A, 73 1.1 mrg (__v8df) _mm512_setzero_pd (), 74 1.1 mrg (__mmask8) __U, __R); 75 1.1 mrg } 76 1.1 mrg 77 1.1 mrg extern __inline __m512 78 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 79 1.1 mrg _mm512_exp2a23_round_ps (__m512 __A, int __R) 80 1.1 mrg { 81 1.1 mrg return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A, 82 1.7 mrg (__v16sf) _mm512_undefined_ps (), 83 1.1 mrg (__mmask16) -1, __R); 84 1.1 mrg } 85 1.1 mrg 86 1.1 mrg extern __inline __m512 87 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 88 1.1 mrg _mm512_mask_exp2a23_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R) 89 1.1 mrg { 90 1.1 mrg return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A, 91 1.1 mrg (__v16sf) __W, 92 1.1 mrg (__mmask16) __U, __R); 93 1.1 mrg } 94 1.1 mrg 95 1.1 mrg extern __inline __m512 96 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 97 1.1 mrg _mm512_maskz_exp2a23_round_ps (__mmask16 __U, __m512 __A, int __R) 98 1.1 mrg { 99 1.1 mrg return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A, 100 1.1 mrg (__v16sf) _mm512_setzero_ps (), 101 1.1 mrg (__mmask16) __U, __R); 102 1.1 mrg } 103 1.1 mrg 104 1.1 mrg extern __inline __m512d 105 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 106 1.1 mrg _mm512_rcp28_round_pd (__m512d __A, int __R) 107 1.1 mrg { 108 1.1 mrg return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A, 109 1.7 mrg (__v8df) _mm512_undefined_pd (), 110 1.1 mrg (__mmask8) -1, __R); 111 1.1 mrg } 112 1.1 mrg 113 1.1 mrg extern __inline __m512d 114 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 115 1.1 mrg _mm512_mask_rcp28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R) 116 1.1 mrg { 117 1.1 mrg return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A, 118 1.1 mrg (__v8df) __W, 119 1.1 mrg (__mmask8) __U, __R); 120 1.1 mrg } 121 1.1 mrg 122 1.1 mrg extern __inline __m512d 123 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 124 1.1 mrg _mm512_maskz_rcp28_round_pd (__mmask8 __U, __m512d __A, int __R) 125 1.1 mrg { 126 1.1 mrg return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A, 127 1.1 mrg (__v8df) _mm512_setzero_pd (), 128 1.1 mrg (__mmask8) __U, __R); 129 1.1 mrg } 130 1.1 mrg 131 1.1 mrg extern __inline __m512 132 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 133 1.1 mrg _mm512_rcp28_round_ps (__m512 __A, int __R) 134 1.1 mrg { 135 1.1 mrg return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A, 136 1.7 mrg (__v16sf) _mm512_undefined_ps (), 137 1.1 mrg (__mmask16) -1, __R); 138 1.1 mrg } 139 1.1 mrg 140 1.1 mrg extern __inline __m512 141 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 142 1.1 mrg _mm512_mask_rcp28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R) 143 1.1 mrg { 144 1.1 mrg return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A, 145 1.1 mrg (__v16sf) __W, 146 1.1 mrg (__mmask16) __U, __R); 147 1.1 mrg } 148 1.1 mrg 149 1.1 mrg extern __inline __m512 150 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 151 1.1 mrg _mm512_maskz_rcp28_round_ps (__mmask16 __U, __m512 __A, int __R) 152 1.1 mrg { 153 1.1 mrg return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A, 154 1.1 mrg (__v16sf) _mm512_setzero_ps (), 155 1.1 mrg (__mmask16) __U, __R); 156 1.1 mrg } 157 1.1 mrg 158 1.1 mrg extern __inline __m128d 159 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 160 1.1 mrg _mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R) 161 1.1 mrg { 162 1.1 mrg return (__m128d) __builtin_ia32_rcp28sd_round ((__v2df) __B, 163 1.1 mrg (__v2df) __A, 164 1.1 mrg __R); 165 1.1 mrg } 166 1.1 mrg 167 1.7 mrg extern __inline __m128d 168 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 169 1.7 mrg _mm_mask_rcp28_round_sd (__m128d __W, __mmask8 __U, __m128d __A, 170 1.7 mrg __m128d __B, int __R) 171 1.7 mrg { 172 1.7 mrg return (__m128d) __builtin_ia32_rcp28sd_mask_round ((__v2df) __B, 173 1.7 mrg (__v2df) __A, 174 1.7 mrg (__v2df) __W, 175 1.7 mrg __U, 176 1.7 mrg __R); 177 1.7 mrg } 178 1.7 mrg 179 1.7 mrg extern __inline __m128d 180 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 181 1.7 mrg _mm_maskz_rcp28_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __R) 182 1.7 mrg { 183 1.7 mrg return (__m128d) __builtin_ia32_rcp28sd_mask_round ((__v2df) __B, 184 1.7 mrg (__v2df) __A, 185 1.7 mrg (__v2df) 186 1.7 mrg _mm_setzero_pd (), 187 1.7 mrg __U, 188 1.7 mrg __R); 189 1.7 mrg } 190 1.7 mrg 191 1.1 mrg extern __inline __m128 192 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 193 1.1 mrg _mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R) 194 1.1 mrg { 195 1.1 mrg return (__m128) __builtin_ia32_rcp28ss_round ((__v4sf) __B, 196 1.1 mrg (__v4sf) __A, 197 1.1 mrg __R); 198 1.1 mrg } 199 1.1 mrg 200 1.7 mrg extern __inline __m128 201 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 202 1.7 mrg _mm_mask_rcp28_round_ss (__m128 __W, __mmask8 __U, __m128 __A, 203 1.7 mrg __m128 __B, int __R) 204 1.7 mrg { 205 1.7 mrg return (__m128) __builtin_ia32_rcp28ss_mask_round ((__v4sf) __B, 206 1.7 mrg (__v4sf) __A, 207 1.7 mrg (__v4sf) __W, 208 1.7 mrg __U, 209 1.7 mrg __R); 210 1.7 mrg } 211 1.7 mrg 212 1.7 mrg extern __inline __m128 213 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 214 1.7 mrg _mm_maskz_rcp28_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __R) 215 1.7 mrg { 216 1.7 mrg return (__m128) __builtin_ia32_rcp28ss_mask_round ((__v4sf) __B, 217 1.7 mrg (__v4sf) __A, 218 1.7 mrg (__v4sf) 219 1.7 mrg _mm_setzero_ps (), 220 1.7 mrg __U, 221 1.7 mrg __R); 222 1.7 mrg } 223 1.7 mrg 224 1.1 mrg extern __inline __m512d 225 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 226 1.1 mrg _mm512_rsqrt28_round_pd (__m512d __A, int __R) 227 1.1 mrg { 228 1.1 mrg return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A, 229 1.7 mrg (__v8df) _mm512_undefined_pd (), 230 1.1 mrg (__mmask8) -1, __R); 231 1.1 mrg } 232 1.1 mrg 233 1.1 mrg extern __inline __m512d 234 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 235 1.1 mrg _mm512_mask_rsqrt28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R) 236 1.1 mrg { 237 1.1 mrg return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A, 238 1.1 mrg (__v8df) __W, 239 1.1 mrg (__mmask8) __U, __R); 240 1.1 mrg } 241 1.1 mrg 242 1.1 mrg extern __inline __m512d 243 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 244 1.1 mrg _mm512_maskz_rsqrt28_round_pd (__mmask8 __U, __m512d __A, int __R) 245 1.1 mrg { 246 1.1 mrg return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A, 247 1.1 mrg (__v8df) _mm512_setzero_pd (), 248 1.1 mrg (__mmask8) __U, __R); 249 1.1 mrg } 250 1.1 mrg 251 1.1 mrg extern __inline __m512 252 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 253 1.1 mrg _mm512_rsqrt28_round_ps (__m512 __A, int __R) 254 1.1 mrg { 255 1.1 mrg return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A, 256 1.7 mrg (__v16sf) _mm512_undefined_ps (), 257 1.1 mrg (__mmask16) -1, __R); 258 1.1 mrg } 259 1.1 mrg 260 1.1 mrg extern __inline __m512 261 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 262 1.1 mrg _mm512_mask_rsqrt28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R) 263 1.1 mrg { 264 1.1 mrg return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A, 265 1.1 mrg (__v16sf) __W, 266 1.1 mrg (__mmask16) __U, __R); 267 1.1 mrg } 268 1.1 mrg 269 1.1 mrg extern __inline __m512 270 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 271 1.1 mrg _mm512_maskz_rsqrt28_round_ps (__mmask16 __U, __m512 __A, int __R) 272 1.1 mrg { 273 1.1 mrg return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A, 274 1.1 mrg (__v16sf) _mm512_setzero_ps (), 275 1.1 mrg (__mmask16) __U, __R); 276 1.1 mrg } 277 1.1 mrg 278 1.1 mrg extern __inline __m128d 279 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 280 1.1 mrg _mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R) 281 1.1 mrg { 282 1.1 mrg return (__m128d) __builtin_ia32_rsqrt28sd_round ((__v2df) __B, 283 1.1 mrg (__v2df) __A, 284 1.1 mrg __R); 285 1.1 mrg } 286 1.1 mrg 287 1.7 mrg extern __inline __m128d 288 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 289 1.7 mrg _mm_mask_rsqrt28_round_sd (__m128d __W, __mmask8 __U, __m128d __A, 290 1.7 mrg __m128d __B, int __R) 291 1.7 mrg { 292 1.7 mrg return (__m128d) __builtin_ia32_rsqrt28sd_mask_round ((__v2df) __B, 293 1.7 mrg (__v2df) __A, 294 1.7 mrg (__v2df) __W, 295 1.7 mrg __U, 296 1.7 mrg __R); 297 1.7 mrg } 298 1.7 mrg 299 1.7 mrg extern __inline __m128d 300 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 301 1.7 mrg _mm_maskz_rsqrt28_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __R) 302 1.7 mrg { 303 1.7 mrg return (__m128d) __builtin_ia32_rsqrt28sd_mask_round ((__v2df) __B, 304 1.7 mrg (__v2df) __A, 305 1.7 mrg (__v2df) 306 1.7 mrg _mm_setzero_pd (), 307 1.7 mrg __U, 308 1.7 mrg __R); 309 1.7 mrg } 310 1.7 mrg 311 1.1 mrg extern __inline __m128 312 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 313 1.1 mrg _mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R) 314 1.1 mrg { 315 1.1 mrg return (__m128) __builtin_ia32_rsqrt28ss_round ((__v4sf) __B, 316 1.1 mrg (__v4sf) __A, 317 1.1 mrg __R); 318 1.1 mrg } 319 1.1 mrg 320 1.7 mrg extern __inline __m128 321 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 322 1.7 mrg _mm_mask_rsqrt28_round_ss (__m128 __W, __mmask8 __U, __m128 __A, 323 1.7 mrg __m128 __B, int __R) 324 1.7 mrg { 325 1.7 mrg return (__m128) __builtin_ia32_rsqrt28ss_mask_round ((__v4sf) __B, 326 1.7 mrg (__v4sf) __A, 327 1.7 mrg (__v4sf) __W, 328 1.7 mrg __U, 329 1.7 mrg __R); 330 1.7 mrg } 331 1.7 mrg 332 1.7 mrg extern __inline __m128 333 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 334 1.7 mrg _mm_maskz_rsqrt28_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __R) 335 1.7 mrg { 336 1.7 mrg return (__m128) __builtin_ia32_rsqrt28ss_mask_round ((__v4sf) __B, 337 1.7 mrg (__v4sf) __A, 338 1.7 mrg (__v4sf) 339 1.7 mrg _mm_setzero_ps (), 340 1.7 mrg __U, 341 1.7 mrg __R); 342 1.7 mrg } 343 1.7 mrg 344 1.1 mrg #else 345 1.1 mrg #define _mm512_exp2a23_round_pd(A, C) \ 346 1.1 mrg __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C) 347 1.1 mrg 348 1.1 mrg #define _mm512_mask_exp2a23_round_pd(W, U, A, C) \ 349 1.1 mrg __builtin_ia32_exp2pd_mask(A, W, U, C) 350 1.1 mrg 351 1.1 mrg #define _mm512_maskz_exp2a23_round_pd(U, A, C) \ 352 1.1 mrg __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C) 353 1.1 mrg 354 1.1 mrg #define _mm512_exp2a23_round_ps(A, C) \ 355 1.1 mrg __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C) 356 1.1 mrg 357 1.1 mrg #define _mm512_mask_exp2a23_round_ps(W, U, A, C) \ 358 1.1 mrg __builtin_ia32_exp2ps_mask(A, W, U, C) 359 1.1 mrg 360 1.1 mrg #define _mm512_maskz_exp2a23_round_ps(U, A, C) \ 361 1.1 mrg __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C) 362 1.1 mrg 363 1.1 mrg #define _mm512_rcp28_round_pd(A, C) \ 364 1.1 mrg __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C) 365 1.1 mrg 366 1.1 mrg #define _mm512_mask_rcp28_round_pd(W, U, A, C) \ 367 1.1 mrg __builtin_ia32_rcp28pd_mask(A, W, U, C) 368 1.1 mrg 369 1.1 mrg #define _mm512_maskz_rcp28_round_pd(U, A, C) \ 370 1.1 mrg __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C) 371 1.1 mrg 372 1.1 mrg #define _mm512_rcp28_round_ps(A, C) \ 373 1.1 mrg __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C) 374 1.1 mrg 375 1.1 mrg #define _mm512_mask_rcp28_round_ps(W, U, A, C) \ 376 1.1 mrg __builtin_ia32_rcp28ps_mask(A, W, U, C) 377 1.1 mrg 378 1.1 mrg #define _mm512_maskz_rcp28_round_ps(U, A, C) \ 379 1.1 mrg __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C) 380 1.1 mrg 381 1.1 mrg #define _mm512_rsqrt28_round_pd(A, C) \ 382 1.1 mrg __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C) 383 1.1 mrg 384 1.1 mrg #define _mm512_mask_rsqrt28_round_pd(W, U, A, C) \ 385 1.1 mrg __builtin_ia32_rsqrt28pd_mask(A, W, U, C) 386 1.1 mrg 387 1.1 mrg #define _mm512_maskz_rsqrt28_round_pd(U, A, C) \ 388 1.1 mrg __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C) 389 1.1 mrg 390 1.1 mrg #define _mm512_rsqrt28_round_ps(A, C) \ 391 1.1 mrg __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C) 392 1.1 mrg 393 1.1 mrg #define _mm512_mask_rsqrt28_round_ps(W, U, A, C) \ 394 1.1 mrg __builtin_ia32_rsqrt28ps_mask(A, W, U, C) 395 1.1 mrg 396 1.1 mrg #define _mm512_maskz_rsqrt28_round_ps(U, A, C) \ 397 1.1 mrg __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C) 398 1.1 mrg 399 1.1 mrg #define _mm_rcp28_round_sd(A, B, R) \ 400 1.1 mrg __builtin_ia32_rcp28sd_round(A, B, R) 401 1.1 mrg 402 1.7 mrg #define _mm_mask_rcp28_round_sd(W, U, A, B, R) \ 403 1.7 mrg __builtin_ia32_rcp28sd_mask_round ((A), (B), (W), (U), (R)) 404 1.7 mrg 405 1.7 mrg #define _mm_maskz_rcp28_round_sd(U, A, B, R) \ 406 1.7 mrg __builtin_ia32_rcp28sd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (), \ 407 1.7 mrg (U), (R)) 408 1.7 mrg 409 1.1 mrg #define _mm_rcp28_round_ss(A, B, R) \ 410 1.1 mrg __builtin_ia32_rcp28ss_round(A, B, R) 411 1.1 mrg 412 1.7 mrg #define _mm_mask_rcp28_round_ss(W, U, A, B, R) \ 413 1.7 mrg __builtin_ia32_rcp28ss_mask_round ((A), (B), (W), (U), (R)) 414 1.7 mrg 415 1.7 mrg #define _mm_maskz_rcp28_round_ss(U, A, B, R) \ 416 1.7 mrg __builtin_ia32_rcp28ss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (), \ 417 1.7 mrg (U), (R)) 418 1.7 mrg 419 1.1 mrg #define _mm_rsqrt28_round_sd(A, B, R) \ 420 1.1 mrg __builtin_ia32_rsqrt28sd_round(A, B, R) 421 1.1 mrg 422 1.7 mrg #define _mm_mask_rsqrt28_round_sd(W, U, A, B, R) \ 423 1.7 mrg __builtin_ia32_rsqrt28sd_mask_round ((A), (B), (W), (U), (R)) 424 1.7 mrg 425 1.7 mrg #define _mm_maskz_rsqrt28_round_sd(U, A, B, R) \ 426 1.7 mrg __builtin_ia32_rsqrt28sd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (),\ 427 1.7 mrg (U), (R)) 428 1.7 mrg 429 1.1 mrg #define _mm_rsqrt28_round_ss(A, B, R) \ 430 1.1 mrg __builtin_ia32_rsqrt28ss_round(A, B, R) 431 1.1 mrg 432 1.7 mrg #define _mm_mask_rsqrt28_round_ss(W, U, A, B, R) \ 433 1.7 mrg __builtin_ia32_rsqrt28ss_mask_round ((A), (B), (W), (U), (R)) 434 1.7 mrg 435 1.7 mrg #define _mm_maskz_rsqrt28_round_ss(U, A, B, R) \ 436 1.7 mrg __builtin_ia32_rsqrt28ss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (),\ 437 1.7 mrg (U), (R)) 438 1.7 mrg 439 1.1 mrg #endif 440 1.1 mrg 441 1.7 mrg #define _mm_mask_rcp28_sd(W, U, A, B)\ 442 1.7 mrg _mm_mask_rcp28_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION) 443 1.7 mrg 444 1.7 mrg #define _mm_maskz_rcp28_sd(U, A, B)\ 445 1.7 mrg _mm_maskz_rcp28_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION) 446 1.7 mrg 447 1.7 mrg #define _mm_mask_rcp28_ss(W, U, A, B)\ 448 1.7 mrg _mm_mask_rcp28_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION) 449 1.7 mrg 450 1.7 mrg #define _mm_maskz_rcp28_ss(U, A, B)\ 451 1.7 mrg _mm_maskz_rcp28_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION) 452 1.7 mrg 453 1.7 mrg #define _mm_mask_rsqrt28_sd(W, U, A, B)\ 454 1.7 mrg _mm_mask_rsqrt28_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION) 455 1.7 mrg 456 1.7 mrg #define _mm_maskz_rsqrt28_sd(U, A, B)\ 457 1.7 mrg _mm_maskz_rsqrt28_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION) 458 1.7 mrg 459 1.7 mrg #define _mm_mask_rsqrt28_ss(W, U, A, B)\ 460 1.7 mrg _mm_mask_rsqrt28_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION) 461 1.7 mrg 462 1.7 mrg #define _mm_maskz_rsqrt28_ss(U, A, B)\ 463 1.7 mrg _mm_maskz_rsqrt28_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION) 464 1.7 mrg 465 1.1 mrg #define _mm512_exp2a23_pd(A) \ 466 1.1 mrg _mm512_exp2a23_round_pd(A, _MM_FROUND_CUR_DIRECTION) 467 1.1 mrg 468 1.1 mrg #define _mm512_mask_exp2a23_pd(W, U, A) \ 469 1.1 mrg _mm512_mask_exp2a23_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION) 470 1.1 mrg 471 1.1 mrg #define _mm512_maskz_exp2a23_pd(U, A) \ 472 1.1 mrg _mm512_maskz_exp2a23_round_pd(U, A, _MM_FROUND_CUR_DIRECTION) 473 1.1 mrg 474 1.1 mrg #define _mm512_exp2a23_ps(A) \ 475 1.1 mrg _mm512_exp2a23_round_ps(A, _MM_FROUND_CUR_DIRECTION) 476 1.1 mrg 477 1.1 mrg #define _mm512_mask_exp2a23_ps(W, U, A) \ 478 1.1 mrg _mm512_mask_exp2a23_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION) 479 1.1 mrg 480 1.1 mrg #define _mm512_maskz_exp2a23_ps(U, A) \ 481 1.1 mrg _mm512_maskz_exp2a23_round_ps(U, A, _MM_FROUND_CUR_DIRECTION) 482 1.1 mrg 483 1.1 mrg #define _mm512_rcp28_pd(A) \ 484 1.1 mrg _mm512_rcp28_round_pd(A, _MM_FROUND_CUR_DIRECTION) 485 1.1 mrg 486 1.1 mrg #define _mm512_mask_rcp28_pd(W, U, A) \ 487 1.1 mrg _mm512_mask_rcp28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION) 488 1.1 mrg 489 1.1 mrg #define _mm512_maskz_rcp28_pd(U, A) \ 490 1.1 mrg _mm512_maskz_rcp28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION) 491 1.1 mrg 492 1.1 mrg #define _mm512_rcp28_ps(A) \ 493 1.1 mrg _mm512_rcp28_round_ps(A, _MM_FROUND_CUR_DIRECTION) 494 1.1 mrg 495 1.1 mrg #define _mm512_mask_rcp28_ps(W, U, A) \ 496 1.1 mrg _mm512_mask_rcp28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION) 497 1.1 mrg 498 1.1 mrg #define _mm512_maskz_rcp28_ps(U, A) \ 499 1.1 mrg _mm512_maskz_rcp28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION) 500 1.1 mrg 501 1.1 mrg #define _mm512_rsqrt28_pd(A) \ 502 1.1 mrg _mm512_rsqrt28_round_pd(A, _MM_FROUND_CUR_DIRECTION) 503 1.1 mrg 504 1.1 mrg #define _mm512_mask_rsqrt28_pd(W, U, A) \ 505 1.1 mrg _mm512_mask_rsqrt28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION) 506 1.1 mrg 507 1.1 mrg #define _mm512_maskz_rsqrt28_pd(U, A) \ 508 1.1 mrg _mm512_maskz_rsqrt28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION) 509 1.1 mrg 510 1.1 mrg #define _mm512_rsqrt28_ps(A) \ 511 1.1 mrg _mm512_rsqrt28_round_ps(A, _MM_FROUND_CUR_DIRECTION) 512 1.1 mrg 513 1.1 mrg #define _mm512_mask_rsqrt28_ps(W, U, A) \ 514 1.1 mrg _mm512_mask_rsqrt28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION) 515 1.1 mrg 516 1.1 mrg #define _mm512_maskz_rsqrt28_ps(U, A) \ 517 1.1 mrg _mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION) 518 1.1 mrg 519 1.1 mrg #define _mm_rcp28_sd(A, B) \ 520 1.1 mrg __builtin_ia32_rcp28sd_round(B, A, _MM_FROUND_CUR_DIRECTION) 521 1.1 mrg 522 1.1 mrg #define _mm_rcp28_ss(A, B) \ 523 1.1 mrg __builtin_ia32_rcp28ss_round(B, A, _MM_FROUND_CUR_DIRECTION) 524 1.1 mrg 525 1.1 mrg #define _mm_rsqrt28_sd(A, B) \ 526 1.1 mrg __builtin_ia32_rsqrt28sd_round(B, A, _MM_FROUND_CUR_DIRECTION) 527 1.1 mrg 528 1.1 mrg #define _mm_rsqrt28_ss(A, B) \ 529 1.1 mrg __builtin_ia32_rsqrt28ss_round(B, A, _MM_FROUND_CUR_DIRECTION) 530 1.1 mrg 531 1.1 mrg #ifdef __DISABLE_AVX512ER__ 532 1.1 mrg #undef __DISABLE_AVX512ER__ 533 1.1 mrg #pragma GCC pop_options 534 1.1 mrg #endif /* __DISABLE_AVX512ER__ */ 535 1.1 mrg 536 1.1 mrg #endif /* _AVX512ERINTRIN_H_INCLUDED */ 537