avx512erintrin.h revision 1.7 1 1.7 mrg /* Copyright (C) 2013-2022 Free Software Foundation, Inc.
2 1.1 mrg
3 1.1 mrg This file is part of GCC.
4 1.1 mrg
5 1.1 mrg GCC is free software; you can redistribute it and/or modify
6 1.1 mrg it under the terms of the GNU General Public License as published by
7 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
8 1.1 mrg any later version.
9 1.1 mrg
10 1.1 mrg GCC is distributed in the hope that it will be useful,
11 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of
12 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 1.1 mrg GNU General Public License for more details.
14 1.1 mrg
15 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
16 1.1 mrg permissions described in the GCC Runtime Library Exception, version
17 1.1 mrg 3.1, as published by the Free Software Foundation.
18 1.1 mrg
19 1.1 mrg You should have received a copy of the GNU General Public License and
20 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
21 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 1.1 mrg <http://www.gnu.org/licenses/>. */
23 1.1 mrg
24 1.1 mrg #ifndef _IMMINTRIN_H_INCLUDED
25 1.1 mrg #error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
26 1.1 mrg #endif
27 1.1 mrg
28 1.1 mrg #ifndef _AVX512ERINTRIN_H_INCLUDED
29 1.1 mrg #define _AVX512ERINTRIN_H_INCLUDED
30 1.1 mrg
31 1.1 mrg #ifndef __AVX512ER__
32 1.1 mrg #pragma GCC push_options
33 1.1 mrg #pragma GCC target("avx512er")
34 1.1 mrg #define __DISABLE_AVX512ER__
35 1.1 mrg #endif /* __AVX512ER__ */
36 1.1 mrg
37 1.1 mrg /* Internal data types for implementing the intrinsics. */
38 1.1 mrg typedef double __v8df __attribute__ ((__vector_size__ (64)));
39 1.1 mrg typedef float __v16sf __attribute__ ((__vector_size__ (64)));
40 1.1 mrg
41 1.1 mrg /* The Intel API is flexible enough that we must allow aliasing with other
42 1.1 mrg vector types, and their scalar components. */
43 1.1 mrg typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
44 1.1 mrg typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
45 1.1 mrg
46 1.1 mrg typedef unsigned char __mmask8;
47 1.1 mrg typedef unsigned short __mmask16;
48 1.1 mrg
49 1.1 mrg #ifdef __OPTIMIZE__
50 1.1 mrg extern __inline __m512d
51 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
52 1.1 mrg _mm512_exp2a23_round_pd (__m512d __A, int __R)
53 1.1 mrg {
54 1.1 mrg return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
55 1.7 mrg (__v8df) _mm512_undefined_pd (),
56 1.1 mrg (__mmask8) -1, __R);
57 1.1 mrg }
58 1.1 mrg
59 1.1 mrg extern __inline __m512d
60 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
61 1.1 mrg _mm512_mask_exp2a23_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
62 1.1 mrg {
63 1.1 mrg return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
64 1.1 mrg (__v8df) __W,
65 1.1 mrg (__mmask8) __U, __R);
66 1.1 mrg }
67 1.1 mrg
68 1.1 mrg extern __inline __m512d
69 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
70 1.1 mrg _mm512_maskz_exp2a23_round_pd (__mmask8 __U, __m512d __A, int __R)
71 1.1 mrg {
72 1.1 mrg return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
73 1.1 mrg (__v8df) _mm512_setzero_pd (),
74 1.1 mrg (__mmask8) __U, __R);
75 1.1 mrg }
76 1.1 mrg
77 1.1 mrg extern __inline __m512
78 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79 1.1 mrg _mm512_exp2a23_round_ps (__m512 __A, int __R)
80 1.1 mrg {
81 1.1 mrg return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
82 1.7 mrg (__v16sf) _mm512_undefined_ps (),
83 1.1 mrg (__mmask16) -1, __R);
84 1.1 mrg }
85 1.1 mrg
86 1.1 mrg extern __inline __m512
87 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
88 1.1 mrg _mm512_mask_exp2a23_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
89 1.1 mrg {
90 1.1 mrg return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
91 1.1 mrg (__v16sf) __W,
92 1.1 mrg (__mmask16) __U, __R);
93 1.1 mrg }
94 1.1 mrg
95 1.1 mrg extern __inline __m512
96 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
97 1.1 mrg _mm512_maskz_exp2a23_round_ps (__mmask16 __U, __m512 __A, int __R)
98 1.1 mrg {
99 1.1 mrg return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
100 1.1 mrg (__v16sf) _mm512_setzero_ps (),
101 1.1 mrg (__mmask16) __U, __R);
102 1.1 mrg }
103 1.1 mrg
104 1.1 mrg extern __inline __m512d
105 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
106 1.1 mrg _mm512_rcp28_round_pd (__m512d __A, int __R)
107 1.1 mrg {
108 1.1 mrg return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
109 1.7 mrg (__v8df) _mm512_undefined_pd (),
110 1.1 mrg (__mmask8) -1, __R);
111 1.1 mrg }
112 1.1 mrg
113 1.1 mrg extern __inline __m512d
114 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
115 1.1 mrg _mm512_mask_rcp28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
116 1.1 mrg {
117 1.1 mrg return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
118 1.1 mrg (__v8df) __W,
119 1.1 mrg (__mmask8) __U, __R);
120 1.1 mrg }
121 1.1 mrg
122 1.1 mrg extern __inline __m512d
123 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
124 1.1 mrg _mm512_maskz_rcp28_round_pd (__mmask8 __U, __m512d __A, int __R)
125 1.1 mrg {
126 1.1 mrg return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
127 1.1 mrg (__v8df) _mm512_setzero_pd (),
128 1.1 mrg (__mmask8) __U, __R);
129 1.1 mrg }
130 1.1 mrg
131 1.1 mrg extern __inline __m512
132 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
133 1.1 mrg _mm512_rcp28_round_ps (__m512 __A, int __R)
134 1.1 mrg {
135 1.1 mrg return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
136 1.7 mrg (__v16sf) _mm512_undefined_ps (),
137 1.1 mrg (__mmask16) -1, __R);
138 1.1 mrg }
139 1.1 mrg
140 1.1 mrg extern __inline __m512
141 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
142 1.1 mrg _mm512_mask_rcp28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
143 1.1 mrg {
144 1.1 mrg return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
145 1.1 mrg (__v16sf) __W,
146 1.1 mrg (__mmask16) __U, __R);
147 1.1 mrg }
148 1.1 mrg
149 1.1 mrg extern __inline __m512
150 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
151 1.1 mrg _mm512_maskz_rcp28_round_ps (__mmask16 __U, __m512 __A, int __R)
152 1.1 mrg {
153 1.1 mrg return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
154 1.1 mrg (__v16sf) _mm512_setzero_ps (),
155 1.1 mrg (__mmask16) __U, __R);
156 1.1 mrg }
157 1.1 mrg
158 1.1 mrg extern __inline __m128d
159 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
160 1.1 mrg _mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R)
161 1.1 mrg {
162 1.1 mrg return (__m128d) __builtin_ia32_rcp28sd_round ((__v2df) __B,
163 1.1 mrg (__v2df) __A,
164 1.1 mrg __R);
165 1.1 mrg }
166 1.1 mrg
167 1.7 mrg extern __inline __m128d
168 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
169 1.7 mrg _mm_mask_rcp28_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
170 1.7 mrg __m128d __B, int __R)
171 1.7 mrg {
172 1.7 mrg return (__m128d) __builtin_ia32_rcp28sd_mask_round ((__v2df) __B,
173 1.7 mrg (__v2df) __A,
174 1.7 mrg (__v2df) __W,
175 1.7 mrg __U,
176 1.7 mrg __R);
177 1.7 mrg }
178 1.7 mrg
179 1.7 mrg extern __inline __m128d
180 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
181 1.7 mrg _mm_maskz_rcp28_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __R)
182 1.7 mrg {
183 1.7 mrg return (__m128d) __builtin_ia32_rcp28sd_mask_round ((__v2df) __B,
184 1.7 mrg (__v2df) __A,
185 1.7 mrg (__v2df)
186 1.7 mrg _mm_setzero_pd (),
187 1.7 mrg __U,
188 1.7 mrg __R);
189 1.7 mrg }
190 1.7 mrg
191 1.1 mrg extern __inline __m128
192 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
193 1.1 mrg _mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R)
194 1.1 mrg {
195 1.1 mrg return (__m128) __builtin_ia32_rcp28ss_round ((__v4sf) __B,
196 1.1 mrg (__v4sf) __A,
197 1.1 mrg __R);
198 1.1 mrg }
199 1.1 mrg
200 1.7 mrg extern __inline __m128
201 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
202 1.7 mrg _mm_mask_rcp28_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
203 1.7 mrg __m128 __B, int __R)
204 1.7 mrg {
205 1.7 mrg return (__m128) __builtin_ia32_rcp28ss_mask_round ((__v4sf) __B,
206 1.7 mrg (__v4sf) __A,
207 1.7 mrg (__v4sf) __W,
208 1.7 mrg __U,
209 1.7 mrg __R);
210 1.7 mrg }
211 1.7 mrg
212 1.7 mrg extern __inline __m128
213 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
214 1.7 mrg _mm_maskz_rcp28_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __R)
215 1.7 mrg {
216 1.7 mrg return (__m128) __builtin_ia32_rcp28ss_mask_round ((__v4sf) __B,
217 1.7 mrg (__v4sf) __A,
218 1.7 mrg (__v4sf)
219 1.7 mrg _mm_setzero_ps (),
220 1.7 mrg __U,
221 1.7 mrg __R);
222 1.7 mrg }
223 1.7 mrg
224 1.1 mrg extern __inline __m512d
225 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
226 1.1 mrg _mm512_rsqrt28_round_pd (__m512d __A, int __R)
227 1.1 mrg {
228 1.1 mrg return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
229 1.7 mrg (__v8df) _mm512_undefined_pd (),
230 1.1 mrg (__mmask8) -1, __R);
231 1.1 mrg }
232 1.1 mrg
233 1.1 mrg extern __inline __m512d
234 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
235 1.1 mrg _mm512_mask_rsqrt28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
236 1.1 mrg {
237 1.1 mrg return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
238 1.1 mrg (__v8df) __W,
239 1.1 mrg (__mmask8) __U, __R);
240 1.1 mrg }
241 1.1 mrg
242 1.1 mrg extern __inline __m512d
243 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
244 1.1 mrg _mm512_maskz_rsqrt28_round_pd (__mmask8 __U, __m512d __A, int __R)
245 1.1 mrg {
246 1.1 mrg return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
247 1.1 mrg (__v8df) _mm512_setzero_pd (),
248 1.1 mrg (__mmask8) __U, __R);
249 1.1 mrg }
250 1.1 mrg
251 1.1 mrg extern __inline __m512
252 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
253 1.1 mrg _mm512_rsqrt28_round_ps (__m512 __A, int __R)
254 1.1 mrg {
255 1.1 mrg return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
256 1.7 mrg (__v16sf) _mm512_undefined_ps (),
257 1.1 mrg (__mmask16) -1, __R);
258 1.1 mrg }
259 1.1 mrg
260 1.1 mrg extern __inline __m512
261 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
262 1.1 mrg _mm512_mask_rsqrt28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
263 1.1 mrg {
264 1.1 mrg return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
265 1.1 mrg (__v16sf) __W,
266 1.1 mrg (__mmask16) __U, __R);
267 1.1 mrg }
268 1.1 mrg
269 1.1 mrg extern __inline __m512
270 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
271 1.1 mrg _mm512_maskz_rsqrt28_round_ps (__mmask16 __U, __m512 __A, int __R)
272 1.1 mrg {
273 1.1 mrg return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
274 1.1 mrg (__v16sf) _mm512_setzero_ps (),
275 1.1 mrg (__mmask16) __U, __R);
276 1.1 mrg }
277 1.1 mrg
278 1.1 mrg extern __inline __m128d
279 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
280 1.1 mrg _mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R)
281 1.1 mrg {
282 1.1 mrg return (__m128d) __builtin_ia32_rsqrt28sd_round ((__v2df) __B,
283 1.1 mrg (__v2df) __A,
284 1.1 mrg __R);
285 1.1 mrg }
286 1.1 mrg
287 1.7 mrg extern __inline __m128d
288 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
289 1.7 mrg _mm_mask_rsqrt28_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
290 1.7 mrg __m128d __B, int __R)
291 1.7 mrg {
292 1.7 mrg return (__m128d) __builtin_ia32_rsqrt28sd_mask_round ((__v2df) __B,
293 1.7 mrg (__v2df) __A,
294 1.7 mrg (__v2df) __W,
295 1.7 mrg __U,
296 1.7 mrg __R);
297 1.7 mrg }
298 1.7 mrg
299 1.7 mrg extern __inline __m128d
300 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
301 1.7 mrg _mm_maskz_rsqrt28_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __R)
302 1.7 mrg {
303 1.7 mrg return (__m128d) __builtin_ia32_rsqrt28sd_mask_round ((__v2df) __B,
304 1.7 mrg (__v2df) __A,
305 1.7 mrg (__v2df)
306 1.7 mrg _mm_setzero_pd (),
307 1.7 mrg __U,
308 1.7 mrg __R);
309 1.7 mrg }
310 1.7 mrg
311 1.1 mrg extern __inline __m128
312 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
313 1.1 mrg _mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R)
314 1.1 mrg {
315 1.1 mrg return (__m128) __builtin_ia32_rsqrt28ss_round ((__v4sf) __B,
316 1.1 mrg (__v4sf) __A,
317 1.1 mrg __R);
318 1.1 mrg }
319 1.1 mrg
320 1.7 mrg extern __inline __m128
321 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
322 1.7 mrg _mm_mask_rsqrt28_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
323 1.7 mrg __m128 __B, int __R)
324 1.7 mrg {
325 1.7 mrg return (__m128) __builtin_ia32_rsqrt28ss_mask_round ((__v4sf) __B,
326 1.7 mrg (__v4sf) __A,
327 1.7 mrg (__v4sf) __W,
328 1.7 mrg __U,
329 1.7 mrg __R);
330 1.7 mrg }
331 1.7 mrg
332 1.7 mrg extern __inline __m128
333 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
334 1.7 mrg _mm_maskz_rsqrt28_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __R)
335 1.7 mrg {
336 1.7 mrg return (__m128) __builtin_ia32_rsqrt28ss_mask_round ((__v4sf) __B,
337 1.7 mrg (__v4sf) __A,
338 1.7 mrg (__v4sf)
339 1.7 mrg _mm_setzero_ps (),
340 1.7 mrg __U,
341 1.7 mrg __R);
342 1.7 mrg }
343 1.7 mrg
344 1.1 mrg #else
345 1.1 mrg #define _mm512_exp2a23_round_pd(A, C) \
346 1.1 mrg __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
347 1.1 mrg
348 1.1 mrg #define _mm512_mask_exp2a23_round_pd(W, U, A, C) \
349 1.1 mrg __builtin_ia32_exp2pd_mask(A, W, U, C)
350 1.1 mrg
351 1.1 mrg #define _mm512_maskz_exp2a23_round_pd(U, A, C) \
352 1.1 mrg __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
353 1.1 mrg
354 1.1 mrg #define _mm512_exp2a23_round_ps(A, C) \
355 1.1 mrg __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
356 1.1 mrg
357 1.1 mrg #define _mm512_mask_exp2a23_round_ps(W, U, A, C) \
358 1.1 mrg __builtin_ia32_exp2ps_mask(A, W, U, C)
359 1.1 mrg
360 1.1 mrg #define _mm512_maskz_exp2a23_round_ps(U, A, C) \
361 1.1 mrg __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
362 1.1 mrg
363 1.1 mrg #define _mm512_rcp28_round_pd(A, C) \
364 1.1 mrg __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
365 1.1 mrg
366 1.1 mrg #define _mm512_mask_rcp28_round_pd(W, U, A, C) \
367 1.1 mrg __builtin_ia32_rcp28pd_mask(A, W, U, C)
368 1.1 mrg
369 1.1 mrg #define _mm512_maskz_rcp28_round_pd(U, A, C) \
370 1.1 mrg __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
371 1.1 mrg
372 1.1 mrg #define _mm512_rcp28_round_ps(A, C) \
373 1.1 mrg __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
374 1.1 mrg
375 1.1 mrg #define _mm512_mask_rcp28_round_ps(W, U, A, C) \
376 1.1 mrg __builtin_ia32_rcp28ps_mask(A, W, U, C)
377 1.1 mrg
378 1.1 mrg #define _mm512_maskz_rcp28_round_ps(U, A, C) \
379 1.1 mrg __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
380 1.1 mrg
381 1.1 mrg #define _mm512_rsqrt28_round_pd(A, C) \
382 1.1 mrg __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
383 1.1 mrg
384 1.1 mrg #define _mm512_mask_rsqrt28_round_pd(W, U, A, C) \
385 1.1 mrg __builtin_ia32_rsqrt28pd_mask(A, W, U, C)
386 1.1 mrg
387 1.1 mrg #define _mm512_maskz_rsqrt28_round_pd(U, A, C) \
388 1.1 mrg __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
389 1.1 mrg
390 1.1 mrg #define _mm512_rsqrt28_round_ps(A, C) \
391 1.1 mrg __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
392 1.1 mrg
393 1.1 mrg #define _mm512_mask_rsqrt28_round_ps(W, U, A, C) \
394 1.1 mrg __builtin_ia32_rsqrt28ps_mask(A, W, U, C)
395 1.1 mrg
396 1.1 mrg #define _mm512_maskz_rsqrt28_round_ps(U, A, C) \
397 1.1 mrg __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
398 1.1 mrg
399 1.1 mrg #define _mm_rcp28_round_sd(A, B, R) \
400 1.1 mrg __builtin_ia32_rcp28sd_round(A, B, R)
401 1.1 mrg
402 1.7 mrg #define _mm_mask_rcp28_round_sd(W, U, A, B, R) \
403 1.7 mrg __builtin_ia32_rcp28sd_mask_round ((A), (B), (W), (U), (R))
404 1.7 mrg
405 1.7 mrg #define _mm_maskz_rcp28_round_sd(U, A, B, R) \
406 1.7 mrg __builtin_ia32_rcp28sd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (), \
407 1.7 mrg (U), (R))
408 1.7 mrg
409 1.1 mrg #define _mm_rcp28_round_ss(A, B, R) \
410 1.1 mrg __builtin_ia32_rcp28ss_round(A, B, R)
411 1.1 mrg
412 1.7 mrg #define _mm_mask_rcp28_round_ss(W, U, A, B, R) \
413 1.7 mrg __builtin_ia32_rcp28ss_mask_round ((A), (B), (W), (U), (R))
414 1.7 mrg
415 1.7 mrg #define _mm_maskz_rcp28_round_ss(U, A, B, R) \
416 1.7 mrg __builtin_ia32_rcp28ss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (), \
417 1.7 mrg (U), (R))
418 1.7 mrg
419 1.1 mrg #define _mm_rsqrt28_round_sd(A, B, R) \
420 1.1 mrg __builtin_ia32_rsqrt28sd_round(A, B, R)
421 1.1 mrg
422 1.7 mrg #define _mm_mask_rsqrt28_round_sd(W, U, A, B, R) \
423 1.7 mrg __builtin_ia32_rsqrt28sd_mask_round ((A), (B), (W), (U), (R))
424 1.7 mrg
425 1.7 mrg #define _mm_maskz_rsqrt28_round_sd(U, A, B, R) \
426 1.7 mrg __builtin_ia32_rsqrt28sd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (),\
427 1.7 mrg (U), (R))
428 1.7 mrg
429 1.1 mrg #define _mm_rsqrt28_round_ss(A, B, R) \
430 1.1 mrg __builtin_ia32_rsqrt28ss_round(A, B, R)
431 1.1 mrg
432 1.7 mrg #define _mm_mask_rsqrt28_round_ss(W, U, A, B, R) \
433 1.7 mrg __builtin_ia32_rsqrt28ss_mask_round ((A), (B), (W), (U), (R))
434 1.7 mrg
435 1.7 mrg #define _mm_maskz_rsqrt28_round_ss(U, A, B, R) \
436 1.7 mrg __builtin_ia32_rsqrt28ss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (),\
437 1.7 mrg (U), (R))
438 1.7 mrg
439 1.1 mrg #endif
440 1.1 mrg
441 1.7 mrg #define _mm_mask_rcp28_sd(W, U, A, B)\
442 1.7 mrg _mm_mask_rcp28_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
443 1.7 mrg
444 1.7 mrg #define _mm_maskz_rcp28_sd(U, A, B)\
445 1.7 mrg _mm_maskz_rcp28_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
446 1.7 mrg
447 1.7 mrg #define _mm_mask_rcp28_ss(W, U, A, B)\
448 1.7 mrg _mm_mask_rcp28_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
449 1.7 mrg
450 1.7 mrg #define _mm_maskz_rcp28_ss(U, A, B)\
451 1.7 mrg _mm_maskz_rcp28_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
452 1.7 mrg
453 1.7 mrg #define _mm_mask_rsqrt28_sd(W, U, A, B)\
454 1.7 mrg _mm_mask_rsqrt28_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
455 1.7 mrg
456 1.7 mrg #define _mm_maskz_rsqrt28_sd(U, A, B)\
457 1.7 mrg _mm_maskz_rsqrt28_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
458 1.7 mrg
459 1.7 mrg #define _mm_mask_rsqrt28_ss(W, U, A, B)\
460 1.7 mrg _mm_mask_rsqrt28_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
461 1.7 mrg
462 1.7 mrg #define _mm_maskz_rsqrt28_ss(U, A, B)\
463 1.7 mrg _mm_maskz_rsqrt28_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
464 1.7 mrg
465 1.1 mrg #define _mm512_exp2a23_pd(A) \
466 1.1 mrg _mm512_exp2a23_round_pd(A, _MM_FROUND_CUR_DIRECTION)
467 1.1 mrg
468 1.1 mrg #define _mm512_mask_exp2a23_pd(W, U, A) \
469 1.1 mrg _mm512_mask_exp2a23_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
470 1.1 mrg
471 1.1 mrg #define _mm512_maskz_exp2a23_pd(U, A) \
472 1.1 mrg _mm512_maskz_exp2a23_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
473 1.1 mrg
474 1.1 mrg #define _mm512_exp2a23_ps(A) \
475 1.1 mrg _mm512_exp2a23_round_ps(A, _MM_FROUND_CUR_DIRECTION)
476 1.1 mrg
477 1.1 mrg #define _mm512_mask_exp2a23_ps(W, U, A) \
478 1.1 mrg _mm512_mask_exp2a23_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
479 1.1 mrg
480 1.1 mrg #define _mm512_maskz_exp2a23_ps(U, A) \
481 1.1 mrg _mm512_maskz_exp2a23_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
482 1.1 mrg
483 1.1 mrg #define _mm512_rcp28_pd(A) \
484 1.1 mrg _mm512_rcp28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
485 1.1 mrg
486 1.1 mrg #define _mm512_mask_rcp28_pd(W, U, A) \
487 1.1 mrg _mm512_mask_rcp28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
488 1.1 mrg
489 1.1 mrg #define _mm512_maskz_rcp28_pd(U, A) \
490 1.1 mrg _mm512_maskz_rcp28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
491 1.1 mrg
492 1.1 mrg #define _mm512_rcp28_ps(A) \
493 1.1 mrg _mm512_rcp28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
494 1.1 mrg
495 1.1 mrg #define _mm512_mask_rcp28_ps(W, U, A) \
496 1.1 mrg _mm512_mask_rcp28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
497 1.1 mrg
498 1.1 mrg #define _mm512_maskz_rcp28_ps(U, A) \
499 1.1 mrg _mm512_maskz_rcp28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
500 1.1 mrg
501 1.1 mrg #define _mm512_rsqrt28_pd(A) \
502 1.1 mrg _mm512_rsqrt28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
503 1.1 mrg
504 1.1 mrg #define _mm512_mask_rsqrt28_pd(W, U, A) \
505 1.1 mrg _mm512_mask_rsqrt28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
506 1.1 mrg
507 1.1 mrg #define _mm512_maskz_rsqrt28_pd(U, A) \
508 1.1 mrg _mm512_maskz_rsqrt28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
509 1.1 mrg
510 1.1 mrg #define _mm512_rsqrt28_ps(A) \
511 1.1 mrg _mm512_rsqrt28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
512 1.1 mrg
513 1.1 mrg #define _mm512_mask_rsqrt28_ps(W, U, A) \
514 1.1 mrg _mm512_mask_rsqrt28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
515 1.1 mrg
516 1.1 mrg #define _mm512_maskz_rsqrt28_ps(U, A) \
517 1.1 mrg _mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
518 1.1 mrg
519 1.1 mrg #define _mm_rcp28_sd(A, B) \
520 1.1 mrg __builtin_ia32_rcp28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
521 1.1 mrg
522 1.1 mrg #define _mm_rcp28_ss(A, B) \
523 1.1 mrg __builtin_ia32_rcp28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
524 1.1 mrg
525 1.1 mrg #define _mm_rsqrt28_sd(A, B) \
526 1.1 mrg __builtin_ia32_rsqrt28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
527 1.1 mrg
528 1.1 mrg #define _mm_rsqrt28_ss(A, B) \
529 1.1 mrg __builtin_ia32_rsqrt28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
530 1.1 mrg
531 1.1 mrg #ifdef __DISABLE_AVX512ER__
532 1.1 mrg #undef __DISABLE_AVX512ER__
533 1.1 mrg #pragma GCC pop_options
534 1.1 mrg #endif /* __DISABLE_AVX512ER__ */
535 1.1 mrg
536 1.1 mrg #endif /* _AVX512ERINTRIN_H_INCLUDED */
537