avx512dqintrin.h revision 1.7 1 1.7 mrg /* Copyright (C) 2014-2022 Free Software Foundation, Inc.
2 1.1 mrg
3 1.1 mrg This file is part of GCC.
4 1.1 mrg
5 1.1 mrg GCC is free software; you can redistribute it and/or modify
6 1.1 mrg it under the terms of the GNU General Public License as published by
7 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
8 1.1 mrg any later version.
9 1.1 mrg
10 1.1 mrg GCC is distributed in the hope that it will be useful,
11 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of
12 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 1.1 mrg GNU General Public License for more details.
14 1.1 mrg
15 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
16 1.1 mrg permissions described in the GCC Runtime Library Exception, version
17 1.1 mrg 3.1, as published by the Free Software Foundation.
18 1.1 mrg
19 1.1 mrg You should have received a copy of the GNU General Public License and
20 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
21 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 1.1 mrg <http://www.gnu.org/licenses/>. */
23 1.1 mrg
24 1.1 mrg #ifndef _IMMINTRIN_H_INCLUDED
25 1.1 mrg #error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead."
26 1.1 mrg #endif
27 1.1 mrg
28 1.1 mrg #ifndef _AVX512DQINTRIN_H_INCLUDED
29 1.1 mrg #define _AVX512DQINTRIN_H_INCLUDED
30 1.1 mrg
31 1.1 mrg #ifndef __AVX512DQ__
32 1.1 mrg #pragma GCC push_options
33 1.1 mrg #pragma GCC target("avx512dq")
34 1.1 mrg #define __DISABLE_AVX512DQ__
35 1.1 mrg #endif /* __AVX512DQ__ */
36 1.1 mrg
37 1.3 mrg extern __inline unsigned char
38 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
39 1.3 mrg _ktest_mask8_u8 (__mmask8 __A, __mmask8 __B, unsigned char *__CF)
40 1.3 mrg {
41 1.3 mrg *__CF = (unsigned char) __builtin_ia32_ktestcqi (__A, __B);
42 1.3 mrg return (unsigned char) __builtin_ia32_ktestzqi (__A, __B);
43 1.3 mrg }
44 1.3 mrg
45 1.3 mrg extern __inline unsigned char
46 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
47 1.3 mrg _ktestz_mask8_u8 (__mmask8 __A, __mmask8 __B)
48 1.3 mrg {
49 1.3 mrg return (unsigned char) __builtin_ia32_ktestzqi (__A, __B);
50 1.3 mrg }
51 1.3 mrg
52 1.3 mrg extern __inline unsigned char
53 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
54 1.3 mrg _ktestc_mask8_u8 (__mmask8 __A, __mmask8 __B)
55 1.3 mrg {
56 1.3 mrg return (unsigned char) __builtin_ia32_ktestcqi (__A, __B);
57 1.3 mrg }
58 1.3 mrg
59 1.3 mrg extern __inline unsigned char
60 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
61 1.3 mrg _ktest_mask16_u8 (__mmask16 __A, __mmask16 __B, unsigned char *__CF)
62 1.3 mrg {
63 1.3 mrg *__CF = (unsigned char) __builtin_ia32_ktestchi (__A, __B);
64 1.3 mrg return (unsigned char) __builtin_ia32_ktestzhi (__A, __B);
65 1.3 mrg }
66 1.3 mrg
67 1.3 mrg extern __inline unsigned char
68 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
69 1.3 mrg _ktestz_mask16_u8 (__mmask16 __A, __mmask16 __B)
70 1.3 mrg {
71 1.3 mrg return (unsigned char) __builtin_ia32_ktestzhi (__A, __B);
72 1.3 mrg }
73 1.3 mrg
74 1.3 mrg extern __inline unsigned char
75 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
76 1.3 mrg _ktestc_mask16_u8 (__mmask16 __A, __mmask16 __B)
77 1.3 mrg {
78 1.3 mrg return (unsigned char) __builtin_ia32_ktestchi (__A, __B);
79 1.3 mrg }
80 1.3 mrg
81 1.3 mrg extern __inline unsigned char
82 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
83 1.3 mrg _kortest_mask8_u8 (__mmask8 __A, __mmask8 __B, unsigned char *__CF)
84 1.3 mrg {
85 1.3 mrg *__CF = (unsigned char) __builtin_ia32_kortestcqi (__A, __B);
86 1.3 mrg return (unsigned char) __builtin_ia32_kortestzqi (__A, __B);
87 1.3 mrg }
88 1.3 mrg
89 1.3 mrg extern __inline unsigned char
90 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
91 1.3 mrg _kortestz_mask8_u8 (__mmask8 __A, __mmask8 __B)
92 1.3 mrg {
93 1.3 mrg return (unsigned char) __builtin_ia32_kortestzqi (__A, __B);
94 1.3 mrg }
95 1.3 mrg
96 1.3 mrg extern __inline unsigned char
97 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
98 1.3 mrg _kortestc_mask8_u8 (__mmask8 __A, __mmask8 __B)
99 1.3 mrg {
100 1.3 mrg return (unsigned char) __builtin_ia32_kortestcqi (__A, __B);
101 1.3 mrg }
102 1.3 mrg
103 1.3 mrg extern __inline __mmask8
104 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
105 1.3 mrg _kadd_mask8 (__mmask8 __A, __mmask8 __B)
106 1.3 mrg {
107 1.3 mrg return (__mmask8) __builtin_ia32_kaddqi ((__mmask8) __A, (__mmask8) __B);
108 1.3 mrg }
109 1.3 mrg
110 1.3 mrg extern __inline __mmask16
111 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
112 1.3 mrg _kadd_mask16 (__mmask16 __A, __mmask16 __B)
113 1.3 mrg {
114 1.3 mrg return (__mmask16) __builtin_ia32_kaddhi ((__mmask16) __A, (__mmask16) __B);
115 1.3 mrg }
116 1.3 mrg
117 1.3 mrg extern __inline unsigned int
118 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
119 1.3 mrg _cvtmask8_u32 (__mmask8 __A)
120 1.3 mrg {
121 1.3 mrg return (unsigned int) __builtin_ia32_kmovb ((__mmask8 ) __A);
122 1.3 mrg }
123 1.3 mrg
124 1.3 mrg extern __inline __mmask8
125 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
126 1.3 mrg _cvtu32_mask8 (unsigned int __A)
127 1.3 mrg {
128 1.3 mrg return (__mmask8) __builtin_ia32_kmovb ((__mmask8) __A);
129 1.3 mrg }
130 1.3 mrg
131 1.3 mrg extern __inline __mmask8
132 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
133 1.3 mrg _load_mask8 (__mmask8 *__A)
134 1.3 mrg {
135 1.3 mrg return (__mmask8) __builtin_ia32_kmovb (*(__mmask8 *) __A);
136 1.3 mrg }
137 1.3 mrg
138 1.3 mrg extern __inline void
139 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
140 1.3 mrg _store_mask8 (__mmask8 *__A, __mmask8 __B)
141 1.3 mrg {
142 1.3 mrg *(__mmask8 *) __A = __builtin_ia32_kmovb (__B);
143 1.3 mrg }
144 1.3 mrg
145 1.3 mrg extern __inline __mmask8
146 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
147 1.3 mrg _knot_mask8 (__mmask8 __A)
148 1.3 mrg {
149 1.3 mrg return (__mmask8) __builtin_ia32_knotqi ((__mmask8) __A);
150 1.3 mrg }
151 1.3 mrg
152 1.3 mrg extern __inline __mmask8
153 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
154 1.3 mrg _kor_mask8 (__mmask8 __A, __mmask8 __B)
155 1.3 mrg {
156 1.3 mrg return (__mmask8) __builtin_ia32_korqi ((__mmask8) __A, (__mmask8) __B);
157 1.3 mrg }
158 1.3 mrg
159 1.3 mrg extern __inline __mmask8
160 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
161 1.3 mrg _kxnor_mask8 (__mmask8 __A, __mmask8 __B)
162 1.3 mrg {
163 1.3 mrg return (__mmask8) __builtin_ia32_kxnorqi ((__mmask8) __A, (__mmask8) __B);
164 1.3 mrg }
165 1.3 mrg
166 1.3 mrg extern __inline __mmask8
167 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
168 1.3 mrg _kxor_mask8 (__mmask8 __A, __mmask8 __B)
169 1.3 mrg {
170 1.3 mrg return (__mmask8) __builtin_ia32_kxorqi ((__mmask8) __A, (__mmask8) __B);
171 1.3 mrg }
172 1.3 mrg
173 1.3 mrg extern __inline __mmask8
174 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
175 1.3 mrg _kand_mask8 (__mmask8 __A, __mmask8 __B)
176 1.3 mrg {
177 1.3 mrg return (__mmask8) __builtin_ia32_kandqi ((__mmask8) __A, (__mmask8) __B);
178 1.3 mrg }
179 1.3 mrg
180 1.3 mrg extern __inline __mmask8
181 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
182 1.3 mrg _kandn_mask8 (__mmask8 __A, __mmask8 __B)
183 1.3 mrg {
184 1.3 mrg return (__mmask8) __builtin_ia32_kandnqi ((__mmask8) __A, (__mmask8) __B);
185 1.3 mrg }
186 1.3 mrg
187 1.1 mrg extern __inline __m512d
188 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
189 1.1 mrg _mm512_broadcast_f64x2 (__m128d __A)
190 1.1 mrg {
191 1.3 mrg return (__m512d)
192 1.3 mrg __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A,
193 1.3 mrg _mm512_undefined_pd (),
194 1.3 mrg (__mmask8) -1);
195 1.1 mrg }
196 1.1 mrg
197 1.1 mrg extern __inline __m512d
198 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
199 1.1 mrg _mm512_mask_broadcast_f64x2 (__m512d __O, __mmask8 __M, __m128d __A)
200 1.1 mrg {
201 1.1 mrg return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df)
202 1.1 mrg __A,
203 1.1 mrg (__v8df)
204 1.1 mrg __O, __M);
205 1.1 mrg }
206 1.1 mrg
207 1.1 mrg extern __inline __m512d
208 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
209 1.1 mrg _mm512_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
210 1.1 mrg {
211 1.1 mrg return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df)
212 1.1 mrg __A,
213 1.1 mrg (__v8df)
214 1.1 mrg _mm512_setzero_ps (),
215 1.1 mrg __M);
216 1.1 mrg }
217 1.1 mrg
218 1.1 mrg extern __inline __m512i
219 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
220 1.1 mrg _mm512_broadcast_i64x2 (__m128i __A)
221 1.1 mrg {
222 1.3 mrg return (__m512i)
223 1.3 mrg __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A,
224 1.3 mrg _mm512_undefined_epi32 (),
225 1.3 mrg (__mmask8) -1);
226 1.1 mrg }
227 1.1 mrg
228 1.1 mrg extern __inline __m512i
229 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
230 1.1 mrg _mm512_mask_broadcast_i64x2 (__m512i __O, __mmask8 __M, __m128i __A)
231 1.1 mrg {
232 1.1 mrg return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di)
233 1.1 mrg __A,
234 1.1 mrg (__v8di)
235 1.1 mrg __O, __M);
236 1.1 mrg }
237 1.1 mrg
238 1.1 mrg extern __inline __m512i
239 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
240 1.1 mrg _mm512_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
241 1.1 mrg {
242 1.1 mrg return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di)
243 1.1 mrg __A,
244 1.1 mrg (__v8di)
245 1.1 mrg _mm512_setzero_si512 (),
246 1.1 mrg __M);
247 1.1 mrg }
248 1.1 mrg
249 1.1 mrg extern __inline __m512
250 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
251 1.1 mrg _mm512_broadcast_f32x2 (__m128 __A)
252 1.1 mrg {
253 1.3 mrg return (__m512)
254 1.3 mrg __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
255 1.3 mrg (__v16sf)_mm512_undefined_ps (),
256 1.3 mrg (__mmask16) -1);
257 1.1 mrg }
258 1.1 mrg
259 1.1 mrg extern __inline __m512
260 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
261 1.1 mrg _mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
262 1.1 mrg {
263 1.1 mrg return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
264 1.1 mrg (__v16sf)
265 1.1 mrg __O, __M);
266 1.1 mrg }
267 1.1 mrg
268 1.1 mrg extern __inline __m512
269 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
270 1.1 mrg _mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
271 1.1 mrg {
272 1.1 mrg return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
273 1.1 mrg (__v16sf)
274 1.1 mrg _mm512_setzero_ps (),
275 1.1 mrg __M);
276 1.1 mrg }
277 1.1 mrg
278 1.1 mrg extern __inline __m512i
279 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
280 1.1 mrg _mm512_broadcast_i32x2 (__m128i __A)
281 1.1 mrg {
282 1.3 mrg return (__m512i)
283 1.3 mrg __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
284 1.3 mrg (__v16si)
285 1.3 mrg _mm512_undefined_epi32 (),
286 1.3 mrg (__mmask16) -1);
287 1.1 mrg }
288 1.1 mrg
289 1.1 mrg extern __inline __m512i
290 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
291 1.1 mrg _mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
292 1.1 mrg {
293 1.1 mrg return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si)
294 1.1 mrg __A,
295 1.1 mrg (__v16si)
296 1.1 mrg __O, __M);
297 1.1 mrg }
298 1.1 mrg
299 1.1 mrg extern __inline __m512i
300 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
301 1.1 mrg _mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
302 1.1 mrg {
303 1.1 mrg return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si)
304 1.1 mrg __A,
305 1.1 mrg (__v16si)
306 1.1 mrg _mm512_setzero_si512 (),
307 1.1 mrg __M);
308 1.1 mrg }
309 1.1 mrg
310 1.1 mrg extern __inline __m512
311 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
312 1.1 mrg _mm512_broadcast_f32x8 (__m256 __A)
313 1.1 mrg {
314 1.3 mrg return (__m512)
315 1.3 mrg __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
316 1.3 mrg _mm512_undefined_ps (),
317 1.3 mrg (__mmask16) -1);
318 1.1 mrg }
319 1.1 mrg
320 1.1 mrg extern __inline __m512
321 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
322 1.1 mrg _mm512_mask_broadcast_f32x8 (__m512 __O, __mmask16 __M, __m256 __A)
323 1.1 mrg {
324 1.1 mrg return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
325 1.1 mrg (__v16sf)__O,
326 1.1 mrg __M);
327 1.1 mrg }
328 1.1 mrg
329 1.1 mrg extern __inline __m512
330 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
331 1.1 mrg _mm512_maskz_broadcast_f32x8 (__mmask16 __M, __m256 __A)
332 1.1 mrg {
333 1.1 mrg return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
334 1.1 mrg (__v16sf)
335 1.1 mrg _mm512_setzero_ps (),
336 1.1 mrg __M);
337 1.1 mrg }
338 1.1 mrg
339 1.1 mrg extern __inline __m512i
340 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
341 1.1 mrg _mm512_broadcast_i32x8 (__m256i __A)
342 1.1 mrg {
343 1.3 mrg return (__m512i)
344 1.3 mrg __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A,
345 1.3 mrg (__v16si)
346 1.3 mrg _mm512_undefined_epi32 (),
347 1.3 mrg (__mmask16) -1);
348 1.1 mrg }
349 1.1 mrg
350 1.1 mrg extern __inline __m512i
351 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
352 1.1 mrg _mm512_mask_broadcast_i32x8 (__m512i __O, __mmask16 __M, __m256i __A)
353 1.1 mrg {
354 1.1 mrg return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si)
355 1.1 mrg __A,
356 1.1 mrg (__v16si)__O,
357 1.1 mrg __M);
358 1.1 mrg }
359 1.1 mrg
360 1.1 mrg extern __inline __m512i
361 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
362 1.1 mrg _mm512_maskz_broadcast_i32x8 (__mmask16 __M, __m256i __A)
363 1.1 mrg {
364 1.1 mrg return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si)
365 1.1 mrg __A,
366 1.1 mrg (__v16si)
367 1.1 mrg _mm512_setzero_si512 (),
368 1.1 mrg __M);
369 1.1 mrg }
370 1.1 mrg
371 1.1 mrg extern __inline __m512i
372 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
373 1.1 mrg _mm512_mullo_epi64 (__m512i __A, __m512i __B)
374 1.1 mrg {
375 1.1 mrg return (__m512i) ((__v8du) __A * (__v8du) __B);
376 1.1 mrg }
377 1.1 mrg
378 1.1 mrg extern __inline __m512i
379 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
380 1.1 mrg _mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
381 1.1 mrg __m512i __B)
382 1.1 mrg {
383 1.1 mrg return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
384 1.1 mrg (__v8di) __B,
385 1.1 mrg (__v8di) __W,
386 1.1 mrg (__mmask8) __U);
387 1.1 mrg }
388 1.1 mrg
389 1.1 mrg extern __inline __m512i
390 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
391 1.1 mrg _mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
392 1.1 mrg {
393 1.1 mrg return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
394 1.1 mrg (__v8di) __B,
395 1.1 mrg (__v8di)
396 1.1 mrg _mm512_setzero_si512 (),
397 1.1 mrg (__mmask8) __U);
398 1.1 mrg }
399 1.1 mrg
400 1.1 mrg extern __inline __m512d
401 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
402 1.1 mrg _mm512_xor_pd (__m512d __A, __m512d __B)
403 1.1 mrg {
404 1.1 mrg return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
405 1.1 mrg (__v8df) __B,
406 1.1 mrg (__v8df)
407 1.1 mrg _mm512_setzero_pd (),
408 1.1 mrg (__mmask8) -1);
409 1.1 mrg }
410 1.1 mrg
411 1.1 mrg extern __inline __m512d
412 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
413 1.1 mrg _mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A,
414 1.1 mrg __m512d __B)
415 1.1 mrg {
416 1.1 mrg return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
417 1.1 mrg (__v8df) __B,
418 1.1 mrg (__v8df) __W,
419 1.1 mrg (__mmask8) __U);
420 1.1 mrg }
421 1.1 mrg
422 1.1 mrg extern __inline __m512d
423 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
424 1.1 mrg _mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B)
425 1.1 mrg {
426 1.1 mrg return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
427 1.1 mrg (__v8df) __B,
428 1.1 mrg (__v8df)
429 1.1 mrg _mm512_setzero_pd (),
430 1.1 mrg (__mmask8) __U);
431 1.1 mrg }
432 1.1 mrg
433 1.1 mrg extern __inline __m512
434 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
435 1.1 mrg _mm512_xor_ps (__m512 __A, __m512 __B)
436 1.1 mrg {
437 1.1 mrg return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
438 1.1 mrg (__v16sf) __B,
439 1.1 mrg (__v16sf)
440 1.1 mrg _mm512_setzero_ps (),
441 1.1 mrg (__mmask16) -1);
442 1.1 mrg }
443 1.1 mrg
444 1.1 mrg extern __inline __m512
445 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
446 1.1 mrg _mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
447 1.1 mrg {
448 1.1 mrg return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
449 1.1 mrg (__v16sf) __B,
450 1.1 mrg (__v16sf) __W,
451 1.1 mrg (__mmask16) __U);
452 1.1 mrg }
453 1.1 mrg
454 1.1 mrg extern __inline __m512
455 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
456 1.1 mrg _mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B)
457 1.1 mrg {
458 1.1 mrg return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
459 1.1 mrg (__v16sf) __B,
460 1.1 mrg (__v16sf)
461 1.1 mrg _mm512_setzero_ps (),
462 1.1 mrg (__mmask16) __U);
463 1.1 mrg }
464 1.1 mrg
465 1.1 mrg extern __inline __m512d
466 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
467 1.1 mrg _mm512_or_pd (__m512d __A, __m512d __B)
468 1.1 mrg {
469 1.1 mrg return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
470 1.1 mrg (__v8df) __B,
471 1.1 mrg (__v8df)
472 1.1 mrg _mm512_setzero_pd (),
473 1.1 mrg (__mmask8) -1);
474 1.1 mrg }
475 1.1 mrg
476 1.1 mrg extern __inline __m512d
477 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
478 1.1 mrg _mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
479 1.1 mrg {
480 1.1 mrg return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
481 1.1 mrg (__v8df) __B,
482 1.1 mrg (__v8df) __W,
483 1.1 mrg (__mmask8) __U);
484 1.1 mrg }
485 1.1 mrg
486 1.1 mrg extern __inline __m512d
487 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
488 1.1 mrg _mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B)
489 1.1 mrg {
490 1.1 mrg return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
491 1.1 mrg (__v8df) __B,
492 1.1 mrg (__v8df)
493 1.1 mrg _mm512_setzero_pd (),
494 1.1 mrg (__mmask8) __U);
495 1.1 mrg }
496 1.1 mrg
497 1.1 mrg extern __inline __m512
498 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
499 1.1 mrg _mm512_or_ps (__m512 __A, __m512 __B)
500 1.1 mrg {
501 1.1 mrg return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
502 1.1 mrg (__v16sf) __B,
503 1.1 mrg (__v16sf)
504 1.1 mrg _mm512_setzero_ps (),
505 1.1 mrg (__mmask16) -1);
506 1.1 mrg }
507 1.1 mrg
508 1.1 mrg extern __inline __m512
509 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
510 1.1 mrg _mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
511 1.1 mrg {
512 1.1 mrg return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
513 1.1 mrg (__v16sf) __B,
514 1.1 mrg (__v16sf) __W,
515 1.1 mrg (__mmask16) __U);
516 1.1 mrg }
517 1.1 mrg
518 1.1 mrg extern __inline __m512
519 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
520 1.1 mrg _mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B)
521 1.1 mrg {
522 1.1 mrg return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
523 1.1 mrg (__v16sf) __B,
524 1.1 mrg (__v16sf)
525 1.1 mrg _mm512_setzero_ps (),
526 1.1 mrg (__mmask16) __U);
527 1.1 mrg }
528 1.1 mrg
529 1.1 mrg extern __inline __m512d
530 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
531 1.1 mrg _mm512_and_pd (__m512d __A, __m512d __B)
532 1.1 mrg {
533 1.1 mrg return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
534 1.1 mrg (__v8df) __B,
535 1.1 mrg (__v8df)
536 1.1 mrg _mm512_setzero_pd (),
537 1.1 mrg (__mmask8) -1);
538 1.1 mrg }
539 1.1 mrg
540 1.1 mrg extern __inline __m512d
541 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
542 1.1 mrg _mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A,
543 1.1 mrg __m512d __B)
544 1.1 mrg {
545 1.1 mrg return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
546 1.1 mrg (__v8df) __B,
547 1.1 mrg (__v8df) __W,
548 1.1 mrg (__mmask8) __U);
549 1.1 mrg }
550 1.1 mrg
551 1.1 mrg extern __inline __m512d
552 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
553 1.1 mrg _mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B)
554 1.1 mrg {
555 1.1 mrg return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
556 1.1 mrg (__v8df) __B,
557 1.1 mrg (__v8df)
558 1.1 mrg _mm512_setzero_pd (),
559 1.1 mrg (__mmask8) __U);
560 1.1 mrg }
561 1.1 mrg
562 1.1 mrg extern __inline __m512
563 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
564 1.1 mrg _mm512_and_ps (__m512 __A, __m512 __B)
565 1.1 mrg {
566 1.1 mrg return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
567 1.1 mrg (__v16sf) __B,
568 1.1 mrg (__v16sf)
569 1.1 mrg _mm512_setzero_ps (),
570 1.1 mrg (__mmask16) -1);
571 1.1 mrg }
572 1.1 mrg
573 1.1 mrg extern __inline __m512
574 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
575 1.1 mrg _mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
576 1.1 mrg {
577 1.1 mrg return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
578 1.1 mrg (__v16sf) __B,
579 1.1 mrg (__v16sf) __W,
580 1.1 mrg (__mmask16) __U);
581 1.1 mrg }
582 1.1 mrg
583 1.1 mrg extern __inline __m512
584 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
585 1.1 mrg _mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B)
586 1.1 mrg {
587 1.1 mrg return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
588 1.1 mrg (__v16sf) __B,
589 1.1 mrg (__v16sf)
590 1.1 mrg _mm512_setzero_ps (),
591 1.1 mrg (__mmask16) __U);
592 1.1 mrg }
593 1.1 mrg
594 1.1 mrg extern __inline __m512d
595 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
596 1.1 mrg _mm512_andnot_pd (__m512d __A, __m512d __B)
597 1.1 mrg {
598 1.1 mrg return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
599 1.1 mrg (__v8df) __B,
600 1.1 mrg (__v8df)
601 1.1 mrg _mm512_setzero_pd (),
602 1.1 mrg (__mmask8) -1);
603 1.1 mrg }
604 1.1 mrg
605 1.1 mrg extern __inline __m512d
606 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
607 1.1 mrg _mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A,
608 1.1 mrg __m512d __B)
609 1.1 mrg {
610 1.1 mrg return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
611 1.1 mrg (__v8df) __B,
612 1.1 mrg (__v8df) __W,
613 1.1 mrg (__mmask8) __U);
614 1.1 mrg }
615 1.1 mrg
616 1.1 mrg extern __inline __m512d
617 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
618 1.1 mrg _mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B)
619 1.1 mrg {
620 1.1 mrg return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
621 1.1 mrg (__v8df) __B,
622 1.1 mrg (__v8df)
623 1.1 mrg _mm512_setzero_pd (),
624 1.1 mrg (__mmask8) __U);
625 1.1 mrg }
626 1.1 mrg
627 1.1 mrg extern __inline __m512
628 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
629 1.1 mrg _mm512_andnot_ps (__m512 __A, __m512 __B)
630 1.1 mrg {
631 1.1 mrg return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
632 1.1 mrg (__v16sf) __B,
633 1.1 mrg (__v16sf)
634 1.1 mrg _mm512_setzero_ps (),
635 1.1 mrg (__mmask16) -1);
636 1.1 mrg }
637 1.1 mrg
638 1.1 mrg extern __inline __m512
639 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
640 1.1 mrg _mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A,
641 1.1 mrg __m512 __B)
642 1.1 mrg {
643 1.1 mrg return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
644 1.1 mrg (__v16sf) __B,
645 1.1 mrg (__v16sf) __W,
646 1.1 mrg (__mmask16) __U);
647 1.1 mrg }
648 1.1 mrg
649 1.1 mrg extern __inline __m512
650 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
651 1.1 mrg _mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B)
652 1.1 mrg {
653 1.1 mrg return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
654 1.1 mrg (__v16sf) __B,
655 1.1 mrg (__v16sf)
656 1.1 mrg _mm512_setzero_ps (),
657 1.1 mrg (__mmask16) __U);
658 1.1 mrg }
659 1.1 mrg
660 1.1 mrg extern __inline __mmask16
661 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
662 1.1 mrg _mm512_movepi32_mask (__m512i __A)
663 1.1 mrg {
664 1.1 mrg return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A);
665 1.1 mrg }
666 1.1 mrg
667 1.1 mrg extern __inline __mmask8
668 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
669 1.1 mrg _mm512_movepi64_mask (__m512i __A)
670 1.1 mrg {
671 1.1 mrg return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A);
672 1.1 mrg }
673 1.1 mrg
674 1.1 mrg extern __inline __m512i
675 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
676 1.1 mrg _mm512_movm_epi32 (__mmask16 __A)
677 1.1 mrg {
678 1.1 mrg return (__m512i) __builtin_ia32_cvtmask2d512 (__A);
679 1.1 mrg }
680 1.1 mrg
681 1.1 mrg extern __inline __m512i
682 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
683 1.1 mrg _mm512_movm_epi64 (__mmask8 __A)
684 1.1 mrg {
685 1.1 mrg return (__m512i) __builtin_ia32_cvtmask2q512 (__A);
686 1.1 mrg }
687 1.1 mrg
688 1.1 mrg extern __inline __m512i
689 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
690 1.1 mrg _mm512_cvttpd_epi64 (__m512d __A)
691 1.1 mrg {
692 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
693 1.1 mrg (__v8di)
694 1.1 mrg _mm512_setzero_si512 (),
695 1.1 mrg (__mmask8) -1,
696 1.1 mrg _MM_FROUND_CUR_DIRECTION);
697 1.1 mrg }
698 1.1 mrg
699 1.1 mrg extern __inline __m512i
700 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
701 1.1 mrg _mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A)
702 1.1 mrg {
703 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
704 1.1 mrg (__v8di) __W,
705 1.1 mrg (__mmask8) __U,
706 1.1 mrg _MM_FROUND_CUR_DIRECTION);
707 1.1 mrg }
708 1.1 mrg
709 1.1 mrg extern __inline __m512i
710 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
711 1.1 mrg _mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A)
712 1.1 mrg {
713 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
714 1.1 mrg (__v8di)
715 1.1 mrg _mm512_setzero_si512 (),
716 1.1 mrg (__mmask8) __U,
717 1.1 mrg _MM_FROUND_CUR_DIRECTION);
718 1.1 mrg }
719 1.1 mrg
720 1.1 mrg extern __inline __m512i
721 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
722 1.1 mrg _mm512_cvttpd_epu64 (__m512d __A)
723 1.1 mrg {
724 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
725 1.1 mrg (__v8di)
726 1.1 mrg _mm512_setzero_si512 (),
727 1.1 mrg (__mmask8) -1,
728 1.1 mrg _MM_FROUND_CUR_DIRECTION);
729 1.1 mrg }
730 1.1 mrg
731 1.1 mrg extern __inline __m512i
732 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
733 1.1 mrg _mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A)
734 1.1 mrg {
735 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
736 1.1 mrg (__v8di) __W,
737 1.1 mrg (__mmask8) __U,
738 1.1 mrg _MM_FROUND_CUR_DIRECTION);
739 1.1 mrg }
740 1.1 mrg
741 1.1 mrg extern __inline __m512i
742 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
743 1.1 mrg _mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A)
744 1.1 mrg {
745 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
746 1.1 mrg (__v8di)
747 1.1 mrg _mm512_setzero_si512 (),
748 1.1 mrg (__mmask8) __U,
749 1.1 mrg _MM_FROUND_CUR_DIRECTION);
750 1.1 mrg }
751 1.1 mrg
752 1.1 mrg extern __inline __m512i
753 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
754 1.1 mrg _mm512_cvttps_epi64 (__m256 __A)
755 1.1 mrg {
756 1.1 mrg return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
757 1.1 mrg (__v8di)
758 1.1 mrg _mm512_setzero_si512 (),
759 1.1 mrg (__mmask8) -1,
760 1.1 mrg _MM_FROUND_CUR_DIRECTION);
761 1.1 mrg }
762 1.1 mrg
763 1.1 mrg extern __inline __m512i
764 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
765 1.1 mrg _mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A)
766 1.1 mrg {
767 1.1 mrg return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
768 1.1 mrg (__v8di) __W,
769 1.1 mrg (__mmask8) __U,
770 1.1 mrg _MM_FROUND_CUR_DIRECTION);
771 1.1 mrg }
772 1.1 mrg
773 1.1 mrg extern __inline __m512i
774 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
775 1.1 mrg _mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A)
776 1.1 mrg {
777 1.1 mrg return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
778 1.1 mrg (__v8di)
779 1.1 mrg _mm512_setzero_si512 (),
780 1.1 mrg (__mmask8) __U,
781 1.1 mrg _MM_FROUND_CUR_DIRECTION);
782 1.1 mrg }
783 1.1 mrg
784 1.1 mrg extern __inline __m512i
785 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
786 1.1 mrg _mm512_cvttps_epu64 (__m256 __A)
787 1.1 mrg {
788 1.1 mrg return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
789 1.1 mrg (__v8di)
790 1.1 mrg _mm512_setzero_si512 (),
791 1.1 mrg (__mmask8) -1,
792 1.1 mrg _MM_FROUND_CUR_DIRECTION);
793 1.1 mrg }
794 1.1 mrg
795 1.1 mrg extern __inline __m512i
796 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
797 1.1 mrg _mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A)
798 1.1 mrg {
799 1.1 mrg return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
800 1.1 mrg (__v8di) __W,
801 1.1 mrg (__mmask8) __U,
802 1.1 mrg _MM_FROUND_CUR_DIRECTION);
803 1.1 mrg }
804 1.1 mrg
805 1.1 mrg extern __inline __m512i
806 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
807 1.1 mrg _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A)
808 1.1 mrg {
809 1.1 mrg return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
810 1.1 mrg (__v8di)
811 1.1 mrg _mm512_setzero_si512 (),
812 1.1 mrg (__mmask8) __U,
813 1.1 mrg _MM_FROUND_CUR_DIRECTION);
814 1.1 mrg }
815 1.1 mrg
816 1.1 mrg extern __inline __m512i
817 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
818 1.1 mrg _mm512_cvtpd_epi64 (__m512d __A)
819 1.1 mrg {
820 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
821 1.1 mrg (__v8di)
822 1.1 mrg _mm512_setzero_si512 (),
823 1.1 mrg (__mmask8) -1,
824 1.1 mrg _MM_FROUND_CUR_DIRECTION);
825 1.1 mrg }
826 1.1 mrg
827 1.1 mrg extern __inline __m512i
828 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
829 1.1 mrg _mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A)
830 1.1 mrg {
831 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
832 1.1 mrg (__v8di) __W,
833 1.1 mrg (__mmask8) __U,
834 1.1 mrg _MM_FROUND_CUR_DIRECTION);
835 1.1 mrg }
836 1.1 mrg
837 1.1 mrg extern __inline __m512i
838 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
839 1.1 mrg _mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A)
840 1.1 mrg {
841 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
842 1.1 mrg (__v8di)
843 1.1 mrg _mm512_setzero_si512 (),
844 1.1 mrg (__mmask8) __U,
845 1.1 mrg _MM_FROUND_CUR_DIRECTION);
846 1.1 mrg }
847 1.1 mrg
848 1.1 mrg extern __inline __m512i
849 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
850 1.1 mrg _mm512_cvtpd_epu64 (__m512d __A)
851 1.1 mrg {
852 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
853 1.1 mrg (__v8di)
854 1.1 mrg _mm512_setzero_si512 (),
855 1.1 mrg (__mmask8) -1,
856 1.1 mrg _MM_FROUND_CUR_DIRECTION);
857 1.1 mrg }
858 1.1 mrg
859 1.1 mrg extern __inline __m512i
860 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
861 1.1 mrg _mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A)
862 1.1 mrg {
863 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
864 1.1 mrg (__v8di) __W,
865 1.1 mrg (__mmask8) __U,
866 1.1 mrg _MM_FROUND_CUR_DIRECTION);
867 1.1 mrg }
868 1.1 mrg
869 1.1 mrg extern __inline __m512i
870 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
871 1.1 mrg _mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A)
872 1.1 mrg {
873 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
874 1.1 mrg (__v8di)
875 1.1 mrg _mm512_setzero_si512 (),
876 1.1 mrg (__mmask8) __U,
877 1.1 mrg _MM_FROUND_CUR_DIRECTION);
878 1.1 mrg }
879 1.1 mrg
880 1.1 mrg extern __inline __m512i
881 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
882 1.1 mrg _mm512_cvtps_epi64 (__m256 __A)
883 1.1 mrg {
884 1.1 mrg return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
885 1.1 mrg (__v8di)
886 1.1 mrg _mm512_setzero_si512 (),
887 1.1 mrg (__mmask8) -1,
888 1.1 mrg _MM_FROUND_CUR_DIRECTION);
889 1.1 mrg }
890 1.1 mrg
891 1.1 mrg extern __inline __m512i
892 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
893 1.1 mrg _mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A)
894 1.1 mrg {
895 1.1 mrg return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
896 1.1 mrg (__v8di) __W,
897 1.1 mrg (__mmask8) __U,
898 1.1 mrg _MM_FROUND_CUR_DIRECTION);
899 1.1 mrg }
900 1.1 mrg
901 1.1 mrg extern __inline __m512i
902 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
903 1.1 mrg _mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A)
904 1.1 mrg {
905 1.1 mrg return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
906 1.1 mrg (__v8di)
907 1.1 mrg _mm512_setzero_si512 (),
908 1.1 mrg (__mmask8) __U,
909 1.1 mrg _MM_FROUND_CUR_DIRECTION);
910 1.1 mrg }
911 1.1 mrg
912 1.1 mrg extern __inline __m512i
913 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
914 1.1 mrg _mm512_cvtps_epu64 (__m256 __A)
915 1.1 mrg {
916 1.1 mrg return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
917 1.1 mrg (__v8di)
918 1.1 mrg _mm512_setzero_si512 (),
919 1.1 mrg (__mmask8) -1,
920 1.1 mrg _MM_FROUND_CUR_DIRECTION);
921 1.1 mrg }
922 1.1 mrg
923 1.1 mrg extern __inline __m512i
924 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
925 1.1 mrg _mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A)
926 1.1 mrg {
927 1.1 mrg return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
928 1.1 mrg (__v8di) __W,
929 1.1 mrg (__mmask8) __U,
930 1.1 mrg _MM_FROUND_CUR_DIRECTION);
931 1.1 mrg }
932 1.1 mrg
933 1.1 mrg extern __inline __m512i
934 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
935 1.1 mrg _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A)
936 1.1 mrg {
937 1.1 mrg return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
938 1.1 mrg (__v8di)
939 1.1 mrg _mm512_setzero_si512 (),
940 1.1 mrg (__mmask8) __U,
941 1.1 mrg _MM_FROUND_CUR_DIRECTION);
942 1.1 mrg }
943 1.1 mrg
944 1.1 mrg extern __inline __m256
945 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
946 1.1 mrg _mm512_cvtepi64_ps (__m512i __A)
947 1.1 mrg {
948 1.1 mrg return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
949 1.1 mrg (__v8sf)
950 1.1 mrg _mm256_setzero_ps (),
951 1.1 mrg (__mmask8) -1,
952 1.1 mrg _MM_FROUND_CUR_DIRECTION);
953 1.1 mrg }
954 1.1 mrg
955 1.1 mrg extern __inline __m256
956 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
957 1.1 mrg _mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A)
958 1.1 mrg {
959 1.1 mrg return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
960 1.1 mrg (__v8sf) __W,
961 1.1 mrg (__mmask8) __U,
962 1.1 mrg _MM_FROUND_CUR_DIRECTION);
963 1.1 mrg }
964 1.1 mrg
965 1.1 mrg extern __inline __m256
966 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
967 1.1 mrg _mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A)
968 1.1 mrg {
969 1.1 mrg return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
970 1.1 mrg (__v8sf)
971 1.1 mrg _mm256_setzero_ps (),
972 1.1 mrg (__mmask8) __U,
973 1.1 mrg _MM_FROUND_CUR_DIRECTION);
974 1.1 mrg }
975 1.1 mrg
976 1.1 mrg extern __inline __m256
977 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
978 1.1 mrg _mm512_cvtepu64_ps (__m512i __A)
979 1.1 mrg {
980 1.1 mrg return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
981 1.1 mrg (__v8sf)
982 1.1 mrg _mm256_setzero_ps (),
983 1.1 mrg (__mmask8) -1,
984 1.1 mrg _MM_FROUND_CUR_DIRECTION);
985 1.1 mrg }
986 1.1 mrg
987 1.1 mrg extern __inline __m256
988 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
989 1.1 mrg _mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A)
990 1.1 mrg {
991 1.1 mrg return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
992 1.1 mrg (__v8sf) __W,
993 1.1 mrg (__mmask8) __U,
994 1.1 mrg _MM_FROUND_CUR_DIRECTION);
995 1.1 mrg }
996 1.1 mrg
997 1.1 mrg extern __inline __m256
998 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
999 1.1 mrg _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A)
1000 1.1 mrg {
1001 1.1 mrg return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
1002 1.1 mrg (__v8sf)
1003 1.1 mrg _mm256_setzero_ps (),
1004 1.1 mrg (__mmask8) __U,
1005 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1006 1.1 mrg }
1007 1.1 mrg
1008 1.1 mrg extern __inline __m512d
1009 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1010 1.1 mrg _mm512_cvtepi64_pd (__m512i __A)
1011 1.1 mrg {
1012 1.1 mrg return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
1013 1.1 mrg (__v8df)
1014 1.1 mrg _mm512_setzero_pd (),
1015 1.1 mrg (__mmask8) -1,
1016 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1017 1.1 mrg }
1018 1.1 mrg
1019 1.1 mrg extern __inline __m512d
1020 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1021 1.1 mrg _mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A)
1022 1.1 mrg {
1023 1.1 mrg return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
1024 1.1 mrg (__v8df) __W,
1025 1.1 mrg (__mmask8) __U,
1026 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1027 1.1 mrg }
1028 1.1 mrg
1029 1.1 mrg extern __inline __m512d
1030 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1031 1.1 mrg _mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A)
1032 1.1 mrg {
1033 1.1 mrg return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
1034 1.1 mrg (__v8df)
1035 1.1 mrg _mm512_setzero_pd (),
1036 1.1 mrg (__mmask8) __U,
1037 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1038 1.1 mrg }
1039 1.1 mrg
1040 1.1 mrg extern __inline __m512d
1041 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1042 1.1 mrg _mm512_cvtepu64_pd (__m512i __A)
1043 1.1 mrg {
1044 1.1 mrg return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
1045 1.1 mrg (__v8df)
1046 1.1 mrg _mm512_setzero_pd (),
1047 1.1 mrg (__mmask8) -1,
1048 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1049 1.1 mrg }
1050 1.1 mrg
1051 1.1 mrg extern __inline __m512d
1052 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1053 1.1 mrg _mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A)
1054 1.1 mrg {
1055 1.1 mrg return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
1056 1.1 mrg (__v8df) __W,
1057 1.1 mrg (__mmask8) __U,
1058 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1059 1.1 mrg }
1060 1.1 mrg
1061 1.1 mrg extern __inline __m512d
1062 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1063 1.1 mrg _mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A)
1064 1.1 mrg {
1065 1.1 mrg return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
1066 1.1 mrg (__v8df)
1067 1.1 mrg _mm512_setzero_pd (),
1068 1.1 mrg (__mmask8) __U,
1069 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1070 1.1 mrg }
1071 1.1 mrg
1072 1.1 mrg #ifdef __OPTIMIZE__
1073 1.3 mrg extern __inline __mmask8
1074 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1075 1.3 mrg _kshiftli_mask8 (__mmask8 __A, unsigned int __B)
1076 1.3 mrg {
1077 1.3 mrg return (__mmask8) __builtin_ia32_kshiftliqi ((__mmask8) __A, (__mmask8) __B);
1078 1.3 mrg }
1079 1.3 mrg
1080 1.3 mrg extern __inline __mmask8
1081 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1082 1.3 mrg _kshiftri_mask8 (__mmask8 __A, unsigned int __B)
1083 1.3 mrg {
1084 1.3 mrg return (__mmask8) __builtin_ia32_kshiftriqi ((__mmask8) __A, (__mmask8) __B);
1085 1.3 mrg }
1086 1.3 mrg
1087 1.1 mrg extern __inline __m512d
1088 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1089 1.1 mrg _mm512_range_pd (__m512d __A, __m512d __B, int __C)
1090 1.1 mrg {
1091 1.1 mrg return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
1092 1.1 mrg (__v8df) __B, __C,
1093 1.1 mrg (__v8df)
1094 1.1 mrg _mm512_setzero_pd (),
1095 1.1 mrg (__mmask8) -1,
1096 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1097 1.1 mrg }
1098 1.1 mrg
1099 1.1 mrg extern __inline __m512d
1100 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1101 1.1 mrg _mm512_mask_range_pd (__m512d __W, __mmask8 __U,
1102 1.1 mrg __m512d __A, __m512d __B, int __C)
1103 1.1 mrg {
1104 1.1 mrg return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
1105 1.1 mrg (__v8df) __B, __C,
1106 1.1 mrg (__v8df) __W,
1107 1.1 mrg (__mmask8) __U,
1108 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1109 1.1 mrg }
1110 1.1 mrg
1111 1.1 mrg extern __inline __m512d
1112 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1113 1.1 mrg _mm512_maskz_range_pd (__mmask8 __U, __m512d __A, __m512d __B, int __C)
1114 1.1 mrg {
1115 1.1 mrg return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
1116 1.1 mrg (__v8df) __B, __C,
1117 1.1 mrg (__v8df)
1118 1.1 mrg _mm512_setzero_pd (),
1119 1.1 mrg (__mmask8) __U,
1120 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1121 1.1 mrg }
1122 1.1 mrg
1123 1.1 mrg extern __inline __m512
1124 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1125 1.1 mrg _mm512_range_ps (__m512 __A, __m512 __B, int __C)
1126 1.1 mrg {
1127 1.1 mrg return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
1128 1.1 mrg (__v16sf) __B, __C,
1129 1.1 mrg (__v16sf)
1130 1.1 mrg _mm512_setzero_ps (),
1131 1.1 mrg (__mmask16) -1,
1132 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1133 1.1 mrg }
1134 1.1 mrg
1135 1.1 mrg extern __inline __m512
1136 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1137 1.1 mrg _mm512_mask_range_ps (__m512 __W, __mmask16 __U,
1138 1.1 mrg __m512 __A, __m512 __B, int __C)
1139 1.1 mrg {
1140 1.1 mrg return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
1141 1.1 mrg (__v16sf) __B, __C,
1142 1.1 mrg (__v16sf) __W,
1143 1.1 mrg (__mmask16) __U,
1144 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1145 1.1 mrg }
1146 1.1 mrg
1147 1.1 mrg extern __inline __m512
1148 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1149 1.1 mrg _mm512_maskz_range_ps (__mmask16 __U, __m512 __A, __m512 __B, int __C)
1150 1.1 mrg {
1151 1.1 mrg return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
1152 1.1 mrg (__v16sf) __B, __C,
1153 1.1 mrg (__v16sf)
1154 1.1 mrg _mm512_setzero_ps (),
1155 1.1 mrg (__mmask16) __U,
1156 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1157 1.1 mrg }
1158 1.1 mrg
1159 1.1 mrg extern __inline __m128d
1160 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1161 1.1 mrg _mm_reduce_sd (__m128d __A, __m128d __B, int __C)
1162 1.1 mrg {
1163 1.4 mrg return (__m128d) __builtin_ia32_reducesd_mask ((__v2df) __A,
1164 1.4 mrg (__v2df) __B, __C,
1165 1.4 mrg (__v2df) _mm_setzero_pd (),
1166 1.4 mrg (__mmask8) -1);
1167 1.4 mrg }
1168 1.4 mrg
1169 1.4 mrg extern __inline __m128d
1170 1.4 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1171 1.7 mrg _mm_reduce_round_sd (__m128d __A, __m128d __B, int __C, const int __R)
1172 1.7 mrg {
1173 1.7 mrg return (__m128d) __builtin_ia32_reducesd_mask_round ((__v2df) __A,
1174 1.7 mrg (__v2df) __B, __C,
1175 1.7 mrg (__v2df)
1176 1.7 mrg _mm_setzero_pd (),
1177 1.7 mrg (__mmask8) -1, __R);
1178 1.7 mrg }
1179 1.7 mrg
1180 1.7 mrg extern __inline __m128d
1181 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1182 1.4 mrg _mm_mask_reduce_sd (__m128d __W, __mmask8 __U, __m128d __A,
1183 1.4 mrg __m128d __B, int __C)
1184 1.4 mrg {
1185 1.4 mrg return (__m128d) __builtin_ia32_reducesd_mask ((__v2df) __A,
1186 1.4 mrg (__v2df) __B, __C,
1187 1.4 mrg (__v2df) __W,
1188 1.4 mrg (__mmask8) __U);
1189 1.4 mrg }
1190 1.4 mrg
1191 1.4 mrg extern __inline __m128d
1192 1.4 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1193 1.7 mrg _mm_mask_reduce_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
1194 1.7 mrg __m128d __B, int __C, const int __R)
1195 1.7 mrg {
1196 1.7 mrg return (__m128d) __builtin_ia32_reducesd_mask_round ((__v2df) __A,
1197 1.7 mrg (__v2df) __B, __C,
1198 1.7 mrg (__v2df) __W,
1199 1.7 mrg __U, __R);
1200 1.7 mrg }
1201 1.7 mrg
1202 1.7 mrg extern __inline __m128d
1203 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1204 1.4 mrg _mm_maskz_reduce_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C)
1205 1.4 mrg {
1206 1.4 mrg return (__m128d) __builtin_ia32_reducesd_mask ((__v2df) __A,
1207 1.4 mrg (__v2df) __B, __C,
1208 1.4 mrg (__v2df) _mm_setzero_pd (),
1209 1.4 mrg (__mmask8) __U);
1210 1.1 mrg }
1211 1.1 mrg
1212 1.7 mrg extern __inline __m128d
1213 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1214 1.7 mrg _mm_maskz_reduce_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
1215 1.7 mrg int __C, const int __R)
1216 1.7 mrg {
1217 1.7 mrg return (__m128d) __builtin_ia32_reducesd_mask_round ((__v2df) __A,
1218 1.7 mrg (__v2df) __B, __C,
1219 1.7 mrg (__v2df)
1220 1.7 mrg _mm_setzero_pd (),
1221 1.7 mrg __U, __R);
1222 1.7 mrg }
1223 1.7 mrg
1224 1.1 mrg extern __inline __m128
1225 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1226 1.1 mrg _mm_reduce_ss (__m128 __A, __m128 __B, int __C)
1227 1.1 mrg {
1228 1.4 mrg return (__m128) __builtin_ia32_reducess_mask ((__v4sf) __A,
1229 1.4 mrg (__v4sf) __B, __C,
1230 1.4 mrg (__v4sf) _mm_setzero_ps (),
1231 1.4 mrg (__mmask8) -1);
1232 1.4 mrg }
1233 1.4 mrg
1234 1.7 mrg extern __inline __m128
1235 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1236 1.7 mrg _mm_reduce_round_ss (__m128 __A, __m128 __B, int __C, const int __R)
1237 1.7 mrg {
1238 1.7 mrg return (__m128) __builtin_ia32_reducess_mask_round ((__v4sf) __A,
1239 1.7 mrg (__v4sf) __B, __C,
1240 1.7 mrg (__v4sf)
1241 1.7 mrg _mm_setzero_ps (),
1242 1.7 mrg (__mmask8) -1, __R);
1243 1.7 mrg }
1244 1.4 mrg
1245 1.4 mrg extern __inline __m128
1246 1.4 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1247 1.4 mrg _mm_mask_reduce_ss (__m128 __W, __mmask8 __U, __m128 __A,
1248 1.4 mrg __m128 __B, int __C)
1249 1.4 mrg {
1250 1.4 mrg return (__m128) __builtin_ia32_reducess_mask ((__v4sf) __A,
1251 1.4 mrg (__v4sf) __B, __C,
1252 1.4 mrg (__v4sf) __W,
1253 1.4 mrg (__mmask8) __U);
1254 1.4 mrg }
1255 1.4 mrg
1256 1.4 mrg extern __inline __m128
1257 1.4 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1258 1.7 mrg _mm_mask_reduce_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
1259 1.7 mrg __m128 __B, int __C, const int __R)
1260 1.7 mrg {
1261 1.7 mrg return (__m128) __builtin_ia32_reducess_mask_round ((__v4sf) __A,
1262 1.7 mrg (__v4sf) __B, __C,
1263 1.7 mrg (__v4sf) __W,
1264 1.7 mrg __U, __R);
1265 1.7 mrg }
1266 1.7 mrg
1267 1.7 mrg extern __inline __m128
1268 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1269 1.4 mrg _mm_maskz_reduce_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C)
1270 1.4 mrg {
1271 1.4 mrg return (__m128) __builtin_ia32_reducess_mask ((__v4sf) __A,
1272 1.4 mrg (__v4sf) __B, __C,
1273 1.4 mrg (__v4sf) _mm_setzero_ps (),
1274 1.4 mrg (__mmask8) __U);
1275 1.1 mrg }
1276 1.1 mrg
1277 1.7 mrg extern __inline __m128
1278 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1279 1.7 mrg _mm_maskz_reduce_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
1280 1.7 mrg int __C, const int __R)
1281 1.7 mrg {
1282 1.7 mrg return (__m128) __builtin_ia32_reducess_mask_round ((__v4sf) __A,
1283 1.7 mrg (__v4sf) __B, __C,
1284 1.7 mrg (__v4sf)
1285 1.7 mrg _mm_setzero_ps (),
1286 1.7 mrg __U, __R);
1287 1.7 mrg }
1288 1.7 mrg
1289 1.1 mrg extern __inline __m128d
1290 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1291 1.1 mrg _mm_range_sd (__m128d __A, __m128d __B, int __C)
1292 1.1 mrg {
1293 1.4 mrg return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
1294 1.4 mrg (__v2df) __B, __C,
1295 1.4 mrg (__v2df)
1296 1.4 mrg _mm_setzero_pd (),
1297 1.4 mrg (__mmask8) -1,
1298 1.4 mrg _MM_FROUND_CUR_DIRECTION);
1299 1.4 mrg }
1300 1.4 mrg
1301 1.4 mrg extern __inline __m128d
1302 1.4 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1303 1.4 mrg _mm_mask_range_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B, int __C)
1304 1.4 mrg {
1305 1.4 mrg return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
1306 1.1 mrg (__v2df) __B, __C,
1307 1.4 mrg (__v2df) __W,
1308 1.4 mrg (__mmask8) __U,
1309 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1310 1.1 mrg }
1311 1.1 mrg
1312 1.4 mrg extern __inline __m128d
1313 1.4 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1314 1.4 mrg _mm_maskz_range_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C)
1315 1.4 mrg {
1316 1.4 mrg return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
1317 1.4 mrg (__v2df) __B, __C,
1318 1.4 mrg (__v2df)
1319 1.4 mrg _mm_setzero_pd (),
1320 1.4 mrg (__mmask8) __U,
1321 1.4 mrg _MM_FROUND_CUR_DIRECTION);
1322 1.4 mrg }
1323 1.1 mrg
1324 1.1 mrg extern __inline __m128
1325 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1326 1.1 mrg _mm_range_ss (__m128 __A, __m128 __B, int __C)
1327 1.1 mrg {
1328 1.4 mrg return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
1329 1.4 mrg (__v4sf) __B, __C,
1330 1.4 mrg (__v4sf)
1331 1.4 mrg _mm_setzero_ps (),
1332 1.4 mrg (__mmask8) -1,
1333 1.4 mrg _MM_FROUND_CUR_DIRECTION);
1334 1.4 mrg }
1335 1.4 mrg
1336 1.4 mrg extern __inline __m128
1337 1.4 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1338 1.4 mrg _mm_mask_range_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, int __C)
1339 1.4 mrg {
1340 1.4 mrg return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
1341 1.1 mrg (__v4sf) __B, __C,
1342 1.4 mrg (__v4sf) __W,
1343 1.4 mrg (__mmask8) __U,
1344 1.4 mrg _MM_FROUND_CUR_DIRECTION);
1345 1.4 mrg }
1346 1.4 mrg
1347 1.4 mrg
1348 1.4 mrg extern __inline __m128
1349 1.4 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1350 1.4 mrg _mm_maskz_range_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C)
1351 1.4 mrg {
1352 1.4 mrg return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
1353 1.4 mrg (__v4sf) __B, __C,
1354 1.4 mrg (__v4sf)
1355 1.4 mrg _mm_setzero_ps (),
1356 1.4 mrg (__mmask8) __U,
1357 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1358 1.1 mrg }
1359 1.1 mrg
1360 1.1 mrg extern __inline __m128d
1361 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1362 1.1 mrg _mm_range_round_sd (__m128d __A, __m128d __B, int __C, const int __R)
1363 1.1 mrg {
1364 1.4 mrg return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
1365 1.1 mrg (__v2df) __B, __C,
1366 1.4 mrg (__v2df)
1367 1.4 mrg _mm_setzero_pd (),
1368 1.4 mrg (__mmask8) -1, __R);
1369 1.4 mrg }
1370 1.4 mrg
1371 1.4 mrg extern __inline __m128d
1372 1.4 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1373 1.4 mrg _mm_mask_range_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
1374 1.4 mrg int __C, const int __R)
1375 1.4 mrg {
1376 1.4 mrg return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
1377 1.4 mrg (__v2df) __B, __C,
1378 1.4 mrg (__v2df) __W,
1379 1.4 mrg (__mmask8) __U, __R);
1380 1.4 mrg }
1381 1.4 mrg
1382 1.4 mrg extern __inline __m128d
1383 1.4 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1384 1.4 mrg _mm_maskz_range_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C,
1385 1.4 mrg const int __R)
1386 1.4 mrg {
1387 1.4 mrg return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
1388 1.4 mrg (__v2df) __B, __C,
1389 1.4 mrg (__v2df)
1390 1.4 mrg _mm_setzero_pd (),
1391 1.4 mrg (__mmask8) __U, __R);
1392 1.1 mrg }
1393 1.1 mrg
1394 1.1 mrg extern __inline __m128
1395 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1396 1.1 mrg _mm_range_round_ss (__m128 __A, __m128 __B, int __C, const int __R)
1397 1.1 mrg {
1398 1.4 mrg return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
1399 1.4 mrg (__v4sf) __B, __C,
1400 1.4 mrg (__v4sf)
1401 1.4 mrg _mm_setzero_ps (),
1402 1.4 mrg (__mmask8) -1, __R);
1403 1.4 mrg }
1404 1.4 mrg
1405 1.4 mrg extern __inline __m128
1406 1.4 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1407 1.4 mrg _mm_mask_range_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
1408 1.4 mrg int __C, const int __R)
1409 1.4 mrg {
1410 1.4 mrg return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
1411 1.4 mrg (__v4sf) __B, __C,
1412 1.4 mrg (__v4sf) __W,
1413 1.4 mrg (__mmask8) __U, __R);
1414 1.4 mrg }
1415 1.4 mrg
1416 1.4 mrg extern __inline __m128
1417 1.4 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1418 1.4 mrg _mm_maskz_range_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C,
1419 1.4 mrg const int __R)
1420 1.4 mrg {
1421 1.4 mrg return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
1422 1.1 mrg (__v4sf) __B, __C,
1423 1.4 mrg (__v4sf)
1424 1.4 mrg _mm_setzero_ps (),
1425 1.4 mrg (__mmask8) __U, __R);
1426 1.1 mrg }
1427 1.1 mrg
1428 1.1 mrg extern __inline __mmask8
1429 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1430 1.1 mrg _mm_fpclass_ss_mask (__m128 __A, const int __imm)
1431 1.1 mrg {
1432 1.6 mrg return (__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) __A, __imm,
1433 1.6 mrg (__mmask8) -1);
1434 1.1 mrg }
1435 1.1 mrg
1436 1.1 mrg extern __inline __mmask8
1437 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1438 1.1 mrg _mm_fpclass_sd_mask (__m128d __A, const int __imm)
1439 1.1 mrg {
1440 1.6 mrg return (__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) __A, __imm,
1441 1.6 mrg (__mmask8) -1);
1442 1.6 mrg }
1443 1.6 mrg
1444 1.6 mrg extern __inline __mmask8
1445 1.6 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1446 1.6 mrg _mm_mask_fpclass_ss_mask (__mmask8 __U, __m128 __A, const int __imm)
1447 1.6 mrg {
1448 1.6 mrg return (__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) __A, __imm, __U);
1449 1.6 mrg }
1450 1.6 mrg
1451 1.6 mrg extern __inline __mmask8
1452 1.6 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1453 1.6 mrg _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm)
1454 1.6 mrg {
1455 1.6 mrg return (__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) __A, __imm, __U);
1456 1.1 mrg }
1457 1.1 mrg
1458 1.1 mrg extern __inline __m512i
1459 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1460 1.1 mrg _mm512_cvtt_roundpd_epi64 (__m512d __A, const int __R)
1461 1.1 mrg {
1462 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
1463 1.1 mrg (__v8di)
1464 1.1 mrg _mm512_setzero_si512 (),
1465 1.1 mrg (__mmask8) -1,
1466 1.1 mrg __R);
1467 1.1 mrg }
1468 1.1 mrg
1469 1.1 mrg extern __inline __m512i
1470 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1471 1.1 mrg _mm512_mask_cvtt_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A,
1472 1.1 mrg const int __R)
1473 1.1 mrg {
1474 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
1475 1.1 mrg (__v8di) __W,
1476 1.1 mrg (__mmask8) __U,
1477 1.1 mrg __R);
1478 1.1 mrg }
1479 1.1 mrg
1480 1.1 mrg extern __inline __m512i
1481 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1482 1.1 mrg _mm512_maskz_cvtt_roundpd_epi64 (__mmask8 __U, __m512d __A,
1483 1.1 mrg const int __R)
1484 1.1 mrg {
1485 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
1486 1.1 mrg (__v8di)
1487 1.1 mrg _mm512_setzero_si512 (),
1488 1.1 mrg (__mmask8) __U,
1489 1.1 mrg __R);
1490 1.1 mrg }
1491 1.1 mrg
1492 1.1 mrg extern __inline __m512i
1493 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1494 1.1 mrg _mm512_cvtt_roundpd_epu64 (__m512d __A, const int __R)
1495 1.1 mrg {
1496 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
1497 1.1 mrg (__v8di)
1498 1.1 mrg _mm512_setzero_si512 (),
1499 1.1 mrg (__mmask8) -1,
1500 1.1 mrg __R);
1501 1.1 mrg }
1502 1.1 mrg
1503 1.1 mrg extern __inline __m512i
1504 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1505 1.1 mrg _mm512_mask_cvtt_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A,
1506 1.1 mrg const int __R)
1507 1.1 mrg {
1508 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
1509 1.1 mrg (__v8di) __W,
1510 1.1 mrg (__mmask8) __U,
1511 1.1 mrg __R);
1512 1.1 mrg }
1513 1.1 mrg
1514 1.1 mrg extern __inline __m512i
1515 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1516 1.1 mrg _mm512_maskz_cvtt_roundpd_epu64 (__mmask8 __U, __m512d __A,
1517 1.1 mrg const int __R)
1518 1.1 mrg {
1519 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
1520 1.1 mrg (__v8di)
1521 1.1 mrg _mm512_setzero_si512 (),
1522 1.1 mrg (__mmask8) __U,
1523 1.1 mrg __R);
1524 1.1 mrg }
1525 1.1 mrg
1526 1.1 mrg extern __inline __m512i
1527 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1528 1.1 mrg _mm512_cvtt_roundps_epi64 (__m256 __A, const int __R)
1529 1.1 mrg {
1530 1.1 mrg return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
1531 1.1 mrg (__v8di)
1532 1.1 mrg _mm512_setzero_si512 (),
1533 1.1 mrg (__mmask8) -1,
1534 1.1 mrg __R);
1535 1.1 mrg }
1536 1.1 mrg
1537 1.1 mrg extern __inline __m512i
1538 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1539 1.1 mrg _mm512_mask_cvtt_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A,
1540 1.1 mrg const int __R)
1541 1.1 mrg {
1542 1.1 mrg return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
1543 1.1 mrg (__v8di) __W,
1544 1.1 mrg (__mmask8) __U,
1545 1.1 mrg __R);
1546 1.1 mrg }
1547 1.1 mrg
1548 1.1 mrg extern __inline __m512i
1549 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1550 1.1 mrg _mm512_maskz_cvtt_roundps_epi64 (__mmask8 __U, __m256 __A,
1551 1.1 mrg const int __R)
1552 1.1 mrg {
1553 1.1 mrg return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
1554 1.1 mrg (__v8di)
1555 1.1 mrg _mm512_setzero_si512 (),
1556 1.1 mrg (__mmask8) __U,
1557 1.1 mrg __R);
1558 1.1 mrg }
1559 1.1 mrg
1560 1.1 mrg extern __inline __m512i
1561 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1562 1.1 mrg _mm512_cvtt_roundps_epu64 (__m256 __A, const int __R)
1563 1.1 mrg {
1564 1.1 mrg return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
1565 1.1 mrg (__v8di)
1566 1.1 mrg _mm512_setzero_si512 (),
1567 1.1 mrg (__mmask8) -1,
1568 1.1 mrg __R);
1569 1.1 mrg }
1570 1.1 mrg
1571 1.1 mrg extern __inline __m512i
1572 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1573 1.1 mrg _mm512_mask_cvtt_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A,
1574 1.1 mrg const int __R)
1575 1.1 mrg {
1576 1.1 mrg return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
1577 1.1 mrg (__v8di) __W,
1578 1.1 mrg (__mmask8) __U,
1579 1.1 mrg __R);
1580 1.1 mrg }
1581 1.1 mrg
1582 1.1 mrg extern __inline __m512i
1583 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1584 1.1 mrg _mm512_maskz_cvtt_roundps_epu64 (__mmask8 __U, __m256 __A,
1585 1.1 mrg const int __R)
1586 1.1 mrg {
1587 1.1 mrg return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
1588 1.1 mrg (__v8di)
1589 1.1 mrg _mm512_setzero_si512 (),
1590 1.1 mrg (__mmask8) __U,
1591 1.1 mrg __R);
1592 1.1 mrg }
1593 1.1 mrg
1594 1.1 mrg extern __inline __m512i
1595 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1596 1.1 mrg _mm512_cvt_roundpd_epi64 (__m512d __A, const int __R)
1597 1.1 mrg {
1598 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
1599 1.1 mrg (__v8di)
1600 1.1 mrg _mm512_setzero_si512 (),
1601 1.1 mrg (__mmask8) -1,
1602 1.1 mrg __R);
1603 1.1 mrg }
1604 1.1 mrg
1605 1.1 mrg extern __inline __m512i
1606 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1607 1.1 mrg _mm512_mask_cvt_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A,
1608 1.1 mrg const int __R)
1609 1.1 mrg {
1610 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
1611 1.1 mrg (__v8di) __W,
1612 1.1 mrg (__mmask8) __U,
1613 1.1 mrg __R);
1614 1.1 mrg }
1615 1.1 mrg
1616 1.1 mrg extern __inline __m512i
1617 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1618 1.1 mrg _mm512_maskz_cvt_roundpd_epi64 (__mmask8 __U, __m512d __A,
1619 1.1 mrg const int __R)
1620 1.1 mrg {
1621 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
1622 1.1 mrg (__v8di)
1623 1.1 mrg _mm512_setzero_si512 (),
1624 1.1 mrg (__mmask8) __U,
1625 1.1 mrg __R);
1626 1.1 mrg }
1627 1.1 mrg
1628 1.1 mrg extern __inline __m512i
1629 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1630 1.1 mrg _mm512_cvt_roundpd_epu64 (__m512d __A, const int __R)
1631 1.1 mrg {
1632 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
1633 1.1 mrg (__v8di)
1634 1.1 mrg _mm512_setzero_si512 (),
1635 1.1 mrg (__mmask8) -1,
1636 1.1 mrg __R);
1637 1.1 mrg }
1638 1.1 mrg
1639 1.1 mrg extern __inline __m512i
1640 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1641 1.1 mrg _mm512_mask_cvt_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A,
1642 1.1 mrg const int __R)
1643 1.1 mrg {
1644 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
1645 1.1 mrg (__v8di) __W,
1646 1.1 mrg (__mmask8) __U,
1647 1.1 mrg __R);
1648 1.1 mrg }
1649 1.1 mrg
1650 1.1 mrg extern __inline __m512i
1651 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1652 1.1 mrg _mm512_maskz_cvt_roundpd_epu64 (__mmask8 __U, __m512d __A,
1653 1.1 mrg const int __R)
1654 1.1 mrg {
1655 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
1656 1.1 mrg (__v8di)
1657 1.1 mrg _mm512_setzero_si512 (),
1658 1.1 mrg (__mmask8) __U,
1659 1.1 mrg __R);
1660 1.1 mrg }
1661 1.1 mrg
1662 1.1 mrg extern __inline __m512i
1663 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1664 1.1 mrg _mm512_cvt_roundps_epi64 (__m256 __A, const int __R)
1665 1.1 mrg {
1666 1.1 mrg return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
1667 1.1 mrg (__v8di)
1668 1.1 mrg _mm512_setzero_si512 (),
1669 1.1 mrg (__mmask8) -1,
1670 1.1 mrg __R);
1671 1.1 mrg }
1672 1.1 mrg
1673 1.1 mrg extern __inline __m512i
1674 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1675 1.1 mrg _mm512_mask_cvt_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A,
1676 1.1 mrg const int __R)
1677 1.1 mrg {
1678 1.1 mrg return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
1679 1.1 mrg (__v8di) __W,
1680 1.1 mrg (__mmask8) __U,
1681 1.1 mrg __R);
1682 1.1 mrg }
1683 1.1 mrg
1684 1.1 mrg extern __inline __m512i
1685 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1686 1.1 mrg _mm512_maskz_cvt_roundps_epi64 (__mmask8 __U, __m256 __A,
1687 1.1 mrg const int __R)
1688 1.1 mrg {
1689 1.1 mrg return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
1690 1.1 mrg (__v8di)
1691 1.1 mrg _mm512_setzero_si512 (),
1692 1.1 mrg (__mmask8) __U,
1693 1.1 mrg __R);
1694 1.1 mrg }
1695 1.1 mrg
1696 1.1 mrg extern __inline __m512i
1697 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1698 1.1 mrg _mm512_cvt_roundps_epu64 (__m256 __A, const int __R)
1699 1.1 mrg {
1700 1.1 mrg return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
1701 1.1 mrg (__v8di)
1702 1.1 mrg _mm512_setzero_si512 (),
1703 1.1 mrg (__mmask8) -1,
1704 1.1 mrg __R);
1705 1.1 mrg }
1706 1.1 mrg
1707 1.1 mrg extern __inline __m512i
1708 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1709 1.1 mrg _mm512_mask_cvt_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A,
1710 1.1 mrg const int __R)
1711 1.1 mrg {
1712 1.1 mrg return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
1713 1.1 mrg (__v8di) __W,
1714 1.1 mrg (__mmask8) __U,
1715 1.1 mrg __R);
1716 1.1 mrg }
1717 1.1 mrg
1718 1.1 mrg extern __inline __m512i
1719 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1720 1.1 mrg _mm512_maskz_cvt_roundps_epu64 (__mmask8 __U, __m256 __A,
1721 1.1 mrg const int __R)
1722 1.1 mrg {
1723 1.1 mrg return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
1724 1.1 mrg (__v8di)
1725 1.1 mrg _mm512_setzero_si512 (),
1726 1.1 mrg (__mmask8) __U,
1727 1.1 mrg __R);
1728 1.1 mrg }
1729 1.1 mrg
1730 1.1 mrg extern __inline __m256
1731 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1732 1.1 mrg _mm512_cvt_roundepi64_ps (__m512i __A, const int __R)
1733 1.1 mrg {
1734 1.1 mrg return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
1735 1.1 mrg (__v8sf)
1736 1.1 mrg _mm256_setzero_ps (),
1737 1.1 mrg (__mmask8) -1,
1738 1.1 mrg __R);
1739 1.1 mrg }
1740 1.1 mrg
1741 1.1 mrg extern __inline __m256
1742 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1743 1.1 mrg _mm512_mask_cvt_roundepi64_ps (__m256 __W, __mmask8 __U, __m512i __A,
1744 1.1 mrg const int __R)
1745 1.1 mrg {
1746 1.1 mrg return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
1747 1.1 mrg (__v8sf) __W,
1748 1.1 mrg (__mmask8) __U,
1749 1.1 mrg __R);
1750 1.1 mrg }
1751 1.1 mrg
1752 1.1 mrg extern __inline __m256
1753 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1754 1.1 mrg _mm512_maskz_cvt_roundepi64_ps (__mmask8 __U, __m512i __A,
1755 1.1 mrg const int __R)
1756 1.1 mrg {
1757 1.1 mrg return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
1758 1.1 mrg (__v8sf)
1759 1.1 mrg _mm256_setzero_ps (),
1760 1.1 mrg (__mmask8) __U,
1761 1.1 mrg __R);
1762 1.1 mrg }
1763 1.1 mrg
1764 1.1 mrg extern __inline __m256
1765 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1766 1.1 mrg _mm512_cvt_roundepu64_ps (__m512i __A, const int __R)
1767 1.1 mrg {
1768 1.1 mrg return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
1769 1.1 mrg (__v8sf)
1770 1.1 mrg _mm256_setzero_ps (),
1771 1.1 mrg (__mmask8) -1,
1772 1.1 mrg __R);
1773 1.1 mrg }
1774 1.1 mrg
1775 1.1 mrg extern __inline __m256
1776 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1777 1.1 mrg _mm512_mask_cvt_roundepu64_ps (__m256 __W, __mmask8 __U, __m512i __A,
1778 1.1 mrg const int __R)
1779 1.1 mrg {
1780 1.1 mrg return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
1781 1.1 mrg (__v8sf) __W,
1782 1.1 mrg (__mmask8) __U,
1783 1.1 mrg __R);
1784 1.1 mrg }
1785 1.1 mrg
1786 1.1 mrg extern __inline __m256
1787 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1788 1.1 mrg _mm512_maskz_cvt_roundepu64_ps (__mmask8 __U, __m512i __A,
1789 1.1 mrg const int __R)
1790 1.1 mrg {
1791 1.1 mrg return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
1792 1.1 mrg (__v8sf)
1793 1.1 mrg _mm256_setzero_ps (),
1794 1.1 mrg (__mmask8) __U,
1795 1.1 mrg __R);
1796 1.1 mrg }
1797 1.1 mrg
1798 1.1 mrg extern __inline __m512d
1799 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1800 1.1 mrg _mm512_cvt_roundepi64_pd (__m512i __A, const int __R)
1801 1.1 mrg {
1802 1.1 mrg return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
1803 1.1 mrg (__v8df)
1804 1.1 mrg _mm512_setzero_pd (),
1805 1.1 mrg (__mmask8) -1,
1806 1.1 mrg __R);
1807 1.1 mrg }
1808 1.1 mrg
1809 1.1 mrg extern __inline __m512d
1810 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1811 1.1 mrg _mm512_mask_cvt_roundepi64_pd (__m512d __W, __mmask8 __U, __m512i __A,
1812 1.1 mrg const int __R)
1813 1.1 mrg {
1814 1.1 mrg return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
1815 1.1 mrg (__v8df) __W,
1816 1.1 mrg (__mmask8) __U,
1817 1.1 mrg __R);
1818 1.1 mrg }
1819 1.1 mrg
1820 1.1 mrg extern __inline __m512d
1821 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1822 1.1 mrg _mm512_maskz_cvt_roundepi64_pd (__mmask8 __U, __m512i __A,
1823 1.1 mrg const int __R)
1824 1.1 mrg {
1825 1.1 mrg return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
1826 1.1 mrg (__v8df)
1827 1.1 mrg _mm512_setzero_pd (),
1828 1.1 mrg (__mmask8) __U,
1829 1.1 mrg __R);
1830 1.1 mrg }
1831 1.1 mrg
1832 1.1 mrg extern __inline __m512d
1833 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1834 1.1 mrg _mm512_cvt_roundepu64_pd (__m512i __A, const int __R)
1835 1.1 mrg {
1836 1.1 mrg return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
1837 1.1 mrg (__v8df)
1838 1.1 mrg _mm512_setzero_pd (),
1839 1.1 mrg (__mmask8) -1,
1840 1.1 mrg __R);
1841 1.1 mrg }
1842 1.1 mrg
1843 1.1 mrg extern __inline __m512d
1844 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1845 1.1 mrg _mm512_mask_cvt_roundepu64_pd (__m512d __W, __mmask8 __U, __m512i __A,
1846 1.1 mrg const int __R)
1847 1.1 mrg {
1848 1.1 mrg return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
1849 1.1 mrg (__v8df) __W,
1850 1.1 mrg (__mmask8) __U,
1851 1.1 mrg __R);
1852 1.1 mrg }
1853 1.1 mrg
1854 1.1 mrg extern __inline __m512d
1855 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1856 1.1 mrg _mm512_maskz_cvt_roundepu64_pd (__mmask8 __U, __m512i __A,
1857 1.1 mrg const int __R)
1858 1.1 mrg {
1859 1.1 mrg return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
1860 1.1 mrg (__v8df)
1861 1.1 mrg _mm512_setzero_pd (),
1862 1.1 mrg (__mmask8) __U,
1863 1.1 mrg __R);
1864 1.1 mrg }
1865 1.1 mrg
1866 1.1 mrg extern __inline __m512d
1867 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1868 1.1 mrg _mm512_reduce_pd (__m512d __A, int __B)
1869 1.1 mrg {
1870 1.1 mrg return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,
1871 1.1 mrg (__v8df)
1872 1.1 mrg _mm512_setzero_pd (),
1873 1.1 mrg (__mmask8) -1);
1874 1.1 mrg }
1875 1.1 mrg
1876 1.1 mrg extern __inline __m512d
1877 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1878 1.7 mrg _mm512_reduce_round_pd (__m512d __A, int __B, const int __R)
1879 1.7 mrg {
1880 1.7 mrg return (__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df) __A,
1881 1.7 mrg __B,
1882 1.7 mrg (__v8df)
1883 1.7 mrg _mm512_setzero_pd (),
1884 1.7 mrg (__mmask8) -1, __R);
1885 1.7 mrg }
1886 1.7 mrg
1887 1.7 mrg extern __inline __m512d
1888 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1889 1.1 mrg _mm512_mask_reduce_pd (__m512d __W, __mmask8 __U, __m512d __A, int __B)
1890 1.1 mrg {
1891 1.1 mrg return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,
1892 1.1 mrg (__v8df) __W,
1893 1.1 mrg (__mmask8) __U);
1894 1.1 mrg }
1895 1.1 mrg
1896 1.1 mrg extern __inline __m512d
1897 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1898 1.7 mrg _mm512_mask_reduce_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
1899 1.7 mrg int __B, const int __R)
1900 1.7 mrg {
1901 1.7 mrg return (__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df) __A,
1902 1.7 mrg __B,
1903 1.7 mrg (__v8df) __W,
1904 1.7 mrg __U, __R);
1905 1.7 mrg }
1906 1.7 mrg
1907 1.7 mrg extern __inline __m512d
1908 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1909 1.1 mrg _mm512_maskz_reduce_pd (__mmask8 __U, __m512d __A, int __B)
1910 1.1 mrg {
1911 1.1 mrg return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,
1912 1.1 mrg (__v8df)
1913 1.1 mrg _mm512_setzero_pd (),
1914 1.1 mrg (__mmask8) __U);
1915 1.1 mrg }
1916 1.1 mrg
1917 1.7 mrg extern __inline __m512d
1918 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1919 1.7 mrg _mm512_maskz_reduce_round_pd (__mmask8 __U, __m512d __A, int __B,
1920 1.7 mrg const int __R)
1921 1.7 mrg {
1922 1.7 mrg return (__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df) __A,
1923 1.7 mrg __B,
1924 1.7 mrg (__v8df)
1925 1.7 mrg _mm512_setzero_pd (),
1926 1.7 mrg __U, __R);
1927 1.7 mrg }
1928 1.7 mrg
1929 1.1 mrg extern __inline __m512
1930 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1931 1.1 mrg _mm512_reduce_ps (__m512 __A, int __B)
1932 1.1 mrg {
1933 1.1 mrg return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,
1934 1.1 mrg (__v16sf)
1935 1.1 mrg _mm512_setzero_ps (),
1936 1.1 mrg (__mmask16) -1);
1937 1.1 mrg }
1938 1.1 mrg
1939 1.1 mrg extern __inline __m512
1940 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1941 1.7 mrg _mm512_reduce_round_ps (__m512 __A, int __B, const int __R)
1942 1.7 mrg {
1943 1.7 mrg return (__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf) __A,
1944 1.7 mrg __B,
1945 1.7 mrg (__v16sf)
1946 1.7 mrg _mm512_setzero_ps (),
1947 1.7 mrg (__mmask16) -1, __R);
1948 1.7 mrg }
1949 1.7 mrg
1950 1.7 mrg extern __inline __m512
1951 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1952 1.1 mrg _mm512_mask_reduce_ps (__m512 __W, __mmask16 __U, __m512 __A, int __B)
1953 1.1 mrg {
1954 1.1 mrg return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,
1955 1.1 mrg (__v16sf) __W,
1956 1.1 mrg (__mmask16) __U);
1957 1.1 mrg }
1958 1.1 mrg
1959 1.1 mrg extern __inline __m512
1960 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1961 1.7 mrg _mm512_mask_reduce_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __B,
1962 1.7 mrg const int __R)
1963 1.7 mrg {
1964 1.7 mrg return (__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf) __A,
1965 1.7 mrg __B,
1966 1.7 mrg (__v16sf) __W,
1967 1.7 mrg __U, __R);
1968 1.7 mrg }
1969 1.7 mrg
1970 1.7 mrg extern __inline __m512
1971 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1972 1.1 mrg _mm512_maskz_reduce_ps (__mmask16 __U, __m512 __A, int __B)
1973 1.1 mrg {
1974 1.1 mrg return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,
1975 1.1 mrg (__v16sf)
1976 1.1 mrg _mm512_setzero_ps (),
1977 1.1 mrg (__mmask16) __U);
1978 1.1 mrg }
1979 1.1 mrg
1980 1.7 mrg extern __inline __m512
1981 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1982 1.7 mrg _mm512_maskz_reduce_round_ps (__mmask16 __U, __m512 __A, int __B,
1983 1.7 mrg const int __R)
1984 1.7 mrg {
1985 1.7 mrg return (__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf) __A,
1986 1.7 mrg __B,
1987 1.7 mrg (__v16sf)
1988 1.7 mrg _mm512_setzero_ps (),
1989 1.7 mrg __U, __R);
1990 1.7 mrg }
1991 1.7 mrg
1992 1.1 mrg extern __inline __m256
1993 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1994 1.1 mrg _mm512_extractf32x8_ps (__m512 __A, const int __imm)
1995 1.1 mrg {
1996 1.1 mrg return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A,
1997 1.1 mrg __imm,
1998 1.1 mrg (__v8sf)
1999 1.1 mrg _mm256_setzero_ps (),
2000 1.1 mrg (__mmask8) -1);
2001 1.1 mrg }
2002 1.1 mrg
2003 1.1 mrg extern __inline __m256
2004 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2005 1.1 mrg _mm512_mask_extractf32x8_ps (__m256 __W, __mmask8 __U, __m512 __A,
2006 1.1 mrg const int __imm)
2007 1.1 mrg {
2008 1.1 mrg return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A,
2009 1.1 mrg __imm,
2010 1.1 mrg (__v8sf) __W,
2011 1.1 mrg (__mmask8) __U);
2012 1.1 mrg }
2013 1.1 mrg
2014 1.1 mrg extern __inline __m256
2015 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2016 1.1 mrg _mm512_maskz_extractf32x8_ps (__mmask8 __U, __m512 __A,
2017 1.1 mrg const int __imm)
2018 1.1 mrg {
2019 1.1 mrg return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A,
2020 1.1 mrg __imm,
2021 1.1 mrg (__v8sf)
2022 1.1 mrg _mm256_setzero_ps (),
2023 1.1 mrg (__mmask8) __U);
2024 1.1 mrg }
2025 1.1 mrg
2026 1.1 mrg extern __inline __m128d
2027 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2028 1.1 mrg _mm512_extractf64x2_pd (__m512d __A, const int __imm)
2029 1.1 mrg {
2030 1.1 mrg return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A,
2031 1.1 mrg __imm,
2032 1.1 mrg (__v2df)
2033 1.1 mrg _mm_setzero_pd (),
2034 1.3 mrg (__mmask8) -1);
2035 1.1 mrg }
2036 1.1 mrg
2037 1.1 mrg extern __inline __m128d
2038 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2039 1.1 mrg _mm512_mask_extractf64x2_pd (__m128d __W, __mmask8 __U, __m512d __A,
2040 1.1 mrg const int __imm)
2041 1.1 mrg {
2042 1.1 mrg return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A,
2043 1.1 mrg __imm,
2044 1.1 mrg (__v2df) __W,
2045 1.1 mrg (__mmask8)
2046 1.1 mrg __U);
2047 1.1 mrg }
2048 1.1 mrg
2049 1.1 mrg extern __inline __m128d
2050 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2051 1.1 mrg _mm512_maskz_extractf64x2_pd (__mmask8 __U, __m512d __A,
2052 1.1 mrg const int __imm)
2053 1.1 mrg {
2054 1.1 mrg return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A,
2055 1.1 mrg __imm,
2056 1.1 mrg (__v2df)
2057 1.1 mrg _mm_setzero_pd (),
2058 1.1 mrg (__mmask8)
2059 1.1 mrg __U);
2060 1.1 mrg }
2061 1.1 mrg
2062 1.1 mrg extern __inline __m256i
2063 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2064 1.1 mrg _mm512_extracti32x8_epi32 (__m512i __A, const int __imm)
2065 1.1 mrg {
2066 1.1 mrg return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A,
2067 1.1 mrg __imm,
2068 1.1 mrg (__v8si)
2069 1.1 mrg _mm256_setzero_si256 (),
2070 1.1 mrg (__mmask8) -1);
2071 1.1 mrg }
2072 1.1 mrg
2073 1.1 mrg extern __inline __m256i
2074 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2075 1.1 mrg _mm512_mask_extracti32x8_epi32 (__m256i __W, __mmask8 __U, __m512i __A,
2076 1.1 mrg const int __imm)
2077 1.1 mrg {
2078 1.1 mrg return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A,
2079 1.1 mrg __imm,
2080 1.1 mrg (__v8si) __W,
2081 1.1 mrg (__mmask8) __U);
2082 1.1 mrg }
2083 1.1 mrg
2084 1.1 mrg extern __inline __m256i
2085 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2086 1.1 mrg _mm512_maskz_extracti32x8_epi32 (__mmask8 __U, __m512i __A,
2087 1.1 mrg const int __imm)
2088 1.1 mrg {
2089 1.1 mrg return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A,
2090 1.1 mrg __imm,
2091 1.1 mrg (__v8si)
2092 1.1 mrg _mm256_setzero_si256 (),
2093 1.1 mrg (__mmask8) __U);
2094 1.1 mrg }
2095 1.1 mrg
2096 1.1 mrg extern __inline __m128i
2097 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2098 1.1 mrg _mm512_extracti64x2_epi64 (__m512i __A, const int __imm)
2099 1.1 mrg {
2100 1.1 mrg return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A,
2101 1.1 mrg __imm,
2102 1.1 mrg (__v2di)
2103 1.3 mrg _mm_setzero_si128 (),
2104 1.3 mrg (__mmask8) -1);
2105 1.1 mrg }
2106 1.1 mrg
2107 1.1 mrg extern __inline __m128i
2108 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2109 1.1 mrg _mm512_mask_extracti64x2_epi64 (__m128i __W, __mmask8 __U, __m512i __A,
2110 1.1 mrg const int __imm)
2111 1.1 mrg {
2112 1.1 mrg return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A,
2113 1.1 mrg __imm,
2114 1.1 mrg (__v2di) __W,
2115 1.1 mrg (__mmask8)
2116 1.1 mrg __U);
2117 1.1 mrg }
2118 1.1 mrg
2119 1.1 mrg extern __inline __m128i
2120 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2121 1.1 mrg _mm512_maskz_extracti64x2_epi64 (__mmask8 __U, __m512i __A,
2122 1.1 mrg const int __imm)
2123 1.1 mrg {
2124 1.1 mrg return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A,
2125 1.1 mrg __imm,
2126 1.1 mrg (__v2di)
2127 1.3 mrg _mm_setzero_si128 (),
2128 1.1 mrg (__mmask8)
2129 1.1 mrg __U);
2130 1.1 mrg }
2131 1.1 mrg
2132 1.1 mrg extern __inline __m512d
2133 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2134 1.1 mrg _mm512_range_round_pd (__m512d __A, __m512d __B, int __C,
2135 1.1 mrg const int __R)
2136 1.1 mrg {
2137 1.1 mrg return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
2138 1.1 mrg (__v8df) __B, __C,
2139 1.1 mrg (__v8df)
2140 1.1 mrg _mm512_setzero_pd (),
2141 1.1 mrg (__mmask8) -1,
2142 1.1 mrg __R);
2143 1.1 mrg }
2144 1.1 mrg
2145 1.1 mrg extern __inline __m512d
2146 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2147 1.1 mrg _mm512_mask_range_round_pd (__m512d __W, __mmask8 __U,
2148 1.1 mrg __m512d __A, __m512d __B, int __C,
2149 1.1 mrg const int __R)
2150 1.1 mrg {
2151 1.1 mrg return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
2152 1.1 mrg (__v8df) __B, __C,
2153 1.1 mrg (__v8df) __W,
2154 1.1 mrg (__mmask8) __U,
2155 1.1 mrg __R);
2156 1.1 mrg }
2157 1.1 mrg
2158 1.1 mrg extern __inline __m512d
2159 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2160 1.1 mrg _mm512_maskz_range_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2161 1.1 mrg int __C, const int __R)
2162 1.1 mrg {
2163 1.1 mrg return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
2164 1.1 mrg (__v8df) __B, __C,
2165 1.1 mrg (__v8df)
2166 1.1 mrg _mm512_setzero_pd (),
2167 1.1 mrg (__mmask8) __U,
2168 1.1 mrg __R);
2169 1.1 mrg }
2170 1.1 mrg
2171 1.1 mrg extern __inline __m512
2172 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2173 1.1 mrg _mm512_range_round_ps (__m512 __A, __m512 __B, int __C, const int __R)
2174 1.1 mrg {
2175 1.1 mrg return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
2176 1.1 mrg (__v16sf) __B, __C,
2177 1.1 mrg (__v16sf)
2178 1.1 mrg _mm512_setzero_ps (),
2179 1.1 mrg (__mmask16) -1,
2180 1.1 mrg __R);
2181 1.1 mrg }
2182 1.1 mrg
2183 1.1 mrg extern __inline __m512
2184 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2185 1.1 mrg _mm512_mask_range_round_ps (__m512 __W, __mmask16 __U,
2186 1.1 mrg __m512 __A, __m512 __B, int __C,
2187 1.1 mrg const int __R)
2188 1.1 mrg {
2189 1.1 mrg return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
2190 1.1 mrg (__v16sf) __B, __C,
2191 1.1 mrg (__v16sf) __W,
2192 1.1 mrg (__mmask16) __U,
2193 1.1 mrg __R);
2194 1.1 mrg }
2195 1.1 mrg
2196 1.1 mrg extern __inline __m512
2197 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2198 1.1 mrg _mm512_maskz_range_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2199 1.1 mrg int __C, const int __R)
2200 1.1 mrg {
2201 1.1 mrg return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
2202 1.1 mrg (__v16sf) __B, __C,
2203 1.1 mrg (__v16sf)
2204 1.1 mrg _mm512_setzero_ps (),
2205 1.1 mrg (__mmask16) __U,
2206 1.1 mrg __R);
2207 1.1 mrg }
2208 1.1 mrg
2209 1.1 mrg extern __inline __m512i
2210 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2211 1.1 mrg _mm512_inserti32x8 (__m512i __A, __m256i __B, const int __imm)
2212 1.1 mrg {
2213 1.1 mrg return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A,
2214 1.1 mrg (__v8si) __B,
2215 1.1 mrg __imm,
2216 1.1 mrg (__v16si)
2217 1.1 mrg _mm512_setzero_si512 (),
2218 1.1 mrg (__mmask16) -1);
2219 1.1 mrg }
2220 1.1 mrg
2221 1.1 mrg extern __inline __m512i
2222 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2223 1.1 mrg _mm512_mask_inserti32x8 (__m512i __W, __mmask16 __U, __m512i __A,
2224 1.1 mrg __m256i __B, const int __imm)
2225 1.1 mrg {
2226 1.1 mrg return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A,
2227 1.1 mrg (__v8si) __B,
2228 1.1 mrg __imm,
2229 1.1 mrg (__v16si) __W,
2230 1.1 mrg (__mmask16) __U);
2231 1.1 mrg }
2232 1.1 mrg
2233 1.1 mrg extern __inline __m512i
2234 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2235 1.1 mrg _mm512_maskz_inserti32x8 (__mmask16 __U, __m512i __A, __m256i __B,
2236 1.1 mrg const int __imm)
2237 1.1 mrg {
2238 1.1 mrg return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A,
2239 1.1 mrg (__v8si) __B,
2240 1.1 mrg __imm,
2241 1.1 mrg (__v16si)
2242 1.1 mrg _mm512_setzero_si512 (),
2243 1.1 mrg (__mmask16) __U);
2244 1.1 mrg }
2245 1.1 mrg
2246 1.1 mrg extern __inline __m512
2247 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2248 1.1 mrg _mm512_insertf32x8 (__m512 __A, __m256 __B, const int __imm)
2249 1.1 mrg {
2250 1.1 mrg return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A,
2251 1.1 mrg (__v8sf) __B,
2252 1.1 mrg __imm,
2253 1.1 mrg (__v16sf)
2254 1.1 mrg _mm512_setzero_ps (),
2255 1.1 mrg (__mmask16) -1);
2256 1.1 mrg }
2257 1.1 mrg
2258 1.1 mrg extern __inline __m512
2259 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2260 1.1 mrg _mm512_mask_insertf32x8 (__m512 __W, __mmask16 __U, __m512 __A,
2261 1.1 mrg __m256 __B, const int __imm)
2262 1.1 mrg {
2263 1.1 mrg return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A,
2264 1.1 mrg (__v8sf) __B,
2265 1.1 mrg __imm,
2266 1.1 mrg (__v16sf) __W,
2267 1.1 mrg (__mmask16) __U);
2268 1.1 mrg }
2269 1.1 mrg
2270 1.1 mrg extern __inline __m512
2271 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2272 1.1 mrg _mm512_maskz_insertf32x8 (__mmask16 __U, __m512 __A, __m256 __B,
2273 1.1 mrg const int __imm)
2274 1.1 mrg {
2275 1.1 mrg return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A,
2276 1.1 mrg (__v8sf) __B,
2277 1.1 mrg __imm,
2278 1.1 mrg (__v16sf)
2279 1.1 mrg _mm512_setzero_ps (),
2280 1.1 mrg (__mmask16) __U);
2281 1.1 mrg }
2282 1.1 mrg
2283 1.1 mrg extern __inline __m512i
2284 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2285 1.1 mrg _mm512_inserti64x2 (__m512i __A, __m128i __B, const int __imm)
2286 1.1 mrg {
2287 1.1 mrg return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A,
2288 1.1 mrg (__v2di) __B,
2289 1.1 mrg __imm,
2290 1.1 mrg (__v8di)
2291 1.1 mrg _mm512_setzero_si512 (),
2292 1.3 mrg (__mmask8) -1);
2293 1.1 mrg }
2294 1.1 mrg
2295 1.1 mrg extern __inline __m512i
2296 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2297 1.1 mrg _mm512_mask_inserti64x2 (__m512i __W, __mmask8 __U, __m512i __A,
2298 1.1 mrg __m128i __B, const int __imm)
2299 1.1 mrg {
2300 1.1 mrg return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A,
2301 1.1 mrg (__v2di) __B,
2302 1.1 mrg __imm,
2303 1.1 mrg (__v8di) __W,
2304 1.1 mrg (__mmask8)
2305 1.1 mrg __U);
2306 1.1 mrg }
2307 1.1 mrg
2308 1.1 mrg extern __inline __m512i
2309 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2310 1.1 mrg _mm512_maskz_inserti64x2 (__mmask8 __U, __m512i __A, __m128i __B,
2311 1.1 mrg const int __imm)
2312 1.1 mrg {
2313 1.1 mrg return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A,
2314 1.1 mrg (__v2di) __B,
2315 1.1 mrg __imm,
2316 1.1 mrg (__v8di)
2317 1.1 mrg _mm512_setzero_si512 (),
2318 1.1 mrg (__mmask8)
2319 1.1 mrg __U);
2320 1.1 mrg }
2321 1.1 mrg
2322 1.1 mrg extern __inline __m512d
2323 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2324 1.1 mrg _mm512_insertf64x2 (__m512d __A, __m128d __B, const int __imm)
2325 1.1 mrg {
2326 1.1 mrg return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A,
2327 1.1 mrg (__v2df) __B,
2328 1.1 mrg __imm,
2329 1.1 mrg (__v8df)
2330 1.1 mrg _mm512_setzero_pd (),
2331 1.3 mrg (__mmask8) -1);
2332 1.1 mrg }
2333 1.1 mrg
2334 1.1 mrg extern __inline __m512d
2335 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2336 1.1 mrg _mm512_mask_insertf64x2 (__m512d __W, __mmask8 __U, __m512d __A,
2337 1.1 mrg __m128d __B, const int __imm)
2338 1.1 mrg {
2339 1.1 mrg return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A,
2340 1.1 mrg (__v2df) __B,
2341 1.1 mrg __imm,
2342 1.1 mrg (__v8df) __W,
2343 1.1 mrg (__mmask8)
2344 1.1 mrg __U);
2345 1.1 mrg }
2346 1.1 mrg
2347 1.1 mrg extern __inline __m512d
2348 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2349 1.1 mrg _mm512_maskz_insertf64x2 (__mmask8 __U, __m512d __A, __m128d __B,
2350 1.1 mrg const int __imm)
2351 1.1 mrg {
2352 1.1 mrg return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A,
2353 1.1 mrg (__v2df) __B,
2354 1.1 mrg __imm,
2355 1.1 mrg (__v8df)
2356 1.1 mrg _mm512_setzero_pd (),
2357 1.1 mrg (__mmask8)
2358 1.1 mrg __U);
2359 1.1 mrg }
2360 1.1 mrg
2361 1.1 mrg extern __inline __mmask8
2362 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2363 1.1 mrg _mm512_mask_fpclass_pd_mask (__mmask8 __U, __m512d __A,
2364 1.1 mrg const int __imm)
2365 1.1 mrg {
2366 1.1 mrg return (__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) __A,
2367 1.1 mrg __imm, __U);
2368 1.1 mrg }
2369 1.1 mrg
2370 1.1 mrg extern __inline __mmask8
2371 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2372 1.1 mrg _mm512_fpclass_pd_mask (__m512d __A, const int __imm)
2373 1.1 mrg {
2374 1.1 mrg return (__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) __A,
2375 1.1 mrg __imm,
2376 1.1 mrg (__mmask8) -1);
2377 1.1 mrg }
2378 1.1 mrg
2379 1.1 mrg extern __inline __mmask16
2380 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2381 1.1 mrg _mm512_mask_fpclass_ps_mask (__mmask16 __U, __m512 __A,
2382 1.1 mrg const int __imm)
2383 1.1 mrg {
2384 1.1 mrg return (__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) __A,
2385 1.1 mrg __imm, __U);
2386 1.1 mrg }
2387 1.1 mrg
2388 1.1 mrg extern __inline __mmask16
2389 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2390 1.1 mrg _mm512_fpclass_ps_mask (__m512 __A, const int __imm)
2391 1.1 mrg {
2392 1.1 mrg return (__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) __A,
2393 1.1 mrg __imm,
2394 1.3 mrg (__mmask16) -1);
2395 1.1 mrg }
2396 1.1 mrg
2397 1.1 mrg #else
2398 1.3 mrg #define _kshiftli_mask8(X, Y) \
2399 1.3 mrg ((__mmask8) __builtin_ia32_kshiftliqi ((__mmask8)(X), (__mmask8)(Y)))
2400 1.3 mrg
2401 1.3 mrg #define _kshiftri_mask8(X, Y) \
2402 1.3 mrg ((__mmask8) __builtin_ia32_kshiftriqi ((__mmask8)(X), (__mmask8)(Y)))
2403 1.3 mrg
2404 1.4 mrg #define _mm_range_sd(A, B, C) \
2405 1.4 mrg ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
2406 1.4 mrg (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \
2407 1.4 mrg (__mmask8) -1, _MM_FROUND_CUR_DIRECTION))
2408 1.4 mrg
2409 1.4 mrg #define _mm_mask_range_sd(W, U, A, B, C) \
2410 1.4 mrg ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
2411 1.4 mrg (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), \
2412 1.4 mrg (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
2413 1.4 mrg
2414 1.4 mrg #define _mm_maskz_range_sd(U, A, B, C) \
2415 1.4 mrg ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
2416 1.4 mrg (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \
2417 1.4 mrg (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
2418 1.1 mrg
2419 1.1 mrg #define _mm_range_ss(A, B, C) \
2420 1.4 mrg ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \
2421 1.4 mrg (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \
2422 1.4 mrg (__mmask8) -1, _MM_FROUND_CUR_DIRECTION))
2423 1.4 mrg
2424 1.4 mrg #define _mm_mask_range_ss(W, U, A, B, C) \
2425 1.4 mrg ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \
2426 1.4 mrg (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), \
2427 1.4 mrg (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
2428 1.4 mrg
2429 1.4 mrg #define _mm_maskz_range_ss(U, A, B, C) \
2430 1.4 mrg ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \
2431 1.4 mrg (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \
2432 1.4 mrg (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
2433 1.4 mrg
2434 1.4 mrg #define _mm_range_round_sd(A, B, C, R) \
2435 1.4 mrg ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
2436 1.4 mrg (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \
2437 1.4 mrg (__mmask8) -1, (R)))
2438 1.4 mrg
2439 1.4 mrg #define _mm_mask_range_round_sd(W, U, A, B, C, R) \
2440 1.4 mrg ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
2441 1.4 mrg (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), \
2442 1.4 mrg (__mmask8)(U), (R)))
2443 1.4 mrg
2444 1.4 mrg #define _mm_maskz_range_round_sd(U, A, B, C, R) \
2445 1.4 mrg ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
2446 1.4 mrg (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \
2447 1.4 mrg (__mmask8)(U), (R)))
2448 1.1 mrg
2449 1.1 mrg #define _mm_range_round_ss(A, B, C, R) \
2450 1.4 mrg ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \
2451 1.4 mrg (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \
2452 1.4 mrg (__mmask8) -1, (R)))
2453 1.4 mrg
2454 1.4 mrg #define _mm_mask_range_round_ss(W, U, A, B, C, R) \
2455 1.4 mrg ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \
2456 1.4 mrg (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), \
2457 1.4 mrg (__mmask8)(U), (R)))
2458 1.4 mrg
2459 1.4 mrg #define _mm_maskz_range_round_ss(U, A, B, C, R) \
2460 1.4 mrg ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \
2461 1.4 mrg (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \
2462 1.4 mrg (__mmask8)(U), (R)))
2463 1.1 mrg
2464 1.1 mrg #define _mm512_cvtt_roundpd_epi64(A, B) \
2465 1.3 mrg ((__m512i)__builtin_ia32_cvttpd2qq512_mask ((A), (__v8di) \
2466 1.3 mrg _mm512_setzero_si512 (), \
2467 1.3 mrg -1, (B)))
2468 1.1 mrg
2469 1.1 mrg #define _mm512_mask_cvtt_roundpd_epi64(W, U, A, B) \
2470 1.3 mrg ((__m512i)__builtin_ia32_cvttpd2qq512_mask ((A), (__v8di)(W), (U), (B)))
2471 1.1 mrg
2472 1.1 mrg #define _mm512_maskz_cvtt_roundpd_epi64(U, A, B) \
2473 1.3 mrg ((__m512i)__builtin_ia32_cvttpd2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2474 1.1 mrg
2475 1.1 mrg #define _mm512_cvtt_roundpd_epu64(A, B) \
2476 1.3 mrg ((__m512i)__builtin_ia32_cvttpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2477 1.1 mrg
2478 1.1 mrg #define _mm512_mask_cvtt_roundpd_epu64(W, U, A, B) \
2479 1.3 mrg ((__m512i)__builtin_ia32_cvttpd2uqq512_mask ((A), (__v8di)(W), (U), (B)))
2480 1.1 mrg
2481 1.1 mrg #define _mm512_maskz_cvtt_roundpd_epu64(U, A, B) \
2482 1.3 mrg ((__m512i)__builtin_ia32_cvttpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2483 1.1 mrg
2484 1.1 mrg #define _mm512_cvtt_roundps_epi64(A, B) \
2485 1.3 mrg ((__m512i)__builtin_ia32_cvttps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2486 1.1 mrg
2487 1.1 mrg #define _mm512_mask_cvtt_roundps_epi64(W, U, A, B) \
2488 1.3 mrg ((__m512i)__builtin_ia32_cvttps2qq512_mask ((A), (__v8di)(W), (U), (B)))
2489 1.1 mrg
2490 1.1 mrg #define _mm512_maskz_cvtt_roundps_epi64(U, A, B) \
2491 1.3 mrg ((__m512i)__builtin_ia32_cvttps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2492 1.1 mrg
2493 1.1 mrg #define _mm512_cvtt_roundps_epu64(A, B) \
2494 1.3 mrg ((__m512i)__builtin_ia32_cvttps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2495 1.1 mrg
2496 1.1 mrg #define _mm512_mask_cvtt_roundps_epu64(W, U, A, B) \
2497 1.3 mrg ((__m512i)__builtin_ia32_cvttps2uqq512_mask ((A), (__v8di)(W), (U), (B)))
2498 1.1 mrg
2499 1.1 mrg #define _mm512_maskz_cvtt_roundps_epu64(U, A, B) \
2500 1.3 mrg ((__m512i)__builtin_ia32_cvttps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2501 1.1 mrg
2502 1.1 mrg #define _mm512_cvt_roundpd_epi64(A, B) \
2503 1.3 mrg ((__m512i)__builtin_ia32_cvtpd2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2504 1.1 mrg
2505 1.1 mrg #define _mm512_mask_cvt_roundpd_epi64(W, U, A, B) \
2506 1.3 mrg ((__m512i)__builtin_ia32_cvtpd2qq512_mask ((A), (__v8di)(W), (U), (B)))
2507 1.1 mrg
2508 1.1 mrg #define _mm512_maskz_cvt_roundpd_epi64(U, A, B) \
2509 1.3 mrg ((__m512i)__builtin_ia32_cvtpd2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2510 1.1 mrg
2511 1.1 mrg #define _mm512_cvt_roundpd_epu64(A, B) \
2512 1.3 mrg ((__m512i)__builtin_ia32_cvtpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2513 1.1 mrg
2514 1.1 mrg #define _mm512_mask_cvt_roundpd_epu64(W, U, A, B) \
2515 1.3 mrg ((__m512i)__builtin_ia32_cvtpd2uqq512_mask ((A), (__v8di)(W), (U), (B)))
2516 1.1 mrg
2517 1.1 mrg #define _mm512_maskz_cvt_roundpd_epu64(U, A, B) \
2518 1.3 mrg ((__m512i)__builtin_ia32_cvtpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2519 1.1 mrg
2520 1.1 mrg #define _mm512_cvt_roundps_epi64(A, B) \
2521 1.3 mrg ((__m512i)__builtin_ia32_cvtps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2522 1.1 mrg
2523 1.1 mrg #define _mm512_mask_cvt_roundps_epi64(W, U, A, B) \
2524 1.3 mrg ((__m512i)__builtin_ia32_cvtps2qq512_mask ((A), (__v8di)(W), (U), (B)))
2525 1.1 mrg
2526 1.1 mrg #define _mm512_maskz_cvt_roundps_epi64(U, A, B) \
2527 1.3 mrg ((__m512i)__builtin_ia32_cvtps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2528 1.1 mrg
2529 1.1 mrg #define _mm512_cvt_roundps_epu64(A, B) \
2530 1.3 mrg ((__m512i)__builtin_ia32_cvtps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2531 1.1 mrg
2532 1.1 mrg #define _mm512_mask_cvt_roundps_epu64(W, U, A, B) \
2533 1.3 mrg ((__m512i)__builtin_ia32_cvtps2uqq512_mask ((A), (__v8di)(W), (U), (B)))
2534 1.1 mrg
2535 1.1 mrg #define _mm512_maskz_cvt_roundps_epu64(U, A, B) \
2536 1.3 mrg ((__m512i)__builtin_ia32_cvtps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2537 1.1 mrg
2538 1.1 mrg #define _mm512_cvt_roundepi64_ps(A, B) \
2539 1.3 mrg ((__m256)__builtin_ia32_cvtqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), -1, (B)))
2540 1.1 mrg
2541 1.1 mrg #define _mm512_mask_cvt_roundepi64_ps(W, U, A, B) \
2542 1.3 mrg ((__m256)__builtin_ia32_cvtqq2ps512_mask ((__v8di)(A), (W), (U), (B)))
2543 1.1 mrg
2544 1.1 mrg #define _mm512_maskz_cvt_roundepi64_ps(U, A, B) \
2545 1.3 mrg ((__m256)__builtin_ia32_cvtqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), (U), (B)))
2546 1.1 mrg
2547 1.1 mrg #define _mm512_cvt_roundepu64_ps(A, B) \
2548 1.3 mrg ((__m256)__builtin_ia32_cvtuqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), -1, (B)))
2549 1.1 mrg
2550 1.1 mrg #define _mm512_mask_cvt_roundepu64_ps(W, U, A, B) \
2551 1.3 mrg ((__m256)__builtin_ia32_cvtuqq2ps512_mask ((__v8di)(A), (W), (U), (B)))
2552 1.1 mrg
2553 1.1 mrg #define _mm512_maskz_cvt_roundepu64_ps(U, A, B) \
2554 1.3 mrg ((__m256)__builtin_ia32_cvtuqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), (U), (B)))
2555 1.1 mrg
2556 1.1 mrg #define _mm512_cvt_roundepi64_pd(A, B) \
2557 1.3 mrg ((__m512d)__builtin_ia32_cvtqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), -1, (B)))
2558 1.1 mrg
2559 1.1 mrg #define _mm512_mask_cvt_roundepi64_pd(W, U, A, B) \
2560 1.3 mrg ((__m512d)__builtin_ia32_cvtqq2pd512_mask ((__v8di)(A), (W), (U), (B)))
2561 1.1 mrg
2562 1.1 mrg #define _mm512_maskz_cvt_roundepi64_pd(U, A, B) \
2563 1.3 mrg ((__m512d)__builtin_ia32_cvtqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), (U), (B)))
2564 1.1 mrg
2565 1.1 mrg #define _mm512_cvt_roundepu64_pd(A, B) \
2566 1.3 mrg ((__m512d)__builtin_ia32_cvtuqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), -1, (B)))
2567 1.1 mrg
2568 1.1 mrg #define _mm512_mask_cvt_roundepu64_pd(W, U, A, B) \
2569 1.3 mrg ((__m512d)__builtin_ia32_cvtuqq2pd512_mask ((__v8di)(A), (W), (U), (B)))
2570 1.1 mrg
2571 1.1 mrg #define _mm512_maskz_cvt_roundepu64_pd(U, A, B) \
2572 1.3 mrg ((__m512d)__builtin_ia32_cvtuqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), (U), (B)))
2573 1.1 mrg
2574 1.1 mrg #define _mm512_reduce_pd(A, B) \
2575 1.1 mrg ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \
2576 1.3 mrg (int)(B), (__v8df)_mm512_setzero_pd (), (__mmask8)-1))
2577 1.1 mrg
2578 1.7 mrg #define _mm512_reduce_round_pd(A, B, R) \
2579 1.7 mrg ((__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df)(__m512d)(A),\
2580 1.7 mrg (int)(B), (__v8df)_mm512_setzero_pd (), (__mmask8)-1, (R)))
2581 1.7 mrg
2582 1.1 mrg #define _mm512_mask_reduce_pd(W, U, A, B) \
2583 1.1 mrg ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \
2584 1.1 mrg (int)(B), (__v8df)(__m512d)(W), (__mmask8)(U)))
2585 1.1 mrg
2586 1.7 mrg #define _mm512_mask_reduce_round_pd(W, U, A, B, R) \
2587 1.7 mrg ((__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df)(__m512d)(A),\
2588 1.7 mrg (int)(B), (__v8df)(__m512d)(W), (U), (R)))
2589 1.7 mrg
2590 1.1 mrg #define _mm512_maskz_reduce_pd(U, A, B) \
2591 1.1 mrg ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \
2592 1.3 mrg (int)(B), (__v8df)_mm512_setzero_pd (), (__mmask8)(U)))
2593 1.1 mrg
2594 1.7 mrg #define _mm512_maskz_reduce_round_pd(U, A, B, R) \
2595 1.7 mrg ((__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df)(__m512d)(A),\
2596 1.7 mrg (int)(B), (__v8df)_mm512_setzero_pd (), (U), (R)))
2597 1.7 mrg
2598 1.1 mrg #define _mm512_reduce_ps(A, B) \
2599 1.1 mrg ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \
2600 1.3 mrg (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)-1))
2601 1.1 mrg
2602 1.7 mrg #define _mm512_reduce_round_ps(A, B, R) \
2603 1.7 mrg ((__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf)(__m512)(A),\
2604 1.7 mrg (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)-1, (R)))
2605 1.7 mrg
2606 1.1 mrg #define _mm512_mask_reduce_ps(W, U, A, B) \
2607 1.1 mrg ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \
2608 1.1 mrg (int)(B), (__v16sf)(__m512)(W), (__mmask16)(U)))
2609 1.1 mrg
2610 1.7 mrg #define _mm512_mask_reduce_round_ps(W, U, A, B, R) \
2611 1.7 mrg ((__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf)(__m512)(A),\
2612 1.7 mrg (int)(B), (__v16sf)(__m512)(W), (U), (R)))
2613 1.7 mrg
2614 1.1 mrg #define _mm512_maskz_reduce_ps(U, A, B) \
2615 1.1 mrg ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \
2616 1.3 mrg (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)(U)))
2617 1.1 mrg
2618 1.7 mrg #define _mm512_maskz_reduce_round_ps(U, A, B, R) \
2619 1.7 mrg ((__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf)(__m512)(A),\
2620 1.7 mrg (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)(U), (R)))
2621 1.7 mrg
2622 1.1 mrg #define _mm512_extractf32x8_ps(X, C) \
2623 1.1 mrg ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \
2624 1.3 mrg (int) (C), (__v8sf)(__m256) _mm256_setzero_ps (), (__mmask8)-1))
2625 1.1 mrg
2626 1.1 mrg #define _mm512_mask_extractf32x8_ps(W, U, X, C) \
2627 1.1 mrg ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \
2628 1.1 mrg (int) (C), (__v8sf)(__m256) (W), (__mmask8) (U)))
2629 1.1 mrg
2630 1.1 mrg #define _mm512_maskz_extractf32x8_ps(U, X, C) \
2631 1.1 mrg ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \
2632 1.3 mrg (int) (C), (__v8sf)(__m256) _mm256_setzero_ps (), (__mmask8) (U)))
2633 1.1 mrg
2634 1.1 mrg #define _mm512_extractf64x2_pd(X, C) \
2635 1.1 mrg ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
2636 1.3 mrg (int) (C), (__v2df)(__m128d) _mm_setzero_pd (), (__mmask8)-1))
2637 1.1 mrg
2638 1.1 mrg #define _mm512_mask_extractf64x2_pd(W, U, X, C) \
2639 1.1 mrg ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
2640 1.1 mrg (int) (C), (__v2df)(__m128d) (W), (__mmask8) (U)))
2641 1.1 mrg
2642 1.1 mrg #define _mm512_maskz_extractf64x2_pd(U, X, C) \
2643 1.1 mrg ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
2644 1.3 mrg (int) (C), (__v2df)(__m128d) _mm_setzero_pd (), (__mmask8) (U)))
2645 1.1 mrg
2646 1.1 mrg #define _mm512_extracti32x8_epi32(X, C) \
2647 1.1 mrg ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \
2648 1.3 mrg (int) (C), (__v8si)(__m256i) _mm256_setzero_si256 (), (__mmask8)-1))
2649 1.1 mrg
2650 1.1 mrg #define _mm512_mask_extracti32x8_epi32(W, U, X, C) \
2651 1.1 mrg ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \
2652 1.1 mrg (int) (C), (__v8si)(__m256i) (W), (__mmask8) (U)))
2653 1.1 mrg
2654 1.1 mrg #define _mm512_maskz_extracti32x8_epi32(U, X, C) \
2655 1.1 mrg ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \
2656 1.3 mrg (int) (C), (__v8si)(__m256i) _mm256_setzero_si256 (), (__mmask8) (U)))
2657 1.1 mrg
2658 1.1 mrg #define _mm512_extracti64x2_epi64(X, C) \
2659 1.1 mrg ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
2660 1.3 mrg (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8)-1))
2661 1.1 mrg
2662 1.1 mrg #define _mm512_mask_extracti64x2_epi64(W, U, X, C) \
2663 1.1 mrg ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
2664 1.1 mrg (int) (C), (__v2di)(__m128i) (W), (__mmask8) (U)))
2665 1.1 mrg
2666 1.1 mrg #define _mm512_maskz_extracti64x2_epi64(U, X, C) \
2667 1.1 mrg ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
2668 1.3 mrg (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
2669 1.1 mrg
2670 1.1 mrg #define _mm512_range_pd(A, B, C) \
2671 1.1 mrg ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
2672 1.1 mrg (__v8df)(__m512d)(B), (int)(C), \
2673 1.3 mrg (__v8df)_mm512_setzero_pd (), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
2674 1.1 mrg
2675 1.1 mrg #define _mm512_mask_range_pd(W, U, A, B, C) \
2676 1.1 mrg ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
2677 1.1 mrg (__v8df)(__m512d)(B), (int)(C), \
2678 1.1 mrg (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
2679 1.1 mrg
2680 1.1 mrg #define _mm512_maskz_range_pd(U, A, B, C) \
2681 1.1 mrg ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
2682 1.1 mrg (__v8df)(__m512d)(B), (int)(C), \
2683 1.3 mrg (__v8df)_mm512_setzero_pd (), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
2684 1.1 mrg
2685 1.1 mrg #define _mm512_range_ps(A, B, C) \
2686 1.1 mrg ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
2687 1.1 mrg (__v16sf)(__m512)(B), (int)(C), \
2688 1.3 mrg (__v16sf)_mm512_setzero_ps (), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
2689 1.1 mrg
2690 1.1 mrg #define _mm512_mask_range_ps(W, U, A, B, C) \
2691 1.1 mrg ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
2692 1.1 mrg (__v16sf)(__m512)(B), (int)(C), \
2693 1.1 mrg (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
2694 1.1 mrg
2695 1.1 mrg #define _mm512_maskz_range_ps(U, A, B, C) \
2696 1.1 mrg ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
2697 1.1 mrg (__v16sf)(__m512)(B), (int)(C), \
2698 1.3 mrg (__v16sf)_mm512_setzero_ps (), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
2699 1.1 mrg
2700 1.1 mrg #define _mm512_range_round_pd(A, B, C, R) \
2701 1.1 mrg ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
2702 1.1 mrg (__v8df)(__m512d)(B), (int)(C), \
2703 1.3 mrg (__v8df)_mm512_setzero_pd (), (__mmask8)-1, (R)))
2704 1.1 mrg
2705 1.1 mrg #define _mm512_mask_range_round_pd(W, U, A, B, C, R) \
2706 1.1 mrg ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
2707 1.1 mrg (__v8df)(__m512d)(B), (int)(C), \
2708 1.1 mrg (__v8df)(__m512d)(W), (__mmask8)(U), (R)))
2709 1.1 mrg
2710 1.1 mrg #define _mm512_maskz_range_round_pd(U, A, B, C, R) \
2711 1.1 mrg ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
2712 1.1 mrg (__v8df)(__m512d)(B), (int)(C), \
2713 1.3 mrg (__v8df)_mm512_setzero_pd (), (__mmask8)(U), (R)))
2714 1.1 mrg
2715 1.1 mrg #define _mm512_range_round_ps(A, B, C, R) \
2716 1.1 mrg ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
2717 1.1 mrg (__v16sf)(__m512)(B), (int)(C), \
2718 1.3 mrg (__v16sf)_mm512_setzero_ps (), (__mmask16)-1, (R)))
2719 1.1 mrg
2720 1.1 mrg #define _mm512_mask_range_round_ps(W, U, A, B, C, R) \
2721 1.1 mrg ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
2722 1.1 mrg (__v16sf)(__m512)(B), (int)(C), \
2723 1.1 mrg (__v16sf)(__m512)(W), (__mmask16)(U), (R)))
2724 1.1 mrg
2725 1.1 mrg #define _mm512_maskz_range_round_ps(U, A, B, C, R) \
2726 1.1 mrg ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
2727 1.1 mrg (__v16sf)(__m512)(B), (int)(C), \
2728 1.3 mrg (__v16sf)_mm512_setzero_ps (), (__mmask16)(U), (R)))
2729 1.1 mrg
2730 1.1 mrg #define _mm512_insertf64x2(X, Y, C) \
2731 1.1 mrg ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\
2732 1.1 mrg (__v2df)(__m128d) (Y), (int) (C), (__v8df)(__m512d) (X), \
2733 1.1 mrg (__mmask8)-1))
2734 1.1 mrg
2735 1.1 mrg #define _mm512_mask_insertf64x2(W, U, X, Y, C) \
2736 1.1 mrg ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\
2737 1.1 mrg (__v2df)(__m128d) (Y), (int) (C), (__v8df)(__m512d) (W), \
2738 1.1 mrg (__mmask8) (U)))
2739 1.1 mrg
2740 1.1 mrg #define _mm512_maskz_insertf64x2(U, X, Y, C) \
2741 1.1 mrg ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\
2742 1.1 mrg (__v2df)(__m128d) (Y), (int) (C), \
2743 1.3 mrg (__v8df)(__m512d) _mm512_setzero_pd (), (__mmask8) (U)))
2744 1.1 mrg
2745 1.1 mrg #define _mm512_inserti64x2(X, Y, C) \
2746 1.1 mrg ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\
2747 1.1 mrg (__v2di)(__m128i) (Y), (int) (C), (__v8di)(__m512i) (X), (__mmask8)-1))
2748 1.1 mrg
2749 1.1 mrg #define _mm512_mask_inserti64x2(W, U, X, Y, C) \
2750 1.1 mrg ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\
2751 1.1 mrg (__v2di)(__m128i) (Y), (int) (C), (__v8di)(__m512i) (W), \
2752 1.1 mrg (__mmask8) (U)))
2753 1.1 mrg
2754 1.1 mrg #define _mm512_maskz_inserti64x2(U, X, Y, C) \
2755 1.1 mrg ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\
2756 1.1 mrg (__v2di)(__m128i) (Y), (int) (C), \
2757 1.1 mrg (__v8di)(__m512i) _mm512_setzero_si512 (), (__mmask8) (U)))
2758 1.1 mrg
2759 1.1 mrg #define _mm512_insertf32x8(X, Y, C) \
2760 1.1 mrg ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X), \
2761 1.1 mrg (__v8sf)(__m256) (Y), (int) (C),\
2762 1.3 mrg (__v16sf)(__m512)_mm512_setzero_ps (),\
2763 1.1 mrg (__mmask16)-1))
2764 1.1 mrg
2765 1.1 mrg #define _mm512_mask_insertf32x8(W, U, X, Y, C) \
2766 1.1 mrg ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X), \
2767 1.1 mrg (__v8sf)(__m256) (Y), (int) (C),\
2768 1.1 mrg (__v16sf)(__m512)(W),\
2769 1.1 mrg (__mmask16)(U)))
2770 1.1 mrg
2771 1.1 mrg #define _mm512_maskz_insertf32x8(U, X, Y, C) \
2772 1.1 mrg ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X), \
2773 1.1 mrg (__v8sf)(__m256) (Y), (int) (C),\
2774 1.3 mrg (__v16sf)(__m512)_mm512_setzero_ps (),\
2775 1.1 mrg (__mmask16)(U)))
2776 1.1 mrg
2777 1.1 mrg #define _mm512_inserti32x8(X, Y, C) \
2778 1.1 mrg ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X), \
2779 1.1 mrg (__v8si)(__m256i) (Y), (int) (C),\
2780 1.1 mrg (__v16si)(__m512i)_mm512_setzero_si512 (),\
2781 1.1 mrg (__mmask16)-1))
2782 1.1 mrg
2783 1.1 mrg #define _mm512_mask_inserti32x8(W, U, X, Y, C) \
2784 1.1 mrg ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X), \
2785 1.1 mrg (__v8si)(__m256i) (Y), (int) (C),\
2786 1.1 mrg (__v16si)(__m512i)(W),\
2787 1.1 mrg (__mmask16)(U)))
2788 1.1 mrg
2789 1.1 mrg #define _mm512_maskz_inserti32x8(U, X, Y, C) \
2790 1.1 mrg ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X), \
2791 1.1 mrg (__v8si)(__m256i) (Y), (int) (C),\
2792 1.1 mrg (__v16si)(__m512i)_mm512_setzero_si512 (),\
2793 1.1 mrg (__mmask16)(U)))
2794 1.1 mrg
2795 1.6 mrg #define _mm_fpclass_ss_mask(X, C) \
2796 1.6 mrg ((__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) (__m128) (X), \
2797 1.6 mrg (int) (C), (__mmask8) (-1))) \
2798 1.6 mrg
2799 1.6 mrg #define _mm_fpclass_sd_mask(X, C) \
2800 1.6 mrg ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X), \
2801 1.6 mrg (int) (C), (__mmask8) (-1))) \
2802 1.6 mrg
2803 1.7 mrg #define _mm_mask_fpclass_ss_mask(U, X, C) \
2804 1.6 mrg ((__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) (__m128) (X), \
2805 1.6 mrg (int) (C), (__mmask8) (U)))
2806 1.6 mrg
2807 1.7 mrg #define _mm_mask_fpclass_sd_mask(U, X, C) \
2808 1.6 mrg ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X), \
2809 1.6 mrg (int) (C), (__mmask8) (U)))
2810 1.1 mrg
2811 1.1 mrg #define _mm512_mask_fpclass_pd_mask(u, X, C) \
2812 1.1 mrg ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), \
2813 1.1 mrg (int) (C), (__mmask8)(u)))
2814 1.1 mrg
2815 1.1 mrg #define _mm512_mask_fpclass_ps_mask(u, x, c) \
2816 1.1 mrg ((__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) (__m512) (x),\
2817 1.6 mrg (int) (c),(__mmask16)(u)))
2818 1.1 mrg
2819 1.1 mrg #define _mm512_fpclass_pd_mask(X, C) \
2820 1.1 mrg ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), \
2821 1.1 mrg (int) (C), (__mmask8)-1))
2822 1.1 mrg
2823 1.1 mrg #define _mm512_fpclass_ps_mask(x, c) \
2824 1.1 mrg ((__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) (__m512) (x),\
2825 1.6 mrg (int) (c),(__mmask16)-1))
2826 1.1 mrg
2827 1.1 mrg #define _mm_reduce_sd(A, B, C) \
2828 1.4 mrg ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A), \
2829 1.4 mrg (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \
2830 1.4 mrg (__mmask8)-1))
2831 1.4 mrg
2832 1.4 mrg #define _mm_mask_reduce_sd(W, U, A, B, C) \
2833 1.4 mrg ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A), \
2834 1.4 mrg (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), (__mmask8)(U)))
2835 1.4 mrg
2836 1.4 mrg #define _mm_maskz_reduce_sd(U, A, B, C) \
2837 1.4 mrg ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A), \
2838 1.4 mrg (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \
2839 1.4 mrg (__mmask8)(U)))
2840 1.1 mrg
2841 1.7 mrg #define _mm_reduce_round_sd(A, B, C, R) \
2842 1.7 mrg ((__m128d) __builtin_ia32_reducesd_mask_round ((__v2df)(__m128d)(A), \
2843 1.7 mrg (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \
2844 1.7 mrg (__mmask8)(-1), (int)(R)))
2845 1.7 mrg
2846 1.7 mrg #define _mm_mask_reduce_round_sd(W, U, A, B, C, R) \
2847 1.7 mrg ((__m128d) __builtin_ia32_reducesd_mask_round ((__v2df)(__m128d)(A), \
2848 1.7 mrg (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), \
2849 1.7 mrg (__mmask8)(U), (int)(R)))
2850 1.7 mrg
2851 1.7 mrg #define _mm_maskz_reduce_round_sd(U, A, B, C, R) \
2852 1.7 mrg ((__m128d) __builtin_ia32_reducesd_mask_round ((__v2df)(__m128d)(A), \
2853 1.7 mrg (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \
2854 1.7 mrg (__mmask8)(U), (int)(R)))
2855 1.7 mrg
2856 1.1 mrg #define _mm_reduce_ss(A, B, C) \
2857 1.4 mrg ((__m128) __builtin_ia32_reducess_mask ((__v4sf)(__m128)(A), \
2858 1.4 mrg (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \
2859 1.4 mrg (__mmask8)-1))
2860 1.4 mrg
2861 1.4 mrg #define _mm_mask_reduce_ss(W, U, A, B, C) \
2862 1.4 mrg ((__m128) __builtin_ia32_reducess_mask ((__v4sf)(__m128)(A), \
2863 1.4 mrg (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), (__mmask8)(U)))
2864 1.4 mrg
2865 1.4 mrg #define _mm_maskz_reduce_ss(U, A, B, C) \
2866 1.4 mrg ((__m128) __builtin_ia32_reducess_mask ((__v4sf)(__m128)(A), \
2867 1.4 mrg (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \
2868 1.4 mrg (__mmask8)(U)))
2869 1.4 mrg
2870 1.7 mrg #define _mm_reduce_round_ss(A, B, C, R) \
2871 1.7 mrg ((__m128) __builtin_ia32_reducess_mask_round ((__v4sf)(__m128)(A), \
2872 1.7 mrg (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \
2873 1.7 mrg (__mmask8)(-1), (int)(R)))
2874 1.7 mrg
2875 1.7 mrg #define _mm_mask_reduce_round_ss(W, U, A, B, C, R) \
2876 1.7 mrg ((__m128) __builtin_ia32_reducess_mask_round ((__v4sf)(__m128)(A), \
2877 1.7 mrg (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), \
2878 1.7 mrg (__mmask8)(U), (int)(R)))
2879 1.7 mrg
2880 1.7 mrg #define _mm_maskz_reduce_round_ss(U, A, B, C, R) \
2881 1.7 mrg ((__m128) __builtin_ia32_reducess_mask_round ((__v4sf)(__m128)(A), \
2882 1.7 mrg (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \
2883 1.7 mrg (__mmask8)(U), (int)(R)))
2884 1.4 mrg
2885 1.1 mrg
2886 1.1 mrg #endif
2887 1.1 mrg
2888 1.1 mrg #ifdef __DISABLE_AVX512DQ__
2889 1.1 mrg #undef __DISABLE_AVX512DQ__
2890 1.1 mrg #pragma GCC pop_options
2891 1.1 mrg #endif /* __DISABLE_AVX512DQ__ */
2892 1.1 mrg
2893 1.1 mrg #endif /* _AVX512DQINTRIN_H_INCLUDED */
2894