avx512dqintrin.h revision 1.7 1 /* Copyright (C) 2014-2022 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead."
26 #endif
27
28 #ifndef _AVX512DQINTRIN_H_INCLUDED
29 #define _AVX512DQINTRIN_H_INCLUDED
30
31 #ifndef __AVX512DQ__
32 #pragma GCC push_options
33 #pragma GCC target("avx512dq")
34 #define __DISABLE_AVX512DQ__
35 #endif /* __AVX512DQ__ */
36
37 extern __inline unsigned char
38 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
39 _ktest_mask8_u8 (__mmask8 __A, __mmask8 __B, unsigned char *__CF)
40 {
41 *__CF = (unsigned char) __builtin_ia32_ktestcqi (__A, __B);
42 return (unsigned char) __builtin_ia32_ktestzqi (__A, __B);
43 }
44
45 extern __inline unsigned char
46 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
47 _ktestz_mask8_u8 (__mmask8 __A, __mmask8 __B)
48 {
49 return (unsigned char) __builtin_ia32_ktestzqi (__A, __B);
50 }
51
52 extern __inline unsigned char
53 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
54 _ktestc_mask8_u8 (__mmask8 __A, __mmask8 __B)
55 {
56 return (unsigned char) __builtin_ia32_ktestcqi (__A, __B);
57 }
58
59 extern __inline unsigned char
60 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
61 _ktest_mask16_u8 (__mmask16 __A, __mmask16 __B, unsigned char *__CF)
62 {
63 *__CF = (unsigned char) __builtin_ia32_ktestchi (__A, __B);
64 return (unsigned char) __builtin_ia32_ktestzhi (__A, __B);
65 }
66
67 extern __inline unsigned char
68 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
69 _ktestz_mask16_u8 (__mmask16 __A, __mmask16 __B)
70 {
71 return (unsigned char) __builtin_ia32_ktestzhi (__A, __B);
72 }
73
74 extern __inline unsigned char
75 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
76 _ktestc_mask16_u8 (__mmask16 __A, __mmask16 __B)
77 {
78 return (unsigned char) __builtin_ia32_ktestchi (__A, __B);
79 }
80
81 extern __inline unsigned char
82 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
83 _kortest_mask8_u8 (__mmask8 __A, __mmask8 __B, unsigned char *__CF)
84 {
85 *__CF = (unsigned char) __builtin_ia32_kortestcqi (__A, __B);
86 return (unsigned char) __builtin_ia32_kortestzqi (__A, __B);
87 }
88
89 extern __inline unsigned char
90 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
91 _kortestz_mask8_u8 (__mmask8 __A, __mmask8 __B)
92 {
93 return (unsigned char) __builtin_ia32_kortestzqi (__A, __B);
94 }
95
96 extern __inline unsigned char
97 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
98 _kortestc_mask8_u8 (__mmask8 __A, __mmask8 __B)
99 {
100 return (unsigned char) __builtin_ia32_kortestcqi (__A, __B);
101 }
102
103 extern __inline __mmask8
104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
105 _kadd_mask8 (__mmask8 __A, __mmask8 __B)
106 {
107 return (__mmask8) __builtin_ia32_kaddqi ((__mmask8) __A, (__mmask8) __B);
108 }
109
110 extern __inline __mmask16
111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
112 _kadd_mask16 (__mmask16 __A, __mmask16 __B)
113 {
114 return (__mmask16) __builtin_ia32_kaddhi ((__mmask16) __A, (__mmask16) __B);
115 }
116
117 extern __inline unsigned int
118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
119 _cvtmask8_u32 (__mmask8 __A)
120 {
121 return (unsigned int) __builtin_ia32_kmovb ((__mmask8 ) __A);
122 }
123
124 extern __inline __mmask8
125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
126 _cvtu32_mask8 (unsigned int __A)
127 {
128 return (__mmask8) __builtin_ia32_kmovb ((__mmask8) __A);
129 }
130
131 extern __inline __mmask8
132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
133 _load_mask8 (__mmask8 *__A)
134 {
135 return (__mmask8) __builtin_ia32_kmovb (*(__mmask8 *) __A);
136 }
137
138 extern __inline void
139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
140 _store_mask8 (__mmask8 *__A, __mmask8 __B)
141 {
142 *(__mmask8 *) __A = __builtin_ia32_kmovb (__B);
143 }
144
145 extern __inline __mmask8
146 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
147 _knot_mask8 (__mmask8 __A)
148 {
149 return (__mmask8) __builtin_ia32_knotqi ((__mmask8) __A);
150 }
151
152 extern __inline __mmask8
153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
154 _kor_mask8 (__mmask8 __A, __mmask8 __B)
155 {
156 return (__mmask8) __builtin_ia32_korqi ((__mmask8) __A, (__mmask8) __B);
157 }
158
159 extern __inline __mmask8
160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
161 _kxnor_mask8 (__mmask8 __A, __mmask8 __B)
162 {
163 return (__mmask8) __builtin_ia32_kxnorqi ((__mmask8) __A, (__mmask8) __B);
164 }
165
166 extern __inline __mmask8
167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
168 _kxor_mask8 (__mmask8 __A, __mmask8 __B)
169 {
170 return (__mmask8) __builtin_ia32_kxorqi ((__mmask8) __A, (__mmask8) __B);
171 }
172
173 extern __inline __mmask8
174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
175 _kand_mask8 (__mmask8 __A, __mmask8 __B)
176 {
177 return (__mmask8) __builtin_ia32_kandqi ((__mmask8) __A, (__mmask8) __B);
178 }
179
180 extern __inline __mmask8
181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
182 _kandn_mask8 (__mmask8 __A, __mmask8 __B)
183 {
184 return (__mmask8) __builtin_ia32_kandnqi ((__mmask8) __A, (__mmask8) __B);
185 }
186
187 extern __inline __m512d
188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
189 _mm512_broadcast_f64x2 (__m128d __A)
190 {
191 return (__m512d)
192 __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A,
193 _mm512_undefined_pd (),
194 (__mmask8) -1);
195 }
196
197 extern __inline __m512d
198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
199 _mm512_mask_broadcast_f64x2 (__m512d __O, __mmask8 __M, __m128d __A)
200 {
201 return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df)
202 __A,
203 (__v8df)
204 __O, __M);
205 }
206
207 extern __inline __m512d
208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
209 _mm512_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
210 {
211 return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df)
212 __A,
213 (__v8df)
214 _mm512_setzero_ps (),
215 __M);
216 }
217
218 extern __inline __m512i
219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
220 _mm512_broadcast_i64x2 (__m128i __A)
221 {
222 return (__m512i)
223 __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A,
224 _mm512_undefined_epi32 (),
225 (__mmask8) -1);
226 }
227
228 extern __inline __m512i
229 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
230 _mm512_mask_broadcast_i64x2 (__m512i __O, __mmask8 __M, __m128i __A)
231 {
232 return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di)
233 __A,
234 (__v8di)
235 __O, __M);
236 }
237
238 extern __inline __m512i
239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
240 _mm512_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
241 {
242 return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di)
243 __A,
244 (__v8di)
245 _mm512_setzero_si512 (),
246 __M);
247 }
248
249 extern __inline __m512
250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
251 _mm512_broadcast_f32x2 (__m128 __A)
252 {
253 return (__m512)
254 __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
255 (__v16sf)_mm512_undefined_ps (),
256 (__mmask16) -1);
257 }
258
259 extern __inline __m512
260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
261 _mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
262 {
263 return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
264 (__v16sf)
265 __O, __M);
266 }
267
268 extern __inline __m512
269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
270 _mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
271 {
272 return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
273 (__v16sf)
274 _mm512_setzero_ps (),
275 __M);
276 }
277
278 extern __inline __m512i
279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
280 _mm512_broadcast_i32x2 (__m128i __A)
281 {
282 return (__m512i)
283 __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
284 (__v16si)
285 _mm512_undefined_epi32 (),
286 (__mmask16) -1);
287 }
288
289 extern __inline __m512i
290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
291 _mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
292 {
293 return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si)
294 __A,
295 (__v16si)
296 __O, __M);
297 }
298
299 extern __inline __m512i
300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
301 _mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
302 {
303 return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si)
304 __A,
305 (__v16si)
306 _mm512_setzero_si512 (),
307 __M);
308 }
309
310 extern __inline __m512
311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
312 _mm512_broadcast_f32x8 (__m256 __A)
313 {
314 return (__m512)
315 __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
316 _mm512_undefined_ps (),
317 (__mmask16) -1);
318 }
319
320 extern __inline __m512
321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
322 _mm512_mask_broadcast_f32x8 (__m512 __O, __mmask16 __M, __m256 __A)
323 {
324 return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
325 (__v16sf)__O,
326 __M);
327 }
328
329 extern __inline __m512
330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
331 _mm512_maskz_broadcast_f32x8 (__mmask16 __M, __m256 __A)
332 {
333 return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
334 (__v16sf)
335 _mm512_setzero_ps (),
336 __M);
337 }
338
339 extern __inline __m512i
340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
341 _mm512_broadcast_i32x8 (__m256i __A)
342 {
343 return (__m512i)
344 __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A,
345 (__v16si)
346 _mm512_undefined_epi32 (),
347 (__mmask16) -1);
348 }
349
350 extern __inline __m512i
351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
352 _mm512_mask_broadcast_i32x8 (__m512i __O, __mmask16 __M, __m256i __A)
353 {
354 return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si)
355 __A,
356 (__v16si)__O,
357 __M);
358 }
359
360 extern __inline __m512i
361 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
362 _mm512_maskz_broadcast_i32x8 (__mmask16 __M, __m256i __A)
363 {
364 return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si)
365 __A,
366 (__v16si)
367 _mm512_setzero_si512 (),
368 __M);
369 }
370
371 extern __inline __m512i
372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
373 _mm512_mullo_epi64 (__m512i __A, __m512i __B)
374 {
375 return (__m512i) ((__v8du) __A * (__v8du) __B);
376 }
377
378 extern __inline __m512i
379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
380 _mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
381 __m512i __B)
382 {
383 return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
384 (__v8di) __B,
385 (__v8di) __W,
386 (__mmask8) __U);
387 }
388
389 extern __inline __m512i
390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
391 _mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
392 {
393 return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
394 (__v8di) __B,
395 (__v8di)
396 _mm512_setzero_si512 (),
397 (__mmask8) __U);
398 }
399
400 extern __inline __m512d
401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
402 _mm512_xor_pd (__m512d __A, __m512d __B)
403 {
404 return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
405 (__v8df) __B,
406 (__v8df)
407 _mm512_setzero_pd (),
408 (__mmask8) -1);
409 }
410
411 extern __inline __m512d
412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
413 _mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A,
414 __m512d __B)
415 {
416 return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
417 (__v8df) __B,
418 (__v8df) __W,
419 (__mmask8) __U);
420 }
421
422 extern __inline __m512d
423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
424 _mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B)
425 {
426 return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
427 (__v8df) __B,
428 (__v8df)
429 _mm512_setzero_pd (),
430 (__mmask8) __U);
431 }
432
433 extern __inline __m512
434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
435 _mm512_xor_ps (__m512 __A, __m512 __B)
436 {
437 return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
438 (__v16sf) __B,
439 (__v16sf)
440 _mm512_setzero_ps (),
441 (__mmask16) -1);
442 }
443
444 extern __inline __m512
445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
446 _mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
447 {
448 return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
449 (__v16sf) __B,
450 (__v16sf) __W,
451 (__mmask16) __U);
452 }
453
454 extern __inline __m512
455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
456 _mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B)
457 {
458 return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
459 (__v16sf) __B,
460 (__v16sf)
461 _mm512_setzero_ps (),
462 (__mmask16) __U);
463 }
464
465 extern __inline __m512d
466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
467 _mm512_or_pd (__m512d __A, __m512d __B)
468 {
469 return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
470 (__v8df) __B,
471 (__v8df)
472 _mm512_setzero_pd (),
473 (__mmask8) -1);
474 }
475
476 extern __inline __m512d
477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
478 _mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
479 {
480 return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
481 (__v8df) __B,
482 (__v8df) __W,
483 (__mmask8) __U);
484 }
485
486 extern __inline __m512d
487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
488 _mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B)
489 {
490 return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
491 (__v8df) __B,
492 (__v8df)
493 _mm512_setzero_pd (),
494 (__mmask8) __U);
495 }
496
497 extern __inline __m512
498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
499 _mm512_or_ps (__m512 __A, __m512 __B)
500 {
501 return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
502 (__v16sf) __B,
503 (__v16sf)
504 _mm512_setzero_ps (),
505 (__mmask16) -1);
506 }
507
508 extern __inline __m512
509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
510 _mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
511 {
512 return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
513 (__v16sf) __B,
514 (__v16sf) __W,
515 (__mmask16) __U);
516 }
517
518 extern __inline __m512
519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
520 _mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B)
521 {
522 return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
523 (__v16sf) __B,
524 (__v16sf)
525 _mm512_setzero_ps (),
526 (__mmask16) __U);
527 }
528
529 extern __inline __m512d
530 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
531 _mm512_and_pd (__m512d __A, __m512d __B)
532 {
533 return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
534 (__v8df) __B,
535 (__v8df)
536 _mm512_setzero_pd (),
537 (__mmask8) -1);
538 }
539
540 extern __inline __m512d
541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
542 _mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A,
543 __m512d __B)
544 {
545 return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
546 (__v8df) __B,
547 (__v8df) __W,
548 (__mmask8) __U);
549 }
550
551 extern __inline __m512d
552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
553 _mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B)
554 {
555 return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
556 (__v8df) __B,
557 (__v8df)
558 _mm512_setzero_pd (),
559 (__mmask8) __U);
560 }
561
562 extern __inline __m512
563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
564 _mm512_and_ps (__m512 __A, __m512 __B)
565 {
566 return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
567 (__v16sf) __B,
568 (__v16sf)
569 _mm512_setzero_ps (),
570 (__mmask16) -1);
571 }
572
573 extern __inline __m512
574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
575 _mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
576 {
577 return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
578 (__v16sf) __B,
579 (__v16sf) __W,
580 (__mmask16) __U);
581 }
582
583 extern __inline __m512
584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
585 _mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B)
586 {
587 return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
588 (__v16sf) __B,
589 (__v16sf)
590 _mm512_setzero_ps (),
591 (__mmask16) __U);
592 }
593
594 extern __inline __m512d
595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
596 _mm512_andnot_pd (__m512d __A, __m512d __B)
597 {
598 return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
599 (__v8df) __B,
600 (__v8df)
601 _mm512_setzero_pd (),
602 (__mmask8) -1);
603 }
604
605 extern __inline __m512d
606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
607 _mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A,
608 __m512d __B)
609 {
610 return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
611 (__v8df) __B,
612 (__v8df) __W,
613 (__mmask8) __U);
614 }
615
616 extern __inline __m512d
617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
618 _mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B)
619 {
620 return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
621 (__v8df) __B,
622 (__v8df)
623 _mm512_setzero_pd (),
624 (__mmask8) __U);
625 }
626
627 extern __inline __m512
628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
629 _mm512_andnot_ps (__m512 __A, __m512 __B)
630 {
631 return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
632 (__v16sf) __B,
633 (__v16sf)
634 _mm512_setzero_ps (),
635 (__mmask16) -1);
636 }
637
638 extern __inline __m512
639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
640 _mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A,
641 __m512 __B)
642 {
643 return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
644 (__v16sf) __B,
645 (__v16sf) __W,
646 (__mmask16) __U);
647 }
648
649 extern __inline __m512
650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
651 _mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B)
652 {
653 return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
654 (__v16sf) __B,
655 (__v16sf)
656 _mm512_setzero_ps (),
657 (__mmask16) __U);
658 }
659
660 extern __inline __mmask16
661 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
662 _mm512_movepi32_mask (__m512i __A)
663 {
664 return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A);
665 }
666
667 extern __inline __mmask8
668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
669 _mm512_movepi64_mask (__m512i __A)
670 {
671 return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A);
672 }
673
674 extern __inline __m512i
675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
676 _mm512_movm_epi32 (__mmask16 __A)
677 {
678 return (__m512i) __builtin_ia32_cvtmask2d512 (__A);
679 }
680
681 extern __inline __m512i
682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
683 _mm512_movm_epi64 (__mmask8 __A)
684 {
685 return (__m512i) __builtin_ia32_cvtmask2q512 (__A);
686 }
687
688 extern __inline __m512i
689 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
690 _mm512_cvttpd_epi64 (__m512d __A)
691 {
692 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
693 (__v8di)
694 _mm512_setzero_si512 (),
695 (__mmask8) -1,
696 _MM_FROUND_CUR_DIRECTION);
697 }
698
699 extern __inline __m512i
700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
701 _mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A)
702 {
703 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
704 (__v8di) __W,
705 (__mmask8) __U,
706 _MM_FROUND_CUR_DIRECTION);
707 }
708
709 extern __inline __m512i
710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
711 _mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A)
712 {
713 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
714 (__v8di)
715 _mm512_setzero_si512 (),
716 (__mmask8) __U,
717 _MM_FROUND_CUR_DIRECTION);
718 }
719
720 extern __inline __m512i
721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
722 _mm512_cvttpd_epu64 (__m512d __A)
723 {
724 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
725 (__v8di)
726 _mm512_setzero_si512 (),
727 (__mmask8) -1,
728 _MM_FROUND_CUR_DIRECTION);
729 }
730
731 extern __inline __m512i
732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
733 _mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A)
734 {
735 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
736 (__v8di) __W,
737 (__mmask8) __U,
738 _MM_FROUND_CUR_DIRECTION);
739 }
740
741 extern __inline __m512i
742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
743 _mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A)
744 {
745 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
746 (__v8di)
747 _mm512_setzero_si512 (),
748 (__mmask8) __U,
749 _MM_FROUND_CUR_DIRECTION);
750 }
751
752 extern __inline __m512i
753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
754 _mm512_cvttps_epi64 (__m256 __A)
755 {
756 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
757 (__v8di)
758 _mm512_setzero_si512 (),
759 (__mmask8) -1,
760 _MM_FROUND_CUR_DIRECTION);
761 }
762
763 extern __inline __m512i
764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
765 _mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A)
766 {
767 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
768 (__v8di) __W,
769 (__mmask8) __U,
770 _MM_FROUND_CUR_DIRECTION);
771 }
772
773 extern __inline __m512i
774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
775 _mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A)
776 {
777 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
778 (__v8di)
779 _mm512_setzero_si512 (),
780 (__mmask8) __U,
781 _MM_FROUND_CUR_DIRECTION);
782 }
783
784 extern __inline __m512i
785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
786 _mm512_cvttps_epu64 (__m256 __A)
787 {
788 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
789 (__v8di)
790 _mm512_setzero_si512 (),
791 (__mmask8) -1,
792 _MM_FROUND_CUR_DIRECTION);
793 }
794
795 extern __inline __m512i
796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
797 _mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A)
798 {
799 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
800 (__v8di) __W,
801 (__mmask8) __U,
802 _MM_FROUND_CUR_DIRECTION);
803 }
804
805 extern __inline __m512i
806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
807 _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A)
808 {
809 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
810 (__v8di)
811 _mm512_setzero_si512 (),
812 (__mmask8) __U,
813 _MM_FROUND_CUR_DIRECTION);
814 }
815
816 extern __inline __m512i
817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
818 _mm512_cvtpd_epi64 (__m512d __A)
819 {
820 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
821 (__v8di)
822 _mm512_setzero_si512 (),
823 (__mmask8) -1,
824 _MM_FROUND_CUR_DIRECTION);
825 }
826
827 extern __inline __m512i
828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
829 _mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A)
830 {
831 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
832 (__v8di) __W,
833 (__mmask8) __U,
834 _MM_FROUND_CUR_DIRECTION);
835 }
836
837 extern __inline __m512i
838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
839 _mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A)
840 {
841 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
842 (__v8di)
843 _mm512_setzero_si512 (),
844 (__mmask8) __U,
845 _MM_FROUND_CUR_DIRECTION);
846 }
847
848 extern __inline __m512i
849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
850 _mm512_cvtpd_epu64 (__m512d __A)
851 {
852 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
853 (__v8di)
854 _mm512_setzero_si512 (),
855 (__mmask8) -1,
856 _MM_FROUND_CUR_DIRECTION);
857 }
858
859 extern __inline __m512i
860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
861 _mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A)
862 {
863 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
864 (__v8di) __W,
865 (__mmask8) __U,
866 _MM_FROUND_CUR_DIRECTION);
867 }
868
869 extern __inline __m512i
870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
871 _mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A)
872 {
873 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
874 (__v8di)
875 _mm512_setzero_si512 (),
876 (__mmask8) __U,
877 _MM_FROUND_CUR_DIRECTION);
878 }
879
880 extern __inline __m512i
881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
882 _mm512_cvtps_epi64 (__m256 __A)
883 {
884 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
885 (__v8di)
886 _mm512_setzero_si512 (),
887 (__mmask8) -1,
888 _MM_FROUND_CUR_DIRECTION);
889 }
890
891 extern __inline __m512i
892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
893 _mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A)
894 {
895 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
896 (__v8di) __W,
897 (__mmask8) __U,
898 _MM_FROUND_CUR_DIRECTION);
899 }
900
901 extern __inline __m512i
902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
903 _mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A)
904 {
905 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
906 (__v8di)
907 _mm512_setzero_si512 (),
908 (__mmask8) __U,
909 _MM_FROUND_CUR_DIRECTION);
910 }
911
912 extern __inline __m512i
913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
914 _mm512_cvtps_epu64 (__m256 __A)
915 {
916 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
917 (__v8di)
918 _mm512_setzero_si512 (),
919 (__mmask8) -1,
920 _MM_FROUND_CUR_DIRECTION);
921 }
922
923 extern __inline __m512i
924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
925 _mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A)
926 {
927 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
928 (__v8di) __W,
929 (__mmask8) __U,
930 _MM_FROUND_CUR_DIRECTION);
931 }
932
933 extern __inline __m512i
934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
935 _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A)
936 {
937 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
938 (__v8di)
939 _mm512_setzero_si512 (),
940 (__mmask8) __U,
941 _MM_FROUND_CUR_DIRECTION);
942 }
943
944 extern __inline __m256
945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
946 _mm512_cvtepi64_ps (__m512i __A)
947 {
948 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
949 (__v8sf)
950 _mm256_setzero_ps (),
951 (__mmask8) -1,
952 _MM_FROUND_CUR_DIRECTION);
953 }
954
955 extern __inline __m256
956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
957 _mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A)
958 {
959 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
960 (__v8sf) __W,
961 (__mmask8) __U,
962 _MM_FROUND_CUR_DIRECTION);
963 }
964
965 extern __inline __m256
966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
967 _mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A)
968 {
969 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
970 (__v8sf)
971 _mm256_setzero_ps (),
972 (__mmask8) __U,
973 _MM_FROUND_CUR_DIRECTION);
974 }
975
976 extern __inline __m256
977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
978 _mm512_cvtepu64_ps (__m512i __A)
979 {
980 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
981 (__v8sf)
982 _mm256_setzero_ps (),
983 (__mmask8) -1,
984 _MM_FROUND_CUR_DIRECTION);
985 }
986
987 extern __inline __m256
988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
989 _mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A)
990 {
991 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
992 (__v8sf) __W,
993 (__mmask8) __U,
994 _MM_FROUND_CUR_DIRECTION);
995 }
996
997 extern __inline __m256
998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
999 _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A)
1000 {
1001 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
1002 (__v8sf)
1003 _mm256_setzero_ps (),
1004 (__mmask8) __U,
1005 _MM_FROUND_CUR_DIRECTION);
1006 }
1007
1008 extern __inline __m512d
1009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1010 _mm512_cvtepi64_pd (__m512i __A)
1011 {
1012 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
1013 (__v8df)
1014 _mm512_setzero_pd (),
1015 (__mmask8) -1,
1016 _MM_FROUND_CUR_DIRECTION);
1017 }
1018
1019 extern __inline __m512d
1020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1021 _mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A)
1022 {
1023 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
1024 (__v8df) __W,
1025 (__mmask8) __U,
1026 _MM_FROUND_CUR_DIRECTION);
1027 }
1028
1029 extern __inline __m512d
1030 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1031 _mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A)
1032 {
1033 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
1034 (__v8df)
1035 _mm512_setzero_pd (),
1036 (__mmask8) __U,
1037 _MM_FROUND_CUR_DIRECTION);
1038 }
1039
1040 extern __inline __m512d
1041 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1042 _mm512_cvtepu64_pd (__m512i __A)
1043 {
1044 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
1045 (__v8df)
1046 _mm512_setzero_pd (),
1047 (__mmask8) -1,
1048 _MM_FROUND_CUR_DIRECTION);
1049 }
1050
1051 extern __inline __m512d
1052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1053 _mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A)
1054 {
1055 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
1056 (__v8df) __W,
1057 (__mmask8) __U,
1058 _MM_FROUND_CUR_DIRECTION);
1059 }
1060
1061 extern __inline __m512d
1062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1063 _mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A)
1064 {
1065 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
1066 (__v8df)
1067 _mm512_setzero_pd (),
1068 (__mmask8) __U,
1069 _MM_FROUND_CUR_DIRECTION);
1070 }
1071
1072 #ifdef __OPTIMIZE__
1073 extern __inline __mmask8
1074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1075 _kshiftli_mask8 (__mmask8 __A, unsigned int __B)
1076 {
1077 return (__mmask8) __builtin_ia32_kshiftliqi ((__mmask8) __A, (__mmask8) __B);
1078 }
1079
1080 extern __inline __mmask8
1081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1082 _kshiftri_mask8 (__mmask8 __A, unsigned int __B)
1083 {
1084 return (__mmask8) __builtin_ia32_kshiftriqi ((__mmask8) __A, (__mmask8) __B);
1085 }
1086
1087 extern __inline __m512d
1088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1089 _mm512_range_pd (__m512d __A, __m512d __B, int __C)
1090 {
1091 return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
1092 (__v8df) __B, __C,
1093 (__v8df)
1094 _mm512_setzero_pd (),
1095 (__mmask8) -1,
1096 _MM_FROUND_CUR_DIRECTION);
1097 }
1098
1099 extern __inline __m512d
1100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1101 _mm512_mask_range_pd (__m512d __W, __mmask8 __U,
1102 __m512d __A, __m512d __B, int __C)
1103 {
1104 return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
1105 (__v8df) __B, __C,
1106 (__v8df) __W,
1107 (__mmask8) __U,
1108 _MM_FROUND_CUR_DIRECTION);
1109 }
1110
1111 extern __inline __m512d
1112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1113 _mm512_maskz_range_pd (__mmask8 __U, __m512d __A, __m512d __B, int __C)
1114 {
1115 return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
1116 (__v8df) __B, __C,
1117 (__v8df)
1118 _mm512_setzero_pd (),
1119 (__mmask8) __U,
1120 _MM_FROUND_CUR_DIRECTION);
1121 }
1122
1123 extern __inline __m512
1124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1125 _mm512_range_ps (__m512 __A, __m512 __B, int __C)
1126 {
1127 return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
1128 (__v16sf) __B, __C,
1129 (__v16sf)
1130 _mm512_setzero_ps (),
1131 (__mmask16) -1,
1132 _MM_FROUND_CUR_DIRECTION);
1133 }
1134
1135 extern __inline __m512
1136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1137 _mm512_mask_range_ps (__m512 __W, __mmask16 __U,
1138 __m512 __A, __m512 __B, int __C)
1139 {
1140 return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
1141 (__v16sf) __B, __C,
1142 (__v16sf) __W,
1143 (__mmask16) __U,
1144 _MM_FROUND_CUR_DIRECTION);
1145 }
1146
1147 extern __inline __m512
1148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1149 _mm512_maskz_range_ps (__mmask16 __U, __m512 __A, __m512 __B, int __C)
1150 {
1151 return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
1152 (__v16sf) __B, __C,
1153 (__v16sf)
1154 _mm512_setzero_ps (),
1155 (__mmask16) __U,
1156 _MM_FROUND_CUR_DIRECTION);
1157 }
1158
1159 extern __inline __m128d
1160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1161 _mm_reduce_sd (__m128d __A, __m128d __B, int __C)
1162 {
1163 return (__m128d) __builtin_ia32_reducesd_mask ((__v2df) __A,
1164 (__v2df) __B, __C,
1165 (__v2df) _mm_setzero_pd (),
1166 (__mmask8) -1);
1167 }
1168
1169 extern __inline __m128d
1170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1171 _mm_reduce_round_sd (__m128d __A, __m128d __B, int __C, const int __R)
1172 {
1173 return (__m128d) __builtin_ia32_reducesd_mask_round ((__v2df) __A,
1174 (__v2df) __B, __C,
1175 (__v2df)
1176 _mm_setzero_pd (),
1177 (__mmask8) -1, __R);
1178 }
1179
1180 extern __inline __m128d
1181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1182 _mm_mask_reduce_sd (__m128d __W, __mmask8 __U, __m128d __A,
1183 __m128d __B, int __C)
1184 {
1185 return (__m128d) __builtin_ia32_reducesd_mask ((__v2df) __A,
1186 (__v2df) __B, __C,
1187 (__v2df) __W,
1188 (__mmask8) __U);
1189 }
1190
1191 extern __inline __m128d
1192 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1193 _mm_mask_reduce_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
1194 __m128d __B, int __C, const int __R)
1195 {
1196 return (__m128d) __builtin_ia32_reducesd_mask_round ((__v2df) __A,
1197 (__v2df) __B, __C,
1198 (__v2df) __W,
1199 __U, __R);
1200 }
1201
1202 extern __inline __m128d
1203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1204 _mm_maskz_reduce_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C)
1205 {
1206 return (__m128d) __builtin_ia32_reducesd_mask ((__v2df) __A,
1207 (__v2df) __B, __C,
1208 (__v2df) _mm_setzero_pd (),
1209 (__mmask8) __U);
1210 }
1211
1212 extern __inline __m128d
1213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1214 _mm_maskz_reduce_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
1215 int __C, const int __R)
1216 {
1217 return (__m128d) __builtin_ia32_reducesd_mask_round ((__v2df) __A,
1218 (__v2df) __B, __C,
1219 (__v2df)
1220 _mm_setzero_pd (),
1221 __U, __R);
1222 }
1223
1224 extern __inline __m128
1225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1226 _mm_reduce_ss (__m128 __A, __m128 __B, int __C)
1227 {
1228 return (__m128) __builtin_ia32_reducess_mask ((__v4sf) __A,
1229 (__v4sf) __B, __C,
1230 (__v4sf) _mm_setzero_ps (),
1231 (__mmask8) -1);
1232 }
1233
1234 extern __inline __m128
1235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1236 _mm_reduce_round_ss (__m128 __A, __m128 __B, int __C, const int __R)
1237 {
1238 return (__m128) __builtin_ia32_reducess_mask_round ((__v4sf) __A,
1239 (__v4sf) __B, __C,
1240 (__v4sf)
1241 _mm_setzero_ps (),
1242 (__mmask8) -1, __R);
1243 }
1244
1245 extern __inline __m128
1246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1247 _mm_mask_reduce_ss (__m128 __W, __mmask8 __U, __m128 __A,
1248 __m128 __B, int __C)
1249 {
1250 return (__m128) __builtin_ia32_reducess_mask ((__v4sf) __A,
1251 (__v4sf) __B, __C,
1252 (__v4sf) __W,
1253 (__mmask8) __U);
1254 }
1255
1256 extern __inline __m128
1257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1258 _mm_mask_reduce_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
1259 __m128 __B, int __C, const int __R)
1260 {
1261 return (__m128) __builtin_ia32_reducess_mask_round ((__v4sf) __A,
1262 (__v4sf) __B, __C,
1263 (__v4sf) __W,
1264 __U, __R);
1265 }
1266
1267 extern __inline __m128
1268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1269 _mm_maskz_reduce_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C)
1270 {
1271 return (__m128) __builtin_ia32_reducess_mask ((__v4sf) __A,
1272 (__v4sf) __B, __C,
1273 (__v4sf) _mm_setzero_ps (),
1274 (__mmask8) __U);
1275 }
1276
1277 extern __inline __m128
1278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1279 _mm_maskz_reduce_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
1280 int __C, const int __R)
1281 {
1282 return (__m128) __builtin_ia32_reducess_mask_round ((__v4sf) __A,
1283 (__v4sf) __B, __C,
1284 (__v4sf)
1285 _mm_setzero_ps (),
1286 __U, __R);
1287 }
1288
1289 extern __inline __m128d
1290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1291 _mm_range_sd (__m128d __A, __m128d __B, int __C)
1292 {
1293 return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
1294 (__v2df) __B, __C,
1295 (__v2df)
1296 _mm_setzero_pd (),
1297 (__mmask8) -1,
1298 _MM_FROUND_CUR_DIRECTION);
1299 }
1300
1301 extern __inline __m128d
1302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1303 _mm_mask_range_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B, int __C)
1304 {
1305 return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
1306 (__v2df) __B, __C,
1307 (__v2df) __W,
1308 (__mmask8) __U,
1309 _MM_FROUND_CUR_DIRECTION);
1310 }
1311
1312 extern __inline __m128d
1313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1314 _mm_maskz_range_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C)
1315 {
1316 return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
1317 (__v2df) __B, __C,
1318 (__v2df)
1319 _mm_setzero_pd (),
1320 (__mmask8) __U,
1321 _MM_FROUND_CUR_DIRECTION);
1322 }
1323
1324 extern __inline __m128
1325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1326 _mm_range_ss (__m128 __A, __m128 __B, int __C)
1327 {
1328 return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
1329 (__v4sf) __B, __C,
1330 (__v4sf)
1331 _mm_setzero_ps (),
1332 (__mmask8) -1,
1333 _MM_FROUND_CUR_DIRECTION);
1334 }
1335
1336 extern __inline __m128
1337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1338 _mm_mask_range_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, int __C)
1339 {
1340 return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
1341 (__v4sf) __B, __C,
1342 (__v4sf) __W,
1343 (__mmask8) __U,
1344 _MM_FROUND_CUR_DIRECTION);
1345 }
1346
1347
1348 extern __inline __m128
1349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1350 _mm_maskz_range_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C)
1351 {
1352 return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
1353 (__v4sf) __B, __C,
1354 (__v4sf)
1355 _mm_setzero_ps (),
1356 (__mmask8) __U,
1357 _MM_FROUND_CUR_DIRECTION);
1358 }
1359
1360 extern __inline __m128d
1361 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1362 _mm_range_round_sd (__m128d __A, __m128d __B, int __C, const int __R)
1363 {
1364 return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
1365 (__v2df) __B, __C,
1366 (__v2df)
1367 _mm_setzero_pd (),
1368 (__mmask8) -1, __R);
1369 }
1370
1371 extern __inline __m128d
1372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1373 _mm_mask_range_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
1374 int __C, const int __R)
1375 {
1376 return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
1377 (__v2df) __B, __C,
1378 (__v2df) __W,
1379 (__mmask8) __U, __R);
1380 }
1381
1382 extern __inline __m128d
1383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1384 _mm_maskz_range_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C,
1385 const int __R)
1386 {
1387 return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
1388 (__v2df) __B, __C,
1389 (__v2df)
1390 _mm_setzero_pd (),
1391 (__mmask8) __U, __R);
1392 }
1393
1394 extern __inline __m128
1395 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1396 _mm_range_round_ss (__m128 __A, __m128 __B, int __C, const int __R)
1397 {
1398 return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
1399 (__v4sf) __B, __C,
1400 (__v4sf)
1401 _mm_setzero_ps (),
1402 (__mmask8) -1, __R);
1403 }
1404
1405 extern __inline __m128
1406 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1407 _mm_mask_range_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
1408 int __C, const int __R)
1409 {
1410 return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
1411 (__v4sf) __B, __C,
1412 (__v4sf) __W,
1413 (__mmask8) __U, __R);
1414 }
1415
1416 extern __inline __m128
1417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1418 _mm_maskz_range_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C,
1419 const int __R)
1420 {
1421 return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
1422 (__v4sf) __B, __C,
1423 (__v4sf)
1424 _mm_setzero_ps (),
1425 (__mmask8) __U, __R);
1426 }
1427
1428 extern __inline __mmask8
1429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1430 _mm_fpclass_ss_mask (__m128 __A, const int __imm)
1431 {
1432 return (__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) __A, __imm,
1433 (__mmask8) -1);
1434 }
1435
1436 extern __inline __mmask8
1437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1438 _mm_fpclass_sd_mask (__m128d __A, const int __imm)
1439 {
1440 return (__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) __A, __imm,
1441 (__mmask8) -1);
1442 }
1443
1444 extern __inline __mmask8
1445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1446 _mm_mask_fpclass_ss_mask (__mmask8 __U, __m128 __A, const int __imm)
1447 {
1448 return (__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) __A, __imm, __U);
1449 }
1450
1451 extern __inline __mmask8
1452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1453 _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm)
1454 {
1455 return (__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) __A, __imm, __U);
1456 }
1457
1458 extern __inline __m512i
1459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1460 _mm512_cvtt_roundpd_epi64 (__m512d __A, const int __R)
1461 {
1462 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
1463 (__v8di)
1464 _mm512_setzero_si512 (),
1465 (__mmask8) -1,
1466 __R);
1467 }
1468
1469 extern __inline __m512i
1470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1471 _mm512_mask_cvtt_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A,
1472 const int __R)
1473 {
1474 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
1475 (__v8di) __W,
1476 (__mmask8) __U,
1477 __R);
1478 }
1479
1480 extern __inline __m512i
1481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1482 _mm512_maskz_cvtt_roundpd_epi64 (__mmask8 __U, __m512d __A,
1483 const int __R)
1484 {
1485 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
1486 (__v8di)
1487 _mm512_setzero_si512 (),
1488 (__mmask8) __U,
1489 __R);
1490 }
1491
1492 extern __inline __m512i
1493 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1494 _mm512_cvtt_roundpd_epu64 (__m512d __A, const int __R)
1495 {
1496 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
1497 (__v8di)
1498 _mm512_setzero_si512 (),
1499 (__mmask8) -1,
1500 __R);
1501 }
1502
1503 extern __inline __m512i
1504 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1505 _mm512_mask_cvtt_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A,
1506 const int __R)
1507 {
1508 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
1509 (__v8di) __W,
1510 (__mmask8) __U,
1511 __R);
1512 }
1513
1514 extern __inline __m512i
1515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1516 _mm512_maskz_cvtt_roundpd_epu64 (__mmask8 __U, __m512d __A,
1517 const int __R)
1518 {
1519 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
1520 (__v8di)
1521 _mm512_setzero_si512 (),
1522 (__mmask8) __U,
1523 __R);
1524 }
1525
1526 extern __inline __m512i
1527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1528 _mm512_cvtt_roundps_epi64 (__m256 __A, const int __R)
1529 {
1530 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
1531 (__v8di)
1532 _mm512_setzero_si512 (),
1533 (__mmask8) -1,
1534 __R);
1535 }
1536
1537 extern __inline __m512i
1538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1539 _mm512_mask_cvtt_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A,
1540 const int __R)
1541 {
1542 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
1543 (__v8di) __W,
1544 (__mmask8) __U,
1545 __R);
1546 }
1547
1548 extern __inline __m512i
1549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1550 _mm512_maskz_cvtt_roundps_epi64 (__mmask8 __U, __m256 __A,
1551 const int __R)
1552 {
1553 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
1554 (__v8di)
1555 _mm512_setzero_si512 (),
1556 (__mmask8) __U,
1557 __R);
1558 }
1559
1560 extern __inline __m512i
1561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1562 _mm512_cvtt_roundps_epu64 (__m256 __A, const int __R)
1563 {
1564 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
1565 (__v8di)
1566 _mm512_setzero_si512 (),
1567 (__mmask8) -1,
1568 __R);
1569 }
1570
1571 extern __inline __m512i
1572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1573 _mm512_mask_cvtt_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A,
1574 const int __R)
1575 {
1576 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
1577 (__v8di) __W,
1578 (__mmask8) __U,
1579 __R);
1580 }
1581
1582 extern __inline __m512i
1583 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1584 _mm512_maskz_cvtt_roundps_epu64 (__mmask8 __U, __m256 __A,
1585 const int __R)
1586 {
1587 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
1588 (__v8di)
1589 _mm512_setzero_si512 (),
1590 (__mmask8) __U,
1591 __R);
1592 }
1593
1594 extern __inline __m512i
1595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1596 _mm512_cvt_roundpd_epi64 (__m512d __A, const int __R)
1597 {
1598 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
1599 (__v8di)
1600 _mm512_setzero_si512 (),
1601 (__mmask8) -1,
1602 __R);
1603 }
1604
1605 extern __inline __m512i
1606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1607 _mm512_mask_cvt_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A,
1608 const int __R)
1609 {
1610 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
1611 (__v8di) __W,
1612 (__mmask8) __U,
1613 __R);
1614 }
1615
1616 extern __inline __m512i
1617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1618 _mm512_maskz_cvt_roundpd_epi64 (__mmask8 __U, __m512d __A,
1619 const int __R)
1620 {
1621 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
1622 (__v8di)
1623 _mm512_setzero_si512 (),
1624 (__mmask8) __U,
1625 __R);
1626 }
1627
1628 extern __inline __m512i
1629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1630 _mm512_cvt_roundpd_epu64 (__m512d __A, const int __R)
1631 {
1632 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
1633 (__v8di)
1634 _mm512_setzero_si512 (),
1635 (__mmask8) -1,
1636 __R);
1637 }
1638
1639 extern __inline __m512i
1640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1641 _mm512_mask_cvt_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A,
1642 const int __R)
1643 {
1644 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
1645 (__v8di) __W,
1646 (__mmask8) __U,
1647 __R);
1648 }
1649
1650 extern __inline __m512i
1651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1652 _mm512_maskz_cvt_roundpd_epu64 (__mmask8 __U, __m512d __A,
1653 const int __R)
1654 {
1655 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
1656 (__v8di)
1657 _mm512_setzero_si512 (),
1658 (__mmask8) __U,
1659 __R);
1660 }
1661
1662 extern __inline __m512i
1663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1664 _mm512_cvt_roundps_epi64 (__m256 __A, const int __R)
1665 {
1666 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
1667 (__v8di)
1668 _mm512_setzero_si512 (),
1669 (__mmask8) -1,
1670 __R);
1671 }
1672
1673 extern __inline __m512i
1674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1675 _mm512_mask_cvt_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A,
1676 const int __R)
1677 {
1678 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
1679 (__v8di) __W,
1680 (__mmask8) __U,
1681 __R);
1682 }
1683
1684 extern __inline __m512i
1685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1686 _mm512_maskz_cvt_roundps_epi64 (__mmask8 __U, __m256 __A,
1687 const int __R)
1688 {
1689 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
1690 (__v8di)
1691 _mm512_setzero_si512 (),
1692 (__mmask8) __U,
1693 __R);
1694 }
1695
1696 extern __inline __m512i
1697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1698 _mm512_cvt_roundps_epu64 (__m256 __A, const int __R)
1699 {
1700 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
1701 (__v8di)
1702 _mm512_setzero_si512 (),
1703 (__mmask8) -1,
1704 __R);
1705 }
1706
1707 extern __inline __m512i
1708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1709 _mm512_mask_cvt_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A,
1710 const int __R)
1711 {
1712 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
1713 (__v8di) __W,
1714 (__mmask8) __U,
1715 __R);
1716 }
1717
1718 extern __inline __m512i
1719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1720 _mm512_maskz_cvt_roundps_epu64 (__mmask8 __U, __m256 __A,
1721 const int __R)
1722 {
1723 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
1724 (__v8di)
1725 _mm512_setzero_si512 (),
1726 (__mmask8) __U,
1727 __R);
1728 }
1729
1730 extern __inline __m256
1731 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1732 _mm512_cvt_roundepi64_ps (__m512i __A, const int __R)
1733 {
1734 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
1735 (__v8sf)
1736 _mm256_setzero_ps (),
1737 (__mmask8) -1,
1738 __R);
1739 }
1740
1741 extern __inline __m256
1742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1743 _mm512_mask_cvt_roundepi64_ps (__m256 __W, __mmask8 __U, __m512i __A,
1744 const int __R)
1745 {
1746 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
1747 (__v8sf) __W,
1748 (__mmask8) __U,
1749 __R);
1750 }
1751
1752 extern __inline __m256
1753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1754 _mm512_maskz_cvt_roundepi64_ps (__mmask8 __U, __m512i __A,
1755 const int __R)
1756 {
1757 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
1758 (__v8sf)
1759 _mm256_setzero_ps (),
1760 (__mmask8) __U,
1761 __R);
1762 }
1763
1764 extern __inline __m256
1765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1766 _mm512_cvt_roundepu64_ps (__m512i __A, const int __R)
1767 {
1768 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
1769 (__v8sf)
1770 _mm256_setzero_ps (),
1771 (__mmask8) -1,
1772 __R);
1773 }
1774
1775 extern __inline __m256
1776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1777 _mm512_mask_cvt_roundepu64_ps (__m256 __W, __mmask8 __U, __m512i __A,
1778 const int __R)
1779 {
1780 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
1781 (__v8sf) __W,
1782 (__mmask8) __U,
1783 __R);
1784 }
1785
1786 extern __inline __m256
1787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1788 _mm512_maskz_cvt_roundepu64_ps (__mmask8 __U, __m512i __A,
1789 const int __R)
1790 {
1791 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
1792 (__v8sf)
1793 _mm256_setzero_ps (),
1794 (__mmask8) __U,
1795 __R);
1796 }
1797
1798 extern __inline __m512d
1799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1800 _mm512_cvt_roundepi64_pd (__m512i __A, const int __R)
1801 {
1802 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
1803 (__v8df)
1804 _mm512_setzero_pd (),
1805 (__mmask8) -1,
1806 __R);
1807 }
1808
1809 extern __inline __m512d
1810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1811 _mm512_mask_cvt_roundepi64_pd (__m512d __W, __mmask8 __U, __m512i __A,
1812 const int __R)
1813 {
1814 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
1815 (__v8df) __W,
1816 (__mmask8) __U,
1817 __R);
1818 }
1819
1820 extern __inline __m512d
1821 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1822 _mm512_maskz_cvt_roundepi64_pd (__mmask8 __U, __m512i __A,
1823 const int __R)
1824 {
1825 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
1826 (__v8df)
1827 _mm512_setzero_pd (),
1828 (__mmask8) __U,
1829 __R);
1830 }
1831
1832 extern __inline __m512d
1833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1834 _mm512_cvt_roundepu64_pd (__m512i __A, const int __R)
1835 {
1836 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
1837 (__v8df)
1838 _mm512_setzero_pd (),
1839 (__mmask8) -1,
1840 __R);
1841 }
1842
1843 extern __inline __m512d
1844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1845 _mm512_mask_cvt_roundepu64_pd (__m512d __W, __mmask8 __U, __m512i __A,
1846 const int __R)
1847 {
1848 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
1849 (__v8df) __W,
1850 (__mmask8) __U,
1851 __R);
1852 }
1853
1854 extern __inline __m512d
1855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1856 _mm512_maskz_cvt_roundepu64_pd (__mmask8 __U, __m512i __A,
1857 const int __R)
1858 {
1859 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
1860 (__v8df)
1861 _mm512_setzero_pd (),
1862 (__mmask8) __U,
1863 __R);
1864 }
1865
1866 extern __inline __m512d
1867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1868 _mm512_reduce_pd (__m512d __A, int __B)
1869 {
1870 return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,
1871 (__v8df)
1872 _mm512_setzero_pd (),
1873 (__mmask8) -1);
1874 }
1875
1876 extern __inline __m512d
1877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1878 _mm512_reduce_round_pd (__m512d __A, int __B, const int __R)
1879 {
1880 return (__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df) __A,
1881 __B,
1882 (__v8df)
1883 _mm512_setzero_pd (),
1884 (__mmask8) -1, __R);
1885 }
1886
1887 extern __inline __m512d
1888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1889 _mm512_mask_reduce_pd (__m512d __W, __mmask8 __U, __m512d __A, int __B)
1890 {
1891 return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,
1892 (__v8df) __W,
1893 (__mmask8) __U);
1894 }
1895
1896 extern __inline __m512d
1897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1898 _mm512_mask_reduce_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
1899 int __B, const int __R)
1900 {
1901 return (__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df) __A,
1902 __B,
1903 (__v8df) __W,
1904 __U, __R);
1905 }
1906
1907 extern __inline __m512d
1908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1909 _mm512_maskz_reduce_pd (__mmask8 __U, __m512d __A, int __B)
1910 {
1911 return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,
1912 (__v8df)
1913 _mm512_setzero_pd (),
1914 (__mmask8) __U);
1915 }
1916
1917 extern __inline __m512d
1918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1919 _mm512_maskz_reduce_round_pd (__mmask8 __U, __m512d __A, int __B,
1920 const int __R)
1921 {
1922 return (__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df) __A,
1923 __B,
1924 (__v8df)
1925 _mm512_setzero_pd (),
1926 __U, __R);
1927 }
1928
1929 extern __inline __m512
1930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1931 _mm512_reduce_ps (__m512 __A, int __B)
1932 {
1933 return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,
1934 (__v16sf)
1935 _mm512_setzero_ps (),
1936 (__mmask16) -1);
1937 }
1938
1939 extern __inline __m512
1940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1941 _mm512_reduce_round_ps (__m512 __A, int __B, const int __R)
1942 {
1943 return (__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf) __A,
1944 __B,
1945 (__v16sf)
1946 _mm512_setzero_ps (),
1947 (__mmask16) -1, __R);
1948 }
1949
1950 extern __inline __m512
1951 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1952 _mm512_mask_reduce_ps (__m512 __W, __mmask16 __U, __m512 __A, int __B)
1953 {
1954 return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,
1955 (__v16sf) __W,
1956 (__mmask16) __U);
1957 }
1958
1959 extern __inline __m512
1960 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1961 _mm512_mask_reduce_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __B,
1962 const int __R)
1963 {
1964 return (__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf) __A,
1965 __B,
1966 (__v16sf) __W,
1967 __U, __R);
1968 }
1969
1970 extern __inline __m512
1971 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1972 _mm512_maskz_reduce_ps (__mmask16 __U, __m512 __A, int __B)
1973 {
1974 return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,
1975 (__v16sf)
1976 _mm512_setzero_ps (),
1977 (__mmask16) __U);
1978 }
1979
1980 extern __inline __m512
1981 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1982 _mm512_maskz_reduce_round_ps (__mmask16 __U, __m512 __A, int __B,
1983 const int __R)
1984 {
1985 return (__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf) __A,
1986 __B,
1987 (__v16sf)
1988 _mm512_setzero_ps (),
1989 __U, __R);
1990 }
1991
1992 extern __inline __m256
1993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1994 _mm512_extractf32x8_ps (__m512 __A, const int __imm)
1995 {
1996 return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A,
1997 __imm,
1998 (__v8sf)
1999 _mm256_setzero_ps (),
2000 (__mmask8) -1);
2001 }
2002
2003 extern __inline __m256
2004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2005 _mm512_mask_extractf32x8_ps (__m256 __W, __mmask8 __U, __m512 __A,
2006 const int __imm)
2007 {
2008 return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A,
2009 __imm,
2010 (__v8sf) __W,
2011 (__mmask8) __U);
2012 }
2013
2014 extern __inline __m256
2015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2016 _mm512_maskz_extractf32x8_ps (__mmask8 __U, __m512 __A,
2017 const int __imm)
2018 {
2019 return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A,
2020 __imm,
2021 (__v8sf)
2022 _mm256_setzero_ps (),
2023 (__mmask8) __U);
2024 }
2025
2026 extern __inline __m128d
2027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2028 _mm512_extractf64x2_pd (__m512d __A, const int __imm)
2029 {
2030 return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A,
2031 __imm,
2032 (__v2df)
2033 _mm_setzero_pd (),
2034 (__mmask8) -1);
2035 }
2036
2037 extern __inline __m128d
2038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2039 _mm512_mask_extractf64x2_pd (__m128d __W, __mmask8 __U, __m512d __A,
2040 const int __imm)
2041 {
2042 return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A,
2043 __imm,
2044 (__v2df) __W,
2045 (__mmask8)
2046 __U);
2047 }
2048
2049 extern __inline __m128d
2050 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2051 _mm512_maskz_extractf64x2_pd (__mmask8 __U, __m512d __A,
2052 const int __imm)
2053 {
2054 return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A,
2055 __imm,
2056 (__v2df)
2057 _mm_setzero_pd (),
2058 (__mmask8)
2059 __U);
2060 }
2061
2062 extern __inline __m256i
2063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2064 _mm512_extracti32x8_epi32 (__m512i __A, const int __imm)
2065 {
2066 return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A,
2067 __imm,
2068 (__v8si)
2069 _mm256_setzero_si256 (),
2070 (__mmask8) -1);
2071 }
2072
2073 extern __inline __m256i
2074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2075 _mm512_mask_extracti32x8_epi32 (__m256i __W, __mmask8 __U, __m512i __A,
2076 const int __imm)
2077 {
2078 return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A,
2079 __imm,
2080 (__v8si) __W,
2081 (__mmask8) __U);
2082 }
2083
2084 extern __inline __m256i
2085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2086 _mm512_maskz_extracti32x8_epi32 (__mmask8 __U, __m512i __A,
2087 const int __imm)
2088 {
2089 return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A,
2090 __imm,
2091 (__v8si)
2092 _mm256_setzero_si256 (),
2093 (__mmask8) __U);
2094 }
2095
2096 extern __inline __m128i
2097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2098 _mm512_extracti64x2_epi64 (__m512i __A, const int __imm)
2099 {
2100 return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A,
2101 __imm,
2102 (__v2di)
2103 _mm_setzero_si128 (),
2104 (__mmask8) -1);
2105 }
2106
2107 extern __inline __m128i
2108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2109 _mm512_mask_extracti64x2_epi64 (__m128i __W, __mmask8 __U, __m512i __A,
2110 const int __imm)
2111 {
2112 return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A,
2113 __imm,
2114 (__v2di) __W,
2115 (__mmask8)
2116 __U);
2117 }
2118
2119 extern __inline __m128i
2120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2121 _mm512_maskz_extracti64x2_epi64 (__mmask8 __U, __m512i __A,
2122 const int __imm)
2123 {
2124 return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A,
2125 __imm,
2126 (__v2di)
2127 _mm_setzero_si128 (),
2128 (__mmask8)
2129 __U);
2130 }
2131
2132 extern __inline __m512d
2133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2134 _mm512_range_round_pd (__m512d __A, __m512d __B, int __C,
2135 const int __R)
2136 {
2137 return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
2138 (__v8df) __B, __C,
2139 (__v8df)
2140 _mm512_setzero_pd (),
2141 (__mmask8) -1,
2142 __R);
2143 }
2144
2145 extern __inline __m512d
2146 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2147 _mm512_mask_range_round_pd (__m512d __W, __mmask8 __U,
2148 __m512d __A, __m512d __B, int __C,
2149 const int __R)
2150 {
2151 return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
2152 (__v8df) __B, __C,
2153 (__v8df) __W,
2154 (__mmask8) __U,
2155 __R);
2156 }
2157
2158 extern __inline __m512d
2159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2160 _mm512_maskz_range_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2161 int __C, const int __R)
2162 {
2163 return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
2164 (__v8df) __B, __C,
2165 (__v8df)
2166 _mm512_setzero_pd (),
2167 (__mmask8) __U,
2168 __R);
2169 }
2170
2171 extern __inline __m512
2172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2173 _mm512_range_round_ps (__m512 __A, __m512 __B, int __C, const int __R)
2174 {
2175 return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
2176 (__v16sf) __B, __C,
2177 (__v16sf)
2178 _mm512_setzero_ps (),
2179 (__mmask16) -1,
2180 __R);
2181 }
2182
2183 extern __inline __m512
2184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2185 _mm512_mask_range_round_ps (__m512 __W, __mmask16 __U,
2186 __m512 __A, __m512 __B, int __C,
2187 const int __R)
2188 {
2189 return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
2190 (__v16sf) __B, __C,
2191 (__v16sf) __W,
2192 (__mmask16) __U,
2193 __R);
2194 }
2195
2196 extern __inline __m512
2197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2198 _mm512_maskz_range_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2199 int __C, const int __R)
2200 {
2201 return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
2202 (__v16sf) __B, __C,
2203 (__v16sf)
2204 _mm512_setzero_ps (),
2205 (__mmask16) __U,
2206 __R);
2207 }
2208
2209 extern __inline __m512i
2210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2211 _mm512_inserti32x8 (__m512i __A, __m256i __B, const int __imm)
2212 {
2213 return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A,
2214 (__v8si) __B,
2215 __imm,
2216 (__v16si)
2217 _mm512_setzero_si512 (),
2218 (__mmask16) -1);
2219 }
2220
2221 extern __inline __m512i
2222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2223 _mm512_mask_inserti32x8 (__m512i __W, __mmask16 __U, __m512i __A,
2224 __m256i __B, const int __imm)
2225 {
2226 return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A,
2227 (__v8si) __B,
2228 __imm,
2229 (__v16si) __W,
2230 (__mmask16) __U);
2231 }
2232
2233 extern __inline __m512i
2234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2235 _mm512_maskz_inserti32x8 (__mmask16 __U, __m512i __A, __m256i __B,
2236 const int __imm)
2237 {
2238 return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A,
2239 (__v8si) __B,
2240 __imm,
2241 (__v16si)
2242 _mm512_setzero_si512 (),
2243 (__mmask16) __U);
2244 }
2245
2246 extern __inline __m512
2247 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2248 _mm512_insertf32x8 (__m512 __A, __m256 __B, const int __imm)
2249 {
2250 return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A,
2251 (__v8sf) __B,
2252 __imm,
2253 (__v16sf)
2254 _mm512_setzero_ps (),
2255 (__mmask16) -1);
2256 }
2257
2258 extern __inline __m512
2259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2260 _mm512_mask_insertf32x8 (__m512 __W, __mmask16 __U, __m512 __A,
2261 __m256 __B, const int __imm)
2262 {
2263 return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A,
2264 (__v8sf) __B,
2265 __imm,
2266 (__v16sf) __W,
2267 (__mmask16) __U);
2268 }
2269
2270 extern __inline __m512
2271 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2272 _mm512_maskz_insertf32x8 (__mmask16 __U, __m512 __A, __m256 __B,
2273 const int __imm)
2274 {
2275 return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A,
2276 (__v8sf) __B,
2277 __imm,
2278 (__v16sf)
2279 _mm512_setzero_ps (),
2280 (__mmask16) __U);
2281 }
2282
2283 extern __inline __m512i
2284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2285 _mm512_inserti64x2 (__m512i __A, __m128i __B, const int __imm)
2286 {
2287 return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A,
2288 (__v2di) __B,
2289 __imm,
2290 (__v8di)
2291 _mm512_setzero_si512 (),
2292 (__mmask8) -1);
2293 }
2294
2295 extern __inline __m512i
2296 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2297 _mm512_mask_inserti64x2 (__m512i __W, __mmask8 __U, __m512i __A,
2298 __m128i __B, const int __imm)
2299 {
2300 return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A,
2301 (__v2di) __B,
2302 __imm,
2303 (__v8di) __W,
2304 (__mmask8)
2305 __U);
2306 }
2307
2308 extern __inline __m512i
2309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2310 _mm512_maskz_inserti64x2 (__mmask8 __U, __m512i __A, __m128i __B,
2311 const int __imm)
2312 {
2313 return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A,
2314 (__v2di) __B,
2315 __imm,
2316 (__v8di)
2317 _mm512_setzero_si512 (),
2318 (__mmask8)
2319 __U);
2320 }
2321
2322 extern __inline __m512d
2323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2324 _mm512_insertf64x2 (__m512d __A, __m128d __B, const int __imm)
2325 {
2326 return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A,
2327 (__v2df) __B,
2328 __imm,
2329 (__v8df)
2330 _mm512_setzero_pd (),
2331 (__mmask8) -1);
2332 }
2333
2334 extern __inline __m512d
2335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2336 _mm512_mask_insertf64x2 (__m512d __W, __mmask8 __U, __m512d __A,
2337 __m128d __B, const int __imm)
2338 {
2339 return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A,
2340 (__v2df) __B,
2341 __imm,
2342 (__v8df) __W,
2343 (__mmask8)
2344 __U);
2345 }
2346
2347 extern __inline __m512d
2348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2349 _mm512_maskz_insertf64x2 (__mmask8 __U, __m512d __A, __m128d __B,
2350 const int __imm)
2351 {
2352 return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A,
2353 (__v2df) __B,
2354 __imm,
2355 (__v8df)
2356 _mm512_setzero_pd (),
2357 (__mmask8)
2358 __U);
2359 }
2360
2361 extern __inline __mmask8
2362 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2363 _mm512_mask_fpclass_pd_mask (__mmask8 __U, __m512d __A,
2364 const int __imm)
2365 {
2366 return (__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) __A,
2367 __imm, __U);
2368 }
2369
2370 extern __inline __mmask8
2371 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2372 _mm512_fpclass_pd_mask (__m512d __A, const int __imm)
2373 {
2374 return (__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) __A,
2375 __imm,
2376 (__mmask8) -1);
2377 }
2378
2379 extern __inline __mmask16
2380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2381 _mm512_mask_fpclass_ps_mask (__mmask16 __U, __m512 __A,
2382 const int __imm)
2383 {
2384 return (__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) __A,
2385 __imm, __U);
2386 }
2387
2388 extern __inline __mmask16
2389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2390 _mm512_fpclass_ps_mask (__m512 __A, const int __imm)
2391 {
2392 return (__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) __A,
2393 __imm,
2394 (__mmask16) -1);
2395 }
2396
2397 #else
2398 #define _kshiftli_mask8(X, Y) \
2399 ((__mmask8) __builtin_ia32_kshiftliqi ((__mmask8)(X), (__mmask8)(Y)))
2400
2401 #define _kshiftri_mask8(X, Y) \
2402 ((__mmask8) __builtin_ia32_kshiftriqi ((__mmask8)(X), (__mmask8)(Y)))
2403
2404 #define _mm_range_sd(A, B, C) \
2405 ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
2406 (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \
2407 (__mmask8) -1, _MM_FROUND_CUR_DIRECTION))
2408
2409 #define _mm_mask_range_sd(W, U, A, B, C) \
2410 ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
2411 (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), \
2412 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
2413
2414 #define _mm_maskz_range_sd(U, A, B, C) \
2415 ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
2416 (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \
2417 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
2418
2419 #define _mm_range_ss(A, B, C) \
2420 ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \
2421 (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \
2422 (__mmask8) -1, _MM_FROUND_CUR_DIRECTION))
2423
2424 #define _mm_mask_range_ss(W, U, A, B, C) \
2425 ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \
2426 (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), \
2427 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
2428
2429 #define _mm_maskz_range_ss(U, A, B, C) \
2430 ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \
2431 (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \
2432 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
2433
2434 #define _mm_range_round_sd(A, B, C, R) \
2435 ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
2436 (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \
2437 (__mmask8) -1, (R)))
2438
2439 #define _mm_mask_range_round_sd(W, U, A, B, C, R) \
2440 ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
2441 (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), \
2442 (__mmask8)(U), (R)))
2443
2444 #define _mm_maskz_range_round_sd(U, A, B, C, R) \
2445 ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
2446 (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \
2447 (__mmask8)(U), (R)))
2448
2449 #define _mm_range_round_ss(A, B, C, R) \
2450 ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \
2451 (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \
2452 (__mmask8) -1, (R)))
2453
2454 #define _mm_mask_range_round_ss(W, U, A, B, C, R) \
2455 ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \
2456 (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), \
2457 (__mmask8)(U), (R)))
2458
2459 #define _mm_maskz_range_round_ss(U, A, B, C, R) \
2460 ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \
2461 (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \
2462 (__mmask8)(U), (R)))
2463
2464 #define _mm512_cvtt_roundpd_epi64(A, B) \
2465 ((__m512i)__builtin_ia32_cvttpd2qq512_mask ((A), (__v8di) \
2466 _mm512_setzero_si512 (), \
2467 -1, (B)))
2468
2469 #define _mm512_mask_cvtt_roundpd_epi64(W, U, A, B) \
2470 ((__m512i)__builtin_ia32_cvttpd2qq512_mask ((A), (__v8di)(W), (U), (B)))
2471
2472 #define _mm512_maskz_cvtt_roundpd_epi64(U, A, B) \
2473 ((__m512i)__builtin_ia32_cvttpd2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2474
2475 #define _mm512_cvtt_roundpd_epu64(A, B) \
2476 ((__m512i)__builtin_ia32_cvttpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2477
2478 #define _mm512_mask_cvtt_roundpd_epu64(W, U, A, B) \
2479 ((__m512i)__builtin_ia32_cvttpd2uqq512_mask ((A), (__v8di)(W), (U), (B)))
2480
2481 #define _mm512_maskz_cvtt_roundpd_epu64(U, A, B) \
2482 ((__m512i)__builtin_ia32_cvttpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2483
2484 #define _mm512_cvtt_roundps_epi64(A, B) \
2485 ((__m512i)__builtin_ia32_cvttps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2486
2487 #define _mm512_mask_cvtt_roundps_epi64(W, U, A, B) \
2488 ((__m512i)__builtin_ia32_cvttps2qq512_mask ((A), (__v8di)(W), (U), (B)))
2489
2490 #define _mm512_maskz_cvtt_roundps_epi64(U, A, B) \
2491 ((__m512i)__builtin_ia32_cvttps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2492
2493 #define _mm512_cvtt_roundps_epu64(A, B) \
2494 ((__m512i)__builtin_ia32_cvttps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2495
2496 #define _mm512_mask_cvtt_roundps_epu64(W, U, A, B) \
2497 ((__m512i)__builtin_ia32_cvttps2uqq512_mask ((A), (__v8di)(W), (U), (B)))
2498
2499 #define _mm512_maskz_cvtt_roundps_epu64(U, A, B) \
2500 ((__m512i)__builtin_ia32_cvttps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2501
2502 #define _mm512_cvt_roundpd_epi64(A, B) \
2503 ((__m512i)__builtin_ia32_cvtpd2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2504
2505 #define _mm512_mask_cvt_roundpd_epi64(W, U, A, B) \
2506 ((__m512i)__builtin_ia32_cvtpd2qq512_mask ((A), (__v8di)(W), (U), (B)))
2507
2508 #define _mm512_maskz_cvt_roundpd_epi64(U, A, B) \
2509 ((__m512i)__builtin_ia32_cvtpd2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2510
2511 #define _mm512_cvt_roundpd_epu64(A, B) \
2512 ((__m512i)__builtin_ia32_cvtpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2513
2514 #define _mm512_mask_cvt_roundpd_epu64(W, U, A, B) \
2515 ((__m512i)__builtin_ia32_cvtpd2uqq512_mask ((A), (__v8di)(W), (U), (B)))
2516
2517 #define _mm512_maskz_cvt_roundpd_epu64(U, A, B) \
2518 ((__m512i)__builtin_ia32_cvtpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2519
2520 #define _mm512_cvt_roundps_epi64(A, B) \
2521 ((__m512i)__builtin_ia32_cvtps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2522
2523 #define _mm512_mask_cvt_roundps_epi64(W, U, A, B) \
2524 ((__m512i)__builtin_ia32_cvtps2qq512_mask ((A), (__v8di)(W), (U), (B)))
2525
2526 #define _mm512_maskz_cvt_roundps_epi64(U, A, B) \
2527 ((__m512i)__builtin_ia32_cvtps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2528
2529 #define _mm512_cvt_roundps_epu64(A, B) \
2530 ((__m512i)__builtin_ia32_cvtps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2531
2532 #define _mm512_mask_cvt_roundps_epu64(W, U, A, B) \
2533 ((__m512i)__builtin_ia32_cvtps2uqq512_mask ((A), (__v8di)(W), (U), (B)))
2534
2535 #define _mm512_maskz_cvt_roundps_epu64(U, A, B) \
2536 ((__m512i)__builtin_ia32_cvtps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2537
2538 #define _mm512_cvt_roundepi64_ps(A, B) \
2539 ((__m256)__builtin_ia32_cvtqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), -1, (B)))
2540
2541 #define _mm512_mask_cvt_roundepi64_ps(W, U, A, B) \
2542 ((__m256)__builtin_ia32_cvtqq2ps512_mask ((__v8di)(A), (W), (U), (B)))
2543
2544 #define _mm512_maskz_cvt_roundepi64_ps(U, A, B) \
2545 ((__m256)__builtin_ia32_cvtqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), (U), (B)))
2546
2547 #define _mm512_cvt_roundepu64_ps(A, B) \
2548 ((__m256)__builtin_ia32_cvtuqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), -1, (B)))
2549
2550 #define _mm512_mask_cvt_roundepu64_ps(W, U, A, B) \
2551 ((__m256)__builtin_ia32_cvtuqq2ps512_mask ((__v8di)(A), (W), (U), (B)))
2552
2553 #define _mm512_maskz_cvt_roundepu64_ps(U, A, B) \
2554 ((__m256)__builtin_ia32_cvtuqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), (U), (B)))
2555
2556 #define _mm512_cvt_roundepi64_pd(A, B) \
2557 ((__m512d)__builtin_ia32_cvtqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), -1, (B)))
2558
2559 #define _mm512_mask_cvt_roundepi64_pd(W, U, A, B) \
2560 ((__m512d)__builtin_ia32_cvtqq2pd512_mask ((__v8di)(A), (W), (U), (B)))
2561
2562 #define _mm512_maskz_cvt_roundepi64_pd(U, A, B) \
2563 ((__m512d)__builtin_ia32_cvtqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), (U), (B)))
2564
2565 #define _mm512_cvt_roundepu64_pd(A, B) \
2566 ((__m512d)__builtin_ia32_cvtuqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), -1, (B)))
2567
2568 #define _mm512_mask_cvt_roundepu64_pd(W, U, A, B) \
2569 ((__m512d)__builtin_ia32_cvtuqq2pd512_mask ((__v8di)(A), (W), (U), (B)))
2570
2571 #define _mm512_maskz_cvt_roundepu64_pd(U, A, B) \
2572 ((__m512d)__builtin_ia32_cvtuqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), (U), (B)))
2573
2574 #define _mm512_reduce_pd(A, B) \
2575 ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \
2576 (int)(B), (__v8df)_mm512_setzero_pd (), (__mmask8)-1))
2577
2578 #define _mm512_reduce_round_pd(A, B, R) \
2579 ((__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df)(__m512d)(A),\
2580 (int)(B), (__v8df)_mm512_setzero_pd (), (__mmask8)-1, (R)))
2581
2582 #define _mm512_mask_reduce_pd(W, U, A, B) \
2583 ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \
2584 (int)(B), (__v8df)(__m512d)(W), (__mmask8)(U)))
2585
2586 #define _mm512_mask_reduce_round_pd(W, U, A, B, R) \
2587 ((__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df)(__m512d)(A),\
2588 (int)(B), (__v8df)(__m512d)(W), (U), (R)))
2589
2590 #define _mm512_maskz_reduce_pd(U, A, B) \
2591 ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \
2592 (int)(B), (__v8df)_mm512_setzero_pd (), (__mmask8)(U)))
2593
2594 #define _mm512_maskz_reduce_round_pd(U, A, B, R) \
2595 ((__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df)(__m512d)(A),\
2596 (int)(B), (__v8df)_mm512_setzero_pd (), (U), (R)))
2597
2598 #define _mm512_reduce_ps(A, B) \
2599 ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \
2600 (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)-1))
2601
2602 #define _mm512_reduce_round_ps(A, B, R) \
2603 ((__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf)(__m512)(A),\
2604 (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)-1, (R)))
2605
2606 #define _mm512_mask_reduce_ps(W, U, A, B) \
2607 ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \
2608 (int)(B), (__v16sf)(__m512)(W), (__mmask16)(U)))
2609
2610 #define _mm512_mask_reduce_round_ps(W, U, A, B, R) \
2611 ((__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf)(__m512)(A),\
2612 (int)(B), (__v16sf)(__m512)(W), (U), (R)))
2613
2614 #define _mm512_maskz_reduce_ps(U, A, B) \
2615 ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \
2616 (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)(U)))
2617
2618 #define _mm512_maskz_reduce_round_ps(U, A, B, R) \
2619 ((__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf)(__m512)(A),\
2620 (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)(U), (R)))
2621
2622 #define _mm512_extractf32x8_ps(X, C) \
2623 ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \
2624 (int) (C), (__v8sf)(__m256) _mm256_setzero_ps (), (__mmask8)-1))
2625
2626 #define _mm512_mask_extractf32x8_ps(W, U, X, C) \
2627 ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \
2628 (int) (C), (__v8sf)(__m256) (W), (__mmask8) (U)))
2629
2630 #define _mm512_maskz_extractf32x8_ps(U, X, C) \
2631 ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \
2632 (int) (C), (__v8sf)(__m256) _mm256_setzero_ps (), (__mmask8) (U)))
2633
2634 #define _mm512_extractf64x2_pd(X, C) \
2635 ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
2636 (int) (C), (__v2df)(__m128d) _mm_setzero_pd (), (__mmask8)-1))
2637
2638 #define _mm512_mask_extractf64x2_pd(W, U, X, C) \
2639 ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
2640 (int) (C), (__v2df)(__m128d) (W), (__mmask8) (U)))
2641
2642 #define _mm512_maskz_extractf64x2_pd(U, X, C) \
2643 ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
2644 (int) (C), (__v2df)(__m128d) _mm_setzero_pd (), (__mmask8) (U)))
2645
2646 #define _mm512_extracti32x8_epi32(X, C) \
2647 ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \
2648 (int) (C), (__v8si)(__m256i) _mm256_setzero_si256 (), (__mmask8)-1))
2649
2650 #define _mm512_mask_extracti32x8_epi32(W, U, X, C) \
2651 ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \
2652 (int) (C), (__v8si)(__m256i) (W), (__mmask8) (U)))
2653
2654 #define _mm512_maskz_extracti32x8_epi32(U, X, C) \
2655 ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \
2656 (int) (C), (__v8si)(__m256i) _mm256_setzero_si256 (), (__mmask8) (U)))
2657
2658 #define _mm512_extracti64x2_epi64(X, C) \
2659 ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
2660 (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8)-1))
2661
2662 #define _mm512_mask_extracti64x2_epi64(W, U, X, C) \
2663 ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
2664 (int) (C), (__v2di)(__m128i) (W), (__mmask8) (U)))
2665
2666 #define _mm512_maskz_extracti64x2_epi64(U, X, C) \
2667 ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
2668 (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
2669
2670 #define _mm512_range_pd(A, B, C) \
2671 ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
2672 (__v8df)(__m512d)(B), (int)(C), \
2673 (__v8df)_mm512_setzero_pd (), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
2674
2675 #define _mm512_mask_range_pd(W, U, A, B, C) \
2676 ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
2677 (__v8df)(__m512d)(B), (int)(C), \
2678 (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
2679
2680 #define _mm512_maskz_range_pd(U, A, B, C) \
2681 ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
2682 (__v8df)(__m512d)(B), (int)(C), \
2683 (__v8df)_mm512_setzero_pd (), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
2684
2685 #define _mm512_range_ps(A, B, C) \
2686 ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
2687 (__v16sf)(__m512)(B), (int)(C), \
2688 (__v16sf)_mm512_setzero_ps (), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
2689
2690 #define _mm512_mask_range_ps(W, U, A, B, C) \
2691 ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
2692 (__v16sf)(__m512)(B), (int)(C), \
2693 (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
2694
2695 #define _mm512_maskz_range_ps(U, A, B, C) \
2696 ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
2697 (__v16sf)(__m512)(B), (int)(C), \
2698 (__v16sf)_mm512_setzero_ps (), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
2699
2700 #define _mm512_range_round_pd(A, B, C, R) \
2701 ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
2702 (__v8df)(__m512d)(B), (int)(C), \
2703 (__v8df)_mm512_setzero_pd (), (__mmask8)-1, (R)))
2704
2705 #define _mm512_mask_range_round_pd(W, U, A, B, C, R) \
2706 ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
2707 (__v8df)(__m512d)(B), (int)(C), \
2708 (__v8df)(__m512d)(W), (__mmask8)(U), (R)))
2709
2710 #define _mm512_maskz_range_round_pd(U, A, B, C, R) \
2711 ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
2712 (__v8df)(__m512d)(B), (int)(C), \
2713 (__v8df)_mm512_setzero_pd (), (__mmask8)(U), (R)))
2714
2715 #define _mm512_range_round_ps(A, B, C, R) \
2716 ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
2717 (__v16sf)(__m512)(B), (int)(C), \
2718 (__v16sf)_mm512_setzero_ps (), (__mmask16)-1, (R)))
2719
2720 #define _mm512_mask_range_round_ps(W, U, A, B, C, R) \
2721 ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
2722 (__v16sf)(__m512)(B), (int)(C), \
2723 (__v16sf)(__m512)(W), (__mmask16)(U), (R)))
2724
2725 #define _mm512_maskz_range_round_ps(U, A, B, C, R) \
2726 ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
2727 (__v16sf)(__m512)(B), (int)(C), \
2728 (__v16sf)_mm512_setzero_ps (), (__mmask16)(U), (R)))
2729
2730 #define _mm512_insertf64x2(X, Y, C) \
2731 ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\
2732 (__v2df)(__m128d) (Y), (int) (C), (__v8df)(__m512d) (X), \
2733 (__mmask8)-1))
2734
2735 #define _mm512_mask_insertf64x2(W, U, X, Y, C) \
2736 ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\
2737 (__v2df)(__m128d) (Y), (int) (C), (__v8df)(__m512d) (W), \
2738 (__mmask8) (U)))
2739
2740 #define _mm512_maskz_insertf64x2(U, X, Y, C) \
2741 ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\
2742 (__v2df)(__m128d) (Y), (int) (C), \
2743 (__v8df)(__m512d) _mm512_setzero_pd (), (__mmask8) (U)))
2744
2745 #define _mm512_inserti64x2(X, Y, C) \
2746 ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\
2747 (__v2di)(__m128i) (Y), (int) (C), (__v8di)(__m512i) (X), (__mmask8)-1))
2748
2749 #define _mm512_mask_inserti64x2(W, U, X, Y, C) \
2750 ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\
2751 (__v2di)(__m128i) (Y), (int) (C), (__v8di)(__m512i) (W), \
2752 (__mmask8) (U)))
2753
2754 #define _mm512_maskz_inserti64x2(U, X, Y, C) \
2755 ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\
2756 (__v2di)(__m128i) (Y), (int) (C), \
2757 (__v8di)(__m512i) _mm512_setzero_si512 (), (__mmask8) (U)))
2758
2759 #define _mm512_insertf32x8(X, Y, C) \
2760 ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X), \
2761 (__v8sf)(__m256) (Y), (int) (C),\
2762 (__v16sf)(__m512)_mm512_setzero_ps (),\
2763 (__mmask16)-1))
2764
2765 #define _mm512_mask_insertf32x8(W, U, X, Y, C) \
2766 ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X), \
2767 (__v8sf)(__m256) (Y), (int) (C),\
2768 (__v16sf)(__m512)(W),\
2769 (__mmask16)(U)))
2770
2771 #define _mm512_maskz_insertf32x8(U, X, Y, C) \
2772 ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X), \
2773 (__v8sf)(__m256) (Y), (int) (C),\
2774 (__v16sf)(__m512)_mm512_setzero_ps (),\
2775 (__mmask16)(U)))
2776
2777 #define _mm512_inserti32x8(X, Y, C) \
2778 ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X), \
2779 (__v8si)(__m256i) (Y), (int) (C),\
2780 (__v16si)(__m512i)_mm512_setzero_si512 (),\
2781 (__mmask16)-1))
2782
2783 #define _mm512_mask_inserti32x8(W, U, X, Y, C) \
2784 ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X), \
2785 (__v8si)(__m256i) (Y), (int) (C),\
2786 (__v16si)(__m512i)(W),\
2787 (__mmask16)(U)))
2788
2789 #define _mm512_maskz_inserti32x8(U, X, Y, C) \
2790 ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X), \
2791 (__v8si)(__m256i) (Y), (int) (C),\
2792 (__v16si)(__m512i)_mm512_setzero_si512 (),\
2793 (__mmask16)(U)))
2794
2795 #define _mm_fpclass_ss_mask(X, C) \
2796 ((__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) (__m128) (X), \
2797 (int) (C), (__mmask8) (-1))) \
2798
2799 #define _mm_fpclass_sd_mask(X, C) \
2800 ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X), \
2801 (int) (C), (__mmask8) (-1))) \
2802
2803 #define _mm_mask_fpclass_ss_mask(U, X, C) \
2804 ((__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) (__m128) (X), \
2805 (int) (C), (__mmask8) (U)))
2806
2807 #define _mm_mask_fpclass_sd_mask(U, X, C) \
2808 ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X), \
2809 (int) (C), (__mmask8) (U)))
2810
2811 #define _mm512_mask_fpclass_pd_mask(u, X, C) \
2812 ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), \
2813 (int) (C), (__mmask8)(u)))
2814
2815 #define _mm512_mask_fpclass_ps_mask(u, x, c) \
2816 ((__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) (__m512) (x),\
2817 (int) (c),(__mmask16)(u)))
2818
2819 #define _mm512_fpclass_pd_mask(X, C) \
2820 ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), \
2821 (int) (C), (__mmask8)-1))
2822
2823 #define _mm512_fpclass_ps_mask(x, c) \
2824 ((__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) (__m512) (x),\
2825 (int) (c),(__mmask16)-1))
2826
2827 #define _mm_reduce_sd(A, B, C) \
2828 ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A), \
2829 (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \
2830 (__mmask8)-1))
2831
2832 #define _mm_mask_reduce_sd(W, U, A, B, C) \
2833 ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A), \
2834 (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), (__mmask8)(U)))
2835
2836 #define _mm_maskz_reduce_sd(U, A, B, C) \
2837 ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A), \
2838 (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \
2839 (__mmask8)(U)))
2840
2841 #define _mm_reduce_round_sd(A, B, C, R) \
2842 ((__m128d) __builtin_ia32_reducesd_mask_round ((__v2df)(__m128d)(A), \
2843 (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \
2844 (__mmask8)(-1), (int)(R)))
2845
2846 #define _mm_mask_reduce_round_sd(W, U, A, B, C, R) \
2847 ((__m128d) __builtin_ia32_reducesd_mask_round ((__v2df)(__m128d)(A), \
2848 (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), \
2849 (__mmask8)(U), (int)(R)))
2850
2851 #define _mm_maskz_reduce_round_sd(U, A, B, C, R) \
2852 ((__m128d) __builtin_ia32_reducesd_mask_round ((__v2df)(__m128d)(A), \
2853 (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \
2854 (__mmask8)(U), (int)(R)))
2855
2856 #define _mm_reduce_ss(A, B, C) \
2857 ((__m128) __builtin_ia32_reducess_mask ((__v4sf)(__m128)(A), \
2858 (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \
2859 (__mmask8)-1))
2860
2861 #define _mm_mask_reduce_ss(W, U, A, B, C) \
2862 ((__m128) __builtin_ia32_reducess_mask ((__v4sf)(__m128)(A), \
2863 (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), (__mmask8)(U)))
2864
2865 #define _mm_maskz_reduce_ss(U, A, B, C) \
2866 ((__m128) __builtin_ia32_reducess_mask ((__v4sf)(__m128)(A), \
2867 (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \
2868 (__mmask8)(U)))
2869
2870 #define _mm_reduce_round_ss(A, B, C, R) \
2871 ((__m128) __builtin_ia32_reducess_mask_round ((__v4sf)(__m128)(A), \
2872 (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \
2873 (__mmask8)(-1), (int)(R)))
2874
2875 #define _mm_mask_reduce_round_ss(W, U, A, B, C, R) \
2876 ((__m128) __builtin_ia32_reducess_mask_round ((__v4sf)(__m128)(A), \
2877 (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), \
2878 (__mmask8)(U), (int)(R)))
2879
2880 #define _mm_maskz_reduce_round_ss(U, A, B, C, R) \
2881 ((__m128) __builtin_ia32_reducess_mask_round ((__v4sf)(__m128)(A), \
2882 (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \
2883 (__mmask8)(U), (int)(R)))
2884
2885
2886 #endif
2887
2888 #ifdef __DISABLE_AVX512DQ__
2889 #undef __DISABLE_AVX512DQ__
2890 #pragma GCC pop_options
2891 #endif /* __DISABLE_AVX512DQ__ */
2892
2893 #endif /* _AVX512DQINTRIN_H_INCLUDED */
2894