avx512dqintrin.h revision 1.3 1 /* Copyright (C) 2014-2017 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead."
26 #endif
27
28 #ifndef _AVX512DQINTRIN_H_INCLUDED
29 #define _AVX512DQINTRIN_H_INCLUDED
30
31 #ifndef __AVX512DQ__
32 #pragma GCC push_options
33 #pragma GCC target("avx512dq")
34 #define __DISABLE_AVX512DQ__
35 #endif /* __AVX512DQ__ */
36
37 extern __inline unsigned char
38 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
39 _ktest_mask8_u8 (__mmask8 __A, __mmask8 __B, unsigned char *__CF)
40 {
41 *__CF = (unsigned char) __builtin_ia32_ktestcqi (__A, __B);
42 return (unsigned char) __builtin_ia32_ktestzqi (__A, __B);
43 }
44
45 extern __inline unsigned char
46 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
47 _ktestz_mask8_u8 (__mmask8 __A, __mmask8 __B)
48 {
49 return (unsigned char) __builtin_ia32_ktestzqi (__A, __B);
50 }
51
52 extern __inline unsigned char
53 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
54 _ktestc_mask8_u8 (__mmask8 __A, __mmask8 __B)
55 {
56 return (unsigned char) __builtin_ia32_ktestcqi (__A, __B);
57 }
58
59 extern __inline unsigned char
60 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
61 _ktest_mask16_u8 (__mmask16 __A, __mmask16 __B, unsigned char *__CF)
62 {
63 *__CF = (unsigned char) __builtin_ia32_ktestchi (__A, __B);
64 return (unsigned char) __builtin_ia32_ktestzhi (__A, __B);
65 }
66
67 extern __inline unsigned char
68 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
69 _ktestz_mask16_u8 (__mmask16 __A, __mmask16 __B)
70 {
71 return (unsigned char) __builtin_ia32_ktestzhi (__A, __B);
72 }
73
74 extern __inline unsigned char
75 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
76 _ktestc_mask16_u8 (__mmask16 __A, __mmask16 __B)
77 {
78 return (unsigned char) __builtin_ia32_ktestchi (__A, __B);
79 }
80
81 extern __inline unsigned char
82 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
83 _kortest_mask8_u8 (__mmask8 __A, __mmask8 __B, unsigned char *__CF)
84 {
85 *__CF = (unsigned char) __builtin_ia32_kortestcqi (__A, __B);
86 return (unsigned char) __builtin_ia32_kortestzqi (__A, __B);
87 }
88
89 extern __inline unsigned char
90 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
91 _kortestz_mask8_u8 (__mmask8 __A, __mmask8 __B)
92 {
93 return (unsigned char) __builtin_ia32_kortestzqi (__A, __B);
94 }
95
96 extern __inline unsigned char
97 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
98 _kortestc_mask8_u8 (__mmask8 __A, __mmask8 __B)
99 {
100 return (unsigned char) __builtin_ia32_kortestcqi (__A, __B);
101 }
102
103 extern __inline __mmask8
104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
105 _kadd_mask8 (__mmask8 __A, __mmask8 __B)
106 {
107 return (__mmask8) __builtin_ia32_kaddqi ((__mmask8) __A, (__mmask8) __B);
108 }
109
110 extern __inline __mmask16
111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
112 _kadd_mask16 (__mmask16 __A, __mmask16 __B)
113 {
114 return (__mmask16) __builtin_ia32_kaddhi ((__mmask16) __A, (__mmask16) __B);
115 }
116
117 extern __inline unsigned int
118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
119 _cvtmask8_u32 (__mmask8 __A)
120 {
121 return (unsigned int) __builtin_ia32_kmovb ((__mmask8 ) __A);
122 }
123
124 extern __inline __mmask8
125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
126 _cvtu32_mask8 (unsigned int __A)
127 {
128 return (__mmask8) __builtin_ia32_kmovb ((__mmask8) __A);
129 }
130
131 extern __inline __mmask8
132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
133 _load_mask8 (__mmask8 *__A)
134 {
135 return (__mmask8) __builtin_ia32_kmovb (*(__mmask8 *) __A);
136 }
137
138 extern __inline void
139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
140 _store_mask8 (__mmask8 *__A, __mmask8 __B)
141 {
142 *(__mmask8 *) __A = __builtin_ia32_kmovb (__B);
143 }
144
145 extern __inline __mmask8
146 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
147 _knot_mask8 (__mmask8 __A)
148 {
149 return (__mmask8) __builtin_ia32_knotqi ((__mmask8) __A);
150 }
151
152 extern __inline __mmask8
153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
154 _kor_mask8 (__mmask8 __A, __mmask8 __B)
155 {
156 return (__mmask8) __builtin_ia32_korqi ((__mmask8) __A, (__mmask8) __B);
157 }
158
159 extern __inline __mmask8
160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
161 _kxnor_mask8 (__mmask8 __A, __mmask8 __B)
162 {
163 return (__mmask8) __builtin_ia32_kxnorqi ((__mmask8) __A, (__mmask8) __B);
164 }
165
166 extern __inline __mmask8
167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
168 _kxor_mask8 (__mmask8 __A, __mmask8 __B)
169 {
170 return (__mmask8) __builtin_ia32_kxorqi ((__mmask8) __A, (__mmask8) __B);
171 }
172
173 extern __inline __mmask8
174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
175 _kand_mask8 (__mmask8 __A, __mmask8 __B)
176 {
177 return (__mmask8) __builtin_ia32_kandqi ((__mmask8) __A, (__mmask8) __B);
178 }
179
180 extern __inline __mmask8
181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
182 _kandn_mask8 (__mmask8 __A, __mmask8 __B)
183 {
184 return (__mmask8) __builtin_ia32_kandnqi ((__mmask8) __A, (__mmask8) __B);
185 }
186
187 extern __inline __m512d
188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
189 _mm512_broadcast_f64x2 (__m128d __A)
190 {
191 return (__m512d)
192 __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A,
193 _mm512_undefined_pd (),
194 (__mmask8) -1);
195 }
196
197 extern __inline __m512d
198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
199 _mm512_mask_broadcast_f64x2 (__m512d __O, __mmask8 __M, __m128d __A)
200 {
201 return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df)
202 __A,
203 (__v8df)
204 __O, __M);
205 }
206
207 extern __inline __m512d
208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
209 _mm512_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
210 {
211 return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df)
212 __A,
213 (__v8df)
214 _mm512_setzero_ps (),
215 __M);
216 }
217
218 extern __inline __m512i
219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
220 _mm512_broadcast_i64x2 (__m128i __A)
221 {
222 return (__m512i)
223 __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A,
224 _mm512_undefined_epi32 (),
225 (__mmask8) -1);
226 }
227
228 extern __inline __m512i
229 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
230 _mm512_mask_broadcast_i64x2 (__m512i __O, __mmask8 __M, __m128i __A)
231 {
232 return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di)
233 __A,
234 (__v8di)
235 __O, __M);
236 }
237
238 extern __inline __m512i
239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
240 _mm512_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
241 {
242 return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di)
243 __A,
244 (__v8di)
245 _mm512_setzero_si512 (),
246 __M);
247 }
248
249 extern __inline __m512
250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
251 _mm512_broadcast_f32x2 (__m128 __A)
252 {
253 return (__m512)
254 __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
255 (__v16sf)_mm512_undefined_ps (),
256 (__mmask16) -1);
257 }
258
259 extern __inline __m512
260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
261 _mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
262 {
263 return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
264 (__v16sf)
265 __O, __M);
266 }
267
268 extern __inline __m512
269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
270 _mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
271 {
272 return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
273 (__v16sf)
274 _mm512_setzero_ps (),
275 __M);
276 }
277
278 extern __inline __m512i
279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
280 _mm512_broadcast_i32x2 (__m128i __A)
281 {
282 return (__m512i)
283 __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
284 (__v16si)
285 _mm512_undefined_epi32 (),
286 (__mmask16) -1);
287 }
288
289 extern __inline __m512i
290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
291 _mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
292 {
293 return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si)
294 __A,
295 (__v16si)
296 __O, __M);
297 }
298
299 extern __inline __m512i
300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
301 _mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
302 {
303 return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si)
304 __A,
305 (__v16si)
306 _mm512_setzero_si512 (),
307 __M);
308 }
309
310 extern __inline __m512
311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
312 _mm512_broadcast_f32x8 (__m256 __A)
313 {
314 return (__m512)
315 __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
316 _mm512_undefined_ps (),
317 (__mmask16) -1);
318 }
319
320 extern __inline __m512
321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
322 _mm512_mask_broadcast_f32x8 (__m512 __O, __mmask16 __M, __m256 __A)
323 {
324 return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
325 (__v16sf)__O,
326 __M);
327 }
328
329 extern __inline __m512
330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
331 _mm512_maskz_broadcast_f32x8 (__mmask16 __M, __m256 __A)
332 {
333 return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
334 (__v16sf)
335 _mm512_setzero_ps (),
336 __M);
337 }
338
339 extern __inline __m512i
340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
341 _mm512_broadcast_i32x8 (__m256i __A)
342 {
343 return (__m512i)
344 __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A,
345 (__v16si)
346 _mm512_undefined_epi32 (),
347 (__mmask16) -1);
348 }
349
350 extern __inline __m512i
351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
352 _mm512_mask_broadcast_i32x8 (__m512i __O, __mmask16 __M, __m256i __A)
353 {
354 return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si)
355 __A,
356 (__v16si)__O,
357 __M);
358 }
359
360 extern __inline __m512i
361 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
362 _mm512_maskz_broadcast_i32x8 (__mmask16 __M, __m256i __A)
363 {
364 return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si)
365 __A,
366 (__v16si)
367 _mm512_setzero_si512 (),
368 __M);
369 }
370
371 extern __inline __m512i
372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
373 _mm512_mullo_epi64 (__m512i __A, __m512i __B)
374 {
375 return (__m512i) ((__v8du) __A * (__v8du) __B);
376 }
377
378 extern __inline __m512i
379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
380 _mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
381 __m512i __B)
382 {
383 return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
384 (__v8di) __B,
385 (__v8di) __W,
386 (__mmask8) __U);
387 }
388
389 extern __inline __m512i
390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
391 _mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
392 {
393 return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
394 (__v8di) __B,
395 (__v8di)
396 _mm512_setzero_si512 (),
397 (__mmask8) __U);
398 }
399
400 extern __inline __m512d
401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
402 _mm512_xor_pd (__m512d __A, __m512d __B)
403 {
404 return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
405 (__v8df) __B,
406 (__v8df)
407 _mm512_setzero_pd (),
408 (__mmask8) -1);
409 }
410
411 extern __inline __m512d
412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
413 _mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A,
414 __m512d __B)
415 {
416 return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
417 (__v8df) __B,
418 (__v8df) __W,
419 (__mmask8) __U);
420 }
421
422 extern __inline __m512d
423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
424 _mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B)
425 {
426 return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
427 (__v8df) __B,
428 (__v8df)
429 _mm512_setzero_pd (),
430 (__mmask8) __U);
431 }
432
433 extern __inline __m512
434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
435 _mm512_xor_ps (__m512 __A, __m512 __B)
436 {
437 return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
438 (__v16sf) __B,
439 (__v16sf)
440 _mm512_setzero_ps (),
441 (__mmask16) -1);
442 }
443
444 extern __inline __m512
445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
446 _mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
447 {
448 return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
449 (__v16sf) __B,
450 (__v16sf) __W,
451 (__mmask16) __U);
452 }
453
454 extern __inline __m512
455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
456 _mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B)
457 {
458 return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
459 (__v16sf) __B,
460 (__v16sf)
461 _mm512_setzero_ps (),
462 (__mmask16) __U);
463 }
464
465 extern __inline __m512d
466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
467 _mm512_or_pd (__m512d __A, __m512d __B)
468 {
469 return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
470 (__v8df) __B,
471 (__v8df)
472 _mm512_setzero_pd (),
473 (__mmask8) -1);
474 }
475
476 extern __inline __m512d
477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
478 _mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
479 {
480 return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
481 (__v8df) __B,
482 (__v8df) __W,
483 (__mmask8) __U);
484 }
485
486 extern __inline __m512d
487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
488 _mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B)
489 {
490 return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
491 (__v8df) __B,
492 (__v8df)
493 _mm512_setzero_pd (),
494 (__mmask8) __U);
495 }
496
497 extern __inline __m512
498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
499 _mm512_or_ps (__m512 __A, __m512 __B)
500 {
501 return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
502 (__v16sf) __B,
503 (__v16sf)
504 _mm512_setzero_ps (),
505 (__mmask16) -1);
506 }
507
508 extern __inline __m512
509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
510 _mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
511 {
512 return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
513 (__v16sf) __B,
514 (__v16sf) __W,
515 (__mmask16) __U);
516 }
517
518 extern __inline __m512
519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
520 _mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B)
521 {
522 return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
523 (__v16sf) __B,
524 (__v16sf)
525 _mm512_setzero_ps (),
526 (__mmask16) __U);
527 }
528
529 extern __inline __m512d
530 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
531 _mm512_and_pd (__m512d __A, __m512d __B)
532 {
533 return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
534 (__v8df) __B,
535 (__v8df)
536 _mm512_setzero_pd (),
537 (__mmask8) -1);
538 }
539
540 extern __inline __m512d
541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
542 _mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A,
543 __m512d __B)
544 {
545 return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
546 (__v8df) __B,
547 (__v8df) __W,
548 (__mmask8) __U);
549 }
550
551 extern __inline __m512d
552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
553 _mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B)
554 {
555 return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
556 (__v8df) __B,
557 (__v8df)
558 _mm512_setzero_pd (),
559 (__mmask8) __U);
560 }
561
562 extern __inline __m512
563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
564 _mm512_and_ps (__m512 __A, __m512 __B)
565 {
566 return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
567 (__v16sf) __B,
568 (__v16sf)
569 _mm512_setzero_ps (),
570 (__mmask16) -1);
571 }
572
573 extern __inline __m512
574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
575 _mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
576 {
577 return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
578 (__v16sf) __B,
579 (__v16sf) __W,
580 (__mmask16) __U);
581 }
582
583 extern __inline __m512
584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
585 _mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B)
586 {
587 return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
588 (__v16sf) __B,
589 (__v16sf)
590 _mm512_setzero_ps (),
591 (__mmask16) __U);
592 }
593
594 extern __inline __m512d
595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
596 _mm512_andnot_pd (__m512d __A, __m512d __B)
597 {
598 return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
599 (__v8df) __B,
600 (__v8df)
601 _mm512_setzero_pd (),
602 (__mmask8) -1);
603 }
604
605 extern __inline __m512d
606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
607 _mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A,
608 __m512d __B)
609 {
610 return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
611 (__v8df) __B,
612 (__v8df) __W,
613 (__mmask8) __U);
614 }
615
616 extern __inline __m512d
617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
618 _mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B)
619 {
620 return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
621 (__v8df) __B,
622 (__v8df)
623 _mm512_setzero_pd (),
624 (__mmask8) __U);
625 }
626
627 extern __inline __m512
628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
629 _mm512_andnot_ps (__m512 __A, __m512 __B)
630 {
631 return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
632 (__v16sf) __B,
633 (__v16sf)
634 _mm512_setzero_ps (),
635 (__mmask16) -1);
636 }
637
638 extern __inline __m512
639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
640 _mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A,
641 __m512 __B)
642 {
643 return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
644 (__v16sf) __B,
645 (__v16sf) __W,
646 (__mmask16) __U);
647 }
648
649 extern __inline __m512
650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
651 _mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B)
652 {
653 return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
654 (__v16sf) __B,
655 (__v16sf)
656 _mm512_setzero_ps (),
657 (__mmask16) __U);
658 }
659
660 extern __inline __mmask16
661 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
662 _mm512_movepi32_mask (__m512i __A)
663 {
664 return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A);
665 }
666
667 extern __inline __mmask8
668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
669 _mm512_movepi64_mask (__m512i __A)
670 {
671 return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A);
672 }
673
674 extern __inline __m512i
675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
676 _mm512_movm_epi32 (__mmask16 __A)
677 {
678 return (__m512i) __builtin_ia32_cvtmask2d512 (__A);
679 }
680
681 extern __inline __m512i
682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
683 _mm512_movm_epi64 (__mmask8 __A)
684 {
685 return (__m512i) __builtin_ia32_cvtmask2q512 (__A);
686 }
687
688 extern __inline __m512i
689 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
690 _mm512_cvttpd_epi64 (__m512d __A)
691 {
692 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
693 (__v8di)
694 _mm512_setzero_si512 (),
695 (__mmask8) -1,
696 _MM_FROUND_CUR_DIRECTION);
697 }
698
699 extern __inline __m512i
700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
701 _mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A)
702 {
703 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
704 (__v8di) __W,
705 (__mmask8) __U,
706 _MM_FROUND_CUR_DIRECTION);
707 }
708
709 extern __inline __m512i
710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
711 _mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A)
712 {
713 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
714 (__v8di)
715 _mm512_setzero_si512 (),
716 (__mmask8) __U,
717 _MM_FROUND_CUR_DIRECTION);
718 }
719
720 extern __inline __m512i
721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
722 _mm512_cvttpd_epu64 (__m512d __A)
723 {
724 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
725 (__v8di)
726 _mm512_setzero_si512 (),
727 (__mmask8) -1,
728 _MM_FROUND_CUR_DIRECTION);
729 }
730
731 extern __inline __m512i
732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
733 _mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A)
734 {
735 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
736 (__v8di) __W,
737 (__mmask8) __U,
738 _MM_FROUND_CUR_DIRECTION);
739 }
740
741 extern __inline __m512i
742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
743 _mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A)
744 {
745 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
746 (__v8di)
747 _mm512_setzero_si512 (),
748 (__mmask8) __U,
749 _MM_FROUND_CUR_DIRECTION);
750 }
751
752 extern __inline __m512i
753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
754 _mm512_cvttps_epi64 (__m256 __A)
755 {
756 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
757 (__v8di)
758 _mm512_setzero_si512 (),
759 (__mmask8) -1,
760 _MM_FROUND_CUR_DIRECTION);
761 }
762
763 extern __inline __m512i
764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
765 _mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A)
766 {
767 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
768 (__v8di) __W,
769 (__mmask8) __U,
770 _MM_FROUND_CUR_DIRECTION);
771 }
772
773 extern __inline __m512i
774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
775 _mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A)
776 {
777 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
778 (__v8di)
779 _mm512_setzero_si512 (),
780 (__mmask8) __U,
781 _MM_FROUND_CUR_DIRECTION);
782 }
783
784 extern __inline __m512i
785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
786 _mm512_cvttps_epu64 (__m256 __A)
787 {
788 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
789 (__v8di)
790 _mm512_setzero_si512 (),
791 (__mmask8) -1,
792 _MM_FROUND_CUR_DIRECTION);
793 }
794
795 extern __inline __m512i
796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
797 _mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A)
798 {
799 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
800 (__v8di) __W,
801 (__mmask8) __U,
802 _MM_FROUND_CUR_DIRECTION);
803 }
804
805 extern __inline __m512i
806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
807 _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A)
808 {
809 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
810 (__v8di)
811 _mm512_setzero_si512 (),
812 (__mmask8) __U,
813 _MM_FROUND_CUR_DIRECTION);
814 }
815
816 extern __inline __m512i
817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
818 _mm512_cvtpd_epi64 (__m512d __A)
819 {
820 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
821 (__v8di)
822 _mm512_setzero_si512 (),
823 (__mmask8) -1,
824 _MM_FROUND_CUR_DIRECTION);
825 }
826
827 extern __inline __m512i
828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
829 _mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A)
830 {
831 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
832 (__v8di) __W,
833 (__mmask8) __U,
834 _MM_FROUND_CUR_DIRECTION);
835 }
836
837 extern __inline __m512i
838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
839 _mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A)
840 {
841 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
842 (__v8di)
843 _mm512_setzero_si512 (),
844 (__mmask8) __U,
845 _MM_FROUND_CUR_DIRECTION);
846 }
847
848 extern __inline __m512i
849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
850 _mm512_cvtpd_epu64 (__m512d __A)
851 {
852 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
853 (__v8di)
854 _mm512_setzero_si512 (),
855 (__mmask8) -1,
856 _MM_FROUND_CUR_DIRECTION);
857 }
858
859 extern __inline __m512i
860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
861 _mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A)
862 {
863 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
864 (__v8di) __W,
865 (__mmask8) __U,
866 _MM_FROUND_CUR_DIRECTION);
867 }
868
869 extern __inline __m512i
870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
871 _mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A)
872 {
873 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
874 (__v8di)
875 _mm512_setzero_si512 (),
876 (__mmask8) __U,
877 _MM_FROUND_CUR_DIRECTION);
878 }
879
880 extern __inline __m512i
881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
882 _mm512_cvtps_epi64 (__m256 __A)
883 {
884 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
885 (__v8di)
886 _mm512_setzero_si512 (),
887 (__mmask8) -1,
888 _MM_FROUND_CUR_DIRECTION);
889 }
890
891 extern __inline __m512i
892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
893 _mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A)
894 {
895 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
896 (__v8di) __W,
897 (__mmask8) __U,
898 _MM_FROUND_CUR_DIRECTION);
899 }
900
901 extern __inline __m512i
902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
903 _mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A)
904 {
905 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
906 (__v8di)
907 _mm512_setzero_si512 (),
908 (__mmask8) __U,
909 _MM_FROUND_CUR_DIRECTION);
910 }
911
912 extern __inline __m512i
913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
914 _mm512_cvtps_epu64 (__m256 __A)
915 {
916 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
917 (__v8di)
918 _mm512_setzero_si512 (),
919 (__mmask8) -1,
920 _MM_FROUND_CUR_DIRECTION);
921 }
922
923 extern __inline __m512i
924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
925 _mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A)
926 {
927 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
928 (__v8di) __W,
929 (__mmask8) __U,
930 _MM_FROUND_CUR_DIRECTION);
931 }
932
933 extern __inline __m512i
934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
935 _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A)
936 {
937 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
938 (__v8di)
939 _mm512_setzero_si512 (),
940 (__mmask8) __U,
941 _MM_FROUND_CUR_DIRECTION);
942 }
943
944 extern __inline __m256
945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
946 _mm512_cvtepi64_ps (__m512i __A)
947 {
948 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
949 (__v8sf)
950 _mm256_setzero_ps (),
951 (__mmask8) -1,
952 _MM_FROUND_CUR_DIRECTION);
953 }
954
955 extern __inline __m256
956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
957 _mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A)
958 {
959 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
960 (__v8sf) __W,
961 (__mmask8) __U,
962 _MM_FROUND_CUR_DIRECTION);
963 }
964
965 extern __inline __m256
966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
967 _mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A)
968 {
969 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
970 (__v8sf)
971 _mm256_setzero_ps (),
972 (__mmask8) __U,
973 _MM_FROUND_CUR_DIRECTION);
974 }
975
976 extern __inline __m256
977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
978 _mm512_cvtepu64_ps (__m512i __A)
979 {
980 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
981 (__v8sf)
982 _mm256_setzero_ps (),
983 (__mmask8) -1,
984 _MM_FROUND_CUR_DIRECTION);
985 }
986
987 extern __inline __m256
988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
989 _mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A)
990 {
991 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
992 (__v8sf) __W,
993 (__mmask8) __U,
994 _MM_FROUND_CUR_DIRECTION);
995 }
996
997 extern __inline __m256
998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
999 _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A)
1000 {
1001 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
1002 (__v8sf)
1003 _mm256_setzero_ps (),
1004 (__mmask8) __U,
1005 _MM_FROUND_CUR_DIRECTION);
1006 }
1007
1008 extern __inline __m512d
1009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1010 _mm512_cvtepi64_pd (__m512i __A)
1011 {
1012 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
1013 (__v8df)
1014 _mm512_setzero_pd (),
1015 (__mmask8) -1,
1016 _MM_FROUND_CUR_DIRECTION);
1017 }
1018
1019 extern __inline __m512d
1020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1021 _mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A)
1022 {
1023 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
1024 (__v8df) __W,
1025 (__mmask8) __U,
1026 _MM_FROUND_CUR_DIRECTION);
1027 }
1028
1029 extern __inline __m512d
1030 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1031 _mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A)
1032 {
1033 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
1034 (__v8df)
1035 _mm512_setzero_pd (),
1036 (__mmask8) __U,
1037 _MM_FROUND_CUR_DIRECTION);
1038 }
1039
1040 extern __inline __m512d
1041 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1042 _mm512_cvtepu64_pd (__m512i __A)
1043 {
1044 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
1045 (__v8df)
1046 _mm512_setzero_pd (),
1047 (__mmask8) -1,
1048 _MM_FROUND_CUR_DIRECTION);
1049 }
1050
1051 extern __inline __m512d
1052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1053 _mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A)
1054 {
1055 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
1056 (__v8df) __W,
1057 (__mmask8) __U,
1058 _MM_FROUND_CUR_DIRECTION);
1059 }
1060
1061 extern __inline __m512d
1062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1063 _mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A)
1064 {
1065 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
1066 (__v8df)
1067 _mm512_setzero_pd (),
1068 (__mmask8) __U,
1069 _MM_FROUND_CUR_DIRECTION);
1070 }
1071
1072 #ifdef __OPTIMIZE__
1073 extern __inline __mmask8
1074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1075 _kshiftli_mask8 (__mmask8 __A, unsigned int __B)
1076 {
1077 return (__mmask8) __builtin_ia32_kshiftliqi ((__mmask8) __A, (__mmask8) __B);
1078 }
1079
1080 extern __inline __mmask8
1081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1082 _kshiftri_mask8 (__mmask8 __A, unsigned int __B)
1083 {
1084 return (__mmask8) __builtin_ia32_kshiftriqi ((__mmask8) __A, (__mmask8) __B);
1085 }
1086
1087 extern __inline __m512d
1088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1089 _mm512_range_pd (__m512d __A, __m512d __B, int __C)
1090 {
1091 return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
1092 (__v8df) __B, __C,
1093 (__v8df)
1094 _mm512_setzero_pd (),
1095 (__mmask8) -1,
1096 _MM_FROUND_CUR_DIRECTION);
1097 }
1098
1099 extern __inline __m512d
1100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1101 _mm512_mask_range_pd (__m512d __W, __mmask8 __U,
1102 __m512d __A, __m512d __B, int __C)
1103 {
1104 return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
1105 (__v8df) __B, __C,
1106 (__v8df) __W,
1107 (__mmask8) __U,
1108 _MM_FROUND_CUR_DIRECTION);
1109 }
1110
1111 extern __inline __m512d
1112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1113 _mm512_maskz_range_pd (__mmask8 __U, __m512d __A, __m512d __B, int __C)
1114 {
1115 return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
1116 (__v8df) __B, __C,
1117 (__v8df)
1118 _mm512_setzero_pd (),
1119 (__mmask8) __U,
1120 _MM_FROUND_CUR_DIRECTION);
1121 }
1122
1123 extern __inline __m512
1124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1125 _mm512_range_ps (__m512 __A, __m512 __B, int __C)
1126 {
1127 return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
1128 (__v16sf) __B, __C,
1129 (__v16sf)
1130 _mm512_setzero_ps (),
1131 (__mmask16) -1,
1132 _MM_FROUND_CUR_DIRECTION);
1133 }
1134
1135 extern __inline __m512
1136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1137 _mm512_mask_range_ps (__m512 __W, __mmask16 __U,
1138 __m512 __A, __m512 __B, int __C)
1139 {
1140 return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
1141 (__v16sf) __B, __C,
1142 (__v16sf) __W,
1143 (__mmask16) __U,
1144 _MM_FROUND_CUR_DIRECTION);
1145 }
1146
1147 extern __inline __m512
1148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1149 _mm512_maskz_range_ps (__mmask16 __U, __m512 __A, __m512 __B, int __C)
1150 {
1151 return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
1152 (__v16sf) __B, __C,
1153 (__v16sf)
1154 _mm512_setzero_ps (),
1155 (__mmask16) __U,
1156 _MM_FROUND_CUR_DIRECTION);
1157 }
1158
1159 extern __inline __m128d
1160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1161 _mm_reduce_sd (__m128d __A, __m128d __B, int __C)
1162 {
1163 return (__m128d) __builtin_ia32_reducesd ((__v2df) __A,
1164 (__v2df) __B, __C);
1165 }
1166
1167 extern __inline __m128
1168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1169 _mm_reduce_ss (__m128 __A, __m128 __B, int __C)
1170 {
1171 return (__m128) __builtin_ia32_reducess ((__v4sf) __A,
1172 (__v4sf) __B, __C);
1173 }
1174
1175 extern __inline __m128d
1176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1177 _mm_range_sd (__m128d __A, __m128d __B, int __C)
1178 {
1179 return (__m128d) __builtin_ia32_rangesd128_round ((__v2df) __A,
1180 (__v2df) __B, __C,
1181 _MM_FROUND_CUR_DIRECTION);
1182 }
1183
1184
1185 extern __inline __m128
1186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1187 _mm_range_ss (__m128 __A, __m128 __B, int __C)
1188 {
1189 return (__m128) __builtin_ia32_rangess128_round ((__v4sf) __A,
1190 (__v4sf) __B, __C,
1191 _MM_FROUND_CUR_DIRECTION);
1192 }
1193
1194 extern __inline __m128d
1195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1196 _mm_range_round_sd (__m128d __A, __m128d __B, int __C, const int __R)
1197 {
1198 return (__m128d) __builtin_ia32_rangesd128_round ((__v2df) __A,
1199 (__v2df) __B, __C,
1200 __R);
1201 }
1202
1203 extern __inline __m128
1204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1205 _mm_range_round_ss (__m128 __A, __m128 __B, int __C, const int __R)
1206 {
1207 return (__m128) __builtin_ia32_rangess128_round ((__v4sf) __A,
1208 (__v4sf) __B, __C,
1209 __R);
1210 }
1211
1212 extern __inline __mmask8
1213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1214 _mm_fpclass_ss_mask (__m128 __A, const int __imm)
1215 {
1216 return (__mmask8) __builtin_ia32_fpclassss ((__v4sf) __A, __imm);
1217 }
1218
1219 extern __inline __mmask8
1220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1221 _mm_fpclass_sd_mask (__m128d __A, const int __imm)
1222 {
1223 return (__mmask8) __builtin_ia32_fpclasssd ((__v2df) __A, __imm);
1224 }
1225
1226 extern __inline __m512i
1227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1228 _mm512_cvtt_roundpd_epi64 (__m512d __A, const int __R)
1229 {
1230 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
1231 (__v8di)
1232 _mm512_setzero_si512 (),
1233 (__mmask8) -1,
1234 __R);
1235 }
1236
1237 extern __inline __m512i
1238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1239 _mm512_mask_cvtt_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A,
1240 const int __R)
1241 {
1242 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
1243 (__v8di) __W,
1244 (__mmask8) __U,
1245 __R);
1246 }
1247
1248 extern __inline __m512i
1249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1250 _mm512_maskz_cvtt_roundpd_epi64 (__mmask8 __U, __m512d __A,
1251 const int __R)
1252 {
1253 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
1254 (__v8di)
1255 _mm512_setzero_si512 (),
1256 (__mmask8) __U,
1257 __R);
1258 }
1259
1260 extern __inline __m512i
1261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1262 _mm512_cvtt_roundpd_epu64 (__m512d __A, const int __R)
1263 {
1264 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
1265 (__v8di)
1266 _mm512_setzero_si512 (),
1267 (__mmask8) -1,
1268 __R);
1269 }
1270
1271 extern __inline __m512i
1272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1273 _mm512_mask_cvtt_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A,
1274 const int __R)
1275 {
1276 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
1277 (__v8di) __W,
1278 (__mmask8) __U,
1279 __R);
1280 }
1281
1282 extern __inline __m512i
1283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1284 _mm512_maskz_cvtt_roundpd_epu64 (__mmask8 __U, __m512d __A,
1285 const int __R)
1286 {
1287 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
1288 (__v8di)
1289 _mm512_setzero_si512 (),
1290 (__mmask8) __U,
1291 __R);
1292 }
1293
1294 extern __inline __m512i
1295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1296 _mm512_cvtt_roundps_epi64 (__m256 __A, const int __R)
1297 {
1298 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
1299 (__v8di)
1300 _mm512_setzero_si512 (),
1301 (__mmask8) -1,
1302 __R);
1303 }
1304
1305 extern __inline __m512i
1306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1307 _mm512_mask_cvtt_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A,
1308 const int __R)
1309 {
1310 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
1311 (__v8di) __W,
1312 (__mmask8) __U,
1313 __R);
1314 }
1315
1316 extern __inline __m512i
1317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1318 _mm512_maskz_cvtt_roundps_epi64 (__mmask8 __U, __m256 __A,
1319 const int __R)
1320 {
1321 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
1322 (__v8di)
1323 _mm512_setzero_si512 (),
1324 (__mmask8) __U,
1325 __R);
1326 }
1327
1328 extern __inline __m512i
1329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1330 _mm512_cvtt_roundps_epu64 (__m256 __A, const int __R)
1331 {
1332 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
1333 (__v8di)
1334 _mm512_setzero_si512 (),
1335 (__mmask8) -1,
1336 __R);
1337 }
1338
1339 extern __inline __m512i
1340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1341 _mm512_mask_cvtt_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A,
1342 const int __R)
1343 {
1344 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
1345 (__v8di) __W,
1346 (__mmask8) __U,
1347 __R);
1348 }
1349
1350 extern __inline __m512i
1351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1352 _mm512_maskz_cvtt_roundps_epu64 (__mmask8 __U, __m256 __A,
1353 const int __R)
1354 {
1355 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
1356 (__v8di)
1357 _mm512_setzero_si512 (),
1358 (__mmask8) __U,
1359 __R);
1360 }
1361
1362 extern __inline __m512i
1363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1364 _mm512_cvt_roundpd_epi64 (__m512d __A, const int __R)
1365 {
1366 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
1367 (__v8di)
1368 _mm512_setzero_si512 (),
1369 (__mmask8) -1,
1370 __R);
1371 }
1372
1373 extern __inline __m512i
1374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1375 _mm512_mask_cvt_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A,
1376 const int __R)
1377 {
1378 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
1379 (__v8di) __W,
1380 (__mmask8) __U,
1381 __R);
1382 }
1383
1384 extern __inline __m512i
1385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1386 _mm512_maskz_cvt_roundpd_epi64 (__mmask8 __U, __m512d __A,
1387 const int __R)
1388 {
1389 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
1390 (__v8di)
1391 _mm512_setzero_si512 (),
1392 (__mmask8) __U,
1393 __R);
1394 }
1395
1396 extern __inline __m512i
1397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1398 _mm512_cvt_roundpd_epu64 (__m512d __A, const int __R)
1399 {
1400 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
1401 (__v8di)
1402 _mm512_setzero_si512 (),
1403 (__mmask8) -1,
1404 __R);
1405 }
1406
1407 extern __inline __m512i
1408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1409 _mm512_mask_cvt_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A,
1410 const int __R)
1411 {
1412 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
1413 (__v8di) __W,
1414 (__mmask8) __U,
1415 __R);
1416 }
1417
1418 extern __inline __m512i
1419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1420 _mm512_maskz_cvt_roundpd_epu64 (__mmask8 __U, __m512d __A,
1421 const int __R)
1422 {
1423 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
1424 (__v8di)
1425 _mm512_setzero_si512 (),
1426 (__mmask8) __U,
1427 __R);
1428 }
1429
1430 extern __inline __m512i
1431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1432 _mm512_cvt_roundps_epi64 (__m256 __A, const int __R)
1433 {
1434 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
1435 (__v8di)
1436 _mm512_setzero_si512 (),
1437 (__mmask8) -1,
1438 __R);
1439 }
1440
1441 extern __inline __m512i
1442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1443 _mm512_mask_cvt_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A,
1444 const int __R)
1445 {
1446 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
1447 (__v8di) __W,
1448 (__mmask8) __U,
1449 __R);
1450 }
1451
1452 extern __inline __m512i
1453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1454 _mm512_maskz_cvt_roundps_epi64 (__mmask8 __U, __m256 __A,
1455 const int __R)
1456 {
1457 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
1458 (__v8di)
1459 _mm512_setzero_si512 (),
1460 (__mmask8) __U,
1461 __R);
1462 }
1463
1464 extern __inline __m512i
1465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1466 _mm512_cvt_roundps_epu64 (__m256 __A, const int __R)
1467 {
1468 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
1469 (__v8di)
1470 _mm512_setzero_si512 (),
1471 (__mmask8) -1,
1472 __R);
1473 }
1474
1475 extern __inline __m512i
1476 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1477 _mm512_mask_cvt_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A,
1478 const int __R)
1479 {
1480 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
1481 (__v8di) __W,
1482 (__mmask8) __U,
1483 __R);
1484 }
1485
1486 extern __inline __m512i
1487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1488 _mm512_maskz_cvt_roundps_epu64 (__mmask8 __U, __m256 __A,
1489 const int __R)
1490 {
1491 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
1492 (__v8di)
1493 _mm512_setzero_si512 (),
1494 (__mmask8) __U,
1495 __R);
1496 }
1497
1498 extern __inline __m256
1499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1500 _mm512_cvt_roundepi64_ps (__m512i __A, const int __R)
1501 {
1502 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
1503 (__v8sf)
1504 _mm256_setzero_ps (),
1505 (__mmask8) -1,
1506 __R);
1507 }
1508
1509 extern __inline __m256
1510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1511 _mm512_mask_cvt_roundepi64_ps (__m256 __W, __mmask8 __U, __m512i __A,
1512 const int __R)
1513 {
1514 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
1515 (__v8sf) __W,
1516 (__mmask8) __U,
1517 __R);
1518 }
1519
1520 extern __inline __m256
1521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1522 _mm512_maskz_cvt_roundepi64_ps (__mmask8 __U, __m512i __A,
1523 const int __R)
1524 {
1525 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
1526 (__v8sf)
1527 _mm256_setzero_ps (),
1528 (__mmask8) __U,
1529 __R);
1530 }
1531
1532 extern __inline __m256
1533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1534 _mm512_cvt_roundepu64_ps (__m512i __A, const int __R)
1535 {
1536 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
1537 (__v8sf)
1538 _mm256_setzero_ps (),
1539 (__mmask8) -1,
1540 __R);
1541 }
1542
1543 extern __inline __m256
1544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1545 _mm512_mask_cvt_roundepu64_ps (__m256 __W, __mmask8 __U, __m512i __A,
1546 const int __R)
1547 {
1548 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
1549 (__v8sf) __W,
1550 (__mmask8) __U,
1551 __R);
1552 }
1553
1554 extern __inline __m256
1555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1556 _mm512_maskz_cvt_roundepu64_ps (__mmask8 __U, __m512i __A,
1557 const int __R)
1558 {
1559 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
1560 (__v8sf)
1561 _mm256_setzero_ps (),
1562 (__mmask8) __U,
1563 __R);
1564 }
1565
1566 extern __inline __m512d
1567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1568 _mm512_cvt_roundepi64_pd (__m512i __A, const int __R)
1569 {
1570 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
1571 (__v8df)
1572 _mm512_setzero_pd (),
1573 (__mmask8) -1,
1574 __R);
1575 }
1576
1577 extern __inline __m512d
1578 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1579 _mm512_mask_cvt_roundepi64_pd (__m512d __W, __mmask8 __U, __m512i __A,
1580 const int __R)
1581 {
1582 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
1583 (__v8df) __W,
1584 (__mmask8) __U,
1585 __R);
1586 }
1587
1588 extern __inline __m512d
1589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1590 _mm512_maskz_cvt_roundepi64_pd (__mmask8 __U, __m512i __A,
1591 const int __R)
1592 {
1593 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
1594 (__v8df)
1595 _mm512_setzero_pd (),
1596 (__mmask8) __U,
1597 __R);
1598 }
1599
1600 extern __inline __m512d
1601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1602 _mm512_cvt_roundepu64_pd (__m512i __A, const int __R)
1603 {
1604 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
1605 (__v8df)
1606 _mm512_setzero_pd (),
1607 (__mmask8) -1,
1608 __R);
1609 }
1610
1611 extern __inline __m512d
1612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1613 _mm512_mask_cvt_roundepu64_pd (__m512d __W, __mmask8 __U, __m512i __A,
1614 const int __R)
1615 {
1616 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
1617 (__v8df) __W,
1618 (__mmask8) __U,
1619 __R);
1620 }
1621
1622 extern __inline __m512d
1623 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1624 _mm512_maskz_cvt_roundepu64_pd (__mmask8 __U, __m512i __A,
1625 const int __R)
1626 {
1627 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
1628 (__v8df)
1629 _mm512_setzero_pd (),
1630 (__mmask8) __U,
1631 __R);
1632 }
1633
1634 extern __inline __m512d
1635 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1636 _mm512_reduce_pd (__m512d __A, int __B)
1637 {
1638 return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,
1639 (__v8df)
1640 _mm512_setzero_pd (),
1641 (__mmask8) -1);
1642 }
1643
1644 extern __inline __m512d
1645 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1646 _mm512_mask_reduce_pd (__m512d __W, __mmask8 __U, __m512d __A, int __B)
1647 {
1648 return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,
1649 (__v8df) __W,
1650 (__mmask8) __U);
1651 }
1652
1653 extern __inline __m512d
1654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1655 _mm512_maskz_reduce_pd (__mmask8 __U, __m512d __A, int __B)
1656 {
1657 return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,
1658 (__v8df)
1659 _mm512_setzero_pd (),
1660 (__mmask8) __U);
1661 }
1662
1663 extern __inline __m512
1664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1665 _mm512_reduce_ps (__m512 __A, int __B)
1666 {
1667 return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,
1668 (__v16sf)
1669 _mm512_setzero_ps (),
1670 (__mmask16) -1);
1671 }
1672
1673 extern __inline __m512
1674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1675 _mm512_mask_reduce_ps (__m512 __W, __mmask16 __U, __m512 __A, int __B)
1676 {
1677 return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,
1678 (__v16sf) __W,
1679 (__mmask16) __U);
1680 }
1681
1682 extern __inline __m512
1683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1684 _mm512_maskz_reduce_ps (__mmask16 __U, __m512 __A, int __B)
1685 {
1686 return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,
1687 (__v16sf)
1688 _mm512_setzero_ps (),
1689 (__mmask16) __U);
1690 }
1691
1692 extern __inline __m256
1693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1694 _mm512_extractf32x8_ps (__m512 __A, const int __imm)
1695 {
1696 return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A,
1697 __imm,
1698 (__v8sf)
1699 _mm256_setzero_ps (),
1700 (__mmask8) -1);
1701 }
1702
1703 extern __inline __m256
1704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1705 _mm512_mask_extractf32x8_ps (__m256 __W, __mmask8 __U, __m512 __A,
1706 const int __imm)
1707 {
1708 return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A,
1709 __imm,
1710 (__v8sf) __W,
1711 (__mmask8) __U);
1712 }
1713
1714 extern __inline __m256
1715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1716 _mm512_maskz_extractf32x8_ps (__mmask8 __U, __m512 __A,
1717 const int __imm)
1718 {
1719 return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A,
1720 __imm,
1721 (__v8sf)
1722 _mm256_setzero_ps (),
1723 (__mmask8) __U);
1724 }
1725
1726 extern __inline __m128d
1727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1728 _mm512_extractf64x2_pd (__m512d __A, const int __imm)
1729 {
1730 return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A,
1731 __imm,
1732 (__v2df)
1733 _mm_setzero_pd (),
1734 (__mmask8) -1);
1735 }
1736
1737 extern __inline __m128d
1738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1739 _mm512_mask_extractf64x2_pd (__m128d __W, __mmask8 __U, __m512d __A,
1740 const int __imm)
1741 {
1742 return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A,
1743 __imm,
1744 (__v2df) __W,
1745 (__mmask8)
1746 __U);
1747 }
1748
1749 extern __inline __m128d
1750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1751 _mm512_maskz_extractf64x2_pd (__mmask8 __U, __m512d __A,
1752 const int __imm)
1753 {
1754 return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A,
1755 __imm,
1756 (__v2df)
1757 _mm_setzero_pd (),
1758 (__mmask8)
1759 __U);
1760 }
1761
1762 extern __inline __m256i
1763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1764 _mm512_extracti32x8_epi32 (__m512i __A, const int __imm)
1765 {
1766 return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A,
1767 __imm,
1768 (__v8si)
1769 _mm256_setzero_si256 (),
1770 (__mmask8) -1);
1771 }
1772
1773 extern __inline __m256i
1774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1775 _mm512_mask_extracti32x8_epi32 (__m256i __W, __mmask8 __U, __m512i __A,
1776 const int __imm)
1777 {
1778 return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A,
1779 __imm,
1780 (__v8si) __W,
1781 (__mmask8) __U);
1782 }
1783
1784 extern __inline __m256i
1785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1786 _mm512_maskz_extracti32x8_epi32 (__mmask8 __U, __m512i __A,
1787 const int __imm)
1788 {
1789 return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A,
1790 __imm,
1791 (__v8si)
1792 _mm256_setzero_si256 (),
1793 (__mmask8) __U);
1794 }
1795
1796 extern __inline __m128i
1797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1798 _mm512_extracti64x2_epi64 (__m512i __A, const int __imm)
1799 {
1800 return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A,
1801 __imm,
1802 (__v2di)
1803 _mm_setzero_si128 (),
1804 (__mmask8) -1);
1805 }
1806
1807 extern __inline __m128i
1808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1809 _mm512_mask_extracti64x2_epi64 (__m128i __W, __mmask8 __U, __m512i __A,
1810 const int __imm)
1811 {
1812 return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A,
1813 __imm,
1814 (__v2di) __W,
1815 (__mmask8)
1816 __U);
1817 }
1818
1819 extern __inline __m128i
1820 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1821 _mm512_maskz_extracti64x2_epi64 (__mmask8 __U, __m512i __A,
1822 const int __imm)
1823 {
1824 return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A,
1825 __imm,
1826 (__v2di)
1827 _mm_setzero_si128 (),
1828 (__mmask8)
1829 __U);
1830 }
1831
1832 extern __inline __m512d
1833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1834 _mm512_range_round_pd (__m512d __A, __m512d __B, int __C,
1835 const int __R)
1836 {
1837 return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
1838 (__v8df) __B, __C,
1839 (__v8df)
1840 _mm512_setzero_pd (),
1841 (__mmask8) -1,
1842 __R);
1843 }
1844
1845 extern __inline __m512d
1846 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1847 _mm512_mask_range_round_pd (__m512d __W, __mmask8 __U,
1848 __m512d __A, __m512d __B, int __C,
1849 const int __R)
1850 {
1851 return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
1852 (__v8df) __B, __C,
1853 (__v8df) __W,
1854 (__mmask8) __U,
1855 __R);
1856 }
1857
1858 extern __inline __m512d
1859 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1860 _mm512_maskz_range_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
1861 int __C, const int __R)
1862 {
1863 return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
1864 (__v8df) __B, __C,
1865 (__v8df)
1866 _mm512_setzero_pd (),
1867 (__mmask8) __U,
1868 __R);
1869 }
1870
1871 extern __inline __m512
1872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1873 _mm512_range_round_ps (__m512 __A, __m512 __B, int __C, const int __R)
1874 {
1875 return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
1876 (__v16sf) __B, __C,
1877 (__v16sf)
1878 _mm512_setzero_ps (),
1879 (__mmask16) -1,
1880 __R);
1881 }
1882
1883 extern __inline __m512
1884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1885 _mm512_mask_range_round_ps (__m512 __W, __mmask16 __U,
1886 __m512 __A, __m512 __B, int __C,
1887 const int __R)
1888 {
1889 return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
1890 (__v16sf) __B, __C,
1891 (__v16sf) __W,
1892 (__mmask16) __U,
1893 __R);
1894 }
1895
1896 extern __inline __m512
1897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1898 _mm512_maskz_range_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
1899 int __C, const int __R)
1900 {
1901 return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
1902 (__v16sf) __B, __C,
1903 (__v16sf)
1904 _mm512_setzero_ps (),
1905 (__mmask16) __U,
1906 __R);
1907 }
1908
1909 extern __inline __m512i
1910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1911 _mm512_inserti32x8 (__m512i __A, __m256i __B, const int __imm)
1912 {
1913 return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A,
1914 (__v8si) __B,
1915 __imm,
1916 (__v16si)
1917 _mm512_setzero_si512 (),
1918 (__mmask16) -1);
1919 }
1920
1921 extern __inline __m512i
1922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1923 _mm512_mask_inserti32x8 (__m512i __W, __mmask16 __U, __m512i __A,
1924 __m256i __B, const int __imm)
1925 {
1926 return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A,
1927 (__v8si) __B,
1928 __imm,
1929 (__v16si) __W,
1930 (__mmask16) __U);
1931 }
1932
1933 extern __inline __m512i
1934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1935 _mm512_maskz_inserti32x8 (__mmask16 __U, __m512i __A, __m256i __B,
1936 const int __imm)
1937 {
1938 return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A,
1939 (__v8si) __B,
1940 __imm,
1941 (__v16si)
1942 _mm512_setzero_si512 (),
1943 (__mmask16) __U);
1944 }
1945
1946 extern __inline __m512
1947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1948 _mm512_insertf32x8 (__m512 __A, __m256 __B, const int __imm)
1949 {
1950 return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A,
1951 (__v8sf) __B,
1952 __imm,
1953 (__v16sf)
1954 _mm512_setzero_ps (),
1955 (__mmask16) -1);
1956 }
1957
1958 extern __inline __m512
1959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1960 _mm512_mask_insertf32x8 (__m512 __W, __mmask16 __U, __m512 __A,
1961 __m256 __B, const int __imm)
1962 {
1963 return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A,
1964 (__v8sf) __B,
1965 __imm,
1966 (__v16sf) __W,
1967 (__mmask16) __U);
1968 }
1969
1970 extern __inline __m512
1971 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1972 _mm512_maskz_insertf32x8 (__mmask16 __U, __m512 __A, __m256 __B,
1973 const int __imm)
1974 {
1975 return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A,
1976 (__v8sf) __B,
1977 __imm,
1978 (__v16sf)
1979 _mm512_setzero_ps (),
1980 (__mmask16) __U);
1981 }
1982
1983 extern __inline __m512i
1984 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1985 _mm512_inserti64x2 (__m512i __A, __m128i __B, const int __imm)
1986 {
1987 return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A,
1988 (__v2di) __B,
1989 __imm,
1990 (__v8di)
1991 _mm512_setzero_si512 (),
1992 (__mmask8) -1);
1993 }
1994
1995 extern __inline __m512i
1996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1997 _mm512_mask_inserti64x2 (__m512i __W, __mmask8 __U, __m512i __A,
1998 __m128i __B, const int __imm)
1999 {
2000 return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A,
2001 (__v2di) __B,
2002 __imm,
2003 (__v8di) __W,
2004 (__mmask8)
2005 __U);
2006 }
2007
2008 extern __inline __m512i
2009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2010 _mm512_maskz_inserti64x2 (__mmask8 __U, __m512i __A, __m128i __B,
2011 const int __imm)
2012 {
2013 return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A,
2014 (__v2di) __B,
2015 __imm,
2016 (__v8di)
2017 _mm512_setzero_si512 (),
2018 (__mmask8)
2019 __U);
2020 }
2021
2022 extern __inline __m512d
2023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2024 _mm512_insertf64x2 (__m512d __A, __m128d __B, const int __imm)
2025 {
2026 return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A,
2027 (__v2df) __B,
2028 __imm,
2029 (__v8df)
2030 _mm512_setzero_pd (),
2031 (__mmask8) -1);
2032 }
2033
2034 extern __inline __m512d
2035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2036 _mm512_mask_insertf64x2 (__m512d __W, __mmask8 __U, __m512d __A,
2037 __m128d __B, const int __imm)
2038 {
2039 return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A,
2040 (__v2df) __B,
2041 __imm,
2042 (__v8df) __W,
2043 (__mmask8)
2044 __U);
2045 }
2046
2047 extern __inline __m512d
2048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2049 _mm512_maskz_insertf64x2 (__mmask8 __U, __m512d __A, __m128d __B,
2050 const int __imm)
2051 {
2052 return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A,
2053 (__v2df) __B,
2054 __imm,
2055 (__v8df)
2056 _mm512_setzero_pd (),
2057 (__mmask8)
2058 __U);
2059 }
2060
2061 extern __inline __mmask8
2062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2063 _mm512_mask_fpclass_pd_mask (__mmask8 __U, __m512d __A,
2064 const int __imm)
2065 {
2066 return (__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) __A,
2067 __imm, __U);
2068 }
2069
2070 extern __inline __mmask8
2071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2072 _mm512_fpclass_pd_mask (__m512d __A, const int __imm)
2073 {
2074 return (__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) __A,
2075 __imm,
2076 (__mmask8) -1);
2077 }
2078
2079 extern __inline __mmask16
2080 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2081 _mm512_mask_fpclass_ps_mask (__mmask16 __U, __m512 __A,
2082 const int __imm)
2083 {
2084 return (__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) __A,
2085 __imm, __U);
2086 }
2087
2088 extern __inline __mmask16
2089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2090 _mm512_fpclass_ps_mask (__m512 __A, const int __imm)
2091 {
2092 return (__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) __A,
2093 __imm,
2094 (__mmask16) -1);
2095 }
2096
2097 #else
2098 #define _kshiftli_mask8(X, Y) \
2099 ((__mmask8) __builtin_ia32_kshiftliqi ((__mmask8)(X), (__mmask8)(Y)))
2100
2101 #define _kshiftri_mask8(X, Y) \
2102 ((__mmask8) __builtin_ia32_kshiftriqi ((__mmask8)(X), (__mmask8)(Y)))
2103
2104 #define _mm_range_sd(A, B, C) \
2105 ((__m128d) __builtin_ia32_rangesd128_round ((__v2df)(__m128d)(A), \
2106 (__v2df)(__m128d)(B), (int)(C), \
2107 _MM_FROUND_CUR_DIRECTION))
2108
2109 #define _mm_range_ss(A, B, C) \
2110 ((__m128) __builtin_ia32_rangess128_round ((__v4sf)(__m128)(A), \
2111 (__v4sf)(__m128)(B), (int)(C), \
2112 _MM_FROUND_CUR_DIRECTION))
2113
2114 #define _mm_range_round_sd(A, B, C, R) \
2115 ((__m128d) __builtin_ia32_rangesd128_round ((__v2df)(__m128d)(A), \
2116 (__v2df)(__m128d)(B), (int)(C), (R)))
2117
2118 #define _mm_range_round_ss(A, B, C, R) \
2119 ((__m128) __builtin_ia32_rangess128_round ((__v4sf)(__m128)(A), \
2120 (__v4sf)(__m128)(B), (int)(C), (R)))
2121
2122 #define _mm512_cvtt_roundpd_epi64(A, B) \
2123 ((__m512i)__builtin_ia32_cvttpd2qq512_mask ((A), (__v8di) \
2124 _mm512_setzero_si512 (), \
2125 -1, (B)))
2126
2127 #define _mm512_mask_cvtt_roundpd_epi64(W, U, A, B) \
2128 ((__m512i)__builtin_ia32_cvttpd2qq512_mask ((A), (__v8di)(W), (U), (B)))
2129
2130 #define _mm512_maskz_cvtt_roundpd_epi64(U, A, B) \
2131 ((__m512i)__builtin_ia32_cvttpd2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2132
2133 #define _mm512_cvtt_roundpd_epu64(A, B) \
2134 ((__m512i)__builtin_ia32_cvttpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2135
2136 #define _mm512_mask_cvtt_roundpd_epu64(W, U, A, B) \
2137 ((__m512i)__builtin_ia32_cvttpd2uqq512_mask ((A), (__v8di)(W), (U), (B)))
2138
2139 #define _mm512_maskz_cvtt_roundpd_epu64(U, A, B) \
2140 ((__m512i)__builtin_ia32_cvttpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2141
2142 #define _mm512_cvtt_roundps_epi64(A, B) \
2143 ((__m512i)__builtin_ia32_cvttps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2144
2145 #define _mm512_mask_cvtt_roundps_epi64(W, U, A, B) \
2146 ((__m512i)__builtin_ia32_cvttps2qq512_mask ((A), (__v8di)(W), (U), (B)))
2147
2148 #define _mm512_maskz_cvtt_roundps_epi64(U, A, B) \
2149 ((__m512i)__builtin_ia32_cvttps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2150
2151 #define _mm512_cvtt_roundps_epu64(A, B) \
2152 ((__m512i)__builtin_ia32_cvttps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2153
2154 #define _mm512_mask_cvtt_roundps_epu64(W, U, A, B) \
2155 ((__m512i)__builtin_ia32_cvttps2uqq512_mask ((A), (__v8di)(W), (U), (B)))
2156
2157 #define _mm512_maskz_cvtt_roundps_epu64(U, A, B) \
2158 ((__m512i)__builtin_ia32_cvttps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2159
2160 #define _mm512_cvt_roundpd_epi64(A, B) \
2161 ((__m512i)__builtin_ia32_cvtpd2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2162
2163 #define _mm512_mask_cvt_roundpd_epi64(W, U, A, B) \
2164 ((__m512i)__builtin_ia32_cvtpd2qq512_mask ((A), (__v8di)(W), (U), (B)))
2165
2166 #define _mm512_maskz_cvt_roundpd_epi64(U, A, B) \
2167 ((__m512i)__builtin_ia32_cvtpd2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2168
2169 #define _mm512_cvt_roundpd_epu64(A, B) \
2170 ((__m512i)__builtin_ia32_cvtpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2171
2172 #define _mm512_mask_cvt_roundpd_epu64(W, U, A, B) \
2173 ((__m512i)__builtin_ia32_cvtpd2uqq512_mask ((A), (__v8di)(W), (U), (B)))
2174
2175 #define _mm512_maskz_cvt_roundpd_epu64(U, A, B) \
2176 ((__m512i)__builtin_ia32_cvtpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2177
2178 #define _mm512_cvt_roundps_epi64(A, B) \
2179 ((__m512i)__builtin_ia32_cvtps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2180
2181 #define _mm512_mask_cvt_roundps_epi64(W, U, A, B) \
2182 ((__m512i)__builtin_ia32_cvtps2qq512_mask ((A), (__v8di)(W), (U), (B)))
2183
2184 #define _mm512_maskz_cvt_roundps_epi64(U, A, B) \
2185 ((__m512i)__builtin_ia32_cvtps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2186
2187 #define _mm512_cvt_roundps_epu64(A, B) \
2188 ((__m512i)__builtin_ia32_cvtps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2189
2190 #define _mm512_mask_cvt_roundps_epu64(W, U, A, B) \
2191 ((__m512i)__builtin_ia32_cvtps2uqq512_mask ((A), (__v8di)(W), (U), (B)))
2192
2193 #define _mm512_maskz_cvt_roundps_epu64(U, A, B) \
2194 ((__m512i)__builtin_ia32_cvtps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2195
2196 #define _mm512_cvt_roundepi64_ps(A, B) \
2197 ((__m256)__builtin_ia32_cvtqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), -1, (B)))
2198
2199 #define _mm512_mask_cvt_roundepi64_ps(W, U, A, B) \
2200 ((__m256)__builtin_ia32_cvtqq2ps512_mask ((__v8di)(A), (W), (U), (B)))
2201
2202 #define _mm512_maskz_cvt_roundepi64_ps(U, A, B) \
2203 ((__m256)__builtin_ia32_cvtqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), (U), (B)))
2204
2205 #define _mm512_cvt_roundepu64_ps(A, B) \
2206 ((__m256)__builtin_ia32_cvtuqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), -1, (B)))
2207
2208 #define _mm512_mask_cvt_roundepu64_ps(W, U, A, B) \
2209 ((__m256)__builtin_ia32_cvtuqq2ps512_mask ((__v8di)(A), (W), (U), (B)))
2210
2211 #define _mm512_maskz_cvt_roundepu64_ps(U, A, B) \
2212 ((__m256)__builtin_ia32_cvtuqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), (U), (B)))
2213
2214 #define _mm512_cvt_roundepi64_pd(A, B) \
2215 ((__m512d)__builtin_ia32_cvtqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), -1, (B)))
2216
2217 #define _mm512_mask_cvt_roundepi64_pd(W, U, A, B) \
2218 ((__m512d)__builtin_ia32_cvtqq2pd512_mask ((__v8di)(A), (W), (U), (B)))
2219
2220 #define _mm512_maskz_cvt_roundepi64_pd(U, A, B) \
2221 ((__m512d)__builtin_ia32_cvtqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), (U), (B)))
2222
2223 #define _mm512_cvt_roundepu64_pd(A, B) \
2224 ((__m512d)__builtin_ia32_cvtuqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), -1, (B)))
2225
2226 #define _mm512_mask_cvt_roundepu64_pd(W, U, A, B) \
2227 ((__m512d)__builtin_ia32_cvtuqq2pd512_mask ((__v8di)(A), (W), (U), (B)))
2228
2229 #define _mm512_maskz_cvt_roundepu64_pd(U, A, B) \
2230 ((__m512d)__builtin_ia32_cvtuqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), (U), (B)))
2231
2232 #define _mm512_reduce_pd(A, B) \
2233 ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \
2234 (int)(B), (__v8df)_mm512_setzero_pd (), (__mmask8)-1))
2235
2236 #define _mm512_mask_reduce_pd(W, U, A, B) \
2237 ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \
2238 (int)(B), (__v8df)(__m512d)(W), (__mmask8)(U)))
2239
2240 #define _mm512_maskz_reduce_pd(U, A, B) \
2241 ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \
2242 (int)(B), (__v8df)_mm512_setzero_pd (), (__mmask8)(U)))
2243
2244 #define _mm512_reduce_ps(A, B) \
2245 ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \
2246 (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)-1))
2247
2248 #define _mm512_mask_reduce_ps(W, U, A, B) \
2249 ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \
2250 (int)(B), (__v16sf)(__m512)(W), (__mmask16)(U)))
2251
2252 #define _mm512_maskz_reduce_ps(U, A, B) \
2253 ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \
2254 (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)(U)))
2255
2256 #define _mm512_extractf32x8_ps(X, C) \
2257 ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \
2258 (int) (C), (__v8sf)(__m256) _mm256_setzero_ps (), (__mmask8)-1))
2259
2260 #define _mm512_mask_extractf32x8_ps(W, U, X, C) \
2261 ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \
2262 (int) (C), (__v8sf)(__m256) (W), (__mmask8) (U)))
2263
2264 #define _mm512_maskz_extractf32x8_ps(U, X, C) \
2265 ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \
2266 (int) (C), (__v8sf)(__m256) _mm256_setzero_ps (), (__mmask8) (U)))
2267
2268 #define _mm512_extractf64x2_pd(X, C) \
2269 ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
2270 (int) (C), (__v2df)(__m128d) _mm_setzero_pd (), (__mmask8)-1))
2271
2272 #define _mm512_mask_extractf64x2_pd(W, U, X, C) \
2273 ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
2274 (int) (C), (__v2df)(__m128d) (W), (__mmask8) (U)))
2275
2276 #define _mm512_maskz_extractf64x2_pd(U, X, C) \
2277 ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
2278 (int) (C), (__v2df)(__m128d) _mm_setzero_pd (), (__mmask8) (U)))
2279
2280 #define _mm512_extracti32x8_epi32(X, C) \
2281 ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \
2282 (int) (C), (__v8si)(__m256i) _mm256_setzero_si256 (), (__mmask8)-1))
2283
2284 #define _mm512_mask_extracti32x8_epi32(W, U, X, C) \
2285 ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \
2286 (int) (C), (__v8si)(__m256i) (W), (__mmask8) (U)))
2287
2288 #define _mm512_maskz_extracti32x8_epi32(U, X, C) \
2289 ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \
2290 (int) (C), (__v8si)(__m256i) _mm256_setzero_si256 (), (__mmask8) (U)))
2291
2292 #define _mm512_extracti64x2_epi64(X, C) \
2293 ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
2294 (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8)-1))
2295
2296 #define _mm512_mask_extracti64x2_epi64(W, U, X, C) \
2297 ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
2298 (int) (C), (__v2di)(__m128i) (W), (__mmask8) (U)))
2299
2300 #define _mm512_maskz_extracti64x2_epi64(U, X, C) \
2301 ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
2302 (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
2303
2304 #define _mm512_range_pd(A, B, C) \
2305 ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
2306 (__v8df)(__m512d)(B), (int)(C), \
2307 (__v8df)_mm512_setzero_pd (), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
2308
2309 #define _mm512_mask_range_pd(W, U, A, B, C) \
2310 ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
2311 (__v8df)(__m512d)(B), (int)(C), \
2312 (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
2313
2314 #define _mm512_maskz_range_pd(U, A, B, C) \
2315 ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
2316 (__v8df)(__m512d)(B), (int)(C), \
2317 (__v8df)_mm512_setzero_pd (), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
2318
2319 #define _mm512_range_ps(A, B, C) \
2320 ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
2321 (__v16sf)(__m512)(B), (int)(C), \
2322 (__v16sf)_mm512_setzero_ps (), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
2323
2324 #define _mm512_mask_range_ps(W, U, A, B, C) \
2325 ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
2326 (__v16sf)(__m512)(B), (int)(C), \
2327 (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
2328
2329 #define _mm512_maskz_range_ps(U, A, B, C) \
2330 ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
2331 (__v16sf)(__m512)(B), (int)(C), \
2332 (__v16sf)_mm512_setzero_ps (), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
2333
2334 #define _mm512_range_round_pd(A, B, C, R) \
2335 ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
2336 (__v8df)(__m512d)(B), (int)(C), \
2337 (__v8df)_mm512_setzero_pd (), (__mmask8)-1, (R)))
2338
2339 #define _mm512_mask_range_round_pd(W, U, A, B, C, R) \
2340 ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
2341 (__v8df)(__m512d)(B), (int)(C), \
2342 (__v8df)(__m512d)(W), (__mmask8)(U), (R)))
2343
2344 #define _mm512_maskz_range_round_pd(U, A, B, C, R) \
2345 ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
2346 (__v8df)(__m512d)(B), (int)(C), \
2347 (__v8df)_mm512_setzero_pd (), (__mmask8)(U), (R)))
2348
2349 #define _mm512_range_round_ps(A, B, C, R) \
2350 ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
2351 (__v16sf)(__m512)(B), (int)(C), \
2352 (__v16sf)_mm512_setzero_ps (), (__mmask16)-1, (R)))
2353
2354 #define _mm512_mask_range_round_ps(W, U, A, B, C, R) \
2355 ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
2356 (__v16sf)(__m512)(B), (int)(C), \
2357 (__v16sf)(__m512)(W), (__mmask16)(U), (R)))
2358
2359 #define _mm512_maskz_range_round_ps(U, A, B, C, R) \
2360 ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
2361 (__v16sf)(__m512)(B), (int)(C), \
2362 (__v16sf)_mm512_setzero_ps (), (__mmask16)(U), (R)))
2363
2364 #define _mm512_insertf64x2(X, Y, C) \
2365 ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\
2366 (__v2df)(__m128d) (Y), (int) (C), (__v8df)(__m512d) (X), \
2367 (__mmask8)-1))
2368
2369 #define _mm512_mask_insertf64x2(W, U, X, Y, C) \
2370 ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\
2371 (__v2df)(__m128d) (Y), (int) (C), (__v8df)(__m512d) (W), \
2372 (__mmask8) (U)))
2373
2374 #define _mm512_maskz_insertf64x2(U, X, Y, C) \
2375 ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\
2376 (__v2df)(__m128d) (Y), (int) (C), \
2377 (__v8df)(__m512d) _mm512_setzero_pd (), (__mmask8) (U)))
2378
2379 #define _mm512_inserti64x2(X, Y, C) \
2380 ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\
2381 (__v2di)(__m128i) (Y), (int) (C), (__v8di)(__m512i) (X), (__mmask8)-1))
2382
2383 #define _mm512_mask_inserti64x2(W, U, X, Y, C) \
2384 ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\
2385 (__v2di)(__m128i) (Y), (int) (C), (__v8di)(__m512i) (W), \
2386 (__mmask8) (U)))
2387
2388 #define _mm512_maskz_inserti64x2(U, X, Y, C) \
2389 ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\
2390 (__v2di)(__m128i) (Y), (int) (C), \
2391 (__v8di)(__m512i) _mm512_setzero_si512 (), (__mmask8) (U)))
2392
2393 #define _mm512_insertf32x8(X, Y, C) \
2394 ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X), \
2395 (__v8sf)(__m256) (Y), (int) (C),\
2396 (__v16sf)(__m512)_mm512_setzero_ps (),\
2397 (__mmask16)-1))
2398
2399 #define _mm512_mask_insertf32x8(W, U, X, Y, C) \
2400 ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X), \
2401 (__v8sf)(__m256) (Y), (int) (C),\
2402 (__v16sf)(__m512)(W),\
2403 (__mmask16)(U)))
2404
2405 #define _mm512_maskz_insertf32x8(U, X, Y, C) \
2406 ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X), \
2407 (__v8sf)(__m256) (Y), (int) (C),\
2408 (__v16sf)(__m512)_mm512_setzero_ps (),\
2409 (__mmask16)(U)))
2410
2411 #define _mm512_inserti32x8(X, Y, C) \
2412 ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X), \
2413 (__v8si)(__m256i) (Y), (int) (C),\
2414 (__v16si)(__m512i)_mm512_setzero_si512 (),\
2415 (__mmask16)-1))
2416
2417 #define _mm512_mask_inserti32x8(W, U, X, Y, C) \
2418 ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X), \
2419 (__v8si)(__m256i) (Y), (int) (C),\
2420 (__v16si)(__m512i)(W),\
2421 (__mmask16)(U)))
2422
2423 #define _mm512_maskz_inserti32x8(U, X, Y, C) \
2424 ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X), \
2425 (__v8si)(__m256i) (Y), (int) (C),\
2426 (__v16si)(__m512i)_mm512_setzero_si512 (),\
2427 (__mmask16)(U)))
2428
2429 #define _mm_fpclass_ss_mask(X, C) \
2430 ((__mmask8) __builtin_ia32_fpclassss ((__v4sf) (__m128) (X), (int) (C))) \
2431
2432 #define _mm_fpclass_sd_mask(X, C) \
2433 ((__mmask8) __builtin_ia32_fpclasssd ((__v2df) (__m128d) (X), (int) (C))) \
2434
2435 #define _mm512_mask_fpclass_pd_mask(u, X, C) \
2436 ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), \
2437 (int) (C), (__mmask8)(u)))
2438
2439 #define _mm512_mask_fpclass_ps_mask(u, x, c) \
2440 ((__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) (__m512) (x),\
2441 (int) (c),(__mmask8)(u)))
2442
2443 #define _mm512_fpclass_pd_mask(X, C) \
2444 ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), \
2445 (int) (C), (__mmask8)-1))
2446
2447 #define _mm512_fpclass_ps_mask(x, c) \
2448 ((__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) (__m512) (x),\
2449 (int) (c),(__mmask8)-1))
2450
2451 #define _mm_reduce_sd(A, B, C) \
2452 ((__m128d) __builtin_ia32_reducesd ((__v2df)(__m128d)(A), \
2453 (__v2df)(__m128d)(B), (int)(C))) \
2454
2455 #define _mm_reduce_ss(A, B, C) \
2456 ((__m128) __builtin_ia32_reducess ((__v4sf)(__m128)(A), \
2457 (__v4sf)(__m128)(A), (int)(C))) \
2458
2459 #endif
2460
2461 #ifdef __DISABLE_AVX512DQ__
2462 #undef __DISABLE_AVX512DQ__
2463 #pragma GCC pop_options
2464 #endif /* __DISABLE_AVX512DQ__ */
2465
2466 #endif /* _AVX512DQINTRIN_H_INCLUDED */
2467