avx512vlintrin.h revision 1.1.1.8 1 /* Copyright (C) 2014-2022 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
26 #endif
27
28 #ifndef _AVX512VLINTRIN_H_INCLUDED
29 #define _AVX512VLINTRIN_H_INCLUDED
30
31 #ifndef __AVX512VL__
32 #pragma GCC push_options
33 #pragma GCC target("avx512vl")
34 #define __DISABLE_AVX512VL__
35 #endif /* __AVX512VL__ */
36
37 /* Internal data types for implementing the intrinsics. */
38 typedef unsigned int __mmask32;
39 typedef int __v4si_u __attribute__ ((__vector_size__ (16), \
40 __may_alias__, __aligned__ (1)));
41 typedef int __v8si_u __attribute__ ((__vector_size__ (32), \
42 __may_alias__, __aligned__ (1)));
43 typedef long long __v2di_u __attribute__ ((__vector_size__ (16), \
44 __may_alias__, __aligned__ (1)));
45 typedef long long __v4di_u __attribute__ ((__vector_size__ (32), \
46 __may_alias__, __aligned__ (1)));
47
48 extern __inline __m256d
49 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
50 _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
51 {
52 return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
53 (__v4df) __W,
54 (__mmask8) __U);
55 }
56
57 extern __inline __m256d
58 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
59 _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
60 {
61 return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
62 (__v4df)
63 _mm256_setzero_pd (),
64 (__mmask8) __U);
65 }
66
67 extern __inline __m128d
68 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
69 _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
70 {
71 return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
72 (__v2df) __W,
73 (__mmask8) __U);
74 }
75
76 extern __inline __m128d
77 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
78 _mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
79 {
80 return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
81 (__v2df)
82 _mm_setzero_pd (),
83 (__mmask8) __U);
84 }
85
86 extern __inline __m256d
87 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
88 _mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
89 {
90 return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P,
91 (__v4df) __W,
92 (__mmask8) __U);
93 }
94
95 extern __inline __m256d
96 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
97 _mm256_maskz_load_pd (__mmask8 __U, void const *__P)
98 {
99 return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P,
100 (__v4df)
101 _mm256_setzero_pd (),
102 (__mmask8) __U);
103 }
104
105 extern __inline __m128d
106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
107 _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
108 {
109 return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P,
110 (__v2df) __W,
111 (__mmask8) __U);
112 }
113
114 extern __inline __m128d
115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
116 _mm_maskz_load_pd (__mmask8 __U, void const *__P)
117 {
118 return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P,
119 (__v2df)
120 _mm_setzero_pd (),
121 (__mmask8) __U);
122 }
123
124 extern __inline void
125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
126 _mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
127 {
128 __builtin_ia32_storeapd256_mask ((__v4df *) __P,
129 (__v4df) __A,
130 (__mmask8) __U);
131 }
132
133 extern __inline void
134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
135 _mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
136 {
137 __builtin_ia32_storeapd128_mask ((__v2df *) __P,
138 (__v2df) __A,
139 (__mmask8) __U);
140 }
141
142 extern __inline __m256
143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
144 _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
145 {
146 return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
147 (__v8sf) __W,
148 (__mmask8) __U);
149 }
150
151 extern __inline __m256
152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
153 _mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
154 {
155 return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
156 (__v8sf)
157 _mm256_setzero_ps (),
158 (__mmask8) __U);
159 }
160
161 extern __inline __m128
162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
163 _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
164 {
165 return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
166 (__v4sf) __W,
167 (__mmask8) __U);
168 }
169
170 extern __inline __m128
171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
172 _mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
173 {
174 return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
175 (__v4sf)
176 _mm_setzero_ps (),
177 (__mmask8) __U);
178 }
179
180 extern __inline __m256
181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
182 _mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
183 {
184 return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P,
185 (__v8sf) __W,
186 (__mmask8) __U);
187 }
188
189 extern __inline __m256
190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
191 _mm256_maskz_load_ps (__mmask8 __U, void const *__P)
192 {
193 return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P,
194 (__v8sf)
195 _mm256_setzero_ps (),
196 (__mmask8) __U);
197 }
198
199 extern __inline __m128
200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
201 _mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
202 {
203 return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P,
204 (__v4sf) __W,
205 (__mmask8) __U);
206 }
207
208 extern __inline __m128
209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
210 _mm_maskz_load_ps (__mmask8 __U, void const *__P)
211 {
212 return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P,
213 (__v4sf)
214 _mm_setzero_ps (),
215 (__mmask8) __U);
216 }
217
218 extern __inline void
219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
220 _mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
221 {
222 __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
223 (__v8sf) __A,
224 (__mmask8) __U);
225 }
226
227 extern __inline void
228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
229 _mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
230 {
231 __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
232 (__v4sf) __A,
233 (__mmask8) __U);
234 }
235
236 extern __inline __m256i
237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
238 _mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
239 {
240 return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
241 (__v4di) __W,
242 (__mmask8) __U);
243 }
244
245 extern __inline __m256i
246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
247 _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
248 {
249 return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
250 (__v4di)
251 _mm256_setzero_si256 (),
252 (__mmask8) __U);
253 }
254
255 extern __inline __m128i
256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
257 _mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
258 {
259 return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
260 (__v2di) __W,
261 (__mmask8) __U);
262 }
263
264 extern __inline __m128i
265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
266 _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
267 {
268 return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
269 (__v2di)
270 _mm_setzero_si128 (),
271 (__mmask8) __U);
272 }
273
274 extern __inline __m256i
275 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
276 _mm256_load_epi64 (void const *__P)
277 {
278 return (__m256i) (*(const __v4di *) __P);
279 }
280
281 extern __inline __m256i
282 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
283 _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
284 {
285 return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P,
286 (__v4di) __W,
287 (__mmask8)
288 __U);
289 }
290
291 extern __inline __m256i
292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
293 _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
294 {
295 return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P,
296 (__v4di)
297 _mm256_setzero_si256 (),
298 (__mmask8)
299 __U);
300 }
301
302 extern __inline __m128i
303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
304 _mm_load_epi64 (void const *__P)
305 {
306 return (__m128i) (*(const __v2di *) __P);
307 }
308
309 extern __inline __m128i
310 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
311 _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
312 {
313 return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P,
314 (__v2di) __W,
315 (__mmask8)
316 __U);
317 }
318
319 extern __inline __m128i
320 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
321 _mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
322 {
323 return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P,
324 (__v2di)
325 _mm_setzero_si128 (),
326 (__mmask8)
327 __U);
328 }
329
330 extern __inline void
331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
332 _mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
333 {
334 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
335 (__v4di) __A,
336 (__mmask8) __U);
337 }
338
339 extern __inline void
340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
341 _mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
342 {
343 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
344 (__v2di) __A,
345 (__mmask8) __U);
346 }
347
348 extern __inline __m256i
349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
350 _mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
351 {
352 return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
353 (__v8si) __W,
354 (__mmask8) __U);
355 }
356
357 extern __inline __m256i
358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
359 _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
360 {
361 return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
362 (__v8si)
363 _mm256_setzero_si256 (),
364 (__mmask8) __U);
365 }
366
367 extern __inline __m128i
368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
369 _mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
370 {
371 return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
372 (__v4si) __W,
373 (__mmask8) __U);
374 }
375
376 extern __inline __m128i
377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
378 _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
379 {
380 return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
381 (__v4si)
382 _mm_setzero_si128 (),
383 (__mmask8) __U);
384 }
385
386 extern __inline __m256i
387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
388 _mm256_load_epi32 (void const *__P)
389 {
390 return (__m256i) (*(const __v8si *) __P);
391 }
392
393 extern __inline __m256i
394 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
395 _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
396 {
397 return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P,
398 (__v8si) __W,
399 (__mmask8)
400 __U);
401 }
402
403 extern __inline __m256i
404 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
405 _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
406 {
407 return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P,
408 (__v8si)
409 _mm256_setzero_si256 (),
410 (__mmask8)
411 __U);
412 }
413
414 extern __inline __m128i
415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
416 _mm_load_epi32 (void const *__P)
417 {
418 return (__m128i) (*(const __v4si *) __P);
419 }
420
421 extern __inline __m128i
422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
423 _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
424 {
425 return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P,
426 (__v4si) __W,
427 (__mmask8)
428 __U);
429 }
430
431 extern __inline __m128i
432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
433 _mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
434 {
435 return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P,
436 (__v4si)
437 _mm_setzero_si128 (),
438 (__mmask8)
439 __U);
440 }
441
442 extern __inline void
443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
444 _mm256_store_epi32 (void *__P, __m256i __A)
445 {
446 *(__v8si *) __P = (__v8si) __A;
447 }
448
449 extern __inline void
450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
451 _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
452 {
453 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
454 (__v8si) __A,
455 (__mmask8) __U);
456 }
457
458 extern __inline void
459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
460 _mm_store_epi32 (void *__P, __m128i __A)
461 {
462 *(__v4si *) __P = (__v4si) __A;
463 }
464
465 extern __inline void
466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
467 _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
468 {
469 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
470 (__v4si) __A,
471 (__mmask8) __U);
472 }
473
474 extern __inline __m128d
475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
476 _mm_mask_add_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
477 {
478 return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
479 (__v2df) __B,
480 (__v2df) __W,
481 (__mmask8) __U);
482 }
483
484 extern __inline __m128d
485 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
486 _mm_maskz_add_pd (__mmask8 __U, __m128d __A, __m128d __B)
487 {
488 return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
489 (__v2df) __B,
490 (__v2df)
491 _mm_setzero_pd (),
492 (__mmask8) __U);
493 }
494
495 extern __inline __m256d
496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
497 _mm256_mask_add_pd (__m256d __W, __mmask8 __U, __m256d __A,
498 __m256d __B)
499 {
500 return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
501 (__v4df) __B,
502 (__v4df) __W,
503 (__mmask8) __U);
504 }
505
506 extern __inline __m256d
507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
508 _mm256_maskz_add_pd (__mmask8 __U, __m256d __A, __m256d __B)
509 {
510 return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
511 (__v4df) __B,
512 (__v4df)
513 _mm256_setzero_pd (),
514 (__mmask8) __U);
515 }
516
517 extern __inline __m128
518 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
519 _mm_mask_add_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
520 {
521 return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
522 (__v4sf) __B,
523 (__v4sf) __W,
524 (__mmask8) __U);
525 }
526
527 extern __inline __m128
528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
529 _mm_maskz_add_ps (__mmask8 __U, __m128 __A, __m128 __B)
530 {
531 return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
532 (__v4sf) __B,
533 (__v4sf)
534 _mm_setzero_ps (),
535 (__mmask8) __U);
536 }
537
538 extern __inline __m256
539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
540 _mm256_mask_add_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
541 {
542 return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
543 (__v8sf) __B,
544 (__v8sf) __W,
545 (__mmask8) __U);
546 }
547
548 extern __inline __m256
549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
550 _mm256_maskz_add_ps (__mmask8 __U, __m256 __A, __m256 __B)
551 {
552 return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
553 (__v8sf) __B,
554 (__v8sf)
555 _mm256_setzero_ps (),
556 (__mmask8) __U);
557 }
558
559 extern __inline __m128d
560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
561 _mm_mask_sub_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
562 {
563 return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
564 (__v2df) __B,
565 (__v2df) __W,
566 (__mmask8) __U);
567 }
568
569 extern __inline __m128d
570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
571 _mm_maskz_sub_pd (__mmask8 __U, __m128d __A, __m128d __B)
572 {
573 return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
574 (__v2df) __B,
575 (__v2df)
576 _mm_setzero_pd (),
577 (__mmask8) __U);
578 }
579
580 extern __inline __m256d
581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
582 _mm256_mask_sub_pd (__m256d __W, __mmask8 __U, __m256d __A,
583 __m256d __B)
584 {
585 return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
586 (__v4df) __B,
587 (__v4df) __W,
588 (__mmask8) __U);
589 }
590
591 extern __inline __m256d
592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
593 _mm256_maskz_sub_pd (__mmask8 __U, __m256d __A, __m256d __B)
594 {
595 return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
596 (__v4df) __B,
597 (__v4df)
598 _mm256_setzero_pd (),
599 (__mmask8) __U);
600 }
601
602 extern __inline __m128
603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
604 _mm_mask_sub_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
605 {
606 return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
607 (__v4sf) __B,
608 (__v4sf) __W,
609 (__mmask8) __U);
610 }
611
612 extern __inline __m128
613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
614 _mm_maskz_sub_ps (__mmask8 __U, __m128 __A, __m128 __B)
615 {
616 return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
617 (__v4sf) __B,
618 (__v4sf)
619 _mm_setzero_ps (),
620 (__mmask8) __U);
621 }
622
623 extern __inline __m256
624 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
625 _mm256_mask_sub_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
626 {
627 return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
628 (__v8sf) __B,
629 (__v8sf) __W,
630 (__mmask8) __U);
631 }
632
633 extern __inline __m256
634 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
635 _mm256_maskz_sub_ps (__mmask8 __U, __m256 __A, __m256 __B)
636 {
637 return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
638 (__v8sf) __B,
639 (__v8sf)
640 _mm256_setzero_ps (),
641 (__mmask8) __U);
642 }
643
644 extern __inline void
645 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
646 _mm256_store_epi64 (void *__P, __m256i __A)
647 {
648 *(__m256i *) __P = __A;
649 }
650
651 extern __inline void
652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
653 _mm_store_epi64 (void *__P, __m128i __A)
654 {
655 *(__m128i *) __P = __A;
656 }
657
658 extern __inline __m256d
659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
660 _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
661 {
662 return (__m256d) __builtin_ia32_loadupd256_mask ((const double *) __P,
663 (__v4df) __W,
664 (__mmask8) __U);
665 }
666
667 extern __inline __m256d
668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
669 _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
670 {
671 return (__m256d) __builtin_ia32_loadupd256_mask ((const double *) __P,
672 (__v4df)
673 _mm256_setzero_pd (),
674 (__mmask8) __U);
675 }
676
677 extern __inline __m128d
678 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
679 _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
680 {
681 return (__m128d) __builtin_ia32_loadupd128_mask ((const double *) __P,
682 (__v2df) __W,
683 (__mmask8) __U);
684 }
685
686 extern __inline __m128d
687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
688 _mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
689 {
690 return (__m128d) __builtin_ia32_loadupd128_mask ((const double *) __P,
691 (__v2df)
692 _mm_setzero_pd (),
693 (__mmask8) __U);
694 }
695
696 extern __inline void
697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
698 _mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
699 {
700 __builtin_ia32_storeupd256_mask ((double *) __P,
701 (__v4df) __A,
702 (__mmask8) __U);
703 }
704
705 extern __inline void
706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
707 _mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
708 {
709 __builtin_ia32_storeupd128_mask ((double *) __P,
710 (__v2df) __A,
711 (__mmask8) __U);
712 }
713
714 extern __inline __m256
715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
716 _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
717 {
718 return (__m256) __builtin_ia32_loadups256_mask ((const float *) __P,
719 (__v8sf) __W,
720 (__mmask8) __U);
721 }
722
723 extern __inline __m256
724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
725 _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
726 {
727 return (__m256) __builtin_ia32_loadups256_mask ((const float *) __P,
728 (__v8sf)
729 _mm256_setzero_ps (),
730 (__mmask8) __U);
731 }
732
733 extern __inline __m128
734 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
735 _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
736 {
737 return (__m128) __builtin_ia32_loadups128_mask ((const float *) __P,
738 (__v4sf) __W,
739 (__mmask8) __U);
740 }
741
742 extern __inline __m128
743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
744 _mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
745 {
746 return (__m128) __builtin_ia32_loadups128_mask ((const float *) __P,
747 (__v4sf)
748 _mm_setzero_ps (),
749 (__mmask8) __U);
750 }
751
752 extern __inline void
753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
754 _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
755 {
756 __builtin_ia32_storeups256_mask ((float *) __P,
757 (__v8sf) __A,
758 (__mmask8) __U);
759 }
760
761 extern __inline void
762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
763 _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
764 {
765 __builtin_ia32_storeups128_mask ((float *) __P,
766 (__v4sf) __A,
767 (__mmask8) __U);
768 }
769
770 extern __inline __m256i
771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
772 _mm256_loadu_epi64 (void const *__P)
773 {
774 return (__m256i) (*(const __v4di_u *) __P);
775 }
776
777 extern __inline __m256i
778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
779 _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
780 {
781 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const long long *) __P,
782 (__v4di) __W,
783 (__mmask8) __U);
784 }
785
786 extern __inline __m256i
787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
788 _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
789 {
790 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const long long *) __P,
791 (__v4di)
792 _mm256_setzero_si256 (),
793 (__mmask8) __U);
794 }
795
796 extern __inline __m128i
797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
798 _mm_loadu_epi64 (void const *__P)
799 {
800 return (__m128i) (*(const __v2di_u *) __P);
801 }
802
803 extern __inline __m128i
804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
805 _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
806 {
807 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P,
808 (__v2di) __W,
809 (__mmask8) __U);
810 }
811
812 extern __inline __m128i
813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
814 _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
815 {
816 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P,
817 (__v2di)
818 _mm_setzero_si128 (),
819 (__mmask8) __U);
820 }
821
822 extern __inline void
823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
824 _mm256_storeu_epi64 (void *__P, __m256i __A)
825 {
826 *(__m256i_u *) __P = (__m256i_u) __A;
827 }
828
829 extern __inline void
830 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
831 _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
832 {
833 __builtin_ia32_storedqudi256_mask ((long long *) __P,
834 (__v4di) __A,
835 (__mmask8) __U);
836 }
837
838 extern __inline void
839 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
840 _mm_storeu_epi64 (void *__P, __m128i __A)
841 {
842 *(__m128i_u *) __P = (__m128i_u) __A;
843 }
844
845 extern __inline void
846 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
847 _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
848 {
849 __builtin_ia32_storedqudi128_mask ((long long *) __P,
850 (__v2di) __A,
851 (__mmask8) __U);
852 }
853
854 extern __inline __m256i
855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
856 _mm256_loadu_epi32 (void const *__P)
857 {
858 return (__m256i) (*(const __v8si_u *) __P);
859 }
860
861 extern __inline __m256i
862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
863 _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
864 {
865 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const int *) __P,
866 (__v8si) __W,
867 (__mmask8) __U);
868 }
869
870 extern __inline __m256i
871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
872 _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
873 {
874 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const int *) __P,
875 (__v8si)
876 _mm256_setzero_si256 (),
877 (__mmask8) __U);
878 }
879
880 extern __inline __m128i
881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
882 _mm_loadu_epi32 (void const *__P)
883 {
884 return (__m128i) (*(const __v4si_u *) __P);
885 }
886
887 extern __inline __m128i
888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
889 _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
890 {
891 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const int *) __P,
892 (__v4si) __W,
893 (__mmask8) __U);
894 }
895
896 extern __inline __m128i
897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
898 _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
899 {
900 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const int *) __P,
901 (__v4si)
902 _mm_setzero_si128 (),
903 (__mmask8) __U);
904 }
905
906 extern __inline void
907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
908 _mm256_storeu_epi32 (void *__P, __m256i __A)
909 {
910 *(__m256i_u *) __P = (__m256i_u) __A;
911 }
912
913 extern __inline void
914 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
915 _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
916 {
917 __builtin_ia32_storedqusi256_mask ((int *) __P,
918 (__v8si) __A,
919 (__mmask8) __U);
920 }
921
922 extern __inline void
923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
924 _mm_storeu_epi32 (void *__P, __m128i __A)
925 {
926 *(__m128i_u *) __P = (__m128i_u) __A;
927 }
928
929 extern __inline void
930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
931 _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
932 {
933 __builtin_ia32_storedqusi128_mask ((int *) __P,
934 (__v4si) __A,
935 (__mmask8) __U);
936 }
937
938 extern __inline __m256i
939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
940 _mm256_mask_abs_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
941 {
942 return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
943 (__v8si) __W,
944 (__mmask8) __U);
945 }
946
947 extern __inline __m256i
948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
949 _mm256_maskz_abs_epi32 (__mmask8 __U, __m256i __A)
950 {
951 return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
952 (__v8si)
953 _mm256_setzero_si256 (),
954 (__mmask8) __U);
955 }
956
957 extern __inline __m128i
958 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
959 _mm_mask_abs_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
960 {
961 return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
962 (__v4si) __W,
963 (__mmask8) __U);
964 }
965
966 extern __inline __m128i
967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
968 _mm_maskz_abs_epi32 (__mmask8 __U, __m128i __A)
969 {
970 return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
971 (__v4si)
972 _mm_setzero_si128 (),
973 (__mmask8) __U);
974 }
975
976 extern __inline __m256i
977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
978 _mm256_abs_epi64 (__m256i __A)
979 {
980 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
981 (__v4di)
982 _mm256_setzero_si256 (),
983 (__mmask8) -1);
984 }
985
986 extern __inline __m256i
987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
988 _mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
989 {
990 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
991 (__v4di) __W,
992 (__mmask8) __U);
993 }
994
995 extern __inline __m256i
996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
997 _mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A)
998 {
999 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
1000 (__v4di)
1001 _mm256_setzero_si256 (),
1002 (__mmask8) __U);
1003 }
1004
1005 extern __inline __m128i
1006 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1007 _mm_abs_epi64 (__m128i __A)
1008 {
1009 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
1010 (__v2di)
1011 _mm_setzero_si128 (),
1012 (__mmask8) -1);
1013 }
1014
1015 extern __inline __m128i
1016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1017 _mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
1018 {
1019 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
1020 (__v2di) __W,
1021 (__mmask8) __U);
1022 }
1023
1024 extern __inline __m128i
1025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1026 _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A)
1027 {
1028 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
1029 (__v2di)
1030 _mm_setzero_si128 (),
1031 (__mmask8) __U);
1032 }
1033
1034 extern __inline __m128i
1035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1036 _mm256_cvtpd_epu32 (__m256d __A)
1037 {
1038 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
1039 (__v4si)
1040 _mm_setzero_si128 (),
1041 (__mmask8) -1);
1042 }
1043
1044 extern __inline __m128i
1045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1046 _mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
1047 {
1048 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
1049 (__v4si) __W,
1050 (__mmask8) __U);
1051 }
1052
1053 extern __inline __m128i
1054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1055 _mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A)
1056 {
1057 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
1058 (__v4si)
1059 _mm_setzero_si128 (),
1060 (__mmask8) __U);
1061 }
1062
1063 extern __inline __m128i
1064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1065 _mm_cvtpd_epu32 (__m128d __A)
1066 {
1067 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
1068 (__v4si)
1069 _mm_setzero_si128 (),
1070 (__mmask8) -1);
1071 }
1072
1073 extern __inline __m128i
1074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1075 _mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
1076 {
1077 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
1078 (__v4si) __W,
1079 (__mmask8) __U);
1080 }
1081
1082 extern __inline __m128i
1083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1084 _mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A)
1085 {
1086 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
1087 (__v4si)
1088 _mm_setzero_si128 (),
1089 (__mmask8) __U);
1090 }
1091
1092 extern __inline __m256i
1093 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1094 _mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
1095 {
1096 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
1097 (__v8si) __W,
1098 (__mmask8) __U);
1099 }
1100
1101 extern __inline __m256i
1102 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1103 _mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A)
1104 {
1105 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
1106 (__v8si)
1107 _mm256_setzero_si256 (),
1108 (__mmask8) __U);
1109 }
1110
1111 extern __inline __m128i
1112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1113 _mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
1114 {
1115 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
1116 (__v4si) __W,
1117 (__mmask8) __U);
1118 }
1119
1120 extern __inline __m128i
1121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1122 _mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A)
1123 {
1124 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
1125 (__v4si)
1126 _mm_setzero_si128 (),
1127 (__mmask8) __U);
1128 }
1129
1130 extern __inline __m256i
1131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1132 _mm256_cvttps_epu32 (__m256 __A)
1133 {
1134 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1135 (__v8si)
1136 _mm256_setzero_si256 (),
1137 (__mmask8) -1);
1138 }
1139
1140 extern __inline __m256i
1141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1142 _mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
1143 {
1144 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1145 (__v8si) __W,
1146 (__mmask8) __U);
1147 }
1148
1149 extern __inline __m256i
1150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1151 _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A)
1152 {
1153 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1154 (__v8si)
1155 _mm256_setzero_si256 (),
1156 (__mmask8) __U);
1157 }
1158
1159 extern __inline __m128i
1160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1161 _mm_cvttps_epu32 (__m128 __A)
1162 {
1163 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1164 (__v4si)
1165 _mm_setzero_si128 (),
1166 (__mmask8) -1);
1167 }
1168
1169 extern __inline __m128i
1170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1171 _mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
1172 {
1173 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1174 (__v4si) __W,
1175 (__mmask8) __U);
1176 }
1177
1178 extern __inline __m128i
1179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1180 _mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A)
1181 {
1182 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1183 (__v4si)
1184 _mm_setzero_si128 (),
1185 (__mmask8) __U);
1186 }
1187
1188 extern __inline __m128i
1189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1190 _mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
1191 {
1192 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
1193 (__v4si) __W,
1194 (__mmask8) __U);
1195 }
1196
1197 extern __inline __m128i
1198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1199 _mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A)
1200 {
1201 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
1202 (__v4si)
1203 _mm_setzero_si128 (),
1204 (__mmask8) __U);
1205 }
1206
1207 extern __inline __m128i
1208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1209 _mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
1210 {
1211 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
1212 (__v4si) __W,
1213 (__mmask8) __U);
1214 }
1215
1216 extern __inline __m128i
1217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1218 _mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A)
1219 {
1220 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
1221 (__v4si)
1222 _mm_setzero_si128 (),
1223 (__mmask8) __U);
1224 }
1225
1226 extern __inline __m128i
1227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1228 _mm256_cvttpd_epu32 (__m256d __A)
1229 {
1230 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1231 (__v4si)
1232 _mm_setzero_si128 (),
1233 (__mmask8) -1);
1234 }
1235
1236 extern __inline __m128i
1237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1238 _mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
1239 {
1240 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1241 (__v4si) __W,
1242 (__mmask8) __U);
1243 }
1244
1245 extern __inline __m128i
1246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1247 _mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A)
1248 {
1249 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1250 (__v4si)
1251 _mm_setzero_si128 (),
1252 (__mmask8) __U);
1253 }
1254
1255 extern __inline __m128i
1256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1257 _mm_cvttpd_epu32 (__m128d __A)
1258 {
1259 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1260 (__v4si)
1261 _mm_setzero_si128 (),
1262 (__mmask8) -1);
1263 }
1264
1265 extern __inline __m128i
1266 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1267 _mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
1268 {
1269 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1270 (__v4si) __W,
1271 (__mmask8) __U);
1272 }
1273
1274 extern __inline __m128i
1275 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1276 _mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A)
1277 {
1278 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1279 (__v4si)
1280 _mm_setzero_si128 (),
1281 (__mmask8) __U);
1282 }
1283
1284 extern __inline __m128i
1285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1286 _mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
1287 {
1288 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
1289 (__v4si) __W,
1290 (__mmask8) __U);
1291 }
1292
1293 extern __inline __m128i
1294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1295 _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A)
1296 {
1297 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
1298 (__v4si)
1299 _mm_setzero_si128 (),
1300 (__mmask8) __U);
1301 }
1302
1303 extern __inline __m128i
1304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1305 _mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
1306 {
1307 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1308 (__v4si) __W,
1309 (__mmask8) __U);
1310 }
1311
1312 extern __inline __m128i
1313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1314 _mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A)
1315 {
1316 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1317 (__v4si)
1318 _mm_setzero_si128 (),
1319 (__mmask8) __U);
1320 }
1321
1322 extern __inline __m256d
1323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1324 _mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A)
1325 {
1326 return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
1327 (__v4df) __W,
1328 (__mmask8) __U);
1329 }
1330
1331 extern __inline __m256d
1332 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1333 _mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
1334 {
1335 return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
1336 (__v4df)
1337 _mm256_setzero_pd (),
1338 (__mmask8) __U);
1339 }
1340
1341 extern __inline __m128d
1342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1343 _mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A)
1344 {
1345 return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
1346 (__v2df) __W,
1347 (__mmask8) __U);
1348 }
1349
1350 extern __inline __m128d
1351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1352 _mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
1353 {
1354 return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
1355 (__v2df)
1356 _mm_setzero_pd (),
1357 (__mmask8) __U);
1358 }
1359
1360 extern __inline __m256d
1361 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1362 _mm256_cvtepu32_pd (__m128i __A)
1363 {
1364 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1365 (__v4df)
1366 _mm256_setzero_pd (),
1367 (__mmask8) -1);
1368 }
1369
1370 extern __inline __m256d
1371 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1372 _mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A)
1373 {
1374 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1375 (__v4df) __W,
1376 (__mmask8) __U);
1377 }
1378
1379 extern __inline __m256d
1380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1381 _mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
1382 {
1383 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1384 (__v4df)
1385 _mm256_setzero_pd (),
1386 (__mmask8) __U);
1387 }
1388
1389 extern __inline __m128d
1390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1391 _mm_cvtepu32_pd (__m128i __A)
1392 {
1393 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1394 (__v2df)
1395 _mm_setzero_pd (),
1396 (__mmask8) -1);
1397 }
1398
1399 extern __inline __m128d
1400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1401 _mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A)
1402 {
1403 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1404 (__v2df) __W,
1405 (__mmask8) __U);
1406 }
1407
1408 extern __inline __m128d
1409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1410 _mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
1411 {
1412 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1413 (__v2df)
1414 _mm_setzero_pd (),
1415 (__mmask8) __U);
1416 }
1417
1418 extern __inline __m256
1419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1420 _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A)
1421 {
1422 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
1423 (__v8sf) __W,
1424 (__mmask8) __U);
1425 }
1426
1427 extern __inline __m256
1428 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1429 _mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A)
1430 {
1431 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
1432 (__v8sf)
1433 _mm256_setzero_ps (),
1434 (__mmask8) __U);
1435 }
1436
1437 extern __inline __m128
1438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1439 _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A)
1440 {
1441 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
1442 (__v4sf) __W,
1443 (__mmask8) __U);
1444 }
1445
1446 extern __inline __m128
1447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1448 _mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A)
1449 {
1450 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
1451 (__v4sf)
1452 _mm_setzero_ps (),
1453 (__mmask8) __U);
1454 }
1455
1456 extern __inline __m256
1457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1458 _mm256_cvtepu32_ps (__m256i __A)
1459 {
1460 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1461 (__v8sf)
1462 _mm256_setzero_ps (),
1463 (__mmask8) -1);
1464 }
1465
1466 extern __inline __m256
1467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1468 _mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A)
1469 {
1470 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1471 (__v8sf) __W,
1472 (__mmask8) __U);
1473 }
1474
1475 extern __inline __m256
1476 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1477 _mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A)
1478 {
1479 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1480 (__v8sf)
1481 _mm256_setzero_ps (),
1482 (__mmask8) __U);
1483 }
1484
1485 extern __inline __m128
1486 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1487 _mm_cvtepu32_ps (__m128i __A)
1488 {
1489 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1490 (__v4sf)
1491 _mm_setzero_ps (),
1492 (__mmask8) -1);
1493 }
1494
1495 extern __inline __m128
1496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1497 _mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A)
1498 {
1499 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1500 (__v4sf) __W,
1501 (__mmask8) __U);
1502 }
1503
1504 extern __inline __m128
1505 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1506 _mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A)
1507 {
1508 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1509 (__v4sf)
1510 _mm_setzero_ps (),
1511 (__mmask8) __U);
1512 }
1513
1514 extern __inline __m256d
1515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1516 _mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A)
1517 {
1518 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
1519 (__v4df) __W,
1520 (__mmask8) __U);
1521 }
1522
1523 extern __inline __m256d
1524 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1525 _mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
1526 {
1527 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
1528 (__v4df)
1529 _mm256_setzero_pd (),
1530 (__mmask8) __U);
1531 }
1532
1533 extern __inline __m128d
1534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1535 _mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A)
1536 {
1537 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
1538 (__v2df) __W,
1539 (__mmask8) __U);
1540 }
1541
1542 extern __inline __m128d
1543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1544 _mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
1545 {
1546 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
1547 (__v2df)
1548 _mm_setzero_pd (),
1549 (__mmask8) __U);
1550 }
1551
1552 extern __inline __m128i
1553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1554 _mm_cvtepi32_epi8 (__m128i __A)
1555 {
1556 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1557 (__v16qi)
1558 _mm_undefined_si128 (),
1559 (__mmask8) -1);
1560 }
1561
1562 extern __inline void
1563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1564 _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1565 {
1566 __builtin_ia32_pmovdb128mem_mask ((unsigned int *) __P, (__v4si) __A, __M);
1567 }
1568
1569 extern __inline __m128i
1570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1571 _mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1572 {
1573 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1574 (__v16qi) __O, __M);
1575 }
1576
1577 extern __inline __m128i
1578 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1579 _mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
1580 {
1581 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1582 (__v16qi)
1583 _mm_setzero_si128 (),
1584 __M);
1585 }
1586
1587 extern __inline __m128i
1588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1589 _mm256_cvtepi32_epi8 (__m256i __A)
1590 {
1591 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1592 (__v16qi)
1593 _mm_undefined_si128 (),
1594 (__mmask8) -1);
1595 }
1596
1597 extern __inline __m128i
1598 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1599 _mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1600 {
1601 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1602 (__v16qi) __O, __M);
1603 }
1604
1605 extern __inline void
1606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1607 _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1608 {
1609 __builtin_ia32_pmovdb256mem_mask ((unsigned long long *) __P, (__v8si) __A, __M);
1610 }
1611
1612 extern __inline __m128i
1613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1614 _mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
1615 {
1616 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1617 (__v16qi)
1618 _mm_setzero_si128 (),
1619 __M);
1620 }
1621
1622 extern __inline __m128i
1623 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1624 _mm_cvtsepi32_epi8 (__m128i __A)
1625 {
1626 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1627 (__v16qi)
1628 _mm_undefined_si128 (),
1629 (__mmask8) -1);
1630 }
1631
1632 extern __inline void
1633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1634 _mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1635 {
1636 __builtin_ia32_pmovsdb128mem_mask ((unsigned int *) __P, (__v4si) __A, __M);
1637 }
1638
1639 extern __inline __m128i
1640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1641 _mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1642 {
1643 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1644 (__v16qi) __O, __M);
1645 }
1646
1647 extern __inline __m128i
1648 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1649 _mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
1650 {
1651 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1652 (__v16qi)
1653 _mm_setzero_si128 (),
1654 __M);
1655 }
1656
1657 extern __inline __m128i
1658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1659 _mm256_cvtsepi32_epi8 (__m256i __A)
1660 {
1661 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1662 (__v16qi)
1663 _mm_undefined_si128 (),
1664 (__mmask8) -1);
1665 }
1666
1667 extern __inline void
1668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1669 _mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1670 {
1671 __builtin_ia32_pmovsdb256mem_mask ((unsigned long long *) __P, (__v8si) __A, __M);
1672 }
1673
1674 extern __inline __m128i
1675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1676 _mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1677 {
1678 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1679 (__v16qi) __O, __M);
1680 }
1681
1682 extern __inline __m128i
1683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1684 _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
1685 {
1686 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1687 (__v16qi)
1688 _mm_setzero_si128 (),
1689 __M);
1690 }
1691
1692 extern __inline __m128i
1693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1694 _mm_cvtusepi32_epi8 (__m128i __A)
1695 {
1696 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1697 (__v16qi)
1698 _mm_undefined_si128 (),
1699 (__mmask8) -1);
1700 }
1701
1702 extern __inline void
1703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1704 _mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1705 {
1706 __builtin_ia32_pmovusdb128mem_mask ((unsigned int *) __P, (__v4si) __A, __M);
1707 }
1708
1709 extern __inline __m128i
1710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1711 _mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1712 {
1713 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1714 (__v16qi) __O,
1715 __M);
1716 }
1717
1718 extern __inline __m128i
1719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1720 _mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
1721 {
1722 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1723 (__v16qi)
1724 _mm_setzero_si128 (),
1725 __M);
1726 }
1727
1728 extern __inline __m128i
1729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1730 _mm256_cvtusepi32_epi8 (__m256i __A)
1731 {
1732 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1733 (__v16qi)
1734 _mm_undefined_si128 (),
1735 (__mmask8) -1);
1736 }
1737
1738 extern __inline void
1739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1740 _mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1741 {
1742 __builtin_ia32_pmovusdb256mem_mask ((unsigned long long *) __P, (__v8si) __A, __M);
1743 }
1744
1745 extern __inline __m128i
1746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1747 _mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1748 {
1749 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1750 (__v16qi) __O,
1751 __M);
1752 }
1753
1754 extern __inline __m128i
1755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1756 _mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
1757 {
1758 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1759 (__v16qi)
1760 _mm_setzero_si128 (),
1761 __M);
1762 }
1763
1764 extern __inline __m128i
1765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1766 _mm_cvtepi32_epi16 (__m128i __A)
1767 {
1768 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1769 (__v8hi)
1770 _mm_setzero_si128 (),
1771 (__mmask8) -1);
1772 }
1773
1774 extern __inline void
1775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1776 _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1777 {
1778 __builtin_ia32_pmovdw128mem_mask ((unsigned long long *) __P, (__v4si) __A, __M);
1779 }
1780
1781 extern __inline __m128i
1782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1783 _mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1784 {
1785 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1786 (__v8hi) __O, __M);
1787 }
1788
1789 extern __inline __m128i
1790 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1791 _mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
1792 {
1793 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1794 (__v8hi)
1795 _mm_setzero_si128 (),
1796 __M);
1797 }
1798
1799 extern __inline __m128i
1800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1801 _mm256_cvtepi32_epi16 (__m256i __A)
1802 {
1803 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1804 (__v8hi)
1805 _mm_setzero_si128 (),
1806 (__mmask8) -1);
1807 }
1808
1809 extern __inline void
1810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1811 _mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1812 {
1813 __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1814 }
1815
1816 extern __inline __m128i
1817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1818 _mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1819 {
1820 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1821 (__v8hi) __O, __M);
1822 }
1823
1824 extern __inline __m128i
1825 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1826 _mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
1827 {
1828 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1829 (__v8hi)
1830 _mm_setzero_si128 (),
1831 __M);
1832 }
1833
1834 extern __inline __m128i
1835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1836 _mm_cvtsepi32_epi16 (__m128i __A)
1837 {
1838 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1839 (__v8hi)
1840 _mm_setzero_si128 (),
1841 (__mmask8) -1);
1842 }
1843
1844 extern __inline void
1845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1846 _mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1847 {
1848 __builtin_ia32_pmovsdw128mem_mask ((unsigned long long *) __P, (__v4si) __A, __M);
1849 }
1850
1851 extern __inline __m128i
1852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1853 _mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1854 {
1855 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1856 (__v8hi)__O,
1857 __M);
1858 }
1859
1860 extern __inline __m128i
1861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1862 _mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
1863 {
1864 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1865 (__v8hi)
1866 _mm_setzero_si128 (),
1867 __M);
1868 }
1869
1870 extern __inline __m128i
1871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1872 _mm256_cvtsepi32_epi16 (__m256i __A)
1873 {
1874 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1875 (__v8hi)
1876 _mm_undefined_si128 (),
1877 (__mmask8) -1);
1878 }
1879
1880 extern __inline void
1881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1882 _mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1883 {
1884 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1885 }
1886
1887 extern __inline __m128i
1888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1889 _mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1890 {
1891 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1892 (__v8hi) __O, __M);
1893 }
1894
1895 extern __inline __m128i
1896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1897 _mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
1898 {
1899 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1900 (__v8hi)
1901 _mm_setzero_si128 (),
1902 __M);
1903 }
1904
1905 extern __inline __m128i
1906 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1907 _mm_cvtusepi32_epi16 (__m128i __A)
1908 {
1909 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1910 (__v8hi)
1911 _mm_undefined_si128 (),
1912 (__mmask8) -1);
1913 }
1914
1915 extern __inline void
1916 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1917 _mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1918 {
1919 __builtin_ia32_pmovusdw128mem_mask ((unsigned long long *) __P, (__v4si) __A, __M);
1920 }
1921
1922 extern __inline __m128i
1923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1924 _mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1925 {
1926 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1927 (__v8hi) __O, __M);
1928 }
1929
1930 extern __inline __m128i
1931 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1932 _mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
1933 {
1934 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1935 (__v8hi)
1936 _mm_setzero_si128 (),
1937 __M);
1938 }
1939
1940 extern __inline __m128i
1941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1942 _mm256_cvtusepi32_epi16 (__m256i __A)
1943 {
1944 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1945 (__v8hi)
1946 _mm_undefined_si128 (),
1947 (__mmask8) -1);
1948 }
1949
1950 extern __inline void
1951 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1952 _mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1953 {
1954 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1955 }
1956
1957 extern __inline __m128i
1958 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1959 _mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1960 {
1961 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1962 (__v8hi) __O, __M);
1963 }
1964
1965 extern __inline __m128i
1966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1967 _mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
1968 {
1969 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1970 (__v8hi)
1971 _mm_setzero_si128 (),
1972 __M);
1973 }
1974
1975 extern __inline __m128i
1976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1977 _mm_cvtepi64_epi8 (__m128i __A)
1978 {
1979 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1980 (__v16qi)
1981 _mm_undefined_si128 (),
1982 (__mmask8) -1);
1983 }
1984
1985 extern __inline void
1986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1987 _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1988 {
1989 __builtin_ia32_pmovqb128mem_mask ((unsigned short *) __P, (__v2di) __A, __M);
1990 }
1991
1992 extern __inline __m128i
1993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1994 _mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1995 {
1996 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1997 (__v16qi) __O, __M);
1998 }
1999
2000 extern __inline __m128i
2001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2002 _mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
2003 {
2004 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
2005 (__v16qi)
2006 _mm_setzero_si128 (),
2007 __M);
2008 }
2009
2010 extern __inline __m128i
2011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2012 _mm256_cvtepi64_epi8 (__m256i __A)
2013 {
2014 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
2015 (__v16qi)
2016 _mm_undefined_si128 (),
2017 (__mmask8) -1);
2018 }
2019
2020 extern __inline void
2021 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2022 _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
2023 {
2024 __builtin_ia32_pmovqb256mem_mask ((unsigned int *) __P, (__v4di) __A, __M);
2025 }
2026
2027 extern __inline __m128i
2028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2029 _mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
2030 {
2031 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
2032 (__v16qi) __O, __M);
2033 }
2034
2035 extern __inline __m128i
2036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2037 _mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
2038 {
2039 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
2040 (__v16qi)
2041 _mm_setzero_si128 (),
2042 __M);
2043 }
2044
2045 extern __inline __m128i
2046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2047 _mm_cvtsepi64_epi8 (__m128i __A)
2048 {
2049 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
2050 (__v16qi)
2051 _mm_undefined_si128 (),
2052 (__mmask8) -1);
2053 }
2054
2055 extern __inline void
2056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2057 _mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
2058 {
2059 __builtin_ia32_pmovsqb128mem_mask ((unsigned short *) __P, (__v2di) __A, __M);
2060 }
2061
2062 extern __inline __m128i
2063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2064 _mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
2065 {
2066 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
2067 (__v16qi) __O, __M);
2068 }
2069
2070 extern __inline __m128i
2071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2072 _mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
2073 {
2074 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
2075 (__v16qi)
2076 _mm_setzero_si128 (),
2077 __M);
2078 }
2079
2080 extern __inline __m128i
2081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2082 _mm256_cvtsepi64_epi8 (__m256i __A)
2083 {
2084 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
2085 (__v16qi)
2086 _mm_undefined_si128 (),
2087 (__mmask8) -1);
2088 }
2089
2090 extern __inline void
2091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2092 _mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
2093 {
2094 __builtin_ia32_pmovsqb256mem_mask ((unsigned int *) __P, (__v4di) __A, __M);
2095 }
2096
2097 extern __inline __m128i
2098 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2099 _mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
2100 {
2101 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
2102 (__v16qi) __O, __M);
2103 }
2104
2105 extern __inline __m128i
2106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2107 _mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
2108 {
2109 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
2110 (__v16qi)
2111 _mm_setzero_si128 (),
2112 __M);
2113 }
2114
2115 extern __inline __m128i
2116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2117 _mm_cvtusepi64_epi8 (__m128i __A)
2118 {
2119 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2120 (__v16qi)
2121 _mm_undefined_si128 (),
2122 (__mmask8) -1);
2123 }
2124
2125 extern __inline void
2126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2127 _mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
2128 {
2129 __builtin_ia32_pmovusqb128mem_mask ((unsigned short *) __P, (__v2di) __A, __M);
2130 }
2131
2132 extern __inline __m128i
2133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2134 _mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
2135 {
2136 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2137 (__v16qi) __O,
2138 __M);
2139 }
2140
2141 extern __inline __m128i
2142 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2143 _mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
2144 {
2145 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2146 (__v16qi)
2147 _mm_setzero_si128 (),
2148 __M);
2149 }
2150
2151 extern __inline __m128i
2152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2153 _mm256_cvtusepi64_epi8 (__m256i __A)
2154 {
2155 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2156 (__v16qi)
2157 _mm_undefined_si128 (),
2158 (__mmask8) -1);
2159 }
2160
2161 extern __inline void
2162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2163 _mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
2164 {
2165 __builtin_ia32_pmovusqb256mem_mask ((unsigned int *) __P, (__v4di) __A, __M);
2166 }
2167
2168 extern __inline __m128i
2169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2170 _mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
2171 {
2172 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2173 (__v16qi) __O,
2174 __M);
2175 }
2176
2177 extern __inline __m128i
2178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2179 _mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
2180 {
2181 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2182 (__v16qi)
2183 _mm_setzero_si128 (),
2184 __M);
2185 }
2186
2187 extern __inline __m128i
2188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2189 _mm_cvtepi64_epi16 (__m128i __A)
2190 {
2191 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2192 (__v8hi)
2193 _mm_undefined_si128 (),
2194 (__mmask8) -1);
2195 }
2196
2197 extern __inline void
2198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2199 _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2200 {
2201 __builtin_ia32_pmovqw128mem_mask ((unsigned int *) __P, (__v2di) __A, __M);
2202 }
2203
2204 extern __inline __m128i
2205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2206 _mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2207 {
2208 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2209 (__v8hi)__O,
2210 __M);
2211 }
2212
2213 extern __inline __m128i
2214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2215 _mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
2216 {
2217 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2218 (__v8hi)
2219 _mm_setzero_si128 (),
2220 __M);
2221 }
2222
2223 extern __inline __m128i
2224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2225 _mm256_cvtepi64_epi16 (__m256i __A)
2226 {
2227 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2228 (__v8hi)
2229 _mm_undefined_si128 (),
2230 (__mmask8) -1);
2231 }
2232
2233 extern __inline void
2234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2235 _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2236 {
2237 __builtin_ia32_pmovqw256mem_mask ((unsigned long long *) __P, (__v4di) __A, __M);
2238 }
2239
2240 extern __inline __m128i
2241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2242 _mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2243 {
2244 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2245 (__v8hi) __O, __M);
2246 }
2247
2248 extern __inline __m128i
2249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2250 _mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
2251 {
2252 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2253 (__v8hi)
2254 _mm_setzero_si128 (),
2255 __M);
2256 }
2257
2258 extern __inline __m128i
2259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2260 _mm_cvtsepi64_epi16 (__m128i __A)
2261 {
2262 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2263 (__v8hi)
2264 _mm_undefined_si128 (),
2265 (__mmask8) -1);
2266 }
2267
2268 extern __inline void
2269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2270 _mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2271 {
2272 __builtin_ia32_pmovsqw128mem_mask ((unsigned int *) __P, (__v2di) __A, __M);
2273 }
2274
2275 extern __inline __m128i
2276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2277 _mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2278 {
2279 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2280 (__v8hi) __O, __M);
2281 }
2282
2283 extern __inline __m128i
2284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2285 _mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
2286 {
2287 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2288 (__v8hi)
2289 _mm_setzero_si128 (),
2290 __M);
2291 }
2292
2293 extern __inline __m128i
2294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2295 _mm256_cvtsepi64_epi16 (__m256i __A)
2296 {
2297 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2298 (__v8hi)
2299 _mm_undefined_si128 (),
2300 (__mmask8) -1);
2301 }
2302
2303 extern __inline void
2304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2305 _mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2306 {
2307 __builtin_ia32_pmovsqw256mem_mask ((unsigned long long *) __P, (__v4di) __A, __M);
2308 }
2309
2310 extern __inline __m128i
2311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2312 _mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2313 {
2314 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2315 (__v8hi) __O, __M);
2316 }
2317
2318 extern __inline __m128i
2319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2320 _mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
2321 {
2322 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2323 (__v8hi)
2324 _mm_setzero_si128 (),
2325 __M);
2326 }
2327
2328 extern __inline __m128i
2329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2330 _mm_cvtusepi64_epi16 (__m128i __A)
2331 {
2332 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2333 (__v8hi)
2334 _mm_undefined_si128 (),
2335 (__mmask8) -1);
2336 }
2337
2338 extern __inline void
2339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2340 _mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2341 {
2342 __builtin_ia32_pmovusqw128mem_mask ((unsigned int *) __P, (__v2di) __A, __M);
2343 }
2344
2345 extern __inline __m128i
2346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2347 _mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2348 {
2349 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2350 (__v8hi) __O, __M);
2351 }
2352
2353 extern __inline __m128i
2354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2355 _mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
2356 {
2357 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2358 (__v8hi)
2359 _mm_setzero_si128 (),
2360 __M);
2361 }
2362
2363 extern __inline __m128i
2364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2365 _mm256_cvtusepi64_epi16 (__m256i __A)
2366 {
2367 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2368 (__v8hi)
2369 _mm_undefined_si128 (),
2370 (__mmask8) -1);
2371 }
2372
2373 extern __inline void
2374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2375 _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2376 {
2377 __builtin_ia32_pmovusqw256mem_mask ((unsigned long long *) __P, (__v4di) __A, __M);
2378 }
2379
2380 extern __inline __m128i
2381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2382 _mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2383 {
2384 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2385 (__v8hi) __O, __M);
2386 }
2387
2388 extern __inline __m128i
2389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2390 _mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
2391 {
2392 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2393 (__v8hi)
2394 _mm_setzero_si128 (),
2395 __M);
2396 }
2397
2398 extern __inline __m128i
2399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2400 _mm_cvtepi64_epi32 (__m128i __A)
2401 {
2402 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2403 (__v4si)
2404 _mm_undefined_si128 (),
2405 (__mmask8) -1);
2406 }
2407
2408 extern __inline void
2409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2410 _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2411 {
2412 __builtin_ia32_pmovqd128mem_mask ((unsigned long long *) __P,
2413 (__v2di) __A, __M);
2414 }
2415
2416 extern __inline __m128i
2417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2418 _mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2419 {
2420 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2421 (__v4si) __O, __M);
2422 }
2423
2424 extern __inline __m128i
2425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2426 _mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
2427 {
2428 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2429 (__v4si)
2430 _mm_setzero_si128 (),
2431 __M);
2432 }
2433
2434 extern __inline __m128i
2435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2436 _mm256_cvtepi64_epi32 (__m256i __A)
2437 {
2438 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2439 (__v4si)
2440 _mm_undefined_si128 (),
2441 (__mmask8) -1);
2442 }
2443
2444 extern __inline void
2445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2446 _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2447 {
2448 __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2449 }
2450
2451 extern __inline __m128i
2452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2453 _mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2454 {
2455 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2456 (__v4si) __O, __M);
2457 }
2458
2459 extern __inline __m128i
2460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2461 _mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
2462 {
2463 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2464 (__v4si)
2465 _mm_setzero_si128 (),
2466 __M);
2467 }
2468
2469 extern __inline __m128i
2470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2471 _mm_cvtsepi64_epi32 (__m128i __A)
2472 {
2473 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2474 (__v4si)
2475 _mm_undefined_si128 (),
2476 (__mmask8) -1);
2477 }
2478
2479 extern __inline void
2480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2481 _mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2482 {
2483 __builtin_ia32_pmovsqd128mem_mask ((unsigned long long *) __P, (__v2di) __A, __M);
2484 }
2485
2486 extern __inline __m128i
2487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2488 _mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2489 {
2490 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2491 (__v4si) __O, __M);
2492 }
2493
2494 extern __inline __m128i
2495 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2496 _mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
2497 {
2498 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2499 (__v4si)
2500 _mm_setzero_si128 (),
2501 __M);
2502 }
2503
2504 extern __inline __m128i
2505 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2506 _mm256_cvtsepi64_epi32 (__m256i __A)
2507 {
2508 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2509 (__v4si)
2510 _mm_undefined_si128 (),
2511 (__mmask8) -1);
2512 }
2513
2514 extern __inline void
2515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2516 _mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2517 {
2518 __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2519 }
2520
2521 extern __inline __m128i
2522 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2523 _mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2524 {
2525 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2526 (__v4si)__O,
2527 __M);
2528 }
2529
2530 extern __inline __m128i
2531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2532 _mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
2533 {
2534 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2535 (__v4si)
2536 _mm_setzero_si128 (),
2537 __M);
2538 }
2539
2540 extern __inline __m128i
2541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2542 _mm_cvtusepi64_epi32 (__m128i __A)
2543 {
2544 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2545 (__v4si)
2546 _mm_undefined_si128 (),
2547 (__mmask8) -1);
2548 }
2549
2550 extern __inline void
2551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2552 _mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2553 {
2554 __builtin_ia32_pmovusqd128mem_mask ((unsigned long long *) __P, (__v2di) __A, __M);
2555 }
2556
2557 extern __inline __m128i
2558 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2559 _mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2560 {
2561 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2562 (__v4si) __O, __M);
2563 }
2564
2565 extern __inline __m128i
2566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2567 _mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
2568 {
2569 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2570 (__v4si)
2571 _mm_setzero_si128 (),
2572 __M);
2573 }
2574
2575 extern __inline __m128i
2576 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2577 _mm256_cvtusepi64_epi32 (__m256i __A)
2578 {
2579 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2580 (__v4si)
2581 _mm_undefined_si128 (),
2582 (__mmask8) -1);
2583 }
2584
2585 extern __inline void
2586 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2587 _mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2588 {
2589 __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2590 }
2591
2592 extern __inline __m128i
2593 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2594 _mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2595 {
2596 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2597 (__v4si) __O, __M);
2598 }
2599
2600 extern __inline __m128i
2601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2602 _mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
2603 {
2604 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2605 (__v4si)
2606 _mm_setzero_si128 (),
2607 __M);
2608 }
2609
2610 extern __inline __m256
2611 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2612 _mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
2613 {
2614 return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
2615 (__v8sf) __O,
2616 __M);
2617 }
2618
2619 extern __inline __m256
2620 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2621 _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
2622 {
2623 return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
2624 (__v8sf)
2625 _mm256_setzero_ps (),
2626 __M);
2627 }
2628
2629 extern __inline __m128
2630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2631 _mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
2632 {
2633 return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
2634 (__v4sf) __O,
2635 __M);
2636 }
2637
2638 extern __inline __m128
2639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2640 _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
2641 {
2642 return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
2643 (__v4sf)
2644 _mm_setzero_ps (),
2645 __M);
2646 }
2647
2648 extern __inline __m256d
2649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2650 _mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
2651 {
2652 return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
2653 (__v4df) __O,
2654 __M);
2655 }
2656
2657 extern __inline __m256d
2658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2659 _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
2660 {
2661 return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
2662 (__v4df)
2663 _mm256_setzero_pd (),
2664 __M);
2665 }
2666
2667 extern __inline __m256i
2668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2669 _mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
2670 {
2671 return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
2672 (__v8si) __O,
2673 __M);
2674 }
2675
2676 extern __inline __m256i
2677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2678 _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
2679 {
2680 return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
2681 (__v8si)
2682 _mm256_setzero_si256 (),
2683 __M);
2684 }
2685
2686 extern __inline __m256i
2687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2688 _mm256_mask_set1_epi32 (__m256i __O, __mmask8 __M, int __A)
2689 {
2690 return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A, (__v8si) __O,
2691 __M);
2692 }
2693
2694 extern __inline __m256i
2695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2696 _mm256_maskz_set1_epi32 (__mmask8 __M, int __A)
2697 {
2698 return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A,
2699 (__v8si)
2700 _mm256_setzero_si256 (),
2701 __M);
2702 }
2703
2704 extern __inline __m128i
2705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2706 _mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2707 {
2708 return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
2709 (__v4si) __O,
2710 __M);
2711 }
2712
2713 extern __inline __m128i
2714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2715 _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
2716 {
2717 return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
2718 (__v4si)
2719 _mm_setzero_si128 (),
2720 __M);
2721 }
2722
2723 extern __inline __m128i
2724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2725 _mm_mask_set1_epi32 (__m128i __O, __mmask8 __M, int __A)
2726 {
2727 return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A, (__v4si) __O,
2728 __M);
2729 }
2730
2731 extern __inline __m128i
2732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2733 _mm_maskz_set1_epi32 (__mmask8 __M, int __A)
2734 {
2735 return (__m128i)
2736 __builtin_ia32_pbroadcastd128_gpr_mask (__A,
2737 (__v4si) _mm_setzero_si128 (),
2738 __M);
2739 }
2740
2741 extern __inline __m256i
2742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2743 _mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
2744 {
2745 return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
2746 (__v4di) __O,
2747 __M);
2748 }
2749
2750 extern __inline __m256i
2751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2752 _mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
2753 {
2754 return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
2755 (__v4di)
2756 _mm256_setzero_si256 (),
2757 __M);
2758 }
2759
2760 extern __inline __m256i
2761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2762 _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
2763 {
2764 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O,
2765 __M);
2766 }
2767
2768 extern __inline __m256i
2769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2770 _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
2771 {
2772 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A,
2773 (__v4di)
2774 _mm256_setzero_si256 (),
2775 __M);
2776 }
2777
2778 extern __inline __m128i
2779 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2780 _mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
2781 {
2782 return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
2783 (__v2di) __O,
2784 __M);
2785 }
2786
2787 extern __inline __m128i
2788 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2789 _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
2790 {
2791 return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
2792 (__v2di)
2793 _mm_setzero_si128 (),
2794 __M);
2795 }
2796
2797 extern __inline __m128i
2798 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2799 _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
2800 {
2801 return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O,
2802 __M);
2803 }
2804
2805 extern __inline __m128i
2806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2807 _mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
2808 {
2809 return (__m128i)
2810 __builtin_ia32_pbroadcastq128_gpr_mask (__A,
2811 (__v2di) _mm_setzero_si128 (),
2812 __M);
2813 }
2814
2815 extern __inline __m256
2816 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2817 _mm256_broadcast_f32x4 (__m128 __A)
2818 {
2819 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2820 (__v8sf)_mm256_undefined_pd (),
2821 (__mmask8) -1);
2822 }
2823
2824 extern __inline __m256
2825 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2826 _mm256_mask_broadcast_f32x4 (__m256 __O, __mmask8 __M, __m128 __A)
2827 {
2828 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2829 (__v8sf) __O,
2830 __M);
2831 }
2832
2833 extern __inline __m256
2834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2835 _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
2836 {
2837 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2838 (__v8sf)
2839 _mm256_setzero_ps (),
2840 __M);
2841 }
2842
2843 extern __inline __m256i
2844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2845 _mm256_broadcast_i32x4 (__m128i __A)
2846 {
2847 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2848 __A,
2849 (__v8si)_mm256_undefined_si256 (),
2850 (__mmask8) -1);
2851 }
2852
2853 extern __inline __m256i
2854 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2855 _mm256_mask_broadcast_i32x4 (__m256i __O, __mmask8 __M, __m128i __A)
2856 {
2857 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2858 __A,
2859 (__v8si)
2860 __O, __M);
2861 }
2862
2863 extern __inline __m256i
2864 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2865 _mm256_maskz_broadcast_i32x4 (__mmask8 __M, __m128i __A)
2866 {
2867 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2868 __A,
2869 (__v8si)
2870 _mm256_setzero_si256 (),
2871 __M);
2872 }
2873
2874 extern __inline __m256i
2875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2876 _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2877 {
2878 return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
2879 (__v8si) __W,
2880 (__mmask8) __U);
2881 }
2882
2883 extern __inline __m256i
2884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2885 _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
2886 {
2887 return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
2888 (__v8si)
2889 _mm256_setzero_si256 (),
2890 (__mmask8) __U);
2891 }
2892
2893 extern __inline __m128i
2894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2895 _mm_mask_cvtepi8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2896 {
2897 return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
2898 (__v4si) __W,
2899 (__mmask8) __U);
2900 }
2901
2902 extern __inline __m128i
2903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2904 _mm_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
2905 {
2906 return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
2907 (__v4si)
2908 _mm_setzero_si128 (),
2909 (__mmask8) __U);
2910 }
2911
2912 extern __inline __m256i
2913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2914 _mm256_mask_cvtepi8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2915 {
2916 return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
2917 (__v4di) __W,
2918 (__mmask8) __U);
2919 }
2920
2921 extern __inline __m256i
2922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2923 _mm256_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2924 {
2925 return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
2926 (__v4di)
2927 _mm256_setzero_si256 (),
2928 (__mmask8) __U);
2929 }
2930
2931 extern __inline __m128i
2932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2933 _mm_mask_cvtepi8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
2934 {
2935 return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
2936 (__v2di) __W,
2937 (__mmask8) __U);
2938 }
2939
2940 extern __inline __m128i
2941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2942 _mm_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2943 {
2944 return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
2945 (__v2di)
2946 _mm_setzero_si128 (),
2947 (__mmask8) __U);
2948 }
2949
2950 extern __inline __m256i
2951 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2952 _mm256_mask_cvtepi16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2953 {
2954 return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
2955 (__v8si) __W,
2956 (__mmask8) __U);
2957 }
2958
2959 extern __inline __m256i
2960 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2961 _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
2962 {
2963 return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
2964 (__v8si)
2965 _mm256_setzero_si256 (),
2966 (__mmask8) __U);
2967 }
2968
2969 extern __inline __m128i
2970 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2971 _mm_mask_cvtepi16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2972 {
2973 return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
2974 (__v4si) __W,
2975 (__mmask8) __U);
2976 }
2977
2978 extern __inline __m128i
2979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2980 _mm_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
2981 {
2982 return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
2983 (__v4si)
2984 _mm_setzero_si128 (),
2985 (__mmask8) __U);
2986 }
2987
2988 extern __inline __m256i
2989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2990 _mm256_mask_cvtepi16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2991 {
2992 return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
2993 (__v4di) __W,
2994 (__mmask8) __U);
2995 }
2996
2997 extern __inline __m256i
2998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2999 _mm256_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
3000 {
3001 return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
3002 (__v4di)
3003 _mm256_setzero_si256 (),
3004 (__mmask8) __U);
3005 }
3006
3007 extern __inline __m128i
3008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3009 _mm_mask_cvtepi16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
3010 {
3011 return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
3012 (__v2di) __W,
3013 (__mmask8) __U);
3014 }
3015
3016 extern __inline __m128i
3017 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3018 _mm_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
3019 {
3020 return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
3021 (__v2di)
3022 _mm_setzero_si128 (),
3023 (__mmask8) __U);
3024 }
3025
3026 extern __inline __m256i
3027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3028 _mm256_mask_cvtepi32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
3029 {
3030 return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
3031 (__v4di) __W,
3032 (__mmask8) __U);
3033 }
3034
3035 extern __inline __m256i
3036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3037 _mm256_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
3038 {
3039 return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
3040 (__v4di)
3041 _mm256_setzero_si256 (),
3042 (__mmask8) __U);
3043 }
3044
3045 extern __inline __m128i
3046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3047 _mm_mask_cvtepi32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
3048 {
3049 return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
3050 (__v2di) __W,
3051 (__mmask8) __U);
3052 }
3053
3054 extern __inline __m128i
3055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3056 _mm_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
3057 {
3058 return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
3059 (__v2di)
3060 _mm_setzero_si128 (),
3061 (__mmask8) __U);
3062 }
3063
3064 extern __inline __m256i
3065 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3066 _mm256_mask_cvtepu8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
3067 {
3068 return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
3069 (__v8si) __W,
3070 (__mmask8) __U);
3071 }
3072
3073 extern __inline __m256i
3074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3075 _mm256_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
3076 {
3077 return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
3078 (__v8si)
3079 _mm256_setzero_si256 (),
3080 (__mmask8) __U);
3081 }
3082
3083 extern __inline __m128i
3084 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3085 _mm_mask_cvtepu8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
3086 {
3087 return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
3088 (__v4si) __W,
3089 (__mmask8) __U);
3090 }
3091
3092 extern __inline __m128i
3093 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3094 _mm_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
3095 {
3096 return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
3097 (__v4si)
3098 _mm_setzero_si128 (),
3099 (__mmask8) __U);
3100 }
3101
3102 extern __inline __m256i
3103 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3104 _mm256_mask_cvtepu8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
3105 {
3106 return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
3107 (__v4di) __W,
3108 (__mmask8) __U);
3109 }
3110
3111 extern __inline __m256i
3112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3113 _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
3114 {
3115 return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
3116 (__v4di)
3117 _mm256_setzero_si256 (),
3118 (__mmask8) __U);
3119 }
3120
3121 extern __inline __m128i
3122 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3123 _mm_mask_cvtepu8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
3124 {
3125 return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
3126 (__v2di) __W,
3127 (__mmask8) __U);
3128 }
3129
3130 extern __inline __m128i
3131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3132 _mm_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
3133 {
3134 return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
3135 (__v2di)
3136 _mm_setzero_si128 (),
3137 (__mmask8) __U);
3138 }
3139
3140 extern __inline __m256i
3141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3142 _mm256_mask_cvtepu16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
3143 {
3144 return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
3145 (__v8si) __W,
3146 (__mmask8) __U);
3147 }
3148
3149 extern __inline __m256i
3150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3151 _mm256_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
3152 {
3153 return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
3154 (__v8si)
3155 _mm256_setzero_si256 (),
3156 (__mmask8) __U);
3157 }
3158
3159 extern __inline __m128i
3160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3161 _mm_mask_cvtepu16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
3162 {
3163 return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
3164 (__v4si) __W,
3165 (__mmask8) __U);
3166 }
3167
3168 extern __inline __m128i
3169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3170 _mm_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
3171 {
3172 return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
3173 (__v4si)
3174 _mm_setzero_si128 (),
3175 (__mmask8) __U);
3176 }
3177
3178 extern __inline __m256i
3179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3180 _mm256_mask_cvtepu16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
3181 {
3182 return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
3183 (__v4di) __W,
3184 (__mmask8) __U);
3185 }
3186
3187 extern __inline __m256i
3188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3189 _mm256_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
3190 {
3191 return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
3192 (__v4di)
3193 _mm256_setzero_si256 (),
3194 (__mmask8) __U);
3195 }
3196
3197 extern __inline __m128i
3198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3199 _mm_mask_cvtepu16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
3200 {
3201 return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
3202 (__v2di) __W,
3203 (__mmask8) __U);
3204 }
3205
3206 extern __inline __m128i
3207 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3208 _mm_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
3209 {
3210 return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
3211 (__v2di)
3212 _mm_setzero_si128 (),
3213 (__mmask8) __U);
3214 }
3215
3216 extern __inline __m256i
3217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3218 _mm256_mask_cvtepu32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
3219 {
3220 return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
3221 (__v4di) __W,
3222 (__mmask8) __U);
3223 }
3224
3225 extern __inline __m256i
3226 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3227 _mm256_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
3228 {
3229 return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
3230 (__v4di)
3231 _mm256_setzero_si256 (),
3232 (__mmask8) __U);
3233 }
3234
3235 extern __inline __m128i
3236 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3237 _mm_mask_cvtepu32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
3238 {
3239 return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
3240 (__v2di) __W,
3241 (__mmask8) __U);
3242 }
3243
3244 extern __inline __m128i
3245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3246 _mm_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
3247 {
3248 return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
3249 (__v2di)
3250 _mm_setzero_si128 (),
3251 (__mmask8) __U);
3252 }
3253
3254 extern __inline __m256d
3255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3256 _mm256_rcp14_pd (__m256d __A)
3257 {
3258 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3259 (__v4df)
3260 _mm256_setzero_pd (),
3261 (__mmask8) -1);
3262 }
3263
3264 extern __inline __m256d
3265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3266 _mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
3267 {
3268 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3269 (__v4df) __W,
3270 (__mmask8) __U);
3271 }
3272
3273 extern __inline __m256d
3274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3275 _mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
3276 {
3277 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3278 (__v4df)
3279 _mm256_setzero_pd (),
3280 (__mmask8) __U);
3281 }
3282
3283 extern __inline __m128d
3284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3285 _mm_rcp14_pd (__m128d __A)
3286 {
3287 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3288 (__v2df)
3289 _mm_setzero_pd (),
3290 (__mmask8) -1);
3291 }
3292
3293 extern __inline __m128d
3294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3295 _mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
3296 {
3297 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3298 (__v2df) __W,
3299 (__mmask8) __U);
3300 }
3301
3302 extern __inline __m128d
3303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3304 _mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
3305 {
3306 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3307 (__v2df)
3308 _mm_setzero_pd (),
3309 (__mmask8) __U);
3310 }
3311
3312 extern __inline __m256
3313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3314 _mm256_rcp14_ps (__m256 __A)
3315 {
3316 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3317 (__v8sf)
3318 _mm256_setzero_ps (),
3319 (__mmask8) -1);
3320 }
3321
3322 extern __inline __m256
3323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3324 _mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
3325 {
3326 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3327 (__v8sf) __W,
3328 (__mmask8) __U);
3329 }
3330
3331 extern __inline __m256
3332 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3333 _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
3334 {
3335 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3336 (__v8sf)
3337 _mm256_setzero_ps (),
3338 (__mmask8) __U);
3339 }
3340
3341 extern __inline __m128
3342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3343 _mm_rcp14_ps (__m128 __A)
3344 {
3345 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3346 (__v4sf)
3347 _mm_setzero_ps (),
3348 (__mmask8) -1);
3349 }
3350
3351 extern __inline __m128
3352 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3353 _mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
3354 {
3355 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3356 (__v4sf) __W,
3357 (__mmask8) __U);
3358 }
3359
3360 extern __inline __m128
3361 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3362 _mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
3363 {
3364 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3365 (__v4sf)
3366 _mm_setzero_ps (),
3367 (__mmask8) __U);
3368 }
3369
3370 extern __inline __m256d
3371 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3372 _mm256_rsqrt14_pd (__m256d __A)
3373 {
3374 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3375 (__v4df)
3376 _mm256_setzero_pd (),
3377 (__mmask8) -1);
3378 }
3379
3380 extern __inline __m256d
3381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3382 _mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
3383 {
3384 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3385 (__v4df) __W,
3386 (__mmask8) __U);
3387 }
3388
3389 extern __inline __m256d
3390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3391 _mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
3392 {
3393 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3394 (__v4df)
3395 _mm256_setzero_pd (),
3396 (__mmask8) __U);
3397 }
3398
3399 extern __inline __m128d
3400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3401 _mm_rsqrt14_pd (__m128d __A)
3402 {
3403 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3404 (__v2df)
3405 _mm_setzero_pd (),
3406 (__mmask8) -1);
3407 }
3408
3409 extern __inline __m128d
3410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3411 _mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
3412 {
3413 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3414 (__v2df) __W,
3415 (__mmask8) __U);
3416 }
3417
3418 extern __inline __m128d
3419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3420 _mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
3421 {
3422 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3423 (__v2df)
3424 _mm_setzero_pd (),
3425 (__mmask8) __U);
3426 }
3427
3428 extern __inline __m256
3429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3430 _mm256_rsqrt14_ps (__m256 __A)
3431 {
3432 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3433 (__v8sf)
3434 _mm256_setzero_ps (),
3435 (__mmask8) -1);
3436 }
3437
3438 extern __inline __m256
3439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3440 _mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
3441 {
3442 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3443 (__v8sf) __W,
3444 (__mmask8) __U);
3445 }
3446
3447 extern __inline __m256
3448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3449 _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
3450 {
3451 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3452 (__v8sf)
3453 _mm256_setzero_ps (),
3454 (__mmask8) __U);
3455 }
3456
3457 extern __inline __m128
3458 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3459 _mm_rsqrt14_ps (__m128 __A)
3460 {
3461 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3462 (__v4sf)
3463 _mm_setzero_ps (),
3464 (__mmask8) -1);
3465 }
3466
3467 extern __inline __m128
3468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3469 _mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
3470 {
3471 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3472 (__v4sf) __W,
3473 (__mmask8) __U);
3474 }
3475
3476 extern __inline __m128
3477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3478 _mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
3479 {
3480 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3481 (__v4sf)
3482 _mm_setzero_ps (),
3483 (__mmask8) __U);
3484 }
3485
3486 extern __inline __m256d
3487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3488 _mm256_mask_sqrt_pd (__m256d __W, __mmask8 __U, __m256d __A)
3489 {
3490 return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
3491 (__v4df) __W,
3492 (__mmask8) __U);
3493 }
3494
3495 extern __inline __m256d
3496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3497 _mm256_maskz_sqrt_pd (__mmask8 __U, __m256d __A)
3498 {
3499 return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
3500 (__v4df)
3501 _mm256_setzero_pd (),
3502 (__mmask8) __U);
3503 }
3504
3505 extern __inline __m128d
3506 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3507 _mm_mask_sqrt_pd (__m128d __W, __mmask8 __U, __m128d __A)
3508 {
3509 return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
3510 (__v2df) __W,
3511 (__mmask8) __U);
3512 }
3513
3514 extern __inline __m128d
3515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3516 _mm_maskz_sqrt_pd (__mmask8 __U, __m128d __A)
3517 {
3518 return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
3519 (__v2df)
3520 _mm_setzero_pd (),
3521 (__mmask8) __U);
3522 }
3523
3524 extern __inline __m256
3525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3526 _mm256_mask_sqrt_ps (__m256 __W, __mmask8 __U, __m256 __A)
3527 {
3528 return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
3529 (__v8sf) __W,
3530 (__mmask8) __U);
3531 }
3532
3533 extern __inline __m256
3534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3535 _mm256_maskz_sqrt_ps (__mmask8 __U, __m256 __A)
3536 {
3537 return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
3538 (__v8sf)
3539 _mm256_setzero_ps (),
3540 (__mmask8) __U);
3541 }
3542
3543 extern __inline __m128
3544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3545 _mm_mask_sqrt_ps (__m128 __W, __mmask8 __U, __m128 __A)
3546 {
3547 return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
3548 (__v4sf) __W,
3549 (__mmask8) __U);
3550 }
3551
3552 extern __inline __m128
3553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3554 _mm_maskz_sqrt_ps (__mmask8 __U, __m128 __A)
3555 {
3556 return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
3557 (__v4sf)
3558 _mm_setzero_ps (),
3559 (__mmask8) __U);
3560 }
3561
3562 extern __inline __m256i
3563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3564 _mm256_mask_add_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3565 __m256i __B)
3566 {
3567 return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
3568 (__v8si) __B,
3569 (__v8si) __W,
3570 (__mmask8) __U);
3571 }
3572
3573 extern __inline __m256i
3574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3575 _mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3576 {
3577 return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
3578 (__v8si) __B,
3579 (__v8si)
3580 _mm256_setzero_si256 (),
3581 (__mmask8) __U);
3582 }
3583
3584 extern __inline __m256i
3585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3586 _mm256_mask_add_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3587 __m256i __B)
3588 {
3589 return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
3590 (__v4di) __B,
3591 (__v4di) __W,
3592 (__mmask8) __U);
3593 }
3594
3595 extern __inline __m256i
3596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3597 _mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
3598 {
3599 return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
3600 (__v4di) __B,
3601 (__v4di)
3602 _mm256_setzero_si256 (),
3603 (__mmask8) __U);
3604 }
3605
3606 extern __inline __m256i
3607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3608 _mm256_mask_sub_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3609 __m256i __B)
3610 {
3611 return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
3612 (__v8si) __B,
3613 (__v8si) __W,
3614 (__mmask8) __U);
3615 }
3616
3617 extern __inline __m256i
3618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3619 _mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3620 {
3621 return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
3622 (__v8si) __B,
3623 (__v8si)
3624 _mm256_setzero_si256 (),
3625 (__mmask8) __U);
3626 }
3627
3628 extern __inline __m256i
3629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3630 _mm256_mask_sub_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3631 __m256i __B)
3632 {
3633 return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
3634 (__v4di) __B,
3635 (__v4di) __W,
3636 (__mmask8) __U);
3637 }
3638
3639 extern __inline __m256i
3640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3641 _mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
3642 {
3643 return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
3644 (__v4di) __B,
3645 (__v4di)
3646 _mm256_setzero_si256 (),
3647 (__mmask8) __U);
3648 }
3649
3650 extern __inline __m128i
3651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3652 _mm_mask_add_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3653 __m128i __B)
3654 {
3655 return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
3656 (__v4si) __B,
3657 (__v4si) __W,
3658 (__mmask8) __U);
3659 }
3660
3661 extern __inline __m128i
3662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3663 _mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3664 {
3665 return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
3666 (__v4si) __B,
3667 (__v4si)
3668 _mm_setzero_si128 (),
3669 (__mmask8) __U);
3670 }
3671
3672 extern __inline __m128i
3673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3674 _mm_mask_add_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3675 __m128i __B)
3676 {
3677 return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
3678 (__v2di) __B,
3679 (__v2di) __W,
3680 (__mmask8) __U);
3681 }
3682
3683 extern __inline __m128i
3684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3685 _mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3686 {
3687 return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
3688 (__v2di) __B,
3689 (__v2di)
3690 _mm_setzero_si128 (),
3691 (__mmask8) __U);
3692 }
3693
3694 extern __inline __m128i
3695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3696 _mm_mask_sub_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3697 __m128i __B)
3698 {
3699 return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
3700 (__v4si) __B,
3701 (__v4si) __W,
3702 (__mmask8) __U);
3703 }
3704
3705 extern __inline __m128i
3706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3707 _mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3708 {
3709 return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
3710 (__v4si) __B,
3711 (__v4si)
3712 _mm_setzero_si128 (),
3713 (__mmask8) __U);
3714 }
3715
3716 extern __inline __m128i
3717 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3718 _mm_mask_sub_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3719 __m128i __B)
3720 {
3721 return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
3722 (__v2di) __B,
3723 (__v2di) __W,
3724 (__mmask8) __U);
3725 }
3726
3727 extern __inline __m128i
3728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3729 _mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3730 {
3731 return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
3732 (__v2di) __B,
3733 (__v2di)
3734 _mm_setzero_si128 (),
3735 (__mmask8) __U);
3736 }
3737
3738 extern __inline __m256
3739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3740 _mm256_getexp_ps (__m256 __A)
3741 {
3742 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3743 (__v8sf)
3744 _mm256_setzero_ps (),
3745 (__mmask8) -1);
3746 }
3747
3748 extern __inline __m256
3749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3750 _mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A)
3751 {
3752 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3753 (__v8sf) __W,
3754 (__mmask8) __U);
3755 }
3756
3757 extern __inline __m256
3758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3759 _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A)
3760 {
3761 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3762 (__v8sf)
3763 _mm256_setzero_ps (),
3764 (__mmask8) __U);
3765 }
3766
3767 extern __inline __m256d
3768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3769 _mm256_getexp_pd (__m256d __A)
3770 {
3771 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3772 (__v4df)
3773 _mm256_setzero_pd (),
3774 (__mmask8) -1);
3775 }
3776
3777 extern __inline __m256d
3778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3779 _mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A)
3780 {
3781 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3782 (__v4df) __W,
3783 (__mmask8) __U);
3784 }
3785
3786 extern __inline __m256d
3787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3788 _mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A)
3789 {
3790 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3791 (__v4df)
3792 _mm256_setzero_pd (),
3793 (__mmask8) __U);
3794 }
3795
3796 extern __inline __m128
3797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3798 _mm_getexp_ps (__m128 __A)
3799 {
3800 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3801 (__v4sf)
3802 _mm_setzero_ps (),
3803 (__mmask8) -1);
3804 }
3805
3806 extern __inline __m128
3807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3808 _mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A)
3809 {
3810 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3811 (__v4sf) __W,
3812 (__mmask8) __U);
3813 }
3814
3815 extern __inline __m128
3816 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3817 _mm_maskz_getexp_ps (__mmask8 __U, __m128 __A)
3818 {
3819 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3820 (__v4sf)
3821 _mm_setzero_ps (),
3822 (__mmask8) __U);
3823 }
3824
3825 extern __inline __m128d
3826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3827 _mm_getexp_pd (__m128d __A)
3828 {
3829 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3830 (__v2df)
3831 _mm_setzero_pd (),
3832 (__mmask8) -1);
3833 }
3834
3835 extern __inline __m128d
3836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3837 _mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A)
3838 {
3839 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3840 (__v2df) __W,
3841 (__mmask8) __U);
3842 }
3843
3844 extern __inline __m128d
3845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3846 _mm_maskz_getexp_pd (__mmask8 __U, __m128d __A)
3847 {
3848 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3849 (__v2df)
3850 _mm_setzero_pd (),
3851 (__mmask8) __U);
3852 }
3853
3854 extern __inline __m256i
3855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3856 _mm256_mask_srl_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3857 __m128i __B)
3858 {
3859 return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
3860 (__v4si) __B,
3861 (__v8si) __W,
3862 (__mmask8) __U);
3863 }
3864
3865 extern __inline __m256i
3866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3867 _mm256_maskz_srl_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
3868 {
3869 return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
3870 (__v4si) __B,
3871 (__v8si)
3872 _mm256_setzero_si256 (),
3873 (__mmask8) __U);
3874 }
3875
3876 extern __inline __m128i
3877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3878 _mm_mask_srl_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3879 __m128i __B)
3880 {
3881 return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
3882 (__v4si) __B,
3883 (__v4si) __W,
3884 (__mmask8) __U);
3885 }
3886
3887 extern __inline __m128i
3888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3889 _mm_maskz_srl_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3890 {
3891 return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
3892 (__v4si) __B,
3893 (__v4si)
3894 _mm_setzero_si128 (),
3895 (__mmask8) __U);
3896 }
3897
3898 extern __inline __m256i
3899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3900 _mm256_mask_srl_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3901 __m128i __B)
3902 {
3903 return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
3904 (__v2di) __B,
3905 (__v4di) __W,
3906 (__mmask8) __U);
3907 }
3908
3909 extern __inline __m256i
3910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3911 _mm256_maskz_srl_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
3912 {
3913 return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
3914 (__v2di) __B,
3915 (__v4di)
3916 _mm256_setzero_si256 (),
3917 (__mmask8) __U);
3918 }
3919
3920 extern __inline __m128i
3921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3922 _mm_mask_srl_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3923 __m128i __B)
3924 {
3925 return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
3926 (__v2di) __B,
3927 (__v2di) __W,
3928 (__mmask8) __U);
3929 }
3930
3931 extern __inline __m128i
3932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3933 _mm_maskz_srl_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3934 {
3935 return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
3936 (__v2di) __B,
3937 (__v2di)
3938 _mm_setzero_si128 (),
3939 (__mmask8) __U);
3940 }
3941
3942 extern __inline __m256i
3943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3944 _mm256_mask_and_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3945 __m256i __B)
3946 {
3947 return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
3948 (__v8si) __B,
3949 (__v8si) __W,
3950 (__mmask8) __U);
3951 }
3952
3953 extern __inline __m256i
3954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3955 _mm256_maskz_and_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3956 {
3957 return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
3958 (__v8si) __B,
3959 (__v8si)
3960 _mm256_setzero_si256 (),
3961 (__mmask8) __U);
3962 }
3963
3964 extern __inline __m256d
3965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3966 _mm256_scalef_pd (__m256d __A, __m256d __B)
3967 {
3968 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3969 (__v4df) __B,
3970 (__v4df)
3971 _mm256_setzero_pd (),
3972 (__mmask8) -1);
3973 }
3974
3975 extern __inline __m256d
3976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3977 _mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
3978 __m256d __B)
3979 {
3980 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3981 (__v4df) __B,
3982 (__v4df) __W,
3983 (__mmask8) __U);
3984 }
3985
3986 extern __inline __m256d
3987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3988 _mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B)
3989 {
3990 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3991 (__v4df) __B,
3992 (__v4df)
3993 _mm256_setzero_pd (),
3994 (__mmask8) __U);
3995 }
3996
3997 extern __inline __m256
3998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3999 _mm256_scalef_ps (__m256 __A, __m256 __B)
4000 {
4001 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
4002 (__v8sf) __B,
4003 (__v8sf)
4004 _mm256_setzero_ps (),
4005 (__mmask8) -1);
4006 }
4007
4008 extern __inline __m256
4009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4010 _mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
4011 __m256 __B)
4012 {
4013 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
4014 (__v8sf) __B,
4015 (__v8sf) __W,
4016 (__mmask8) __U);
4017 }
4018
4019 extern __inline __m256
4020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4021 _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B)
4022 {
4023 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
4024 (__v8sf) __B,
4025 (__v8sf)
4026 _mm256_setzero_ps (),
4027 (__mmask8) __U);
4028 }
4029
4030 extern __inline __m128d
4031 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4032 _mm_scalef_pd (__m128d __A, __m128d __B)
4033 {
4034 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
4035 (__v2df) __B,
4036 (__v2df)
4037 _mm_setzero_pd (),
4038 (__mmask8) -1);
4039 }
4040
4041 extern __inline __m128d
4042 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4043 _mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
4044 __m128d __B)
4045 {
4046 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
4047 (__v2df) __B,
4048 (__v2df) __W,
4049 (__mmask8) __U);
4050 }
4051
4052 extern __inline __m128d
4053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4054 _mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B)
4055 {
4056 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
4057 (__v2df) __B,
4058 (__v2df)
4059 _mm_setzero_pd (),
4060 (__mmask8) __U);
4061 }
4062
4063 extern __inline __m128
4064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4065 _mm_scalef_ps (__m128 __A, __m128 __B)
4066 {
4067 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
4068 (__v4sf) __B,
4069 (__v4sf)
4070 _mm_setzero_ps (),
4071 (__mmask8) -1);
4072 }
4073
4074 extern __inline __m128
4075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4076 _mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
4077 {
4078 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
4079 (__v4sf) __B,
4080 (__v4sf) __W,
4081 (__mmask8) __U);
4082 }
4083
4084 extern __inline __m128
4085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4086 _mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B)
4087 {
4088 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
4089 (__v4sf) __B,
4090 (__v4sf)
4091 _mm_setzero_ps (),
4092 (__mmask8) __U);
4093 }
4094
4095 extern __inline __m256d
4096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4097 _mm256_mask_fmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
4098 __m256d __C)
4099 {
4100 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
4101 (__v4df) __B,
4102 (__v4df) __C,
4103 (__mmask8) __U);
4104 }
4105
4106 extern __inline __m256d
4107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4108 _mm256_mask3_fmadd_pd (__m256d __A, __m256d __B, __m256d __C,
4109 __mmask8 __U)
4110 {
4111 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A,
4112 (__v4df) __B,
4113 (__v4df) __C,
4114 (__mmask8) __U);
4115 }
4116
4117 extern __inline __m256d
4118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4119 _mm256_maskz_fmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4120 __m256d __C)
4121 {
4122 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
4123 (__v4df) __B,
4124 (__v4df) __C,
4125 (__mmask8) __U);
4126 }
4127
4128 extern __inline __m128d
4129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4130 _mm_mask_fmadd_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
4131 {
4132 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
4133 (__v2df) __B,
4134 (__v2df) __C,
4135 (__mmask8) __U);
4136 }
4137
4138 extern __inline __m128d
4139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4140 _mm_mask3_fmadd_pd (__m128d __A, __m128d __B, __m128d __C,
4141 __mmask8 __U)
4142 {
4143 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A,
4144 (__v2df) __B,
4145 (__v2df) __C,
4146 (__mmask8) __U);
4147 }
4148
4149 extern __inline __m128d
4150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4151 _mm_maskz_fmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4152 __m128d __C)
4153 {
4154 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
4155 (__v2df) __B,
4156 (__v2df) __C,
4157 (__mmask8) __U);
4158 }
4159
4160 extern __inline __m256
4161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4162 _mm256_mask_fmadd_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
4163 {
4164 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
4165 (__v8sf) __B,
4166 (__v8sf) __C,
4167 (__mmask8) __U);
4168 }
4169
4170 extern __inline __m256
4171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4172 _mm256_mask3_fmadd_ps (__m256 __A, __m256 __B, __m256 __C,
4173 __mmask8 __U)
4174 {
4175 return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A,
4176 (__v8sf) __B,
4177 (__v8sf) __C,
4178 (__mmask8) __U);
4179 }
4180
4181 extern __inline __m256
4182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4183 _mm256_maskz_fmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4184 __m256 __C)
4185 {
4186 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
4187 (__v8sf) __B,
4188 (__v8sf) __C,
4189 (__mmask8) __U);
4190 }
4191
4192 extern __inline __m128
4193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4194 _mm_mask_fmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4195 {
4196 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
4197 (__v4sf) __B,
4198 (__v4sf) __C,
4199 (__mmask8) __U);
4200 }
4201
4202 extern __inline __m128
4203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4204 _mm_mask3_fmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4205 {
4206 return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A,
4207 (__v4sf) __B,
4208 (__v4sf) __C,
4209 (__mmask8) __U);
4210 }
4211
4212 extern __inline __m128
4213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4214 _mm_maskz_fmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4215 {
4216 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
4217 (__v4sf) __B,
4218 (__v4sf) __C,
4219 (__mmask8) __U);
4220 }
4221
4222 extern __inline __m256d
4223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4224 _mm256_mask_fmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4225 __m256d __C)
4226 {
4227 return (__m256d) __builtin_ia32_vfmsubpd256_mask ((__v4df) __A,
4228 (__v4df) __B,
4229 (__v4df) __C,
4230 (__mmask8) __U);
4231 }
4232
4233 extern __inline __m256d
4234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4235 _mm256_mask3_fmsub_pd (__m256d __A, __m256d __B, __m256d __C,
4236 __mmask8 __U)
4237 {
4238 return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A,
4239 (__v4df) __B,
4240 (__v4df) __C,
4241 (__mmask8) __U);
4242 }
4243
4244 extern __inline __m256d
4245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4246 _mm256_maskz_fmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4247 __m256d __C)
4248 {
4249 return (__m256d) __builtin_ia32_vfmsubpd256_maskz ((__v4df) __A,
4250 (__v4df) __B,
4251 (__v4df) __C,
4252 (__mmask8) __U);
4253 }
4254
4255 extern __inline __m128d
4256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4257 _mm_mask_fmsub_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
4258 {
4259 return (__m128d) __builtin_ia32_vfmsubpd128_mask ((__v2df) __A,
4260 (__v2df) __B,
4261 (__v2df) __C,
4262 (__mmask8) __U);
4263 }
4264
4265 extern __inline __m128d
4266 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4267 _mm_mask3_fmsub_pd (__m128d __A, __m128d __B, __m128d __C,
4268 __mmask8 __U)
4269 {
4270 return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A,
4271 (__v2df) __B,
4272 (__v2df) __C,
4273 (__mmask8) __U);
4274 }
4275
4276 extern __inline __m128d
4277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4278 _mm_maskz_fmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4279 __m128d __C)
4280 {
4281 return (__m128d) __builtin_ia32_vfmsubpd128_maskz ((__v2df) __A,
4282 (__v2df) __B,
4283 (__v2df) __C,
4284 (__mmask8) __U);
4285 }
4286
4287 extern __inline __m256
4288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4289 _mm256_mask_fmsub_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
4290 {
4291 return (__m256) __builtin_ia32_vfmsubps256_mask ((__v8sf) __A,
4292 (__v8sf) __B,
4293 (__v8sf) __C,
4294 (__mmask8) __U);
4295 }
4296
4297 extern __inline __m256
4298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4299 _mm256_mask3_fmsub_ps (__m256 __A, __m256 __B, __m256 __C,
4300 __mmask8 __U)
4301 {
4302 return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A,
4303 (__v8sf) __B,
4304 (__v8sf) __C,
4305 (__mmask8) __U);
4306 }
4307
4308 extern __inline __m256
4309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4310 _mm256_maskz_fmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4311 __m256 __C)
4312 {
4313 return (__m256) __builtin_ia32_vfmsubps256_maskz ((__v8sf) __A,
4314 (__v8sf) __B,
4315 (__v8sf) __C,
4316 (__mmask8) __U);
4317 }
4318
4319 extern __inline __m128
4320 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4321 _mm_mask_fmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4322 {
4323 return (__m128) __builtin_ia32_vfmsubps128_mask ((__v4sf) __A,
4324 (__v4sf) __B,
4325 (__v4sf) __C,
4326 (__mmask8) __U);
4327 }
4328
4329 extern __inline __m128
4330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4331 _mm_mask3_fmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4332 {
4333 return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A,
4334 (__v4sf) __B,
4335 (__v4sf) __C,
4336 (__mmask8) __U);
4337 }
4338
4339 extern __inline __m128
4340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4341 _mm_maskz_fmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4342 {
4343 return (__m128) __builtin_ia32_vfmsubps128_maskz ((__v4sf) __A,
4344 (__v4sf) __B,
4345 (__v4sf) __C,
4346 (__mmask8) __U);
4347 }
4348
4349 extern __inline __m256d
4350 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4351 _mm256_mask_fmaddsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4352 __m256d __C)
4353 {
4354 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
4355 (__v4df) __B,
4356 (__v4df) __C,
4357 (__mmask8) __U);
4358 }
4359
4360 extern __inline __m256d
4361 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4362 _mm256_mask3_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C,
4363 __mmask8 __U)
4364 {
4365 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A,
4366 (__v4df) __B,
4367 (__v4df) __C,
4368 (__mmask8)
4369 __U);
4370 }
4371
4372 extern __inline __m256d
4373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4374 _mm256_maskz_fmaddsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4375 __m256d __C)
4376 {
4377 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
4378 (__v4df) __B,
4379 (__v4df) __C,
4380 (__mmask8)
4381 __U);
4382 }
4383
4384 extern __inline __m128d
4385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4386 _mm_mask_fmaddsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
4387 __m128d __C)
4388 {
4389 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
4390 (__v2df) __B,
4391 (__v2df) __C,
4392 (__mmask8) __U);
4393 }
4394
4395 extern __inline __m128d
4396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4397 _mm_mask3_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C,
4398 __mmask8 __U)
4399 {
4400 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A,
4401 (__v2df) __B,
4402 (__v2df) __C,
4403 (__mmask8)
4404 __U);
4405 }
4406
4407 extern __inline __m128d
4408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4409 _mm_maskz_fmaddsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4410 __m128d __C)
4411 {
4412 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
4413 (__v2df) __B,
4414 (__v2df) __C,
4415 (__mmask8)
4416 __U);
4417 }
4418
4419 extern __inline __m256
4420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4421 _mm256_mask_fmaddsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
4422 __m256 __C)
4423 {
4424 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
4425 (__v8sf) __B,
4426 (__v8sf) __C,
4427 (__mmask8) __U);
4428 }
4429
4430 extern __inline __m256
4431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4432 _mm256_mask3_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C,
4433 __mmask8 __U)
4434 {
4435 return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A,
4436 (__v8sf) __B,
4437 (__v8sf) __C,
4438 (__mmask8) __U);
4439 }
4440
4441 extern __inline __m256
4442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4443 _mm256_maskz_fmaddsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4444 __m256 __C)
4445 {
4446 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
4447 (__v8sf) __B,
4448 (__v8sf) __C,
4449 (__mmask8) __U);
4450 }
4451
4452 extern __inline __m128
4453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4454 _mm_mask_fmaddsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4455 {
4456 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
4457 (__v4sf) __B,
4458 (__v4sf) __C,
4459 (__mmask8) __U);
4460 }
4461
4462 extern __inline __m128
4463 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4464 _mm_mask3_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C,
4465 __mmask8 __U)
4466 {
4467 return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A,
4468 (__v4sf) __B,
4469 (__v4sf) __C,
4470 (__mmask8) __U);
4471 }
4472
4473 extern __inline __m128
4474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4475 _mm_maskz_fmaddsub_ps (__mmask8 __U, __m128 __A, __m128 __B,
4476 __m128 __C)
4477 {
4478 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
4479 (__v4sf) __B,
4480 (__v4sf) __C,
4481 (__mmask8) __U);
4482 }
4483
4484 extern __inline __m256d
4485 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4486 _mm256_mask_fmsubadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
4487 __m256d __C)
4488 {
4489 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
4490 (__v4df) __B,
4491 -(__v4df) __C,
4492 (__mmask8) __U);
4493 }
4494
4495 extern __inline __m256d
4496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4497 _mm256_mask3_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C,
4498 __mmask8 __U)
4499 {
4500 return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A,
4501 (__v4df) __B,
4502 (__v4df) __C,
4503 (__mmask8)
4504 __U);
4505 }
4506
4507 extern __inline __m256d
4508 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4509 _mm256_maskz_fmsubadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4510 __m256d __C)
4511 {
4512 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
4513 (__v4df) __B,
4514 -(__v4df) __C,
4515 (__mmask8)
4516 __U);
4517 }
4518
4519 extern __inline __m128d
4520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4521 _mm_mask_fmsubadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
4522 __m128d __C)
4523 {
4524 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
4525 (__v2df) __B,
4526 -(__v2df) __C,
4527 (__mmask8) __U);
4528 }
4529
4530 extern __inline __m128d
4531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4532 _mm_mask3_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C,
4533 __mmask8 __U)
4534 {
4535 return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A,
4536 (__v2df) __B,
4537 (__v2df) __C,
4538 (__mmask8)
4539 __U);
4540 }
4541
4542 extern __inline __m128d
4543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4544 _mm_maskz_fmsubadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4545 __m128d __C)
4546 {
4547 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
4548 (__v2df) __B,
4549 -(__v2df) __C,
4550 (__mmask8)
4551 __U);
4552 }
4553
4554 extern __inline __m256
4555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4556 _mm256_mask_fmsubadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
4557 __m256 __C)
4558 {
4559 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
4560 (__v8sf) __B,
4561 -(__v8sf) __C,
4562 (__mmask8) __U);
4563 }
4564
4565 extern __inline __m256
4566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4567 _mm256_mask3_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C,
4568 __mmask8 __U)
4569 {
4570 return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A,
4571 (__v8sf) __B,
4572 (__v8sf) __C,
4573 (__mmask8) __U);
4574 }
4575
4576 extern __inline __m256
4577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4578 _mm256_maskz_fmsubadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4579 __m256 __C)
4580 {
4581 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
4582 (__v8sf) __B,
4583 -(__v8sf) __C,
4584 (__mmask8) __U);
4585 }
4586
4587 extern __inline __m128
4588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4589 _mm_mask_fmsubadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4590 {
4591 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
4592 (__v4sf) __B,
4593 -(__v4sf) __C,
4594 (__mmask8) __U);
4595 }
4596
4597 extern __inline __m128
4598 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4599 _mm_mask3_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C,
4600 __mmask8 __U)
4601 {
4602 return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A,
4603 (__v4sf) __B,
4604 (__v4sf) __C,
4605 (__mmask8) __U);
4606 }
4607
4608 extern __inline __m128
4609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4610 _mm_maskz_fmsubadd_ps (__mmask8 __U, __m128 __A, __m128 __B,
4611 __m128 __C)
4612 {
4613 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
4614 (__v4sf) __B,
4615 -(__v4sf) __C,
4616 (__mmask8) __U);
4617 }
4618
4619 extern __inline __m256d
4620 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4621 _mm256_mask_fnmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
4622 __m256d __C)
4623 {
4624 return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A,
4625 (__v4df) __B,
4626 (__v4df) __C,
4627 (__mmask8) __U);
4628 }
4629
4630 extern __inline __m256d
4631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4632 _mm256_mask3_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C,
4633 __mmask8 __U)
4634 {
4635 return (__m256d) __builtin_ia32_vfnmaddpd256_mask3 ((__v4df) __A,
4636 (__v4df) __B,
4637 (__v4df) __C,
4638 (__mmask8) __U);
4639 }
4640
4641 extern __inline __m256d
4642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4643 _mm256_maskz_fnmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4644 __m256d __C)
4645 {
4646 return (__m256d) __builtin_ia32_vfnmaddpd256_maskz ((__v4df) __A,
4647 (__v4df) __B,
4648 (__v4df) __C,
4649 (__mmask8) __U);
4650 }
4651
4652 extern __inline __m128d
4653 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4654 _mm_mask_fnmadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
4655 __m128d __C)
4656 {
4657 return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A,
4658 (__v2df) __B,
4659 (__v2df) __C,
4660 (__mmask8) __U);
4661 }
4662
4663 extern __inline __m128d
4664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4665 _mm_mask3_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C,
4666 __mmask8 __U)
4667 {
4668 return (__m128d) __builtin_ia32_vfnmaddpd128_mask3 ((__v2df) __A,
4669 (__v2df) __B,
4670 (__v2df) __C,
4671 (__mmask8) __U);
4672 }
4673
4674 extern __inline __m128d
4675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4676 _mm_maskz_fnmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4677 __m128d __C)
4678 {
4679 return (__m128d) __builtin_ia32_vfnmaddpd128_maskz ((__v2df) __A,
4680 (__v2df) __B,
4681 (__v2df) __C,
4682 (__mmask8) __U);
4683 }
4684
4685 extern __inline __m256
4686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4687 _mm256_mask_fnmadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
4688 __m256 __C)
4689 {
4690 return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A,
4691 (__v8sf) __B,
4692 (__v8sf) __C,
4693 (__mmask8) __U);
4694 }
4695
4696 extern __inline __m256
4697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4698 _mm256_mask3_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C,
4699 __mmask8 __U)
4700 {
4701 return (__m256) __builtin_ia32_vfnmaddps256_mask3 ((__v8sf) __A,
4702 (__v8sf) __B,
4703 (__v8sf) __C,
4704 (__mmask8) __U);
4705 }
4706
4707 extern __inline __m256
4708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4709 _mm256_maskz_fnmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4710 __m256 __C)
4711 {
4712 return (__m256) __builtin_ia32_vfnmaddps256_maskz ((__v8sf) __A,
4713 (__v8sf) __B,
4714 (__v8sf) __C,
4715 (__mmask8) __U);
4716 }
4717
4718 extern __inline __m128
4719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4720 _mm_mask_fnmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4721 {
4722 return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A,
4723 (__v4sf) __B,
4724 (__v4sf) __C,
4725 (__mmask8) __U);
4726 }
4727
4728 extern __inline __m128
4729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4730 _mm_mask3_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4731 {
4732 return (__m128) __builtin_ia32_vfnmaddps128_mask3 ((__v4sf) __A,
4733 (__v4sf) __B,
4734 (__v4sf) __C,
4735 (__mmask8) __U);
4736 }
4737
4738 extern __inline __m128
4739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4740 _mm_maskz_fnmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4741 {
4742 return (__m128) __builtin_ia32_vfnmaddps128_maskz ((__v4sf) __A,
4743 (__v4sf) __B,
4744 (__v4sf) __C,
4745 (__mmask8) __U);
4746 }
4747
4748 extern __inline __m256d
4749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4750 _mm256_mask_fnmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4751 __m256d __C)
4752 {
4753 return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A,
4754 (__v4df) __B,
4755 (__v4df) __C,
4756 (__mmask8) __U);
4757 }
4758
4759 extern __inline __m256d
4760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4761 _mm256_mask3_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C,
4762 __mmask8 __U)
4763 {
4764 return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A,
4765 (__v4df) __B,
4766 (__v4df) __C,
4767 (__mmask8) __U);
4768 }
4769
4770 extern __inline __m256d
4771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4772 _mm256_maskz_fnmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4773 __m256d __C)
4774 {
4775 return (__m256d) __builtin_ia32_vfnmsubpd256_maskz ((__v4df) __A,
4776 (__v4df) __B,
4777 (__v4df) __C,
4778 (__mmask8) __U);
4779 }
4780
4781 extern __inline __m128d
4782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4783 _mm_mask_fnmsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
4784 __m128d __C)
4785 {
4786 return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A,
4787 (__v2df) __B,
4788 (__v2df) __C,
4789 (__mmask8) __U);
4790 }
4791
4792 extern __inline __m128d
4793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4794 _mm_mask3_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C,
4795 __mmask8 __U)
4796 {
4797 return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A,
4798 (__v2df) __B,
4799 (__v2df) __C,
4800 (__mmask8) __U);
4801 }
4802
4803 extern __inline __m128d
4804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4805 _mm_maskz_fnmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4806 __m128d __C)
4807 {
4808 return (__m128d) __builtin_ia32_vfnmsubpd128_maskz ((__v2df) __A,
4809 (__v2df) __B,
4810 (__v2df) __C,
4811 (__mmask8) __U);
4812 }
4813
4814 extern __inline __m256
4815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4816 _mm256_mask_fnmsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
4817 __m256 __C)
4818 {
4819 return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A,
4820 (__v8sf) __B,
4821 (__v8sf) __C,
4822 (__mmask8) __U);
4823 }
4824
4825 extern __inline __m256
4826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4827 _mm256_mask3_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C,
4828 __mmask8 __U)
4829 {
4830 return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A,
4831 (__v8sf) __B,
4832 (__v8sf) __C,
4833 (__mmask8) __U);
4834 }
4835
4836 extern __inline __m256
4837 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4838 _mm256_maskz_fnmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4839 __m256 __C)
4840 {
4841 return (__m256) __builtin_ia32_vfnmsubps256_maskz ((__v8sf) __A,
4842 (__v8sf) __B,
4843 (__v8sf) __C,
4844 (__mmask8) __U);
4845 }
4846
4847 extern __inline __m128
4848 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4849 _mm_mask_fnmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4850 {
4851 return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A,
4852 (__v4sf) __B,
4853 (__v4sf) __C,
4854 (__mmask8) __U);
4855 }
4856
4857 extern __inline __m128
4858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4859 _mm_mask3_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4860 {
4861 return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A,
4862 (__v4sf) __B,
4863 (__v4sf) __C,
4864 (__mmask8) __U);
4865 }
4866
4867 extern __inline __m128
4868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4869 _mm_maskz_fnmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4870 {
4871 return (__m128) __builtin_ia32_vfnmsubps128_maskz ((__v4sf) __A,
4872 (__v4sf) __B,
4873 (__v4sf) __C,
4874 (__mmask8) __U);
4875 }
4876
4877 extern __inline __m128i
4878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4879 _mm_mask_and_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4880 __m128i __B)
4881 {
4882 return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
4883 (__v4si) __B,
4884 (__v4si) __W,
4885 (__mmask8) __U);
4886 }
4887
4888 extern __inline __m128i
4889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4890 _mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4891 {
4892 return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
4893 (__v4si) __B,
4894 (__v4si)
4895 _mm_setzero_si128 (),
4896 (__mmask8) __U);
4897 }
4898
4899 extern __inline __m256i
4900 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4901 _mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4902 __m256i __B)
4903 {
4904 return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
4905 (__v8si) __B,
4906 (__v8si) __W,
4907 (__mmask8) __U);
4908 }
4909
4910 extern __inline __m256i
4911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4912 _mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4913 {
4914 return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
4915 (__v8si) __B,
4916 (__v8si)
4917 _mm256_setzero_si256 (),
4918 (__mmask8) __U);
4919 }
4920
4921 extern __inline __m128i
4922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4923 _mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4924 __m128i __B)
4925 {
4926 return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
4927 (__v4si) __B,
4928 (__v4si) __W,
4929 (__mmask8) __U);
4930 }
4931
4932 extern __inline __m128i
4933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4934 _mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4935 {
4936 return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
4937 (__v4si) __B,
4938 (__v4si)
4939 _mm_setzero_si128 (),
4940 (__mmask8) __U);
4941 }
4942
4943 extern __inline __m256i
4944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4945 _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4946 __m256i __B)
4947 {
4948 return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
4949 (__v8si) __B,
4950 (__v8si) __W,
4951 (__mmask8) __U);
4952 }
4953
4954 extern __inline __m256i
4955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4956 _mm256_maskz_or_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4957 {
4958 return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
4959 (__v8si) __B,
4960 (__v8si)
4961 _mm256_setzero_si256 (),
4962 (__mmask8) __U);
4963 }
4964
4965 extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
4966 _mm256_or_epi32 (__m256i __A, __m256i __B)
4967 {
4968 return (__m256i) ((__v8su)__A | (__v8su)__B);
4969 }
4970
4971 extern __inline __m128i
4972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4973 _mm_mask_or_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4974 {
4975 return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
4976 (__v4si) __B,
4977 (__v4si) __W,
4978 (__mmask8) __U);
4979 }
4980
4981 extern __inline __m128i
4982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4983 _mm_maskz_or_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4984 {
4985 return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
4986 (__v4si) __B,
4987 (__v4si)
4988 _mm_setzero_si128 (),
4989 (__mmask8) __U);
4990 }
4991
4992 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
4993 _mm_or_epi32 (__m128i __A, __m128i __B)
4994 {
4995 return (__m128i) ((__v4su)__A | (__v4su)__B);
4996 }
4997
4998 extern __inline __m256i
4999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5000 _mm256_mask_xor_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5001 __m256i __B)
5002 {
5003 return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
5004 (__v8si) __B,
5005 (__v8si) __W,
5006 (__mmask8) __U);
5007 }
5008
5009 extern __inline __m256i
5010 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5011 _mm256_maskz_xor_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
5012 {
5013 return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
5014 (__v8si) __B,
5015 (__v8si)
5016 _mm256_setzero_si256 (),
5017 (__mmask8) __U);
5018 }
5019
5020 extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
5021 _mm256_xor_epi32 (__m256i __A, __m256i __B)
5022 {
5023 return (__m256i) ((__v8su)__A ^ (__v8su)__B);
5024 }
5025
5026 extern __inline __m128i
5027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5028 _mm_mask_xor_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5029 __m128i __B)
5030 {
5031 return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
5032 (__v4si) __B,
5033 (__v4si) __W,
5034 (__mmask8) __U);
5035 }
5036
5037 extern __inline __m128i
5038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5039 _mm_maskz_xor_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5040 {
5041 return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
5042 (__v4si) __B,
5043 (__v4si)
5044 _mm_setzero_si128 (),
5045 (__mmask8) __U);
5046 }
5047
5048 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
5049 _mm_xor_epi32 (__m128i __A, __m128i __B)
5050 {
5051 return (__m128i) ((__v4su)__A ^ (__v4su)__B);
5052 }
5053
5054 extern __inline __m128
5055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5056 _mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A)
5057 {
5058 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
5059 (__v4sf) __W,
5060 (__mmask8) __U);
5061 }
5062
5063 extern __inline __m128
5064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5065 _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A)
5066 {
5067 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
5068 (__v4sf)
5069 _mm_setzero_ps (),
5070 (__mmask8) __U);
5071 }
5072
5073 extern __inline __m128
5074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5075 _mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A)
5076 {
5077 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
5078 (__v4sf) __W,
5079 (__mmask8) __U);
5080 }
5081
5082 extern __inline __m128
5083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5084 _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A)
5085 {
5086 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
5087 (__v4sf)
5088 _mm_setzero_ps (),
5089 (__mmask8) __U);
5090 }
5091
5092 extern __inline __m256i
5093 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5094 _mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
5095 {
5096 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
5097 (__v8si) __W,
5098 (__mmask8) __U);
5099 }
5100
5101 extern __inline __m256i
5102 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5103 _mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A)
5104 {
5105 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
5106 (__v8si)
5107 _mm256_setzero_si256 (),
5108 (__mmask8) __U);
5109 }
5110
5111 extern __inline __m128i
5112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5113 _mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
5114 {
5115 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
5116 (__v4si) __W,
5117 (__mmask8) __U);
5118 }
5119
5120 extern __inline __m128i
5121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5122 _mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A)
5123 {
5124 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
5125 (__v4si)
5126 _mm_setzero_si128 (),
5127 (__mmask8) __U);
5128 }
5129
5130 extern __inline __m256i
5131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5132 _mm256_cvtps_epu32 (__m256 __A)
5133 {
5134 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5135 (__v8si)
5136 _mm256_setzero_si256 (),
5137 (__mmask8) -1);
5138 }
5139
5140 extern __inline __m256i
5141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5142 _mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
5143 {
5144 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5145 (__v8si) __W,
5146 (__mmask8) __U);
5147 }
5148
5149 extern __inline __m256i
5150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5151 _mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A)
5152 {
5153 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5154 (__v8si)
5155 _mm256_setzero_si256 (),
5156 (__mmask8) __U);
5157 }
5158
5159 extern __inline __m128i
5160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5161 _mm_cvtps_epu32 (__m128 __A)
5162 {
5163 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5164 (__v4si)
5165 _mm_setzero_si128 (),
5166 (__mmask8) -1);
5167 }
5168
5169 extern __inline __m128i
5170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5171 _mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
5172 {
5173 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5174 (__v4si) __W,
5175 (__mmask8) __U);
5176 }
5177
5178 extern __inline __m128i
5179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5180 _mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A)
5181 {
5182 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5183 (__v4si)
5184 _mm_setzero_si128 (),
5185 (__mmask8) __U);
5186 }
5187
5188 extern __inline __m256d
5189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5190 _mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
5191 {
5192 return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
5193 (__v4df) __W,
5194 (__mmask8) __U);
5195 }
5196
5197 extern __inline __m256d
5198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5199 _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
5200 {
5201 return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
5202 (__v4df)
5203 _mm256_setzero_pd (),
5204 (__mmask8) __U);
5205 }
5206
5207 extern __inline __m128d
5208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5209 _mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
5210 {
5211 return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
5212 (__v2df) __W,
5213 (__mmask8) __U);
5214 }
5215
5216 extern __inline __m128d
5217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5218 _mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
5219 {
5220 return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
5221 (__v2df)
5222 _mm_setzero_pd (),
5223 (__mmask8) __U);
5224 }
5225
5226 extern __inline __m256
5227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5228 _mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
5229 {
5230 return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
5231 (__v8sf) __W,
5232 (__mmask8) __U);
5233 }
5234
5235 extern __inline __m256
5236 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5237 _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
5238 {
5239 return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
5240 (__v8sf)
5241 _mm256_setzero_ps (),
5242 (__mmask8) __U);
5243 }
5244
5245 extern __inline __m128
5246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5247 _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
5248 {
5249 return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
5250 (__v4sf) __W,
5251 (__mmask8) __U);
5252 }
5253
5254 extern __inline __m128
5255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5256 _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
5257 {
5258 return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
5259 (__v4sf)
5260 _mm_setzero_ps (),
5261 (__mmask8) __U);
5262 }
5263
5264 extern __inline __m256
5265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5266 _mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
5267 {
5268 return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
5269 (__v8sf) __W,
5270 (__mmask8) __U);
5271 }
5272
5273 extern __inline __m256
5274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5275 _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
5276 {
5277 return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
5278 (__v8sf)
5279 _mm256_setzero_ps (),
5280 (__mmask8) __U);
5281 }
5282
5283 extern __inline __m128
5284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5285 _mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
5286 {
5287 return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
5288 (__v4sf) __W,
5289 (__mmask8) __U);
5290 }
5291
5292 extern __inline __m128
5293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5294 _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
5295 {
5296 return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
5297 (__v4sf)
5298 _mm_setzero_ps (),
5299 (__mmask8) __U);
5300 }
5301
5302 extern __inline __m128i
5303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5304 _mm_mask_unpackhi_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5305 __m128i __B)
5306 {
5307 return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
5308 (__v4si) __B,
5309 (__v4si) __W,
5310 (__mmask8) __U);
5311 }
5312
5313 extern __inline __m128i
5314 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5315 _mm_maskz_unpackhi_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5316 {
5317 return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
5318 (__v4si) __B,
5319 (__v4si)
5320 _mm_setzero_si128 (),
5321 (__mmask8) __U);
5322 }
5323
5324 extern __inline __m256i
5325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5326 _mm256_mask_unpackhi_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5327 __m256i __B)
5328 {
5329 return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
5330 (__v8si) __B,
5331 (__v8si) __W,
5332 (__mmask8) __U);
5333 }
5334
5335 extern __inline __m256i
5336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5337 _mm256_maskz_unpackhi_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
5338 {
5339 return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
5340 (__v8si) __B,
5341 (__v8si)
5342 _mm256_setzero_si256 (),
5343 (__mmask8) __U);
5344 }
5345
5346 extern __inline __m128i
5347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5348 _mm_mask_unpackhi_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5349 __m128i __B)
5350 {
5351 return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
5352 (__v2di) __B,
5353 (__v2di) __W,
5354 (__mmask8) __U);
5355 }
5356
5357 extern __inline __m128i
5358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5359 _mm_maskz_unpackhi_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5360 {
5361 return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
5362 (__v2di) __B,
5363 (__v2di)
5364 _mm_setzero_si128 (),
5365 (__mmask8) __U);
5366 }
5367
5368 extern __inline __m256i
5369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5370 _mm256_mask_unpackhi_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5371 __m256i __B)
5372 {
5373 return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
5374 (__v4di) __B,
5375 (__v4di) __W,
5376 (__mmask8) __U);
5377 }
5378
5379 extern __inline __m256i
5380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5381 _mm256_maskz_unpackhi_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
5382 {
5383 return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
5384 (__v4di) __B,
5385 (__v4di)
5386 _mm256_setzero_si256 (),
5387 (__mmask8) __U);
5388 }
5389
5390 extern __inline __m128i
5391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5392 _mm_mask_unpacklo_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5393 __m128i __B)
5394 {
5395 return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
5396 (__v4si) __B,
5397 (__v4si) __W,
5398 (__mmask8) __U);
5399 }
5400
5401 extern __inline __m128i
5402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5403 _mm_maskz_unpacklo_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5404 {
5405 return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
5406 (__v4si) __B,
5407 (__v4si)
5408 _mm_setzero_si128 (),
5409 (__mmask8) __U);
5410 }
5411
5412 extern __inline __m256i
5413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5414 _mm256_mask_unpacklo_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5415 __m256i __B)
5416 {
5417 return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
5418 (__v8si) __B,
5419 (__v8si) __W,
5420 (__mmask8) __U);
5421 }
5422
5423 extern __inline __m256i
5424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5425 _mm256_maskz_unpacklo_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
5426 {
5427 return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
5428 (__v8si) __B,
5429 (__v8si)
5430 _mm256_setzero_si256 (),
5431 (__mmask8) __U);
5432 }
5433
5434 extern __inline __m128i
5435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5436 _mm_mask_unpacklo_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5437 __m128i __B)
5438 {
5439 return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
5440 (__v2di) __B,
5441 (__v2di) __W,
5442 (__mmask8) __U);
5443 }
5444
5445 extern __inline __m128i
5446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5447 _mm_maskz_unpacklo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5448 {
5449 return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
5450 (__v2di) __B,
5451 (__v2di)
5452 _mm_setzero_si128 (),
5453 (__mmask8) __U);
5454 }
5455
5456 extern __inline __m256i
5457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5458 _mm256_mask_unpacklo_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5459 __m256i __B)
5460 {
5461 return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
5462 (__v4di) __B,
5463 (__v4di) __W,
5464 (__mmask8) __U);
5465 }
5466
5467 extern __inline __m256i
5468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5469 _mm256_maskz_unpacklo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
5470 {
5471 return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
5472 (__v4di) __B,
5473 (__v4di)
5474 _mm256_setzero_si256 (),
5475 (__mmask8) __U);
5476 }
5477
5478 extern __inline __mmask8
5479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5480 _mm_cmpeq_epu32_mask (__m128i __A, __m128i __B)
5481 {
5482 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5483 (__v4si) __B, 0,
5484 (__mmask8) -1);
5485 }
5486
5487 extern __inline __mmask8
5488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5489 _mm_cmpeq_epi32_mask (__m128i __A, __m128i __B)
5490 {
5491 return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
5492 (__v4si) __B,
5493 (__mmask8) -1);
5494 }
5495
5496 extern __inline __mmask8
5497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5498 _mm_mask_cmpeq_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5499 {
5500 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5501 (__v4si) __B, 0, __U);
5502 }
5503
5504 extern __inline __mmask8
5505 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5506 _mm_mask_cmpeq_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5507 {
5508 return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
5509 (__v4si) __B, __U);
5510 }
5511
5512 extern __inline __mmask8
5513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5514 _mm256_cmpeq_epu32_mask (__m256i __A, __m256i __B)
5515 {
5516 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5517 (__v8si) __B, 0,
5518 (__mmask8) -1);
5519 }
5520
5521 extern __inline __mmask8
5522 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5523 _mm256_cmpeq_epi32_mask (__m256i __A, __m256i __B)
5524 {
5525 return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
5526 (__v8si) __B,
5527 (__mmask8) -1);
5528 }
5529
5530 extern __inline __mmask8
5531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5532 _mm256_mask_cmpeq_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5533 {
5534 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5535 (__v8si) __B, 0, __U);
5536 }
5537
5538 extern __inline __mmask8
5539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5540 _mm256_mask_cmpeq_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5541 {
5542 return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
5543 (__v8si) __B, __U);
5544 }
5545
5546 extern __inline __mmask8
5547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5548 _mm_cmpeq_epu64_mask (__m128i __A, __m128i __B)
5549 {
5550 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5551 (__v2di) __B, 0,
5552 (__mmask8) -1);
5553 }
5554
5555 extern __inline __mmask8
5556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5557 _mm_cmpeq_epi64_mask (__m128i __A, __m128i __B)
5558 {
5559 return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
5560 (__v2di) __B,
5561 (__mmask8) -1);
5562 }
5563
5564 extern __inline __mmask8
5565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5566 _mm_mask_cmpeq_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5567 {
5568 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5569 (__v2di) __B, 0, __U);
5570 }
5571
5572 extern __inline __mmask8
5573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5574 _mm_mask_cmpeq_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5575 {
5576 return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
5577 (__v2di) __B, __U);
5578 }
5579
5580 extern __inline __mmask8
5581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5582 _mm256_cmpeq_epu64_mask (__m256i __A, __m256i __B)
5583 {
5584 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5585 (__v4di) __B, 0,
5586 (__mmask8) -1);
5587 }
5588
5589 extern __inline __mmask8
5590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5591 _mm256_cmpeq_epi64_mask (__m256i __A, __m256i __B)
5592 {
5593 return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
5594 (__v4di) __B,
5595 (__mmask8) -1);
5596 }
5597
5598 extern __inline __mmask8
5599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5600 _mm256_mask_cmpeq_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5601 {
5602 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5603 (__v4di) __B, 0, __U);
5604 }
5605
5606 extern __inline __mmask8
5607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5608 _mm256_mask_cmpeq_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5609 {
5610 return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
5611 (__v4di) __B, __U);
5612 }
5613
5614 extern __inline __mmask8
5615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5616 _mm_cmpgt_epu32_mask (__m128i __A, __m128i __B)
5617 {
5618 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5619 (__v4si) __B, 6,
5620 (__mmask8) -1);
5621 }
5622
5623 extern __inline __mmask8
5624 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5625 _mm_cmpgt_epi32_mask (__m128i __A, __m128i __B)
5626 {
5627 return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
5628 (__v4si) __B,
5629 (__mmask8) -1);
5630 }
5631
5632 extern __inline __mmask8
5633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5634 _mm_mask_cmpgt_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5635 {
5636 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5637 (__v4si) __B, 6, __U);
5638 }
5639
5640 extern __inline __mmask8
5641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5642 _mm_mask_cmpgt_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5643 {
5644 return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
5645 (__v4si) __B, __U);
5646 }
5647
5648 extern __inline __mmask8
5649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5650 _mm256_cmpgt_epu32_mask (__m256i __A, __m256i __B)
5651 {
5652 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5653 (__v8si) __B, 6,
5654 (__mmask8) -1);
5655 }
5656
5657 extern __inline __mmask8
5658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5659 _mm256_cmpgt_epi32_mask (__m256i __A, __m256i __B)
5660 {
5661 return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
5662 (__v8si) __B,
5663 (__mmask8) -1);
5664 }
5665
5666 extern __inline __mmask8
5667 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5668 _mm256_mask_cmpgt_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5669 {
5670 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5671 (__v8si) __B, 6, __U);
5672 }
5673
5674 extern __inline __mmask8
5675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5676 _mm256_mask_cmpgt_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5677 {
5678 return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
5679 (__v8si) __B, __U);
5680 }
5681
5682 extern __inline __mmask8
5683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5684 _mm_cmpgt_epu64_mask (__m128i __A, __m128i __B)
5685 {
5686 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5687 (__v2di) __B, 6,
5688 (__mmask8) -1);
5689 }
5690
5691 extern __inline __mmask8
5692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5693 _mm_cmpgt_epi64_mask (__m128i __A, __m128i __B)
5694 {
5695 return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
5696 (__v2di) __B,
5697 (__mmask8) -1);
5698 }
5699
5700 extern __inline __mmask8
5701 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5702 _mm_mask_cmpgt_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5703 {
5704 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5705 (__v2di) __B, 6, __U);
5706 }
5707
5708 extern __inline __mmask8
5709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5710 _mm_mask_cmpgt_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5711 {
5712 return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
5713 (__v2di) __B, __U);
5714 }
5715
5716 extern __inline __mmask8
5717 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5718 _mm256_cmpgt_epu64_mask (__m256i __A, __m256i __B)
5719 {
5720 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5721 (__v4di) __B, 6,
5722 (__mmask8) -1);
5723 }
5724
5725 extern __inline __mmask8
5726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5727 _mm256_cmpgt_epi64_mask (__m256i __A, __m256i __B)
5728 {
5729 return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
5730 (__v4di) __B,
5731 (__mmask8) -1);
5732 }
5733
5734 extern __inline __mmask8
5735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5736 _mm256_mask_cmpgt_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5737 {
5738 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5739 (__v4di) __B, 6, __U);
5740 }
5741
5742 extern __inline __mmask8
5743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5744 _mm256_mask_cmpgt_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5745 {
5746 return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
5747 (__v4di) __B, __U);
5748 }
5749
5750 extern __inline __mmask8
5751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5752 _mm_test_epi32_mask (__m128i __A, __m128i __B)
5753 {
5754 return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
5755 (__v4si) __B,
5756 (__mmask8) -1);
5757 }
5758
5759 extern __inline __mmask8
5760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5761 _mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5762 {
5763 return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
5764 (__v4si) __B, __U);
5765 }
5766
5767 extern __inline __mmask8
5768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5769 _mm256_test_epi32_mask (__m256i __A, __m256i __B)
5770 {
5771 return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
5772 (__v8si) __B,
5773 (__mmask8) -1);
5774 }
5775
5776 extern __inline __mmask8
5777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5778 _mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5779 {
5780 return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
5781 (__v8si) __B, __U);
5782 }
5783
5784 extern __inline __mmask8
5785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5786 _mm_test_epi64_mask (__m128i __A, __m128i __B)
5787 {
5788 return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
5789 (__v2di) __B,
5790 (__mmask8) -1);
5791 }
5792
5793 extern __inline __mmask8
5794 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5795 _mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5796 {
5797 return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
5798 (__v2di) __B, __U);
5799 }
5800
5801 extern __inline __mmask8
5802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5803 _mm256_test_epi64_mask (__m256i __A, __m256i __B)
5804 {
5805 return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
5806 (__v4di) __B,
5807 (__mmask8) -1);
5808 }
5809
5810 extern __inline __mmask8
5811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5812 _mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5813 {
5814 return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
5815 (__v4di) __B, __U);
5816 }
5817
5818 extern __inline __mmask8
5819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5820 _mm_testn_epi32_mask (__m128i __A, __m128i __B)
5821 {
5822 return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
5823 (__v4si) __B,
5824 (__mmask8) -1);
5825 }
5826
5827 extern __inline __mmask8
5828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5829 _mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5830 {
5831 return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
5832 (__v4si) __B, __U);
5833 }
5834
5835 extern __inline __mmask8
5836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5837 _mm256_testn_epi32_mask (__m256i __A, __m256i __B)
5838 {
5839 return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
5840 (__v8si) __B,
5841 (__mmask8) -1);
5842 }
5843
5844 extern __inline __mmask8
5845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5846 _mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5847 {
5848 return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
5849 (__v8si) __B, __U);
5850 }
5851
5852 extern __inline __mmask8
5853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5854 _mm_testn_epi64_mask (__m128i __A, __m128i __B)
5855 {
5856 return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
5857 (__v2di) __B,
5858 (__mmask8) -1);
5859 }
5860
5861 extern __inline __mmask8
5862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5863 _mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5864 {
5865 return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
5866 (__v2di) __B, __U);
5867 }
5868
5869 extern __inline __mmask8
5870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5871 _mm256_testn_epi64_mask (__m256i __A, __m256i __B)
5872 {
5873 return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
5874 (__v4di) __B,
5875 (__mmask8) -1);
5876 }
5877
5878 extern __inline __mmask8
5879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5880 _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5881 {
5882 return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
5883 (__v4di) __B, __U);
5884 }
5885
5886 extern __inline __m256d
5887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5888 _mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A)
5889 {
5890 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
5891 (__v4df) __W,
5892 (__mmask8) __U);
5893 }
5894
5895 extern __inline __m256d
5896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5897 _mm256_maskz_compress_pd (__mmask8 __U, __m256d __A)
5898 {
5899 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
5900 (__v4df)
5901 _mm256_setzero_pd (),
5902 (__mmask8) __U);
5903 }
5904
5905 extern __inline void
5906 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5907 _mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A)
5908 {
5909 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
5910 (__v4df) __A,
5911 (__mmask8) __U);
5912 }
5913
5914 extern __inline __m128d
5915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5916 _mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A)
5917 {
5918 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
5919 (__v2df) __W,
5920 (__mmask8) __U);
5921 }
5922
5923 extern __inline __m128d
5924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5925 _mm_maskz_compress_pd (__mmask8 __U, __m128d __A)
5926 {
5927 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
5928 (__v2df)
5929 _mm_setzero_pd (),
5930 (__mmask8) __U);
5931 }
5932
5933 extern __inline void
5934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5935 _mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A)
5936 {
5937 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
5938 (__v2df) __A,
5939 (__mmask8) __U);
5940 }
5941
5942 extern __inline __m256
5943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5944 _mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A)
5945 {
5946 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
5947 (__v8sf) __W,
5948 (__mmask8) __U);
5949 }
5950
5951 extern __inline __m256
5952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5953 _mm256_maskz_compress_ps (__mmask8 __U, __m256 __A)
5954 {
5955 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
5956 (__v8sf)
5957 _mm256_setzero_ps (),
5958 (__mmask8) __U);
5959 }
5960
5961 extern __inline void
5962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5963 _mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A)
5964 {
5965 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
5966 (__v8sf) __A,
5967 (__mmask8) __U);
5968 }
5969
5970 extern __inline __m128
5971 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5972 _mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A)
5973 {
5974 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
5975 (__v4sf) __W,
5976 (__mmask8) __U);
5977 }
5978
5979 extern __inline __m128
5980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5981 _mm_maskz_compress_ps (__mmask8 __U, __m128 __A)
5982 {
5983 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
5984 (__v4sf)
5985 _mm_setzero_ps (),
5986 (__mmask8) __U);
5987 }
5988
5989 extern __inline void
5990 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5991 _mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A)
5992 {
5993 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
5994 (__v4sf) __A,
5995 (__mmask8) __U);
5996 }
5997
5998 extern __inline __m256i
5999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6000 _mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
6001 {
6002 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
6003 (__v4di) __W,
6004 (__mmask8) __U);
6005 }
6006
6007 extern __inline __m256i
6008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6009 _mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A)
6010 {
6011 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
6012 (__v4di)
6013 _mm256_setzero_si256 (),
6014 (__mmask8) __U);
6015 }
6016
6017 extern __inline void
6018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6019 _mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A)
6020 {
6021 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
6022 (__v4di) __A,
6023 (__mmask8) __U);
6024 }
6025
6026 extern __inline __m128i
6027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6028 _mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
6029 {
6030 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
6031 (__v2di) __W,
6032 (__mmask8) __U);
6033 }
6034
6035 extern __inline __m128i
6036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6037 _mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A)
6038 {
6039 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
6040 (__v2di)
6041 _mm_setzero_si128 (),
6042 (__mmask8) __U);
6043 }
6044
6045 extern __inline void
6046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6047 _mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A)
6048 {
6049 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
6050 (__v2di) __A,
6051 (__mmask8) __U);
6052 }
6053
6054 extern __inline __m256i
6055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6056 _mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
6057 {
6058 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
6059 (__v8si) __W,
6060 (__mmask8) __U);
6061 }
6062
6063 extern __inline __m256i
6064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6065 _mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A)
6066 {
6067 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
6068 (__v8si)
6069 _mm256_setzero_si256 (),
6070 (__mmask8) __U);
6071 }
6072
6073 extern __inline void
6074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6075 _mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A)
6076 {
6077 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
6078 (__v8si) __A,
6079 (__mmask8) __U);
6080 }
6081
6082 extern __inline __m128i
6083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6084 _mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
6085 {
6086 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
6087 (__v4si) __W,
6088 (__mmask8) __U);
6089 }
6090
6091 extern __inline __m128i
6092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6093 _mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A)
6094 {
6095 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
6096 (__v4si)
6097 _mm_setzero_si128 (),
6098 (__mmask8) __U);
6099 }
6100
6101 extern __inline void
6102 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6103 _mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A)
6104 {
6105 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
6106 (__v4si) __A,
6107 (__mmask8) __U);
6108 }
6109
6110 extern __inline __m256d
6111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6112 _mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A)
6113 {
6114 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
6115 (__v4df) __W,
6116 (__mmask8) __U);
6117 }
6118
6119 extern __inline __m256d
6120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6121 _mm256_maskz_expand_pd (__mmask8 __U, __m256d __A)
6122 {
6123 return (__m256d) __builtin_ia32_expanddf256_maskz ((__v4df) __A,
6124 (__v4df)
6125 _mm256_setzero_pd (),
6126 (__mmask8) __U);
6127 }
6128
6129 extern __inline __m256d
6130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6131 _mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P)
6132 {
6133 return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P,
6134 (__v4df) __W,
6135 (__mmask8)
6136 __U);
6137 }
6138
6139 extern __inline __m256d
6140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6141 _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
6142 {
6143 return (__m256d) __builtin_ia32_expandloaddf256_maskz ((const __v4df *) __P,
6144 (__v4df)
6145 _mm256_setzero_pd (),
6146 (__mmask8)
6147 __U);
6148 }
6149
6150 extern __inline __m128d
6151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6152 _mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A)
6153 {
6154 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
6155 (__v2df) __W,
6156 (__mmask8) __U);
6157 }
6158
6159 extern __inline __m128d
6160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6161 _mm_maskz_expand_pd (__mmask8 __U, __m128d __A)
6162 {
6163 return (__m128d) __builtin_ia32_expanddf128_maskz ((__v2df) __A,
6164 (__v2df)
6165 _mm_setzero_pd (),
6166 (__mmask8) __U);
6167 }
6168
6169 extern __inline __m128d
6170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6171 _mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P)
6172 {
6173 return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P,
6174 (__v2df) __W,
6175 (__mmask8)
6176 __U);
6177 }
6178
6179 extern __inline __m128d
6180 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6181 _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
6182 {
6183 return (__m128d) __builtin_ia32_expandloaddf128_maskz ((const __v2df *) __P,
6184 (__v2df)
6185 _mm_setzero_pd (),
6186 (__mmask8)
6187 __U);
6188 }
6189
6190 extern __inline __m256
6191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6192 _mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A)
6193 {
6194 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
6195 (__v8sf) __W,
6196 (__mmask8) __U);
6197 }
6198
6199 extern __inline __m256
6200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6201 _mm256_maskz_expand_ps (__mmask8 __U, __m256 __A)
6202 {
6203 return (__m256) __builtin_ia32_expandsf256_maskz ((__v8sf) __A,
6204 (__v8sf)
6205 _mm256_setzero_ps (),
6206 (__mmask8) __U);
6207 }
6208
6209 extern __inline __m256
6210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6211 _mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P)
6212 {
6213 return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P,
6214 (__v8sf) __W,
6215 (__mmask8) __U);
6216 }
6217
6218 extern __inline __m256
6219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6220 _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
6221 {
6222 return (__m256) __builtin_ia32_expandloadsf256_maskz ((const __v8sf *) __P,
6223 (__v8sf)
6224 _mm256_setzero_ps (),
6225 (__mmask8)
6226 __U);
6227 }
6228
6229 extern __inline __m128
6230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6231 _mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A)
6232 {
6233 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
6234 (__v4sf) __W,
6235 (__mmask8) __U);
6236 }
6237
6238 extern __inline __m128
6239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6240 _mm_maskz_expand_ps (__mmask8 __U, __m128 __A)
6241 {
6242 return (__m128) __builtin_ia32_expandsf128_maskz ((__v4sf) __A,
6243 (__v4sf)
6244 _mm_setzero_ps (),
6245 (__mmask8) __U);
6246 }
6247
6248 extern __inline __m128
6249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6250 _mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P)
6251 {
6252 return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P,
6253 (__v4sf) __W,
6254 (__mmask8) __U);
6255 }
6256
6257 extern __inline __m128
6258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6259 _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
6260 {
6261 return (__m128) __builtin_ia32_expandloadsf128_maskz ((const __v4sf *) __P,
6262 (__v4sf)
6263 _mm_setzero_ps (),
6264 (__mmask8)
6265 __U);
6266 }
6267
6268 extern __inline __m256i
6269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6270 _mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
6271 {
6272 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
6273 (__v4di) __W,
6274 (__mmask8) __U);
6275 }
6276
6277 extern __inline __m256i
6278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6279 _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A)
6280 {
6281 return (__m256i) __builtin_ia32_expanddi256_maskz ((__v4di) __A,
6282 (__v4di)
6283 _mm256_setzero_si256 (),
6284 (__mmask8) __U);
6285 }
6286
6287 extern __inline __m256i
6288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6289 _mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
6290 void const *__P)
6291 {
6292 return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P,
6293 (__v4di) __W,
6294 (__mmask8)
6295 __U);
6296 }
6297
6298 extern __inline __m256i
6299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6300 _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
6301 {
6302 return (__m256i) __builtin_ia32_expandloaddi256_maskz ((const __v4di *) __P,
6303 (__v4di)
6304 _mm256_setzero_si256 (),
6305 (__mmask8)
6306 __U);
6307 }
6308
6309 extern __inline __m128i
6310 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6311 _mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
6312 {
6313 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
6314 (__v2di) __W,
6315 (__mmask8) __U);
6316 }
6317
6318 extern __inline __m128i
6319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6320 _mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A)
6321 {
6322 return (__m128i) __builtin_ia32_expanddi128_maskz ((__v2di) __A,
6323 (__v2di)
6324 _mm_setzero_si128 (),
6325 (__mmask8) __U);
6326 }
6327
6328 extern __inline __m128i
6329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6330 _mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
6331 {
6332 return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P,
6333 (__v2di) __W,
6334 (__mmask8)
6335 __U);
6336 }
6337
6338 extern __inline __m128i
6339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6340 _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
6341 {
6342 return (__m128i) __builtin_ia32_expandloaddi128_maskz ((const __v2di *) __P,
6343 (__v2di)
6344 _mm_setzero_si128 (),
6345 (__mmask8)
6346 __U);
6347 }
6348
6349 extern __inline __m256i
6350 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6351 _mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
6352 {
6353 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
6354 (__v8si) __W,
6355 (__mmask8) __U);
6356 }
6357
6358 extern __inline __m256i
6359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6360 _mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A)
6361 {
6362 return (__m256i) __builtin_ia32_expandsi256_maskz ((__v8si) __A,
6363 (__v8si)
6364 _mm256_setzero_si256 (),
6365 (__mmask8) __U);
6366 }
6367
6368 extern __inline __m256i
6369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6370 _mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
6371 void const *__P)
6372 {
6373 return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P,
6374 (__v8si) __W,
6375 (__mmask8)
6376 __U);
6377 }
6378
6379 extern __inline __m256i
6380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6381 _mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
6382 {
6383 return (__m256i) __builtin_ia32_expandloadsi256_maskz ((const __v8si *) __P,
6384 (__v8si)
6385 _mm256_setzero_si256 (),
6386 (__mmask8)
6387 __U);
6388 }
6389
6390 extern __inline __m128i
6391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6392 _mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
6393 {
6394 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
6395 (__v4si) __W,
6396 (__mmask8) __U);
6397 }
6398
6399 extern __inline __m128i
6400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6401 _mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A)
6402 {
6403 return (__m128i) __builtin_ia32_expandsi128_maskz ((__v4si) __A,
6404 (__v4si)
6405 _mm_setzero_si128 (),
6406 (__mmask8) __U);
6407 }
6408
6409 extern __inline __m128i
6410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6411 _mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
6412 {
6413 return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P,
6414 (__v4si) __W,
6415 (__mmask8)
6416 __U);
6417 }
6418
6419 extern __inline __m128i
6420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6421 _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
6422 {
6423 return (__m128i) __builtin_ia32_expandloadsi128_maskz ((const __v4si *) __P,
6424 (__v4si)
6425 _mm_setzero_si128 (),
6426 (__mmask8)
6427 __U);
6428 }
6429
6430 extern __inline __m256d
6431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6432 _mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B)
6433 {
6434 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
6435 /* idx */ ,
6436 (__v4df) __A,
6437 (__v4df) __B,
6438 (__mmask8) -1);
6439 }
6440
6441 extern __inline __m256d
6442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6443 _mm256_mask_permutex2var_pd (__m256d __A, __mmask8 __U, __m256i __I,
6444 __m256d __B)
6445 {
6446 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
6447 /* idx */ ,
6448 (__v4df) __A,
6449 (__v4df) __B,
6450 (__mmask8)
6451 __U);
6452 }
6453
6454 extern __inline __m256d
6455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6456 _mm256_mask2_permutex2var_pd (__m256d __A, __m256i __I, __mmask8 __U,
6457 __m256d __B)
6458 {
6459 return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A,
6460 (__v4di) __I
6461 /* idx */ ,
6462 (__v4df) __B,
6463 (__mmask8)
6464 __U);
6465 }
6466
6467 extern __inline __m256d
6468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6469 _mm256_maskz_permutex2var_pd (__mmask8 __U, __m256d __A, __m256i __I,
6470 __m256d __B)
6471 {
6472 return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I
6473 /* idx */ ,
6474 (__v4df) __A,
6475 (__v4df) __B,
6476 (__mmask8)
6477 __U);
6478 }
6479
6480 extern __inline __m256
6481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6482 _mm256_permutex2var_ps (__m256 __A, __m256i __I, __m256 __B)
6483 {
6484 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
6485 /* idx */ ,
6486 (__v8sf) __A,
6487 (__v8sf) __B,
6488 (__mmask8) -1);
6489 }
6490
6491 extern __inline __m256
6492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6493 _mm256_mask_permutex2var_ps (__m256 __A, __mmask8 __U, __m256i __I,
6494 __m256 __B)
6495 {
6496 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
6497 /* idx */ ,
6498 (__v8sf) __A,
6499 (__v8sf) __B,
6500 (__mmask8) __U);
6501 }
6502
6503 extern __inline __m256
6504 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6505 _mm256_mask2_permutex2var_ps (__m256 __A, __m256i __I, __mmask8 __U,
6506 __m256 __B)
6507 {
6508 return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A,
6509 (__v8si) __I
6510 /* idx */ ,
6511 (__v8sf) __B,
6512 (__mmask8) __U);
6513 }
6514
6515 extern __inline __m256
6516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6517 _mm256_maskz_permutex2var_ps (__mmask8 __U, __m256 __A, __m256i __I,
6518 __m256 __B)
6519 {
6520 return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I
6521 /* idx */ ,
6522 (__v8sf) __A,
6523 (__v8sf) __B,
6524 (__mmask8)
6525 __U);
6526 }
6527
6528 extern __inline __m128i
6529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6530 _mm_permutex2var_epi64 (__m128i __A, __m128i __I, __m128i __B)
6531 {
6532 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
6533 /* idx */ ,
6534 (__v2di) __A,
6535 (__v2di) __B,
6536 (__mmask8) -1);
6537 }
6538
6539 extern __inline __m128i
6540 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6541 _mm_mask_permutex2var_epi64 (__m128i __A, __mmask8 __U, __m128i __I,
6542 __m128i __B)
6543 {
6544 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
6545 /* idx */ ,
6546 (__v2di) __A,
6547 (__v2di) __B,
6548 (__mmask8) __U);
6549 }
6550
6551 extern __inline __m128i
6552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6553 _mm_mask2_permutex2var_epi64 (__m128i __A, __m128i __I, __mmask8 __U,
6554 __m128i __B)
6555 {
6556 return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A,
6557 (__v2di) __I
6558 /* idx */ ,
6559 (__v2di) __B,
6560 (__mmask8) __U);
6561 }
6562
6563 extern __inline __m128i
6564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6565 _mm_maskz_permutex2var_epi64 (__mmask8 __U, __m128i __A, __m128i __I,
6566 __m128i __B)
6567 {
6568 return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I
6569 /* idx */ ,
6570 (__v2di) __A,
6571 (__v2di) __B,
6572 (__mmask8)
6573 __U);
6574 }
6575
6576 extern __inline __m128i
6577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6578 _mm_permutex2var_epi32 (__m128i __A, __m128i __I, __m128i __B)
6579 {
6580 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
6581 /* idx */ ,
6582 (__v4si) __A,
6583 (__v4si) __B,
6584 (__mmask8) -1);
6585 }
6586
6587 extern __inline __m128i
6588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6589 _mm_mask_permutex2var_epi32 (__m128i __A, __mmask8 __U, __m128i __I,
6590 __m128i __B)
6591 {
6592 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
6593 /* idx */ ,
6594 (__v4si) __A,
6595 (__v4si) __B,
6596 (__mmask8) __U);
6597 }
6598
6599 extern __inline __m128i
6600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6601 _mm_mask2_permutex2var_epi32 (__m128i __A, __m128i __I, __mmask8 __U,
6602 __m128i __B)
6603 {
6604 return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A,
6605 (__v4si) __I
6606 /* idx */ ,
6607 (__v4si) __B,
6608 (__mmask8) __U);
6609 }
6610
6611 extern __inline __m128i
6612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6613 _mm_maskz_permutex2var_epi32 (__mmask8 __U, __m128i __A, __m128i __I,
6614 __m128i __B)
6615 {
6616 return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I
6617 /* idx */ ,
6618 (__v4si) __A,
6619 (__v4si) __B,
6620 (__mmask8)
6621 __U);
6622 }
6623
6624 extern __inline __m256i
6625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6626 _mm256_permutex2var_epi64 (__m256i __A, __m256i __I, __m256i __B)
6627 {
6628 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
6629 /* idx */ ,
6630 (__v4di) __A,
6631 (__v4di) __B,
6632 (__mmask8) -1);
6633 }
6634
6635 extern __inline __m256i
6636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6637 _mm256_mask_permutex2var_epi64 (__m256i __A, __mmask8 __U, __m256i __I,
6638 __m256i __B)
6639 {
6640 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
6641 /* idx */ ,
6642 (__v4di) __A,
6643 (__v4di) __B,
6644 (__mmask8) __U);
6645 }
6646
6647 extern __inline __m256i
6648 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6649 _mm256_mask2_permutex2var_epi64 (__m256i __A, __m256i __I,
6650 __mmask8 __U, __m256i __B)
6651 {
6652 return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A,
6653 (__v4di) __I
6654 /* idx */ ,
6655 (__v4di) __B,
6656 (__mmask8) __U);
6657 }
6658
6659 extern __inline __m256i
6660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6661 _mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A,
6662 __m256i __I, __m256i __B)
6663 {
6664 return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I
6665 /* idx */ ,
6666 (__v4di) __A,
6667 (__v4di) __B,
6668 (__mmask8)
6669 __U);
6670 }
6671
6672 extern __inline __m256i
6673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6674 _mm256_permutex2var_epi32 (__m256i __A, __m256i __I, __m256i __B)
6675 {
6676 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
6677 /* idx */ ,
6678 (__v8si) __A,
6679 (__v8si) __B,
6680 (__mmask8) -1);
6681 }
6682
6683 extern __inline __m256i
6684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6685 _mm256_mask_permutex2var_epi32 (__m256i __A, __mmask8 __U, __m256i __I,
6686 __m256i __B)
6687 {
6688 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
6689 /* idx */ ,
6690 (__v8si) __A,
6691 (__v8si) __B,
6692 (__mmask8) __U);
6693 }
6694
6695 extern __inline __m256i
6696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6697 _mm256_mask2_permutex2var_epi32 (__m256i __A, __m256i __I,
6698 __mmask8 __U, __m256i __B)
6699 {
6700 return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A,
6701 (__v8si) __I
6702 /* idx */ ,
6703 (__v8si) __B,
6704 (__mmask8) __U);
6705 }
6706
6707 extern __inline __m256i
6708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6709 _mm256_maskz_permutex2var_epi32 (__mmask8 __U, __m256i __A,
6710 __m256i __I, __m256i __B)
6711 {
6712 return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I
6713 /* idx */ ,
6714 (__v8si) __A,
6715 (__v8si) __B,
6716 (__mmask8)
6717 __U);
6718 }
6719
6720 extern __inline __m128d
6721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6722 _mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B)
6723 {
6724 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
6725 /* idx */ ,
6726 (__v2df) __A,
6727 (__v2df) __B,
6728 (__mmask8) -1);
6729 }
6730
6731 extern __inline __m128d
6732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6733 _mm_mask_permutex2var_pd (__m128d __A, __mmask8 __U, __m128i __I,
6734 __m128d __B)
6735 {
6736 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
6737 /* idx */ ,
6738 (__v2df) __A,
6739 (__v2df) __B,
6740 (__mmask8)
6741 __U);
6742 }
6743
6744 extern __inline __m128d
6745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6746 _mm_mask2_permutex2var_pd (__m128d __A, __m128i __I, __mmask8 __U,
6747 __m128d __B)
6748 {
6749 return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A,
6750 (__v2di) __I
6751 /* idx */ ,
6752 (__v2df) __B,
6753 (__mmask8)
6754 __U);
6755 }
6756
6757 extern __inline __m128d
6758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6759 _mm_maskz_permutex2var_pd (__mmask8 __U, __m128d __A, __m128i __I,
6760 __m128d __B)
6761 {
6762 return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I
6763 /* idx */ ,
6764 (__v2df) __A,
6765 (__v2df) __B,
6766 (__mmask8)
6767 __U);
6768 }
6769
6770 extern __inline __m128
6771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6772 _mm_permutex2var_ps (__m128 __A, __m128i __I, __m128 __B)
6773 {
6774 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
6775 /* idx */ ,
6776 (__v4sf) __A,
6777 (__v4sf) __B,
6778 (__mmask8) -1);
6779 }
6780
6781 extern __inline __m128
6782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6783 _mm_mask_permutex2var_ps (__m128 __A, __mmask8 __U, __m128i __I,
6784 __m128 __B)
6785 {
6786 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
6787 /* idx */ ,
6788 (__v4sf) __A,
6789 (__v4sf) __B,
6790 (__mmask8) __U);
6791 }
6792
6793 extern __inline __m128
6794 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6795 _mm_mask2_permutex2var_ps (__m128 __A, __m128i __I, __mmask8 __U,
6796 __m128 __B)
6797 {
6798 return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A,
6799 (__v4si) __I
6800 /* idx */ ,
6801 (__v4sf) __B,
6802 (__mmask8) __U);
6803 }
6804
6805 extern __inline __m128
6806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6807 _mm_maskz_permutex2var_ps (__mmask8 __U, __m128 __A, __m128i __I,
6808 __m128 __B)
6809 {
6810 return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I
6811 /* idx */ ,
6812 (__v4sf) __A,
6813 (__v4sf) __B,
6814 (__mmask8)
6815 __U);
6816 }
6817
6818 extern __inline __m128i
6819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6820 _mm_srav_epi64 (__m128i __X, __m128i __Y)
6821 {
6822 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6823 (__v2di) __Y,
6824 (__v2di)
6825 _mm_setzero_si128 (),
6826 (__mmask8) -1);
6827 }
6828
6829 extern __inline __m128i
6830 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6831 _mm_mask_srav_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6832 __m128i __Y)
6833 {
6834 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6835 (__v2di) __Y,
6836 (__v2di) __W,
6837 (__mmask8) __U);
6838 }
6839
6840 extern __inline __m128i
6841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6842 _mm_maskz_srav_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6843 {
6844 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6845 (__v2di) __Y,
6846 (__v2di)
6847 _mm_setzero_si128 (),
6848 (__mmask8) __U);
6849 }
6850
6851 extern __inline __m256i
6852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6853 _mm256_mask_sllv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6854 __m256i __Y)
6855 {
6856 return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
6857 (__v8si) __Y,
6858 (__v8si) __W,
6859 (__mmask8) __U);
6860 }
6861
6862 extern __inline __m256i
6863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6864 _mm256_maskz_sllv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6865 {
6866 return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
6867 (__v8si) __Y,
6868 (__v8si)
6869 _mm256_setzero_si256 (),
6870 (__mmask8) __U);
6871 }
6872
6873 extern __inline __m128i
6874 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6875 _mm_mask_sllv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6876 __m128i __Y)
6877 {
6878 return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
6879 (__v4si) __Y,
6880 (__v4si) __W,
6881 (__mmask8) __U);
6882 }
6883
6884 extern __inline __m128i
6885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6886 _mm_maskz_sllv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6887 {
6888 return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
6889 (__v4si) __Y,
6890 (__v4si)
6891 _mm_setzero_si128 (),
6892 (__mmask8) __U);
6893 }
6894
6895 extern __inline __m256i
6896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6897 _mm256_mask_sllv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
6898 __m256i __Y)
6899 {
6900 return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
6901 (__v4di) __Y,
6902 (__v4di) __W,
6903 (__mmask8) __U);
6904 }
6905
6906 extern __inline __m256i
6907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6908 _mm256_maskz_sllv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
6909 {
6910 return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
6911 (__v4di) __Y,
6912 (__v4di)
6913 _mm256_setzero_si256 (),
6914 (__mmask8) __U);
6915 }
6916
6917 extern __inline __m128i
6918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6919 _mm_mask_sllv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6920 __m128i __Y)
6921 {
6922 return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
6923 (__v2di) __Y,
6924 (__v2di) __W,
6925 (__mmask8) __U);
6926 }
6927
6928 extern __inline __m128i
6929 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6930 _mm_maskz_sllv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6931 {
6932 return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
6933 (__v2di) __Y,
6934 (__v2di)
6935 _mm_setzero_si128 (),
6936 (__mmask8) __U);
6937 }
6938
6939 extern __inline __m256i
6940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6941 _mm256_mask_srav_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6942 __m256i __Y)
6943 {
6944 return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
6945 (__v8si) __Y,
6946 (__v8si) __W,
6947 (__mmask8) __U);
6948 }
6949
6950 extern __inline __m256i
6951 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6952 _mm256_maskz_srav_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6953 {
6954 return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
6955 (__v8si) __Y,
6956 (__v8si)
6957 _mm256_setzero_si256 (),
6958 (__mmask8) __U);
6959 }
6960
6961 extern __inline __m128i
6962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6963 _mm_mask_srav_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6964 __m128i __Y)
6965 {
6966 return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
6967 (__v4si) __Y,
6968 (__v4si) __W,
6969 (__mmask8) __U);
6970 }
6971
6972 extern __inline __m128i
6973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6974 _mm_maskz_srav_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6975 {
6976 return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
6977 (__v4si) __Y,
6978 (__v4si)
6979 _mm_setzero_si128 (),
6980 (__mmask8) __U);
6981 }
6982
6983 extern __inline __m256i
6984 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6985 _mm256_mask_srlv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6986 __m256i __Y)
6987 {
6988 return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
6989 (__v8si) __Y,
6990 (__v8si) __W,
6991 (__mmask8) __U);
6992 }
6993
6994 extern __inline __m256i
6995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6996 _mm256_maskz_srlv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6997 {
6998 return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
6999 (__v8si) __Y,
7000 (__v8si)
7001 _mm256_setzero_si256 (),
7002 (__mmask8) __U);
7003 }
7004
7005 extern __inline __m128i
7006 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7007 _mm_mask_srlv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
7008 __m128i __Y)
7009 {
7010 return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
7011 (__v4si) __Y,
7012 (__v4si) __W,
7013 (__mmask8) __U);
7014 }
7015
7016 extern __inline __m128i
7017 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7018 _mm_maskz_srlv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
7019 {
7020 return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
7021 (__v4si) __Y,
7022 (__v4si)
7023 _mm_setzero_si128 (),
7024 (__mmask8) __U);
7025 }
7026
7027 extern __inline __m256i
7028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7029 _mm256_mask_srlv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
7030 __m256i __Y)
7031 {
7032 return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
7033 (__v4di) __Y,
7034 (__v4di) __W,
7035 (__mmask8) __U);
7036 }
7037
7038 extern __inline __m256i
7039 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7040 _mm256_maskz_srlv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
7041 {
7042 return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
7043 (__v4di) __Y,
7044 (__v4di)
7045 _mm256_setzero_si256 (),
7046 (__mmask8) __U);
7047 }
7048
7049 extern __inline __m128i
7050 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7051 _mm_mask_srlv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
7052 __m128i __Y)
7053 {
7054 return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
7055 (__v2di) __Y,
7056 (__v2di) __W,
7057 (__mmask8) __U);
7058 }
7059
7060 extern __inline __m128i
7061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7062 _mm_maskz_srlv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
7063 {
7064 return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
7065 (__v2di) __Y,
7066 (__v2di)
7067 _mm_setzero_si128 (),
7068 (__mmask8) __U);
7069 }
7070
7071 extern __inline __m256i
7072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7073 _mm256_rolv_epi32 (__m256i __A, __m256i __B)
7074 {
7075 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
7076 (__v8si) __B,
7077 (__v8si)
7078 _mm256_setzero_si256 (),
7079 (__mmask8) -1);
7080 }
7081
7082 extern __inline __m256i
7083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7084 _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
7085 __m256i __B)
7086 {
7087 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
7088 (__v8si) __B,
7089 (__v8si) __W,
7090 (__mmask8) __U);
7091 }
7092
7093 extern __inline __m256i
7094 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7095 _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
7096 {
7097 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
7098 (__v8si) __B,
7099 (__v8si)
7100 _mm256_setzero_si256 (),
7101 (__mmask8) __U);
7102 }
7103
7104 extern __inline __m128i
7105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7106 _mm_rolv_epi32 (__m128i __A, __m128i __B)
7107 {
7108 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
7109 (__v4si) __B,
7110 (__v4si)
7111 _mm_setzero_si128 (),
7112 (__mmask8) -1);
7113 }
7114
7115 extern __inline __m128i
7116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7117 _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
7118 __m128i __B)
7119 {
7120 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
7121 (__v4si) __B,
7122 (__v4si) __W,
7123 (__mmask8) __U);
7124 }
7125
7126 extern __inline __m128i
7127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7128 _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
7129 {
7130 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
7131 (__v4si) __B,
7132 (__v4si)
7133 _mm_setzero_si128 (),
7134 (__mmask8) __U);
7135 }
7136
7137 extern __inline __m256i
7138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7139 _mm256_rorv_epi32 (__m256i __A, __m256i __B)
7140 {
7141 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7142 (__v8si) __B,
7143 (__v8si)
7144 _mm256_setzero_si256 (),
7145 (__mmask8) -1);
7146 }
7147
7148 extern __inline __m256i
7149 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7150 _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
7151 __m256i __B)
7152 {
7153 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7154 (__v8si) __B,
7155 (__v8si) __W,
7156 (__mmask8) __U);
7157 }
7158
7159 extern __inline __m256i
7160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7161 _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
7162 {
7163 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7164 (__v8si) __B,
7165 (__v8si)
7166 _mm256_setzero_si256 (),
7167 (__mmask8) __U);
7168 }
7169
7170 extern __inline __m128i
7171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7172 _mm_rorv_epi32 (__m128i __A, __m128i __B)
7173 {
7174 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7175 (__v4si) __B,
7176 (__v4si)
7177 _mm_setzero_si128 (),
7178 (__mmask8) -1);
7179 }
7180
7181 extern __inline __m128i
7182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7183 _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
7184 __m128i __B)
7185 {
7186 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7187 (__v4si) __B,
7188 (__v4si) __W,
7189 (__mmask8) __U);
7190 }
7191
7192 extern __inline __m128i
7193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7194 _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
7195 {
7196 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7197 (__v4si) __B,
7198 (__v4si)
7199 _mm_setzero_si128 (),
7200 (__mmask8) __U);
7201 }
7202
7203 extern __inline __m256i
7204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7205 _mm256_rolv_epi64 (__m256i __A, __m256i __B)
7206 {
7207 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7208 (__v4di) __B,
7209 (__v4di)
7210 _mm256_setzero_si256 (),
7211 (__mmask8) -1);
7212 }
7213
7214 extern __inline __m256i
7215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7216 _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7217 __m256i __B)
7218 {
7219 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7220 (__v4di) __B,
7221 (__v4di) __W,
7222 (__mmask8) __U);
7223 }
7224
7225 extern __inline __m256i
7226 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7227 _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7228 {
7229 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7230 (__v4di) __B,
7231 (__v4di)
7232 _mm256_setzero_si256 (),
7233 (__mmask8) __U);
7234 }
7235
7236 extern __inline __m128i
7237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7238 _mm_rolv_epi64 (__m128i __A, __m128i __B)
7239 {
7240 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7241 (__v2di) __B,
7242 (__v2di)
7243 _mm_setzero_si128 (),
7244 (__mmask8) -1);
7245 }
7246
7247 extern __inline __m128i
7248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7249 _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7250 __m128i __B)
7251 {
7252 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7253 (__v2di) __B,
7254 (__v2di) __W,
7255 (__mmask8) __U);
7256 }
7257
7258 extern __inline __m128i
7259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7260 _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7261 {
7262 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7263 (__v2di) __B,
7264 (__v2di)
7265 _mm_setzero_si128 (),
7266 (__mmask8) __U);
7267 }
7268
7269 extern __inline __m256i
7270 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7271 _mm256_rorv_epi64 (__m256i __A, __m256i __B)
7272 {
7273 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7274 (__v4di) __B,
7275 (__v4di)
7276 _mm256_setzero_si256 (),
7277 (__mmask8) -1);
7278 }
7279
7280 extern __inline __m256i
7281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7282 _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7283 __m256i __B)
7284 {
7285 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7286 (__v4di) __B,
7287 (__v4di) __W,
7288 (__mmask8) __U);
7289 }
7290
7291 extern __inline __m256i
7292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7293 _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7294 {
7295 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7296 (__v4di) __B,
7297 (__v4di)
7298 _mm256_setzero_si256 (),
7299 (__mmask8) __U);
7300 }
7301
7302 extern __inline __m128i
7303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7304 _mm_rorv_epi64 (__m128i __A, __m128i __B)
7305 {
7306 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7307 (__v2di) __B,
7308 (__v2di)
7309 _mm_setzero_si128 (),
7310 (__mmask8) -1);
7311 }
7312
7313 extern __inline __m128i
7314 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7315 _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7316 __m128i __B)
7317 {
7318 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7319 (__v2di) __B,
7320 (__v2di) __W,
7321 (__mmask8) __U);
7322 }
7323
7324 extern __inline __m128i
7325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7326 _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7327 {
7328 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7329 (__v2di) __B,
7330 (__v2di)
7331 _mm_setzero_si128 (),
7332 (__mmask8) __U);
7333 }
7334
7335 extern __inline __m256i
7336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7337 _mm256_srav_epi64 (__m256i __X, __m256i __Y)
7338 {
7339 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7340 (__v4di) __Y,
7341 (__v4di)
7342 _mm256_setzero_si256 (),
7343 (__mmask8) -1);
7344 }
7345
7346 extern __inline __m256i
7347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7348 _mm256_mask_srav_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
7349 __m256i __Y)
7350 {
7351 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7352 (__v4di) __Y,
7353 (__v4di) __W,
7354 (__mmask8) __U);
7355 }
7356
7357 extern __inline __m256i
7358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7359 _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
7360 {
7361 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7362 (__v4di) __Y,
7363 (__v4di)
7364 _mm256_setzero_si256 (),
7365 (__mmask8) __U);
7366 }
7367
7368 extern __inline __m256i
7369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7370 _mm256_mask_and_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7371 __m256i __B)
7372 {
7373 return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
7374 (__v4di) __B,
7375 (__v4di) __W, __U);
7376 }
7377
7378 extern __inline __m256i
7379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7380 _mm256_maskz_and_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7381 {
7382 return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
7383 (__v4di) __B,
7384 (__v4di)
7385 _mm256_setzero_pd (),
7386 __U);
7387 }
7388
7389 extern __inline __m128i
7390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7391 _mm_mask_and_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7392 __m128i __B)
7393 {
7394 return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
7395 (__v2di) __B,
7396 (__v2di) __W, __U);
7397 }
7398
7399 extern __inline __m128i
7400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7401 _mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7402 {
7403 return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
7404 (__v2di) __B,
7405 (__v2di)
7406 _mm_setzero_pd (),
7407 __U);
7408 }
7409
7410 extern __inline __m256i
7411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7412 _mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7413 __m256i __B)
7414 {
7415 return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
7416 (__v4di) __B,
7417 (__v4di) __W, __U);
7418 }
7419
7420 extern __inline __m256i
7421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7422 _mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7423 {
7424 return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
7425 (__v4di) __B,
7426 (__v4di)
7427 _mm256_setzero_pd (),
7428 __U);
7429 }
7430
7431 extern __inline __m128i
7432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7433 _mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7434 __m128i __B)
7435 {
7436 return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
7437 (__v2di) __B,
7438 (__v2di) __W, __U);
7439 }
7440
7441 extern __inline __m128i
7442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7443 _mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7444 {
7445 return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
7446 (__v2di) __B,
7447 (__v2di)
7448 _mm_setzero_pd (),
7449 __U);
7450 }
7451
7452 extern __inline __m256i
7453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7454 _mm256_mask_or_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7455 __m256i __B)
7456 {
7457 return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
7458 (__v4di) __B,
7459 (__v4di) __W,
7460 (__mmask8) __U);
7461 }
7462
7463 extern __inline __m256i
7464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7465 _mm256_maskz_or_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7466 {
7467 return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
7468 (__v4di) __B,
7469 (__v4di)
7470 _mm256_setzero_si256 (),
7471 (__mmask8) __U);
7472 }
7473
7474 extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
7475 _mm256_or_epi64 (__m256i __A, __m256i __B)
7476 {
7477 return (__m256i) ((__v4du)__A | (__v4du)__B);
7478 }
7479
7480 extern __inline __m128i
7481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7482 _mm_mask_or_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
7483 {
7484 return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
7485 (__v2di) __B,
7486 (__v2di) __W,
7487 (__mmask8) __U);
7488 }
7489
7490 extern __inline __m128i
7491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7492 _mm_maskz_or_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7493 {
7494 return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
7495 (__v2di) __B,
7496 (__v2di)
7497 _mm_setzero_si128 (),
7498 (__mmask8) __U);
7499 }
7500
7501 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
7502 _mm_or_epi64 (__m128i __A, __m128i __B)
7503 {
7504 return (__m128i) ((__v2du)__A | (__v2du)__B);
7505 }
7506
7507 extern __inline __m256i
7508 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7509 _mm256_mask_xor_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7510 __m256i __B)
7511 {
7512 return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
7513 (__v4di) __B,
7514 (__v4di) __W,
7515 (__mmask8) __U);
7516 }
7517
7518 extern __inline __m256i
7519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7520 _mm256_maskz_xor_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7521 {
7522 return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
7523 (__v4di) __B,
7524 (__v4di)
7525 _mm256_setzero_si256 (),
7526 (__mmask8) __U);
7527 }
7528
7529 extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
7530 _mm256_xor_epi64 (__m256i __A, __m256i __B)
7531 {
7532 return (__m256i) ((__v4du)__A ^ (__v4du)__B);
7533 }
7534
7535 extern __inline __m128i
7536 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7537 _mm_mask_xor_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7538 __m128i __B)
7539 {
7540 return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
7541 (__v2di) __B,
7542 (__v2di) __W,
7543 (__mmask8) __U);
7544 }
7545
7546 extern __inline __m128i
7547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7548 _mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7549 {
7550 return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
7551 (__v2di) __B,
7552 (__v2di)
7553 _mm_setzero_si128 (),
7554 (__mmask8) __U);
7555 }
7556
7557 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
7558 _mm_xor_epi64 (__m128i __A, __m128i __B)
7559 {
7560 return (__m128i) ((__v2du)__A ^ (__v2du)__B);
7561 }
7562
7563 extern __inline __m256d
7564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7565 _mm256_mask_max_pd (__m256d __W, __mmask8 __U, __m256d __A,
7566 __m256d __B)
7567 {
7568 return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
7569 (__v4df) __B,
7570 (__v4df) __W,
7571 (__mmask8) __U);
7572 }
7573
7574 extern __inline __m256d
7575 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7576 _mm256_maskz_max_pd (__mmask8 __U, __m256d __A, __m256d __B)
7577 {
7578 return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
7579 (__v4df) __B,
7580 (__v4df)
7581 _mm256_setzero_pd (),
7582 (__mmask8) __U);
7583 }
7584
7585 extern __inline __m256
7586 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7587 _mm256_mask_max_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7588 {
7589 return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
7590 (__v8sf) __B,
7591 (__v8sf) __W,
7592 (__mmask8) __U);
7593 }
7594
7595 extern __inline __m256
7596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7597 _mm256_maskz_max_ps (__mmask8 __U, __m256 __A, __m256 __B)
7598 {
7599 return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
7600 (__v8sf) __B,
7601 (__v8sf)
7602 _mm256_setzero_ps (),
7603 (__mmask8) __U);
7604 }
7605
7606 extern __inline __m128
7607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7608 _mm_mask_div_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7609 {
7610 return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
7611 (__v4sf) __B,
7612 (__v4sf) __W,
7613 (__mmask8) __U);
7614 }
7615
7616 extern __inline __m128
7617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7618 _mm_maskz_div_ps (__mmask8 __U, __m128 __A, __m128 __B)
7619 {
7620 return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
7621 (__v4sf) __B,
7622 (__v4sf)
7623 _mm_setzero_ps (),
7624 (__mmask8) __U);
7625 }
7626
7627 extern __inline __m128d
7628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7629 _mm_mask_div_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7630 {
7631 return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
7632 (__v2df) __B,
7633 (__v2df) __W,
7634 (__mmask8) __U);
7635 }
7636
7637 extern __inline __m128d
7638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7639 _mm_maskz_div_pd (__mmask8 __U, __m128d __A, __m128d __B)
7640 {
7641 return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
7642 (__v2df) __B,
7643 (__v2df)
7644 _mm_setzero_pd (),
7645 (__mmask8) __U);
7646 }
7647
7648 extern __inline __m256d
7649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7650 _mm256_mask_min_pd (__m256d __W, __mmask8 __U, __m256d __A,
7651 __m256d __B)
7652 {
7653 return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
7654 (__v4df) __B,
7655 (__v4df) __W,
7656 (__mmask8) __U);
7657 }
7658
7659 extern __inline __m256d
7660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7661 _mm256_mask_div_pd (__m256d __W, __mmask8 __U, __m256d __A,
7662 __m256d __B)
7663 {
7664 return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
7665 (__v4df) __B,
7666 (__v4df) __W,
7667 (__mmask8) __U);
7668 }
7669
7670 extern __inline __m256d
7671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7672 _mm256_maskz_min_pd (__mmask8 __U, __m256d __A, __m256d __B)
7673 {
7674 return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
7675 (__v4df) __B,
7676 (__v4df)
7677 _mm256_setzero_pd (),
7678 (__mmask8) __U);
7679 }
7680
7681 extern __inline __m256
7682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7683 _mm256_mask_min_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7684 {
7685 return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
7686 (__v8sf) __B,
7687 (__v8sf) __W,
7688 (__mmask8) __U);
7689 }
7690
7691 extern __inline __m256d
7692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7693 _mm256_maskz_div_pd (__mmask8 __U, __m256d __A, __m256d __B)
7694 {
7695 return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
7696 (__v4df) __B,
7697 (__v4df)
7698 _mm256_setzero_pd (),
7699 (__mmask8) __U);
7700 }
7701
7702 extern __inline __m256
7703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7704 _mm256_mask_div_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7705 {
7706 return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
7707 (__v8sf) __B,
7708 (__v8sf) __W,
7709 (__mmask8) __U);
7710 }
7711
7712 extern __inline __m256
7713 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7714 _mm256_maskz_min_ps (__mmask8 __U, __m256 __A, __m256 __B)
7715 {
7716 return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
7717 (__v8sf) __B,
7718 (__v8sf)
7719 _mm256_setzero_ps (),
7720 (__mmask8) __U);
7721 }
7722
7723 extern __inline __m256
7724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7725 _mm256_maskz_div_ps (__mmask8 __U, __m256 __A, __m256 __B)
7726 {
7727 return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
7728 (__v8sf) __B,
7729 (__v8sf)
7730 _mm256_setzero_ps (),
7731 (__mmask8) __U);
7732 }
7733
7734 extern __inline __m128
7735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7736 _mm_mask_min_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7737 {
7738 return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
7739 (__v4sf) __B,
7740 (__v4sf) __W,
7741 (__mmask8) __U);
7742 }
7743
7744 extern __inline __m128
7745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7746 _mm_mask_mul_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7747 {
7748 return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
7749 (__v4sf) __B,
7750 (__v4sf) __W,
7751 (__mmask8) __U);
7752 }
7753
7754 extern __inline __m128
7755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7756 _mm_maskz_min_ps (__mmask8 __U, __m128 __A, __m128 __B)
7757 {
7758 return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
7759 (__v4sf) __B,
7760 (__v4sf)
7761 _mm_setzero_ps (),
7762 (__mmask8) __U);
7763 }
7764
7765 extern __inline __m128
7766 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7767 _mm_maskz_mul_ps (__mmask8 __U, __m128 __A, __m128 __B)
7768 {
7769 return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
7770 (__v4sf) __B,
7771 (__v4sf)
7772 _mm_setzero_ps (),
7773 (__mmask8) __U);
7774 }
7775
7776 extern __inline __m128
7777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7778 _mm_mask_max_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7779 {
7780 return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
7781 (__v4sf) __B,
7782 (__v4sf) __W,
7783 (__mmask8) __U);
7784 }
7785
7786 extern __inline __m128
7787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7788 _mm_maskz_max_ps (__mmask8 __U, __m128 __A, __m128 __B)
7789 {
7790 return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
7791 (__v4sf) __B,
7792 (__v4sf)
7793 _mm_setzero_ps (),
7794 (__mmask8) __U);
7795 }
7796
7797 extern __inline __m128d
7798 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7799 _mm_mask_min_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7800 {
7801 return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
7802 (__v2df) __B,
7803 (__v2df) __W,
7804 (__mmask8) __U);
7805 }
7806
7807 extern __inline __m128d
7808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7809 _mm_maskz_min_pd (__mmask8 __U, __m128d __A, __m128d __B)
7810 {
7811 return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
7812 (__v2df) __B,
7813 (__v2df)
7814 _mm_setzero_pd (),
7815 (__mmask8) __U);
7816 }
7817
7818 extern __inline __m128d
7819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7820 _mm_mask_max_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7821 {
7822 return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
7823 (__v2df) __B,
7824 (__v2df) __W,
7825 (__mmask8) __U);
7826 }
7827
7828 extern __inline __m128d
7829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7830 _mm_maskz_max_pd (__mmask8 __U, __m128d __A, __m128d __B)
7831 {
7832 return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
7833 (__v2df) __B,
7834 (__v2df)
7835 _mm_setzero_pd (),
7836 (__mmask8) __U);
7837 }
7838
7839 extern __inline __m128d
7840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7841 _mm_mask_mul_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7842 {
7843 return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
7844 (__v2df) __B,
7845 (__v2df) __W,
7846 (__mmask8) __U);
7847 }
7848
7849 extern __inline __m128d
7850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7851 _mm_maskz_mul_pd (__mmask8 __U, __m128d __A, __m128d __B)
7852 {
7853 return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
7854 (__v2df) __B,
7855 (__v2df)
7856 _mm_setzero_pd (),
7857 (__mmask8) __U);
7858 }
7859
7860 extern __inline __m256
7861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7862 _mm256_mask_mul_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7863 {
7864 return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
7865 (__v8sf) __B,
7866 (__v8sf) __W,
7867 (__mmask8) __U);
7868 }
7869
7870 extern __inline __m256
7871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7872 _mm256_maskz_mul_ps (__mmask8 __U, __m256 __A, __m256 __B)
7873 {
7874 return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
7875 (__v8sf) __B,
7876 (__v8sf)
7877 _mm256_setzero_ps (),
7878 (__mmask8) __U);
7879 }
7880
7881 extern __inline __m256d
7882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7883 _mm256_mask_mul_pd (__m256d __W, __mmask8 __U, __m256d __A,
7884 __m256d __B)
7885 {
7886 return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
7887 (__v4df) __B,
7888 (__v4df) __W,
7889 (__mmask8) __U);
7890 }
7891
7892 extern __inline __m256d
7893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7894 _mm256_maskz_mul_pd (__mmask8 __U, __m256d __A, __m256d __B)
7895 {
7896 return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
7897 (__v4df) __B,
7898 (__v4df)
7899 _mm256_setzero_pd (),
7900 (__mmask8) __U);
7901 }
7902
7903 extern __inline __m256i
7904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7905 _mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
7906 {
7907 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7908 (__v4di) __B,
7909 (__v4di)
7910 _mm256_setzero_si256 (),
7911 __M);
7912 }
7913
7914 extern __inline __m256i
7915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7916 _mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
7917 __m256i __B)
7918 {
7919 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7920 (__v4di) __B,
7921 (__v4di) __W, __M);
7922 }
7923
7924 extern __inline __m256i
7925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7926 _mm256_min_epi64 (__m256i __A, __m256i __B)
7927 {
7928 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7929 (__v4di) __B,
7930 (__v4di)
7931 _mm256_setzero_si256 (),
7932 (__mmask8) -1);
7933 }
7934
7935 extern __inline __m256i
7936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7937 _mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
7938 __m256i __B)
7939 {
7940 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7941 (__v4di) __B,
7942 (__v4di) __W, __M);
7943 }
7944
7945 extern __inline __m256i
7946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7947 _mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
7948 {
7949 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7950 (__v4di) __B,
7951 (__v4di)
7952 _mm256_setzero_si256 (),
7953 __M);
7954 }
7955
7956 extern __inline __m256i
7957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7958 _mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
7959 {
7960 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7961 (__v4di) __B,
7962 (__v4di)
7963 _mm256_setzero_si256 (),
7964 __M);
7965 }
7966
7967 extern __inline __m256i
7968 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7969 _mm256_max_epi64 (__m256i __A, __m256i __B)
7970 {
7971 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7972 (__v4di) __B,
7973 (__v4di)
7974 _mm256_setzero_si256 (),
7975 (__mmask8) -1);
7976 }
7977
7978 extern __inline __m256i
7979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7980 _mm256_max_epu64 (__m256i __A, __m256i __B)
7981 {
7982 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7983 (__v4di) __B,
7984 (__v4di)
7985 _mm256_setzero_si256 (),
7986 (__mmask8) -1);
7987 }
7988
7989 extern __inline __m256i
7990 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7991 _mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
7992 __m256i __B)
7993 {
7994 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7995 (__v4di) __B,
7996 (__v4di) __W, __M);
7997 }
7998
7999 extern __inline __m256i
8000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8001 _mm256_min_epu64 (__m256i __A, __m256i __B)
8002 {
8003 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
8004 (__v4di) __B,
8005 (__v4di)
8006 _mm256_setzero_si256 (),
8007 (__mmask8) -1);
8008 }
8009
8010 extern __inline __m256i
8011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8012 _mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
8013 __m256i __B)
8014 {
8015 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
8016 (__v4di) __B,
8017 (__v4di) __W, __M);
8018 }
8019
8020 extern __inline __m256i
8021 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8022 _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
8023 {
8024 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
8025 (__v4di) __B,
8026 (__v4di)
8027 _mm256_setzero_si256 (),
8028 __M);
8029 }
8030
8031 extern __inline __m256i
8032 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8033 _mm256_maskz_max_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
8034 {
8035 return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
8036 (__v8si) __B,
8037 (__v8si)
8038 _mm256_setzero_si256 (),
8039 __M);
8040 }
8041
8042 extern __inline __m256i
8043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8044 _mm256_mask_max_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
8045 __m256i __B)
8046 {
8047 return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
8048 (__v8si) __B,
8049 (__v8si) __W, __M);
8050 }
8051
8052 extern __inline __m256i
8053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8054 _mm256_maskz_min_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
8055 {
8056 return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
8057 (__v8si) __B,
8058 (__v8si)
8059 _mm256_setzero_si256 (),
8060 __M);
8061 }
8062
8063 extern __inline __m256i
8064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8065 _mm256_mask_min_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
8066 __m256i __B)
8067 {
8068 return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
8069 (__v8si) __B,
8070 (__v8si) __W, __M);
8071 }
8072
8073 extern __inline __m256i
8074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8075 _mm256_maskz_max_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
8076 {
8077 return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
8078 (__v8si) __B,
8079 (__v8si)
8080 _mm256_setzero_si256 (),
8081 __M);
8082 }
8083
8084 extern __inline __m256i
8085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8086 _mm256_mask_max_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
8087 __m256i __B)
8088 {
8089 return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
8090 (__v8si) __B,
8091 (__v8si) __W, __M);
8092 }
8093
8094 extern __inline __m256i
8095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8096 _mm256_maskz_min_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
8097 {
8098 return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
8099 (__v8si) __B,
8100 (__v8si)
8101 _mm256_setzero_si256 (),
8102 __M);
8103 }
8104
8105 extern __inline __m256i
8106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8107 _mm256_mask_min_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
8108 __m256i __B)
8109 {
8110 return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
8111 (__v8si) __B,
8112 (__v8si) __W, __M);
8113 }
8114
8115 extern __inline __m128i
8116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8117 _mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
8118 {
8119 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
8120 (__v2di) __B,
8121 (__v2di)
8122 _mm_setzero_si128 (),
8123 __M);
8124 }
8125
8126 extern __inline __m128i
8127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8128 _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
8129 __m128i __B)
8130 {
8131 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
8132 (__v2di) __B,
8133 (__v2di) __W, __M);
8134 }
8135
8136 extern __inline __m128i
8137 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8138 _mm_min_epi64 (__m128i __A, __m128i __B)
8139 {
8140 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
8141 (__v2di) __B,
8142 (__v2di)
8143 _mm_setzero_si128 (),
8144 (__mmask8) -1);
8145 }
8146
8147 extern __inline __m128i
8148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8149 _mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
8150 __m128i __B)
8151 {
8152 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
8153 (__v2di) __B,
8154 (__v2di) __W, __M);
8155 }
8156
8157 extern __inline __m128i
8158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8159 _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
8160 {
8161 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
8162 (__v2di) __B,
8163 (__v2di)
8164 _mm_setzero_si128 (),
8165 __M);
8166 }
8167
8168 extern __inline __m128i
8169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8170 _mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
8171 {
8172 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8173 (__v2di) __B,
8174 (__v2di)
8175 _mm_setzero_si128 (),
8176 __M);
8177 }
8178
8179 extern __inline __m128i
8180 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8181 _mm_max_epi64 (__m128i __A, __m128i __B)
8182 {
8183 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
8184 (__v2di) __B,
8185 (__v2di)
8186 _mm_setzero_si128 (),
8187 (__mmask8) -1);
8188 }
8189
8190 extern __inline __m128i
8191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8192 _mm_max_epu64 (__m128i __A, __m128i __B)
8193 {
8194 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8195 (__v2di) __B,
8196 (__v2di)
8197 _mm_setzero_si128 (),
8198 (__mmask8) -1);
8199 }
8200
8201 extern __inline __m128i
8202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8203 _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
8204 __m128i __B)
8205 {
8206 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8207 (__v2di) __B,
8208 (__v2di) __W, __M);
8209 }
8210
8211 extern __inline __m128i
8212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8213 _mm_min_epu64 (__m128i __A, __m128i __B)
8214 {
8215 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8216 (__v2di) __B,
8217 (__v2di)
8218 _mm_setzero_si128 (),
8219 (__mmask8) -1);
8220 }
8221
8222 extern __inline __m128i
8223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8224 _mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
8225 __m128i __B)
8226 {
8227 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8228 (__v2di) __B,
8229 (__v2di) __W, __M);
8230 }
8231
8232 extern __inline __m128i
8233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8234 _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
8235 {
8236 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8237 (__v2di) __B,
8238 (__v2di)
8239 _mm_setzero_si128 (),
8240 __M);
8241 }
8242
8243 extern __inline __m128i
8244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8245 _mm_maskz_max_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
8246 {
8247 return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
8248 (__v4si) __B,
8249 (__v4si)
8250 _mm_setzero_si128 (),
8251 __M);
8252 }
8253
8254 extern __inline __m128i
8255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8256 _mm_mask_max_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
8257 __m128i __B)
8258 {
8259 return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
8260 (__v4si) __B,
8261 (__v4si) __W, __M);
8262 }
8263
8264 extern __inline __m128i
8265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8266 _mm_maskz_min_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
8267 {
8268 return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
8269 (__v4si) __B,
8270 (__v4si)
8271 _mm_setzero_si128 (),
8272 __M);
8273 }
8274
8275 extern __inline __m128i
8276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8277 _mm_mask_min_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
8278 __m128i __B)
8279 {
8280 return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
8281 (__v4si) __B,
8282 (__v4si) __W, __M);
8283 }
8284
8285 extern __inline __m128i
8286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8287 _mm_maskz_max_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
8288 {
8289 return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
8290 (__v4si) __B,
8291 (__v4si)
8292 _mm_setzero_si128 (),
8293 __M);
8294 }
8295
8296 extern __inline __m128i
8297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8298 _mm_mask_max_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
8299 __m128i __B)
8300 {
8301 return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
8302 (__v4si) __B,
8303 (__v4si) __W, __M);
8304 }
8305
8306 extern __inline __m128i
8307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8308 _mm_maskz_min_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
8309 {
8310 return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
8311 (__v4si) __B,
8312 (__v4si)
8313 _mm_setzero_si128 (),
8314 __M);
8315 }
8316
8317 extern __inline __m128i
8318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8319 _mm_mask_min_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
8320 __m128i __B)
8321 {
8322 return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
8323 (__v4si) __B,
8324 (__v4si) __W, __M);
8325 }
8326
8327 #ifndef __AVX512CD__
8328 #pragma GCC push_options
8329 #pragma GCC target("avx512vl,avx512cd")
8330 #define __DISABLE_AVX512VLCD__
8331 #endif
8332
8333 extern __inline __m128i
8334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8335 _mm_broadcastmb_epi64 (__mmask8 __A)
8336 {
8337 return (__m128i) __builtin_ia32_broadcastmb128 (__A);
8338 }
8339
8340 extern __inline __m256i
8341 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8342 _mm256_broadcastmb_epi64 (__mmask8 __A)
8343 {
8344 return (__m256i) __builtin_ia32_broadcastmb256 (__A);
8345 }
8346
8347 extern __inline __m128i
8348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8349 _mm_broadcastmw_epi32 (__mmask16 __A)
8350 {
8351 return (__m128i) __builtin_ia32_broadcastmw128 (__A);
8352 }
8353
8354 extern __inline __m256i
8355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8356 _mm256_broadcastmw_epi32 (__mmask16 __A)
8357 {
8358 return (__m256i) __builtin_ia32_broadcastmw256 (__A);
8359 }
8360
8361 extern __inline __m256i
8362 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8363 _mm256_lzcnt_epi32 (__m256i __A)
8364 {
8365 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8366 (__v8si)
8367 _mm256_setzero_si256 (),
8368 (__mmask8) -1);
8369 }
8370
8371 extern __inline __m256i
8372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8373 _mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
8374 {
8375 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8376 (__v8si) __W,
8377 (__mmask8) __U);
8378 }
8379
8380 extern __inline __m256i
8381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8382 _mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A)
8383 {
8384 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8385 (__v8si)
8386 _mm256_setzero_si256 (),
8387 (__mmask8) __U);
8388 }
8389
8390 extern __inline __m256i
8391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8392 _mm256_lzcnt_epi64 (__m256i __A)
8393 {
8394 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8395 (__v4di)
8396 _mm256_setzero_si256 (),
8397 (__mmask8) -1);
8398 }
8399
8400 extern __inline __m256i
8401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8402 _mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
8403 {
8404 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8405 (__v4di) __W,
8406 (__mmask8) __U);
8407 }
8408
8409 extern __inline __m256i
8410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8411 _mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A)
8412 {
8413 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8414 (__v4di)
8415 _mm256_setzero_si256 (),
8416 (__mmask8) __U);
8417 }
8418
8419 extern __inline __m256i
8420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8421 _mm256_conflict_epi64 (__m256i __A)
8422 {
8423 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8424 (__v4di)
8425 _mm256_setzero_si256 (),
8426 (__mmask8) -1);
8427 }
8428
8429 extern __inline __m256i
8430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8431 _mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
8432 {
8433 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8434 (__v4di) __W,
8435 (__mmask8)
8436 __U);
8437 }
8438
8439 extern __inline __m256i
8440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8441 _mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
8442 {
8443 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8444 (__v4di)
8445 _mm256_setzero_si256 (),
8446 (__mmask8)
8447 __U);
8448 }
8449
8450 extern __inline __m256i
8451 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8452 _mm256_conflict_epi32 (__m256i __A)
8453 {
8454 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8455 (__v8si)
8456 _mm256_setzero_si256 (),
8457 (__mmask8) -1);
8458 }
8459
8460 extern __inline __m256i
8461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8462 _mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
8463 {
8464 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8465 (__v8si) __W,
8466 (__mmask8)
8467 __U);
8468 }
8469
8470 extern __inline __m256i
8471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8472 _mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
8473 {
8474 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8475 (__v8si)
8476 _mm256_setzero_si256 (),
8477 (__mmask8)
8478 __U);
8479 }
8480
8481 extern __inline __m128i
8482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8483 _mm_lzcnt_epi32 (__m128i __A)
8484 {
8485 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8486 (__v4si)
8487 _mm_setzero_si128 (),
8488 (__mmask8) -1);
8489 }
8490
8491 extern __inline __m128i
8492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8493 _mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
8494 {
8495 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8496 (__v4si) __W,
8497 (__mmask8) __U);
8498 }
8499
8500 extern __inline __m128i
8501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8502 _mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A)
8503 {
8504 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8505 (__v4si)
8506 _mm_setzero_si128 (),
8507 (__mmask8) __U);
8508 }
8509
8510 extern __inline __m128i
8511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8512 _mm_lzcnt_epi64 (__m128i __A)
8513 {
8514 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8515 (__v2di)
8516 _mm_setzero_si128 (),
8517 (__mmask8) -1);
8518 }
8519
8520 extern __inline __m128i
8521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8522 _mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
8523 {
8524 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8525 (__v2di) __W,
8526 (__mmask8) __U);
8527 }
8528
8529 extern __inline __m128i
8530 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8531 _mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A)
8532 {
8533 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8534 (__v2di)
8535 _mm_setzero_si128 (),
8536 (__mmask8) __U);
8537 }
8538
8539 extern __inline __m128i
8540 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8541 _mm_conflict_epi64 (__m128i __A)
8542 {
8543 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8544 (__v2di)
8545 _mm_setzero_si128 (),
8546 (__mmask8) -1);
8547 }
8548
8549 extern __inline __m128i
8550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8551 _mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
8552 {
8553 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8554 (__v2di) __W,
8555 (__mmask8)
8556 __U);
8557 }
8558
8559 extern __inline __m128i
8560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8561 _mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
8562 {
8563 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8564 (__v2di)
8565 _mm_setzero_si128 (),
8566 (__mmask8)
8567 __U);
8568 }
8569
8570 extern __inline __m128i
8571 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8572 _mm_conflict_epi32 (__m128i __A)
8573 {
8574 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8575 (__v4si)
8576 _mm_setzero_si128 (),
8577 (__mmask8) -1);
8578 }
8579
8580 extern __inline __m128i
8581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8582 _mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
8583 {
8584 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8585 (__v4si) __W,
8586 (__mmask8)
8587 __U);
8588 }
8589
8590 extern __inline __m128i
8591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8592 _mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
8593 {
8594 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8595 (__v4si)
8596 _mm_setzero_si128 (),
8597 (__mmask8)
8598 __U);
8599 }
8600
8601 #ifdef __DISABLE_AVX512VLCD__
8602 #pragma GCC pop_options
8603 #endif
8604
8605 extern __inline __m256d
8606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8607 _mm256_mask_unpacklo_pd (__m256d __W, __mmask8 __U, __m256d __A,
8608 __m256d __B)
8609 {
8610 return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
8611 (__v4df) __B,
8612 (__v4df) __W,
8613 (__mmask8) __U);
8614 }
8615
8616 extern __inline __m256d
8617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8618 _mm256_maskz_unpacklo_pd (__mmask8 __U, __m256d __A, __m256d __B)
8619 {
8620 return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
8621 (__v4df) __B,
8622 (__v4df)
8623 _mm256_setzero_pd (),
8624 (__mmask8) __U);
8625 }
8626
8627 extern __inline __m128d
8628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8629 _mm_mask_unpacklo_pd (__m128d __W, __mmask8 __U, __m128d __A,
8630 __m128d __B)
8631 {
8632 return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
8633 (__v2df) __B,
8634 (__v2df) __W,
8635 (__mmask8) __U);
8636 }
8637
8638 extern __inline __m128d
8639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8640 _mm_maskz_unpacklo_pd (__mmask8 __U, __m128d __A, __m128d __B)
8641 {
8642 return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
8643 (__v2df) __B,
8644 (__v2df)
8645 _mm_setzero_pd (),
8646 (__mmask8) __U);
8647 }
8648
8649 extern __inline __m256
8650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8651 _mm256_mask_unpacklo_ps (__m256 __W, __mmask8 __U, __m256 __A,
8652 __m256 __B)
8653 {
8654 return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
8655 (__v8sf) __B,
8656 (__v8sf) __W,
8657 (__mmask8) __U);
8658 }
8659
8660 extern __inline __m256d
8661 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8662 _mm256_mask_unpackhi_pd (__m256d __W, __mmask8 __U, __m256d __A,
8663 __m256d __B)
8664 {
8665 return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
8666 (__v4df) __B,
8667 (__v4df) __W,
8668 (__mmask8) __U);
8669 }
8670
8671 extern __inline __m256d
8672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8673 _mm256_maskz_unpackhi_pd (__mmask8 __U, __m256d __A, __m256d __B)
8674 {
8675 return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
8676 (__v4df) __B,
8677 (__v4df)
8678 _mm256_setzero_pd (),
8679 (__mmask8) __U);
8680 }
8681
8682 extern __inline __m128d
8683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8684 _mm_mask_unpackhi_pd (__m128d __W, __mmask8 __U, __m128d __A,
8685 __m128d __B)
8686 {
8687 return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
8688 (__v2df) __B,
8689 (__v2df) __W,
8690 (__mmask8) __U);
8691 }
8692
8693 extern __inline __m128d
8694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8695 _mm_maskz_unpackhi_pd (__mmask8 __U, __m128d __A, __m128d __B)
8696 {
8697 return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
8698 (__v2df) __B,
8699 (__v2df)
8700 _mm_setzero_pd (),
8701 (__mmask8) __U);
8702 }
8703
8704 extern __inline __m256
8705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8706 _mm256_mask_unpackhi_ps (__m256 __W, __mmask8 __U, __m256 __A,
8707 __m256 __B)
8708 {
8709 return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
8710 (__v8sf) __B,
8711 (__v8sf) __W,
8712 (__mmask8) __U);
8713 }
8714
8715 extern __inline __m256
8716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8717 _mm256_maskz_unpackhi_ps (__mmask8 __U, __m256 __A, __m256 __B)
8718 {
8719 return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
8720 (__v8sf) __B,
8721 (__v8sf)
8722 _mm256_setzero_ps (),
8723 (__mmask8) __U);
8724 }
8725
8726 extern __inline __m128
8727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8728 _mm_mask_unpackhi_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8729 {
8730 return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
8731 (__v4sf) __B,
8732 (__v4sf) __W,
8733 (__mmask8) __U);
8734 }
8735
8736 extern __inline __m128
8737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8738 _mm_maskz_unpackhi_ps (__mmask8 __U, __m128 __A, __m128 __B)
8739 {
8740 return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
8741 (__v4sf) __B,
8742 (__v4sf)
8743 _mm_setzero_ps (),
8744 (__mmask8) __U);
8745 }
8746
8747 extern __inline __m128
8748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8749 _mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
8750 {
8751 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8752 (__v4sf) __W,
8753 (__mmask8) __U);
8754 }
8755
8756 extern __inline __m128
8757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8758 _mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8759 {
8760 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8761 (__v4sf)
8762 _mm_setzero_ps (),
8763 (__mmask8) __U);
8764 }
8765
8766 extern __inline __m256
8767 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8768 _mm256_maskz_unpacklo_ps (__mmask8 __U, __m256 __A, __m256 __B)
8769 {
8770 return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
8771 (__v8sf) __B,
8772 (__v8sf)
8773 _mm256_setzero_ps (),
8774 (__mmask8) __U);
8775 }
8776
8777 extern __inline __m256
8778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8779 _mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
8780 {
8781 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8782 (__v8sf) __W,
8783 (__mmask8) __U);
8784 }
8785
8786 extern __inline __m256
8787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8788 _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8789 {
8790 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8791 (__v8sf)
8792 _mm256_setzero_ps (),
8793 (__mmask8) __U);
8794 }
8795
8796 extern __inline __m128
8797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8798 _mm_mask_unpacklo_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8799 {
8800 return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
8801 (__v4sf) __B,
8802 (__v4sf) __W,
8803 (__mmask8) __U);
8804 }
8805
8806 extern __inline __m128
8807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8808 _mm_maskz_unpacklo_ps (__mmask8 __U, __m128 __A, __m128 __B)
8809 {
8810 return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
8811 (__v4sf) __B,
8812 (__v4sf)
8813 _mm_setzero_ps (),
8814 (__mmask8) __U);
8815 }
8816
8817 extern __inline __m256i
8818 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8819 _mm256_mask_sra_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
8820 __m128i __B)
8821 {
8822 return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
8823 (__v4si) __B,
8824 (__v8si) __W,
8825 (__mmask8) __U);
8826 }
8827
8828 extern __inline __m256i
8829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8830 _mm256_maskz_sra_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
8831 {
8832 return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
8833 (__v4si) __B,
8834 (__v8si)
8835 _mm256_setzero_si256 (),
8836 (__mmask8) __U);
8837 }
8838
8839 extern __inline __m128i
8840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8841 _mm_mask_sra_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
8842 __m128i __B)
8843 {
8844 return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
8845 (__v4si) __B,
8846 (__v4si) __W,
8847 (__mmask8) __U);
8848 }
8849
8850 extern __inline __m128i
8851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8852 _mm_maskz_sra_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
8853 {
8854 return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
8855 (__v4si) __B,
8856 (__v4si)
8857 _mm_setzero_si128 (),
8858 (__mmask8) __U);
8859 }
8860
8861 extern __inline __m256i
8862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8863 _mm256_sra_epi64 (__m256i __A, __m128i __B)
8864 {
8865 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8866 (__v2di) __B,
8867 (__v4di)
8868 _mm256_setzero_si256 (),
8869 (__mmask8) -1);
8870 }
8871
8872 extern __inline __m256i
8873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8874 _mm256_mask_sra_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
8875 __m128i __B)
8876 {
8877 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8878 (__v2di) __B,
8879 (__v4di) __W,
8880 (__mmask8) __U);
8881 }
8882
8883 extern __inline __m256i
8884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8885 _mm256_maskz_sra_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
8886 {
8887 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8888 (__v2di) __B,
8889 (__v4di)
8890 _mm256_setzero_si256 (),
8891 (__mmask8) __U);
8892 }
8893
8894 extern __inline __m128i
8895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8896 _mm_sra_epi64 (__m128i __A, __m128i __B)
8897 {
8898 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8899 (__v2di) __B,
8900 (__v2di)
8901 _mm_setzero_si128 (),
8902 (__mmask8) -1);
8903 }
8904
8905 extern __inline __m128i
8906 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8907 _mm_mask_sra_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
8908 __m128i __B)
8909 {
8910 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8911 (__v2di) __B,
8912 (__v2di) __W,
8913 (__mmask8) __U);
8914 }
8915
8916 extern __inline __m128i
8917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8918 _mm_maskz_sra_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
8919 {
8920 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8921 (__v2di) __B,
8922 (__v2di)
8923 _mm_setzero_si128 (),
8924 (__mmask8) __U);
8925 }
8926
8927 extern __inline __m128i
8928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8929 _mm_mask_sll_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
8930 __m128i __B)
8931 {
8932 return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
8933 (__v4si) __B,
8934 (__v4si) __W,
8935 (__mmask8) __U);
8936 }
8937
8938 extern __inline __m128i
8939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8940 _mm_maskz_sll_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
8941 {
8942 return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
8943 (__v4si) __B,
8944 (__v4si)
8945 _mm_setzero_si128 (),
8946 (__mmask8) __U);
8947 }
8948
8949 extern __inline __m128i
8950 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8951 _mm_mask_sll_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
8952 __m128i __B)
8953 {
8954 return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
8955 (__v2di) __B,
8956 (__v2di) __W,
8957 (__mmask8) __U);
8958 }
8959
8960 extern __inline __m128i
8961 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8962 _mm_maskz_sll_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
8963 {
8964 return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
8965 (__v2di) __B,
8966 (__v2di)
8967 _mm_setzero_si128 (),
8968 (__mmask8) __U);
8969 }
8970
8971 extern __inline __m256i
8972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8973 _mm256_mask_sll_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
8974 __m128i __B)
8975 {
8976 return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
8977 (__v4si) __B,
8978 (__v8si) __W,
8979 (__mmask8) __U);
8980 }
8981
8982 extern __inline __m256i
8983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8984 _mm256_maskz_sll_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
8985 {
8986 return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
8987 (__v4si) __B,
8988 (__v8si)
8989 _mm256_setzero_si256 (),
8990 (__mmask8) __U);
8991 }
8992
8993 extern __inline __m256i
8994 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8995 _mm256_mask_sll_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
8996 __m128i __B)
8997 {
8998 return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
8999 (__v2di) __B,
9000 (__v4di) __W,
9001 (__mmask8) __U);
9002 }
9003
9004 extern __inline __m256i
9005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9006 _mm256_maskz_sll_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
9007 {
9008 return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
9009 (__v2di) __B,
9010 (__v4di)
9011 _mm256_setzero_si256 (),
9012 (__mmask8) __U);
9013 }
9014
9015 extern __inline __m256
9016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9017 _mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X,
9018 __m256 __Y)
9019 {
9020 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
9021 (__v8si) __X,
9022 (__v8sf) __W,
9023 (__mmask8) __U);
9024 }
9025
9026 extern __inline __m256
9027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9028 _mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y)
9029 {
9030 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
9031 (__v8si) __X,
9032 (__v8sf)
9033 _mm256_setzero_ps (),
9034 (__mmask8) __U);
9035 }
9036
9037 extern __inline __m256d
9038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9039 _mm256_permutexvar_pd (__m256i __X, __m256d __Y)
9040 {
9041 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
9042 (__v4di) __X,
9043 (__v4df)
9044 _mm256_setzero_pd (),
9045 (__mmask8) -1);
9046 }
9047
9048 extern __inline __m256d
9049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9050 _mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
9051 __m256d __Y)
9052 {
9053 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
9054 (__v4di) __X,
9055 (__v4df) __W,
9056 (__mmask8) __U);
9057 }
9058
9059 extern __inline __m256d
9060 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9061 _mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
9062 {
9063 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
9064 (__v4di) __X,
9065 (__v4df)
9066 _mm256_setzero_pd (),
9067 (__mmask8) __U);
9068 }
9069
9070 extern __inline __m256d
9071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9072 _mm256_mask_permutevar_pd (__m256d __W, __mmask8 __U, __m256d __A,
9073 __m256i __C)
9074 {
9075 return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
9076 (__v4di) __C,
9077 (__v4df) __W,
9078 (__mmask8)
9079 __U);
9080 }
9081
9082 extern __inline __m256d
9083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9084 _mm256_maskz_permutevar_pd (__mmask8 __U, __m256d __A, __m256i __C)
9085 {
9086 return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
9087 (__v4di) __C,
9088 (__v4df)
9089 _mm256_setzero_pd (),
9090 (__mmask8)
9091 __U);
9092 }
9093
9094 extern __inline __m256
9095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9096 _mm256_mask_permutevar_ps (__m256 __W, __mmask8 __U, __m256 __A,
9097 __m256i __C)
9098 {
9099 return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
9100 (__v8si) __C,
9101 (__v8sf) __W,
9102 (__mmask8) __U);
9103 }
9104
9105 extern __inline __m256
9106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9107 _mm256_maskz_permutevar_ps (__mmask8 __U, __m256 __A, __m256i __C)
9108 {
9109 return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
9110 (__v8si) __C,
9111 (__v8sf)
9112 _mm256_setzero_ps (),
9113 (__mmask8) __U);
9114 }
9115
9116 extern __inline __m128d
9117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9118 _mm_mask_permutevar_pd (__m128d __W, __mmask8 __U, __m128d __A,
9119 __m128i __C)
9120 {
9121 return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
9122 (__v2di) __C,
9123 (__v2df) __W,
9124 (__mmask8) __U);
9125 }
9126
9127 extern __inline __m128d
9128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9129 _mm_maskz_permutevar_pd (__mmask8 __U, __m128d __A, __m128i __C)
9130 {
9131 return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
9132 (__v2di) __C,
9133 (__v2df)
9134 _mm_setzero_pd (),
9135 (__mmask8) __U);
9136 }
9137
9138 extern __inline __m128
9139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9140 _mm_mask_permutevar_ps (__m128 __W, __mmask8 __U, __m128 __A,
9141 __m128i __C)
9142 {
9143 return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
9144 (__v4si) __C,
9145 (__v4sf) __W,
9146 (__mmask8) __U);
9147 }
9148
9149 extern __inline __m128
9150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9151 _mm_maskz_permutevar_ps (__mmask8 __U, __m128 __A, __m128i __C)
9152 {
9153 return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
9154 (__v4si) __C,
9155 (__v4sf)
9156 _mm_setzero_ps (),
9157 (__mmask8) __U);
9158 }
9159
9160 extern __inline __m256i
9161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9162 _mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
9163 {
9164 return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
9165 (__v8si) __B,
9166 (__v8si)
9167 _mm256_setzero_si256 (),
9168 __M);
9169 }
9170
9171 extern __inline __m256i
9172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9173 _mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
9174 {
9175 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9176 (__v4di) __X,
9177 (__v4di)
9178 _mm256_setzero_si256 (),
9179 __M);
9180 }
9181
9182 extern __inline __m256i
9183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9184 _mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
9185 __m256i __B)
9186 {
9187 return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
9188 (__v8si) __B,
9189 (__v8si) __W, __M);
9190 }
9191
9192 extern __inline __m128i
9193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9194 _mm_maskz_mullo_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
9195 {
9196 return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
9197 (__v4si) __B,
9198 (__v4si)
9199 _mm_setzero_si128 (),
9200 __M);
9201 }
9202
9203 extern __inline __m128i
9204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9205 _mm_mask_mullo_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
9206 __m128i __B)
9207 {
9208 return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
9209 (__v4si) __B,
9210 (__v4si) __W, __M);
9211 }
9212
9213 extern __inline __m256i
9214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9215 _mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
9216 __m256i __Y)
9217 {
9218 return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
9219 (__v8si) __Y,
9220 (__v4di) __W, __M);
9221 }
9222
9223 extern __inline __m256i
9224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9225 _mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
9226 {
9227 return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
9228 (__v8si) __Y,
9229 (__v4di)
9230 _mm256_setzero_si256 (),
9231 __M);
9232 }
9233
9234 extern __inline __m128i
9235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9236 _mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X,
9237 __m128i __Y)
9238 {
9239 return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
9240 (__v4si) __Y,
9241 (__v2di) __W, __M);
9242 }
9243
9244 extern __inline __m128i
9245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9246 _mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y)
9247 {
9248 return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
9249 (__v4si) __Y,
9250 (__v2di)
9251 _mm_setzero_si128 (),
9252 __M);
9253 }
9254
9255 extern __inline __m256i
9256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9257 _mm256_permutexvar_epi64 (__m256i __X, __m256i __Y)
9258 {
9259 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9260 (__v4di) __X,
9261 (__v4di)
9262 _mm256_setzero_si256 (),
9263 (__mmask8) -1);
9264 }
9265
9266 extern __inline __m256i
9267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9268 _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
9269 __m256i __Y)
9270 {
9271 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9272 (__v4di) __X,
9273 (__v4di) __W,
9274 __M);
9275 }
9276
9277 extern __inline __m256i
9278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9279 _mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X,
9280 __m256i __Y)
9281 {
9282 return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
9283 (__v8si) __Y,
9284 (__v4di) __W, __M);
9285 }
9286
9287 extern __inline __m256i
9288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9289 _mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
9290 {
9291 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9292 (__v8si) __X,
9293 (__v8si)
9294 _mm256_setzero_si256 (),
9295 __M);
9296 }
9297
9298 extern __inline __m256i
9299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9300 _mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y)
9301 {
9302 return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
9303 (__v8si) __Y,
9304 (__v4di)
9305 _mm256_setzero_si256 (),
9306 __M);
9307 }
9308
9309 extern __inline __m128i
9310 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9311 _mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X,
9312 __m128i __Y)
9313 {
9314 return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
9315 (__v4si) __Y,
9316 (__v2di) __W, __M);
9317 }
9318
9319 extern __inline __m128i
9320 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9321 _mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y)
9322 {
9323 return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
9324 (__v4si) __Y,
9325 (__v2di)
9326 _mm_setzero_si128 (),
9327 __M);
9328 }
9329
9330 extern __inline __m256i
9331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9332 _mm256_permutexvar_epi32 (__m256i __X, __m256i __Y)
9333 {
9334 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9335 (__v8si) __X,
9336 (__v8si)
9337 _mm256_setzero_si256 (),
9338 (__mmask8) -1);
9339 }
9340
9341 extern __inline __m256i
9342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9343 _mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
9344 __m256i __Y)
9345 {
9346 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9347 (__v8si) __X,
9348 (__v8si) __W,
9349 __M);
9350 }
9351
9352 extern __inline __mmask8
9353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9354 _mm256_mask_cmpneq_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9355 {
9356 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9357 (__v8si) __Y, 4,
9358 (__mmask8) __M);
9359 }
9360
9361 extern __inline __mmask8
9362 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9363 _mm256_cmpneq_epu32_mask (__m256i __X, __m256i __Y)
9364 {
9365 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9366 (__v8si) __Y, 4,
9367 (__mmask8) -1);
9368 }
9369
9370 extern __inline __mmask8
9371 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9372 _mm256_mask_cmplt_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9373 {
9374 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9375 (__v8si) __Y, 1,
9376 (__mmask8) __M);
9377 }
9378
9379 extern __inline __mmask8
9380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9381 _mm256_cmplt_epu32_mask (__m256i __X, __m256i __Y)
9382 {
9383 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9384 (__v8si) __Y, 1,
9385 (__mmask8) -1);
9386 }
9387
9388 extern __inline __mmask8
9389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9390 _mm256_mask_cmpge_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9391 {
9392 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9393 (__v8si) __Y, 5,
9394 (__mmask8) __M);
9395 }
9396
9397 extern __inline __mmask8
9398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9399 _mm256_cmpge_epu32_mask (__m256i __X, __m256i __Y)
9400 {
9401 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9402 (__v8si) __Y, 5,
9403 (__mmask8) -1);
9404 }
9405
9406 extern __inline __mmask8
9407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9408 _mm256_mask_cmple_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9409 {
9410 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9411 (__v8si) __Y, 2,
9412 (__mmask8) __M);
9413 }
9414
9415 extern __inline __mmask8
9416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9417 _mm256_cmple_epu32_mask (__m256i __X, __m256i __Y)
9418 {
9419 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9420 (__v8si) __Y, 2,
9421 (__mmask8) -1);
9422 }
9423
9424 extern __inline __mmask8
9425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9426 _mm256_mask_cmpneq_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9427 {
9428 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9429 (__v4di) __Y, 4,
9430 (__mmask8) __M);
9431 }
9432
9433 extern __inline __mmask8
9434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9435 _mm256_cmpneq_epu64_mask (__m256i __X, __m256i __Y)
9436 {
9437 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9438 (__v4di) __Y, 4,
9439 (__mmask8) -1);
9440 }
9441
9442 extern __inline __mmask8
9443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9444 _mm256_mask_cmplt_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9445 {
9446 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9447 (__v4di) __Y, 1,
9448 (__mmask8) __M);
9449 }
9450
9451 extern __inline __mmask8
9452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9453 _mm256_cmplt_epu64_mask (__m256i __X, __m256i __Y)
9454 {
9455 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9456 (__v4di) __Y, 1,
9457 (__mmask8) -1);
9458 }
9459
9460 extern __inline __mmask8
9461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9462 _mm256_mask_cmpge_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9463 {
9464 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9465 (__v4di) __Y, 5,
9466 (__mmask8) __M);
9467 }
9468
9469 extern __inline __mmask8
9470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9471 _mm256_cmpge_epu64_mask (__m256i __X, __m256i __Y)
9472 {
9473 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9474 (__v4di) __Y, 5,
9475 (__mmask8) -1);
9476 }
9477
9478 extern __inline __mmask8
9479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9480 _mm256_mask_cmple_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9481 {
9482 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9483 (__v4di) __Y, 2,
9484 (__mmask8) __M);
9485 }
9486
9487 extern __inline __mmask8
9488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9489 _mm256_cmple_epu64_mask (__m256i __X, __m256i __Y)
9490 {
9491 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9492 (__v4di) __Y, 2,
9493 (__mmask8) -1);
9494 }
9495
9496 extern __inline __mmask8
9497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9498 _mm256_mask_cmpneq_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9499 {
9500 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9501 (__v8si) __Y, 4,
9502 (__mmask8) __M);
9503 }
9504
9505 extern __inline __mmask8
9506 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9507 _mm256_cmpneq_epi32_mask (__m256i __X, __m256i __Y)
9508 {
9509 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9510 (__v8si) __Y, 4,
9511 (__mmask8) -1);
9512 }
9513
9514 extern __inline __mmask8
9515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9516 _mm256_mask_cmplt_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9517 {
9518 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9519 (__v8si) __Y, 1,
9520 (__mmask8) __M);
9521 }
9522
9523 extern __inline __mmask8
9524 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9525 _mm256_cmplt_epi32_mask (__m256i __X, __m256i __Y)
9526 {
9527 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9528 (__v8si) __Y, 1,
9529 (__mmask8) -1);
9530 }
9531
9532 extern __inline __mmask8
9533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9534 _mm256_mask_cmpge_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9535 {
9536 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9537 (__v8si) __Y, 5,
9538 (__mmask8) __M);
9539 }
9540
9541 extern __inline __mmask8
9542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9543 _mm256_cmpge_epi32_mask (__m256i __X, __m256i __Y)
9544 {
9545 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9546 (__v8si) __Y, 5,
9547 (__mmask8) -1);
9548 }
9549
9550 extern __inline __mmask8
9551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9552 _mm256_mask_cmple_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9553 {
9554 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9555 (__v8si) __Y, 2,
9556 (__mmask8) __M);
9557 }
9558
9559 extern __inline __mmask8
9560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9561 _mm256_cmple_epi32_mask (__m256i __X, __m256i __Y)
9562 {
9563 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9564 (__v8si) __Y, 2,
9565 (__mmask8) -1);
9566 }
9567
9568 extern __inline __mmask8
9569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9570 _mm256_mask_cmpneq_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9571 {
9572 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9573 (__v4di) __Y, 4,
9574 (__mmask8) __M);
9575 }
9576
9577 extern __inline __mmask8
9578 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9579 _mm256_cmpneq_epi64_mask (__m256i __X, __m256i __Y)
9580 {
9581 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9582 (__v4di) __Y, 4,
9583 (__mmask8) -1);
9584 }
9585
9586 extern __inline __mmask8
9587 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9588 _mm256_mask_cmplt_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9589 {
9590 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9591 (__v4di) __Y, 1,
9592 (__mmask8) __M);
9593 }
9594
9595 extern __inline __mmask8
9596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9597 _mm256_cmplt_epi64_mask (__m256i __X, __m256i __Y)
9598 {
9599 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9600 (__v4di) __Y, 1,
9601 (__mmask8) -1);
9602 }
9603
9604 extern __inline __mmask8
9605 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9606 _mm256_mask_cmpge_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9607 {
9608 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9609 (__v4di) __Y, 5,
9610 (__mmask8) __M);
9611 }
9612
9613 extern __inline __mmask8
9614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9615 _mm256_cmpge_epi64_mask (__m256i __X, __m256i __Y)
9616 {
9617 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9618 (__v4di) __Y, 5,
9619 (__mmask8) -1);
9620 }
9621
9622 extern __inline __mmask8
9623 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9624 _mm256_mask_cmple_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9625 {
9626 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9627 (__v4di) __Y, 2,
9628 (__mmask8) __M);
9629 }
9630
9631 extern __inline __mmask8
9632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9633 _mm256_cmple_epi64_mask (__m256i __X, __m256i __Y)
9634 {
9635 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9636 (__v4di) __Y, 2,
9637 (__mmask8) -1);
9638 }
9639
9640 extern __inline __mmask8
9641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9642 _mm_mask_cmpneq_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9643 {
9644 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9645 (__v4si) __Y, 4,
9646 (__mmask8) __M);
9647 }
9648
9649 extern __inline __mmask8
9650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9651 _mm_cmpneq_epu32_mask (__m128i __X, __m128i __Y)
9652 {
9653 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9654 (__v4si) __Y, 4,
9655 (__mmask8) -1);
9656 }
9657
9658 extern __inline __mmask8
9659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9660 _mm_mask_cmplt_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9661 {
9662 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9663 (__v4si) __Y, 1,
9664 (__mmask8) __M);
9665 }
9666
9667 extern __inline __mmask8
9668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9669 _mm_cmplt_epu32_mask (__m128i __X, __m128i __Y)
9670 {
9671 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9672 (__v4si) __Y, 1,
9673 (__mmask8) -1);
9674 }
9675
9676 extern __inline __mmask8
9677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9678 _mm_mask_cmpge_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9679 {
9680 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9681 (__v4si) __Y, 5,
9682 (__mmask8) __M);
9683 }
9684
9685 extern __inline __mmask8
9686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9687 _mm_cmpge_epu32_mask (__m128i __X, __m128i __Y)
9688 {
9689 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9690 (__v4si) __Y, 5,
9691 (__mmask8) -1);
9692 }
9693
9694 extern __inline __mmask8
9695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9696 _mm_mask_cmple_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9697 {
9698 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9699 (__v4si) __Y, 2,
9700 (__mmask8) __M);
9701 }
9702
9703 extern __inline __mmask8
9704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9705 _mm_cmple_epu32_mask (__m128i __X, __m128i __Y)
9706 {
9707 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9708 (__v4si) __Y, 2,
9709 (__mmask8) -1);
9710 }
9711
9712 extern __inline __mmask8
9713 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9714 _mm_mask_cmpneq_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9715 {
9716 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9717 (__v2di) __Y, 4,
9718 (__mmask8) __M);
9719 }
9720
9721 extern __inline __mmask8
9722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9723 _mm_cmpneq_epu64_mask (__m128i __X, __m128i __Y)
9724 {
9725 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9726 (__v2di) __Y, 4,
9727 (__mmask8) -1);
9728 }
9729
9730 extern __inline __mmask8
9731 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9732 _mm_mask_cmplt_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9733 {
9734 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9735 (__v2di) __Y, 1,
9736 (__mmask8) __M);
9737 }
9738
9739 extern __inline __mmask8
9740 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9741 _mm_cmplt_epu64_mask (__m128i __X, __m128i __Y)
9742 {
9743 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9744 (__v2di) __Y, 1,
9745 (__mmask8) -1);
9746 }
9747
9748 extern __inline __mmask8
9749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9750 _mm_mask_cmpge_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9751 {
9752 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9753 (__v2di) __Y, 5,
9754 (__mmask8) __M);
9755 }
9756
9757 extern __inline __mmask8
9758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9759 _mm_cmpge_epu64_mask (__m128i __X, __m128i __Y)
9760 {
9761 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9762 (__v2di) __Y, 5,
9763 (__mmask8) -1);
9764 }
9765
9766 extern __inline __mmask8
9767 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9768 _mm_mask_cmple_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9769 {
9770 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9771 (__v2di) __Y, 2,
9772 (__mmask8) __M);
9773 }
9774
9775 extern __inline __mmask8
9776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9777 _mm_cmple_epu64_mask (__m128i __X, __m128i __Y)
9778 {
9779 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9780 (__v2di) __Y, 2,
9781 (__mmask8) -1);
9782 }
9783
9784 extern __inline __mmask8
9785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9786 _mm_mask_cmpneq_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9787 {
9788 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9789 (__v4si) __Y, 4,
9790 (__mmask8) __M);
9791 }
9792
9793 extern __inline __mmask8
9794 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9795 _mm_cmpneq_epi32_mask (__m128i __X, __m128i __Y)
9796 {
9797 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9798 (__v4si) __Y, 4,
9799 (__mmask8) -1);
9800 }
9801
9802 extern __inline __mmask8
9803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9804 _mm_mask_cmplt_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9805 {
9806 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9807 (__v4si) __Y, 1,
9808 (__mmask8) __M);
9809 }
9810
9811 extern __inline __mmask8
9812 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9813 _mm_cmplt_epi32_mask (__m128i __X, __m128i __Y)
9814 {
9815 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9816 (__v4si) __Y, 1,
9817 (__mmask8) -1);
9818 }
9819
9820 extern __inline __mmask8
9821 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9822 _mm_mask_cmpge_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9823 {
9824 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9825 (__v4si) __Y, 5,
9826 (__mmask8) __M);
9827 }
9828
9829 extern __inline __mmask8
9830 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9831 _mm_cmpge_epi32_mask (__m128i __X, __m128i __Y)
9832 {
9833 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9834 (__v4si) __Y, 5,
9835 (__mmask8) -1);
9836 }
9837
9838 extern __inline __mmask8
9839 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9840 _mm_mask_cmple_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9841 {
9842 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9843 (__v4si) __Y, 2,
9844 (__mmask8) __M);
9845 }
9846
9847 extern __inline __mmask8
9848 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9849 _mm_cmple_epi32_mask (__m128i __X, __m128i __Y)
9850 {
9851 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9852 (__v4si) __Y, 2,
9853 (__mmask8) -1);
9854 }
9855
9856 extern __inline __mmask8
9857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9858 _mm_mask_cmpneq_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9859 {
9860 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9861 (__v2di) __Y, 4,
9862 (__mmask8) __M);
9863 }
9864
9865 extern __inline __mmask8
9866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9867 _mm_cmpneq_epi64_mask (__m128i __X, __m128i __Y)
9868 {
9869 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9870 (__v2di) __Y, 4,
9871 (__mmask8) -1);
9872 }
9873
9874 extern __inline __mmask8
9875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9876 _mm_mask_cmplt_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9877 {
9878 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9879 (__v2di) __Y, 1,
9880 (__mmask8) __M);
9881 }
9882
9883 extern __inline __mmask8
9884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9885 _mm_cmplt_epi64_mask (__m128i __X, __m128i __Y)
9886 {
9887 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9888 (__v2di) __Y, 1,
9889 (__mmask8) -1);
9890 }
9891
9892 extern __inline __mmask8
9893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9894 _mm_mask_cmpge_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9895 {
9896 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9897 (__v2di) __Y, 5,
9898 (__mmask8) __M);
9899 }
9900
9901 extern __inline __mmask8
9902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9903 _mm_cmpge_epi64_mask (__m128i __X, __m128i __Y)
9904 {
9905 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9906 (__v2di) __Y, 5,
9907 (__mmask8) -1);
9908 }
9909
9910 extern __inline __mmask8
9911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9912 _mm_mask_cmple_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9913 {
9914 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9915 (__v2di) __Y, 2,
9916 (__mmask8) __M);
9917 }
9918
9919 extern __inline __mmask8
9920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9921 _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
9922 {
9923 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9924 (__v2di) __Y, 2,
9925 (__mmask8) -1);
9926 }
9927
9928 #ifdef __OPTIMIZE__
9929 extern __inline __m256i
9930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9931 _mm256_permutex_epi64 (__m256i __X, const int __I)
9932 {
9933 return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9934 __I,
9935 (__v4di)
9936 _mm256_setzero_si256(),
9937 (__mmask8) -1);
9938 }
9939
9940 extern __inline __m256i
9941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9942 _mm256_mask_permutex_epi64 (__m256i __W, __mmask8 __M,
9943 __m256i __X, const int __I)
9944 {
9945 return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9946 __I,
9947 (__v4di) __W,
9948 (__mmask8) __M);
9949 }
9950
9951 extern __inline __m256i
9952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9953 _mm256_maskz_permutex_epi64 (__mmask8 __M, __m256i __X, const int __I)
9954 {
9955 return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9956 __I,
9957 (__v4di)
9958 _mm256_setzero_si256 (),
9959 (__mmask8) __M);
9960 }
9961
9962 extern __inline __m256d
9963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9964 _mm256_mask_shuffle_pd (__m256d __W, __mmask8 __U, __m256d __A,
9965 __m256d __B, const int __imm)
9966 {
9967 return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
9968 (__v4df) __B, __imm,
9969 (__v4df) __W,
9970 (__mmask8) __U);
9971 }
9972
9973 extern __inline __m256d
9974 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9975 _mm256_maskz_shuffle_pd (__mmask8 __U, __m256d __A, __m256d __B,
9976 const int __imm)
9977 {
9978 return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
9979 (__v4df) __B, __imm,
9980 (__v4df)
9981 _mm256_setzero_pd (),
9982 (__mmask8) __U);
9983 }
9984
9985 extern __inline __m128d
9986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9987 _mm_mask_shuffle_pd (__m128d __W, __mmask8 __U, __m128d __A,
9988 __m128d __B, const int __imm)
9989 {
9990 return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
9991 (__v2df) __B, __imm,
9992 (__v2df) __W,
9993 (__mmask8) __U);
9994 }
9995
9996 extern __inline __m128d
9997 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9998 _mm_maskz_shuffle_pd (__mmask8 __U, __m128d __A, __m128d __B,
9999 const int __imm)
10000 {
10001 return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
10002 (__v2df) __B, __imm,
10003 (__v2df)
10004 _mm_setzero_pd (),
10005 (__mmask8) __U);
10006 }
10007
10008 extern __inline __m256
10009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10010 _mm256_mask_shuffle_ps (__m256 __W, __mmask8 __U, __m256 __A,
10011 __m256 __B, const int __imm)
10012 {
10013 return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
10014 (__v8sf) __B, __imm,
10015 (__v8sf) __W,
10016 (__mmask8) __U);
10017 }
10018
10019 extern __inline __m256
10020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10021 _mm256_maskz_shuffle_ps (__mmask8 __U, __m256 __A, __m256 __B,
10022 const int __imm)
10023 {
10024 return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
10025 (__v8sf) __B, __imm,
10026 (__v8sf)
10027 _mm256_setzero_ps (),
10028 (__mmask8) __U);
10029 }
10030
10031 extern __inline __m128
10032 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10033 _mm_mask_shuffle_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
10034 const int __imm)
10035 {
10036 return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
10037 (__v4sf) __B, __imm,
10038 (__v4sf) __W,
10039 (__mmask8) __U);
10040 }
10041
10042 extern __inline __m128
10043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10044 _mm_maskz_shuffle_ps (__mmask8 __U, __m128 __A, __m128 __B,
10045 const int __imm)
10046 {
10047 return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
10048 (__v4sf) __B, __imm,
10049 (__v4sf)
10050 _mm_setzero_ps (),
10051 (__mmask8) __U);
10052 }
10053
10054 extern __inline __m256i
10055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10056 _mm256_inserti32x4 (__m256i __A, __m128i __B, const int __imm)
10057 {
10058 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
10059 (__v4si) __B,
10060 __imm,
10061 (__v8si)
10062 _mm256_setzero_si256 (),
10063 (__mmask8) -1);
10064 }
10065
10066 extern __inline __m256i
10067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10068 _mm256_mask_inserti32x4 (__m256i __W, __mmask8 __U, __m256i __A,
10069 __m128i __B, const int __imm)
10070 {
10071 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
10072 (__v4si) __B,
10073 __imm,
10074 (__v8si) __W,
10075 (__mmask8)
10076 __U);
10077 }
10078
10079 extern __inline __m256i
10080 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10081 _mm256_maskz_inserti32x4 (__mmask8 __U, __m256i __A, __m128i __B,
10082 const int __imm)
10083 {
10084 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
10085 (__v4si) __B,
10086 __imm,
10087 (__v8si)
10088 _mm256_setzero_si256 (),
10089 (__mmask8)
10090 __U);
10091 }
10092
10093 extern __inline __m256
10094 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10095 _mm256_insertf32x4 (__m256 __A, __m128 __B, const int __imm)
10096 {
10097 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
10098 (__v4sf) __B,
10099 __imm,
10100 (__v8sf)
10101 _mm256_setzero_ps (),
10102 (__mmask8) -1);
10103 }
10104
10105 extern __inline __m256
10106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10107 _mm256_mask_insertf32x4 (__m256 __W, __mmask8 __U, __m256 __A,
10108 __m128 __B, const int __imm)
10109 {
10110 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
10111 (__v4sf) __B,
10112 __imm,
10113 (__v8sf) __W,
10114 (__mmask8) __U);
10115 }
10116
10117 extern __inline __m256
10118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10119 _mm256_maskz_insertf32x4 (__mmask8 __U, __m256 __A, __m128 __B,
10120 const int __imm)
10121 {
10122 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
10123 (__v4sf) __B,
10124 __imm,
10125 (__v8sf)
10126 _mm256_setzero_ps (),
10127 (__mmask8) __U);
10128 }
10129
10130 extern __inline __m128i
10131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10132 _mm256_extracti32x4_epi32 (__m256i __A, const int __imm)
10133 {
10134 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
10135 __imm,
10136 (__v4si)
10137 _mm_setzero_si128 (),
10138 (__mmask8) -1);
10139 }
10140
10141 extern __inline __m128i
10142 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10143 _mm256_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m256i __A,
10144 const int __imm)
10145 {
10146 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
10147 __imm,
10148 (__v4si) __W,
10149 (__mmask8)
10150 __U);
10151 }
10152
10153 extern __inline __m128i
10154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10155 _mm256_maskz_extracti32x4_epi32 (__mmask8 __U, __m256i __A,
10156 const int __imm)
10157 {
10158 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
10159 __imm,
10160 (__v4si)
10161 _mm_setzero_si128 (),
10162 (__mmask8)
10163 __U);
10164 }
10165
10166 extern __inline __m128
10167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10168 _mm256_extractf32x4_ps (__m256 __A, const int __imm)
10169 {
10170 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
10171 __imm,
10172 (__v4sf)
10173 _mm_setzero_ps (),
10174 (__mmask8) -1);
10175 }
10176
10177 extern __inline __m128
10178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10179 _mm256_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m256 __A,
10180 const int __imm)
10181 {
10182 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
10183 __imm,
10184 (__v4sf) __W,
10185 (__mmask8)
10186 __U);
10187 }
10188
10189 extern __inline __m128
10190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10191 _mm256_maskz_extractf32x4_ps (__mmask8 __U, __m256 __A,
10192 const int __imm)
10193 {
10194 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
10195 __imm,
10196 (__v4sf)
10197 _mm_setzero_ps (),
10198 (__mmask8)
10199 __U);
10200 }
10201
10202 extern __inline __m256i
10203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10204 _mm256_shuffle_i64x2 (__m256i __A, __m256i __B, const int __imm)
10205 {
10206 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
10207 (__v4di) __B,
10208 __imm,
10209 (__v4di)
10210 _mm256_setzero_si256 (),
10211 (__mmask8) -1);
10212 }
10213
10214 extern __inline __m256i
10215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10216 _mm256_mask_shuffle_i64x2 (__m256i __W, __mmask8 __U, __m256i __A,
10217 __m256i __B, const int __imm)
10218 {
10219 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
10220 (__v4di) __B,
10221 __imm,
10222 (__v4di) __W,
10223 (__mmask8) __U);
10224 }
10225
10226 extern __inline __m256i
10227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10228 _mm256_maskz_shuffle_i64x2 (__mmask8 __U, __m256i __A, __m256i __B,
10229 const int __imm)
10230 {
10231 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
10232 (__v4di) __B,
10233 __imm,
10234 (__v4di)
10235 _mm256_setzero_si256 (),
10236 (__mmask8) __U);
10237 }
10238
10239 extern __inline __m256i
10240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10241 _mm256_shuffle_i32x4 (__m256i __A, __m256i __B, const int __imm)
10242 {
10243 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
10244 (__v8si) __B,
10245 __imm,
10246 (__v8si)
10247 _mm256_setzero_si256 (),
10248 (__mmask8) -1);
10249 }
10250
10251 extern __inline __m256i
10252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10253 _mm256_mask_shuffle_i32x4 (__m256i __W, __mmask8 __U, __m256i __A,
10254 __m256i __B, const int __imm)
10255 {
10256 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
10257 (__v8si) __B,
10258 __imm,
10259 (__v8si) __W,
10260 (__mmask8) __U);
10261 }
10262
10263 extern __inline __m256i
10264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10265 _mm256_maskz_shuffle_i32x4 (__mmask8 __U, __m256i __A, __m256i __B,
10266 const int __imm)
10267 {
10268 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
10269 (__v8si) __B,
10270 __imm,
10271 (__v8si)
10272 _mm256_setzero_si256 (),
10273 (__mmask8) __U);
10274 }
10275
10276 extern __inline __m256d
10277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10278 _mm256_shuffle_f64x2 (__m256d __A, __m256d __B, const int __imm)
10279 {
10280 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
10281 (__v4df) __B,
10282 __imm,
10283 (__v4df)
10284 _mm256_setzero_pd (),
10285 (__mmask8) -1);
10286 }
10287
10288 extern __inline __m256d
10289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10290 _mm256_mask_shuffle_f64x2 (__m256d __W, __mmask8 __U, __m256d __A,
10291 __m256d __B, const int __imm)
10292 {
10293 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
10294 (__v4df) __B,
10295 __imm,
10296 (__v4df) __W,
10297 (__mmask8) __U);
10298 }
10299
10300 extern __inline __m256d
10301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10302 _mm256_maskz_shuffle_f64x2 (__mmask8 __U, __m256d __A, __m256d __B,
10303 const int __imm)
10304 {
10305 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
10306 (__v4df) __B,
10307 __imm,
10308 (__v4df)
10309 _mm256_setzero_pd (),
10310 (__mmask8) __U);
10311 }
10312
10313 extern __inline __m256
10314 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10315 _mm256_shuffle_f32x4 (__m256 __A, __m256 __B, const int __imm)
10316 {
10317 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
10318 (__v8sf) __B,
10319 __imm,
10320 (__v8sf)
10321 _mm256_setzero_ps (),
10322 (__mmask8) -1);
10323 }
10324
10325 extern __inline __m256
10326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10327 _mm256_mask_shuffle_f32x4 (__m256 __W, __mmask8 __U, __m256 __A,
10328 __m256 __B, const int __imm)
10329 {
10330 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
10331 (__v8sf) __B,
10332 __imm,
10333 (__v8sf) __W,
10334 (__mmask8) __U);
10335 }
10336
10337 extern __inline __m256
10338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10339 _mm256_maskz_shuffle_f32x4 (__mmask8 __U, __m256 __A, __m256 __B,
10340 const int __imm)
10341 {
10342 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
10343 (__v8sf) __B,
10344 __imm,
10345 (__v8sf)
10346 _mm256_setzero_ps (),
10347 (__mmask8) __U);
10348 }
10349
10350 extern __inline __m256d
10351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10352 _mm256_fixupimm_pd (__m256d __A, __m256d __B, __m256i __C,
10353 const int __imm)
10354 {
10355 return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
10356 (__v4df) __B,
10357 (__v4di) __C,
10358 __imm,
10359 (__mmask8) -1);
10360 }
10361
10362 extern __inline __m256d
10363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10364 _mm256_mask_fixupimm_pd (__m256d __A, __mmask8 __U, __m256d __B,
10365 __m256i __C, const int __imm)
10366 {
10367 return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
10368 (__v4df) __B,
10369 (__v4di) __C,
10370 __imm,
10371 (__mmask8) __U);
10372 }
10373
10374 extern __inline __m256d
10375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10376 _mm256_maskz_fixupimm_pd (__mmask8 __U, __m256d __A, __m256d __B,
10377 __m256i __C, const int __imm)
10378 {
10379 return (__m256d) __builtin_ia32_fixupimmpd256_maskz ((__v4df) __A,
10380 (__v4df) __B,
10381 (__v4di) __C,
10382 __imm,
10383 (__mmask8) __U);
10384 }
10385
10386 extern __inline __m256
10387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10388 _mm256_fixupimm_ps (__m256 __A, __m256 __B, __m256i __C,
10389 const int __imm)
10390 {
10391 return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
10392 (__v8sf) __B,
10393 (__v8si) __C,
10394 __imm,
10395 (__mmask8) -1);
10396 }
10397
10398 extern __inline __m256
10399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10400 _mm256_mask_fixupimm_ps (__m256 __A, __mmask8 __U, __m256 __B,
10401 __m256i __C, const int __imm)
10402 {
10403 return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
10404 (__v8sf) __B,
10405 (__v8si) __C,
10406 __imm,
10407 (__mmask8) __U);
10408 }
10409
10410 extern __inline __m256
10411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10412 _mm256_maskz_fixupimm_ps (__mmask8 __U, __m256 __A, __m256 __B,
10413 __m256i __C, const int __imm)
10414 {
10415 return (__m256) __builtin_ia32_fixupimmps256_maskz ((__v8sf) __A,
10416 (__v8sf) __B,
10417 (__v8si) __C,
10418 __imm,
10419 (__mmask8) __U);
10420 }
10421
10422 extern __inline __m128d
10423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10424 _mm_fixupimm_pd (__m128d __A, __m128d __B, __m128i __C,
10425 const int __imm)
10426 {
10427 return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
10428 (__v2df) __B,
10429 (__v2di) __C,
10430 __imm,
10431 (__mmask8) -1);
10432 }
10433
10434 extern __inline __m128d
10435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10436 _mm_mask_fixupimm_pd (__m128d __A, __mmask8 __U, __m128d __B,
10437 __m128i __C, const int __imm)
10438 {
10439 return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
10440 (__v2df) __B,
10441 (__v2di) __C,
10442 __imm,
10443 (__mmask8) __U);
10444 }
10445
10446 extern __inline __m128d
10447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10448 _mm_maskz_fixupimm_pd (__mmask8 __U, __m128d __A, __m128d __B,
10449 __m128i __C, const int __imm)
10450 {
10451 return (__m128d) __builtin_ia32_fixupimmpd128_maskz ((__v2df) __A,
10452 (__v2df) __B,
10453 (__v2di) __C,
10454 __imm,
10455 (__mmask8) __U);
10456 }
10457
10458 extern __inline __m128
10459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10460 _mm_fixupimm_ps (__m128 __A, __m128 __B, __m128i __C, const int __imm)
10461 {
10462 return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
10463 (__v4sf) __B,
10464 (__v4si) __C,
10465 __imm,
10466 (__mmask8) -1);
10467 }
10468
10469 extern __inline __m128
10470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10471 _mm_mask_fixupimm_ps (__m128 __A, __mmask8 __U, __m128 __B,
10472 __m128i __C, const int __imm)
10473 {
10474 return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
10475 (__v4sf) __B,
10476 (__v4si) __C,
10477 __imm,
10478 (__mmask8) __U);
10479 }
10480
10481 extern __inline __m128
10482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10483 _mm_maskz_fixupimm_ps (__mmask8 __U, __m128 __A, __m128 __B,
10484 __m128i __C, const int __imm)
10485 {
10486 return (__m128) __builtin_ia32_fixupimmps128_maskz ((__v4sf) __A,
10487 (__v4sf) __B,
10488 (__v4si) __C,
10489 __imm,
10490 (__mmask8) __U);
10491 }
10492
10493 extern __inline __m256i
10494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10495 _mm256_mask_srli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
10496 const int __imm)
10497 {
10498 return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
10499 (__v8si) __W,
10500 (__mmask8) __U);
10501 }
10502
10503 extern __inline __m256i
10504 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10505 _mm256_maskz_srli_epi32 (__mmask8 __U, __m256i __A, const int __imm)
10506 {
10507 return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
10508 (__v8si)
10509 _mm256_setzero_si256 (),
10510 (__mmask8) __U);
10511 }
10512
10513 extern __inline __m128i
10514 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10515 _mm_mask_srli_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
10516 const int __imm)
10517 {
10518 return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
10519 (__v4si) __W,
10520 (__mmask8) __U);
10521 }
10522
10523 extern __inline __m128i
10524 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10525 _mm_maskz_srli_epi32 (__mmask8 __U, __m128i __A, const int __imm)
10526 {
10527 return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
10528 (__v4si)
10529 _mm_setzero_si128 (),
10530 (__mmask8) __U);
10531 }
10532
10533 extern __inline __m256i
10534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10535 _mm256_mask_srli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
10536 const int __imm)
10537 {
10538 return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
10539 (__v4di) __W,
10540 (__mmask8) __U);
10541 }
10542
10543 extern __inline __m256i
10544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10545 _mm256_maskz_srli_epi64 (__mmask8 __U, __m256i __A, const int __imm)
10546 {
10547 return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
10548 (__v4di)
10549 _mm256_setzero_si256 (),
10550 (__mmask8) __U);
10551 }
10552
10553 extern __inline __m128i
10554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10555 _mm_mask_srli_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
10556 const int __imm)
10557 {
10558 return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
10559 (__v2di) __W,
10560 (__mmask8) __U);
10561 }
10562
10563 extern __inline __m128i
10564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10565 _mm_maskz_srli_epi64 (__mmask8 __U, __m128i __A, const int __imm)
10566 {
10567 return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
10568 (__v2di)
10569 _mm_setzero_si128 (),
10570 (__mmask8) __U);
10571 }
10572
10573 extern __inline __m256i
10574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10575 _mm256_ternarylogic_epi64 (__m256i __A, __m256i __B, __m256i __C,
10576 const int __imm)
10577 {
10578 return (__m256i)
10579 __builtin_ia32_pternlogq256_mask ((__v4di) __A,
10580 (__v4di) __B,
10581 (__v4di) __C,
10582 (unsigned char) __imm,
10583 (__mmask8) -1);
10584 }
10585
10586 extern __inline __m256i
10587 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10588 _mm256_mask_ternarylogic_epi64 (__m256i __A, __mmask8 __U,
10589 __m256i __B, __m256i __C,
10590 const int __imm)
10591 {
10592 return (__m256i)
10593 __builtin_ia32_pternlogq256_mask ((__v4di) __A,
10594 (__v4di) __B,
10595 (__v4di) __C,
10596 (unsigned char) __imm,
10597 (__mmask8) __U);
10598 }
10599
10600 extern __inline __m256i
10601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10602 _mm256_maskz_ternarylogic_epi64 (__mmask8 __U, __m256i __A,
10603 __m256i __B, __m256i __C,
10604 const int __imm)
10605 {
10606 return (__m256i)
10607 __builtin_ia32_pternlogq256_maskz ((__v4di) __A,
10608 (__v4di) __B,
10609 (__v4di) __C,
10610 (unsigned char) __imm,
10611 (__mmask8) __U);
10612 }
10613
10614 extern __inline __m256i
10615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10616 _mm256_ternarylogic_epi32 (__m256i __A, __m256i __B, __m256i __C,
10617 const int __imm)
10618 {
10619 return (__m256i)
10620 __builtin_ia32_pternlogd256_mask ((__v8si) __A,
10621 (__v8si) __B,
10622 (__v8si) __C,
10623 (unsigned char) __imm,
10624 (__mmask8) -1);
10625 }
10626
10627 extern __inline __m256i
10628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10629 _mm256_mask_ternarylogic_epi32 (__m256i __A, __mmask8 __U,
10630 __m256i __B, __m256i __C,
10631 const int __imm)
10632 {
10633 return (__m256i)
10634 __builtin_ia32_pternlogd256_mask ((__v8si) __A,
10635 (__v8si) __B,
10636 (__v8si) __C,
10637 (unsigned char) __imm,
10638 (__mmask8) __U);
10639 }
10640
10641 extern __inline __m256i
10642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10643 _mm256_maskz_ternarylogic_epi32 (__mmask8 __U, __m256i __A,
10644 __m256i __B, __m256i __C,
10645 const int __imm)
10646 {
10647 return (__m256i)
10648 __builtin_ia32_pternlogd256_maskz ((__v8si) __A,
10649 (__v8si) __B,
10650 (__v8si) __C,
10651 (unsigned char) __imm,
10652 (__mmask8) __U);
10653 }
10654
10655 extern __inline __m128i
10656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10657 _mm_ternarylogic_epi64 (__m128i __A, __m128i __B, __m128i __C,
10658 const int __imm)
10659 {
10660 return (__m128i)
10661 __builtin_ia32_pternlogq128_mask ((__v2di) __A,
10662 (__v2di) __B,
10663 (__v2di) __C,
10664 (unsigned char) __imm,
10665 (__mmask8) -1);
10666 }
10667
10668 extern __inline __m128i
10669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10670 _mm_mask_ternarylogic_epi64 (__m128i __A, __mmask8 __U,
10671 __m128i __B, __m128i __C,
10672 const int __imm)
10673 {
10674 return (__m128i)
10675 __builtin_ia32_pternlogq128_mask ((__v2di) __A,
10676 (__v2di) __B,
10677 (__v2di) __C,
10678 (unsigned char) __imm,
10679 (__mmask8) __U);
10680 }
10681
10682 extern __inline __m128i
10683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10684 _mm_maskz_ternarylogic_epi64 (__mmask8 __U, __m128i __A,
10685 __m128i __B, __m128i __C,
10686 const int __imm)
10687 {
10688 return (__m128i)
10689 __builtin_ia32_pternlogq128_maskz ((__v2di) __A,
10690 (__v2di) __B,
10691 (__v2di) __C,
10692 (unsigned char) __imm,
10693 (__mmask8) __U);
10694 }
10695
10696 extern __inline __m128i
10697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10698 _mm_ternarylogic_epi32 (__m128i __A, __m128i __B, __m128i __C,
10699 const int __imm)
10700 {
10701 return (__m128i)
10702 __builtin_ia32_pternlogd128_mask ((__v4si) __A,
10703 (__v4si) __B,
10704 (__v4si) __C,
10705 (unsigned char) __imm,
10706 (__mmask8) -1);
10707 }
10708
10709 extern __inline __m128i
10710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10711 _mm_mask_ternarylogic_epi32 (__m128i __A, __mmask8 __U,
10712 __m128i __B, __m128i __C,
10713 const int __imm)
10714 {
10715 return (__m128i)
10716 __builtin_ia32_pternlogd128_mask ((__v4si) __A,
10717 (__v4si) __B,
10718 (__v4si) __C,
10719 (unsigned char) __imm,
10720 (__mmask8) __U);
10721 }
10722
10723 extern __inline __m128i
10724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10725 _mm_maskz_ternarylogic_epi32 (__mmask8 __U, __m128i __A,
10726 __m128i __B, __m128i __C,
10727 const int __imm)
10728 {
10729 return (__m128i)
10730 __builtin_ia32_pternlogd128_maskz ((__v4si) __A,
10731 (__v4si) __B,
10732 (__v4si) __C,
10733 (unsigned char) __imm,
10734 (__mmask8) __U);
10735 }
10736
10737 extern __inline __m256
10738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10739 _mm256_roundscale_ps (__m256 __A, const int __imm)
10740 {
10741 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
10742 __imm,
10743 (__v8sf)
10744 _mm256_setzero_ps (),
10745 (__mmask8) -1);
10746 }
10747
10748 extern __inline __m256
10749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10750 _mm256_mask_roundscale_ps (__m256 __W, __mmask8 __U, __m256 __A,
10751 const int __imm)
10752 {
10753 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
10754 __imm,
10755 (__v8sf) __W,
10756 (__mmask8) __U);
10757 }
10758
10759 extern __inline __m256
10760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10761 _mm256_maskz_roundscale_ps (__mmask8 __U, __m256 __A, const int __imm)
10762 {
10763 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
10764 __imm,
10765 (__v8sf)
10766 _mm256_setzero_ps (),
10767 (__mmask8) __U);
10768 }
10769
10770 extern __inline __m256d
10771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10772 _mm256_roundscale_pd (__m256d __A, const int __imm)
10773 {
10774 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
10775 __imm,
10776 (__v4df)
10777 _mm256_setzero_pd (),
10778 (__mmask8) -1);
10779 }
10780
10781 extern __inline __m256d
10782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10783 _mm256_mask_roundscale_pd (__m256d __W, __mmask8 __U, __m256d __A,
10784 const int __imm)
10785 {
10786 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
10787 __imm,
10788 (__v4df) __W,
10789 (__mmask8) __U);
10790 }
10791
10792 extern __inline __m256d
10793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10794 _mm256_maskz_roundscale_pd (__mmask8 __U, __m256d __A, const int __imm)
10795 {
10796 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
10797 __imm,
10798 (__v4df)
10799 _mm256_setzero_pd (),
10800 (__mmask8) __U);
10801 }
10802
10803 extern __inline __m128
10804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10805 _mm_roundscale_ps (__m128 __A, const int __imm)
10806 {
10807 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10808 __imm,
10809 (__v4sf)
10810 _mm_setzero_ps (),
10811 (__mmask8) -1);
10812 }
10813
10814 extern __inline __m128
10815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10816 _mm_mask_roundscale_ps (__m128 __W, __mmask8 __U, __m128 __A,
10817 const int __imm)
10818 {
10819 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10820 __imm,
10821 (__v4sf) __W,
10822 (__mmask8) __U);
10823 }
10824
10825 extern __inline __m128
10826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10827 _mm_maskz_roundscale_ps (__mmask8 __U, __m128 __A, const int __imm)
10828 {
10829 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10830 __imm,
10831 (__v4sf)
10832 _mm_setzero_ps (),
10833 (__mmask8) __U);
10834 }
10835
10836 extern __inline __m128d
10837 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10838 _mm_roundscale_pd (__m128d __A, const int __imm)
10839 {
10840 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10841 __imm,
10842 (__v2df)
10843 _mm_setzero_pd (),
10844 (__mmask8) -1);
10845 }
10846
10847 extern __inline __m128d
10848 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10849 _mm_mask_roundscale_pd (__m128d __W, __mmask8 __U, __m128d __A,
10850 const int __imm)
10851 {
10852 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10853 __imm,
10854 (__v2df) __W,
10855 (__mmask8) __U);
10856 }
10857
10858 extern __inline __m128d
10859 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10860 _mm_maskz_roundscale_pd (__mmask8 __U, __m128d __A, const int __imm)
10861 {
10862 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10863 __imm,
10864 (__v2df)
10865 _mm_setzero_pd (),
10866 (__mmask8) __U);
10867 }
10868
10869 extern __inline __m256
10870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10871 _mm256_getmant_ps (__m256 __A, _MM_MANTISSA_NORM_ENUM __B,
10872 _MM_MANTISSA_SIGN_ENUM __C)
10873 {
10874 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10875 (__C << 2) | __B,
10876 (__v8sf)
10877 _mm256_setzero_ps (),
10878 (__mmask8) -1);
10879 }
10880
10881 extern __inline __m256
10882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10883 _mm256_mask_getmant_ps (__m256 __W, __mmask8 __U, __m256 __A,
10884 _MM_MANTISSA_NORM_ENUM __B,
10885 _MM_MANTISSA_SIGN_ENUM __C)
10886 {
10887 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10888 (__C << 2) | __B,
10889 (__v8sf) __W,
10890 (__mmask8) __U);
10891 }
10892
10893 extern __inline __m256
10894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10895 _mm256_maskz_getmant_ps (__mmask8 __U, __m256 __A,
10896 _MM_MANTISSA_NORM_ENUM __B,
10897 _MM_MANTISSA_SIGN_ENUM __C)
10898 {
10899 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10900 (__C << 2) | __B,
10901 (__v8sf)
10902 _mm256_setzero_ps (),
10903 (__mmask8) __U);
10904 }
10905
10906 extern __inline __m128
10907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10908 _mm_getmant_ps (__m128 __A, _MM_MANTISSA_NORM_ENUM __B,
10909 _MM_MANTISSA_SIGN_ENUM __C)
10910 {
10911 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10912 (__C << 2) | __B,
10913 (__v4sf)
10914 _mm_setzero_ps (),
10915 (__mmask8) -1);
10916 }
10917
10918 extern __inline __m128
10919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10920 _mm_mask_getmant_ps (__m128 __W, __mmask8 __U, __m128 __A,
10921 _MM_MANTISSA_NORM_ENUM __B,
10922 _MM_MANTISSA_SIGN_ENUM __C)
10923 {
10924 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10925 (__C << 2) | __B,
10926 (__v4sf) __W,
10927 (__mmask8) __U);
10928 }
10929
10930 extern __inline __m128
10931 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10932 _mm_maskz_getmant_ps (__mmask8 __U, __m128 __A,
10933 _MM_MANTISSA_NORM_ENUM __B,
10934 _MM_MANTISSA_SIGN_ENUM __C)
10935 {
10936 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10937 (__C << 2) | __B,
10938 (__v4sf)
10939 _mm_setzero_ps (),
10940 (__mmask8) __U);
10941 }
10942
10943 extern __inline __m256d
10944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10945 _mm256_getmant_pd (__m256d __A, _MM_MANTISSA_NORM_ENUM __B,
10946 _MM_MANTISSA_SIGN_ENUM __C)
10947 {
10948 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10949 (__C << 2) | __B,
10950 (__v4df)
10951 _mm256_setzero_pd (),
10952 (__mmask8) -1);
10953 }
10954
10955 extern __inline __m256d
10956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10957 _mm256_mask_getmant_pd (__m256d __W, __mmask8 __U, __m256d __A,
10958 _MM_MANTISSA_NORM_ENUM __B,
10959 _MM_MANTISSA_SIGN_ENUM __C)
10960 {
10961 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10962 (__C << 2) | __B,
10963 (__v4df) __W,
10964 (__mmask8) __U);
10965 }
10966
10967 extern __inline __m256d
10968 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10969 _mm256_maskz_getmant_pd (__mmask8 __U, __m256d __A,
10970 _MM_MANTISSA_NORM_ENUM __B,
10971 _MM_MANTISSA_SIGN_ENUM __C)
10972 {
10973 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10974 (__C << 2) | __B,
10975 (__v4df)
10976 _mm256_setzero_pd (),
10977 (__mmask8) __U);
10978 }
10979
10980 extern __inline __m128d
10981 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10982 _mm_getmant_pd (__m128d __A, _MM_MANTISSA_NORM_ENUM __B,
10983 _MM_MANTISSA_SIGN_ENUM __C)
10984 {
10985 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10986 (__C << 2) | __B,
10987 (__v2df)
10988 _mm_setzero_pd (),
10989 (__mmask8) -1);
10990 }
10991
10992 extern __inline __m128d
10993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10994 _mm_mask_getmant_pd (__m128d __W, __mmask8 __U, __m128d __A,
10995 _MM_MANTISSA_NORM_ENUM __B,
10996 _MM_MANTISSA_SIGN_ENUM __C)
10997 {
10998 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10999 (__C << 2) | __B,
11000 (__v2df) __W,
11001 (__mmask8) __U);
11002 }
11003
11004 extern __inline __m128d
11005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11006 _mm_maskz_getmant_pd (__mmask8 __U, __m128d __A,
11007 _MM_MANTISSA_NORM_ENUM __B,
11008 _MM_MANTISSA_SIGN_ENUM __C)
11009 {
11010 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
11011 (__C << 2) | __B,
11012 (__v2df)
11013 _mm_setzero_pd (),
11014 (__mmask8) __U);
11015 }
11016
11017 extern __inline __m256
11018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11019 _mm256_mmask_i32gather_ps (__m256 __v1_old, __mmask8 __mask,
11020 __m256i __index, void const *__addr,
11021 int __scale)
11022 {
11023 return (__m256) __builtin_ia32_gather3siv8sf ((__v8sf) __v1_old,
11024 __addr,
11025 (__v8si) __index,
11026 __mask, __scale);
11027 }
11028
11029 extern __inline __m128
11030 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11031 _mm_mmask_i32gather_ps (__m128 __v1_old, __mmask8 __mask,
11032 __m128i __index, void const *__addr,
11033 int __scale)
11034 {
11035 return (__m128) __builtin_ia32_gather3siv4sf ((__v4sf) __v1_old,
11036 __addr,
11037 (__v4si) __index,
11038 __mask, __scale);
11039 }
11040
11041 extern __inline __m256d
11042 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11043 _mm256_mmask_i32gather_pd (__m256d __v1_old, __mmask8 __mask,
11044 __m128i __index, void const *__addr,
11045 int __scale)
11046 {
11047 return (__m256d) __builtin_ia32_gather3siv4df ((__v4df) __v1_old,
11048 __addr,
11049 (__v4si) __index,
11050 __mask, __scale);
11051 }
11052
11053 extern __inline __m128d
11054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11055 _mm_mmask_i32gather_pd (__m128d __v1_old, __mmask8 __mask,
11056 __m128i __index, void const *__addr,
11057 int __scale)
11058 {
11059 return (__m128d) __builtin_ia32_gather3siv2df ((__v2df) __v1_old,
11060 __addr,
11061 (__v4si) __index,
11062 __mask, __scale);
11063 }
11064
11065 extern __inline __m128
11066 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11067 _mm256_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
11068 __m256i __index, void const *__addr,
11069 int __scale)
11070 {
11071 return (__m128) __builtin_ia32_gather3div8sf ((__v4sf) __v1_old,
11072 __addr,
11073 (__v4di) __index,
11074 __mask, __scale);
11075 }
11076
11077 extern __inline __m128
11078 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11079 _mm_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
11080 __m128i __index, void const *__addr,
11081 int __scale)
11082 {
11083 return (__m128) __builtin_ia32_gather3div4sf ((__v4sf) __v1_old,
11084 __addr,
11085 (__v2di) __index,
11086 __mask, __scale);
11087 }
11088
11089 extern __inline __m256d
11090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11091 _mm256_mmask_i64gather_pd (__m256d __v1_old, __mmask8 __mask,
11092 __m256i __index, void const *__addr,
11093 int __scale)
11094 {
11095 return (__m256d) __builtin_ia32_gather3div4df ((__v4df) __v1_old,
11096 __addr,
11097 (__v4di) __index,
11098 __mask, __scale);
11099 }
11100
11101 extern __inline __m128d
11102 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11103 _mm_mmask_i64gather_pd (__m128d __v1_old, __mmask8 __mask,
11104 __m128i __index, void const *__addr,
11105 int __scale)
11106 {
11107 return (__m128d) __builtin_ia32_gather3div2df ((__v2df) __v1_old,
11108 __addr,
11109 (__v2di) __index,
11110 __mask, __scale);
11111 }
11112
11113 extern __inline __m256i
11114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11115 _mm256_mmask_i32gather_epi32 (__m256i __v1_old, __mmask8 __mask,
11116 __m256i __index, void const *__addr,
11117 int __scale)
11118 {
11119 return (__m256i) __builtin_ia32_gather3siv8si ((__v8si) __v1_old,
11120 __addr,
11121 (__v8si) __index,
11122 __mask, __scale);
11123 }
11124
11125 extern __inline __m128i
11126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11127 _mm_mmask_i32gather_epi32 (__m128i __v1_old, __mmask8 __mask,
11128 __m128i __index, void const *__addr,
11129 int __scale)
11130 {
11131 return (__m128i) __builtin_ia32_gather3siv4si ((__v4si) __v1_old,
11132 __addr,
11133 (__v4si) __index,
11134 __mask, __scale);
11135 }
11136
11137 extern __inline __m256i
11138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11139 _mm256_mmask_i32gather_epi64 (__m256i __v1_old, __mmask8 __mask,
11140 __m128i __index, void const *__addr,
11141 int __scale)
11142 {
11143 return (__m256i) __builtin_ia32_gather3siv4di ((__v4di) __v1_old,
11144 __addr,
11145 (__v4si) __index,
11146 __mask, __scale);
11147 }
11148
11149 extern __inline __m128i
11150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11151 _mm_mmask_i32gather_epi64 (__m128i __v1_old, __mmask8 __mask,
11152 __m128i __index, void const *__addr,
11153 int __scale)
11154 {
11155 return (__m128i) __builtin_ia32_gather3siv2di ((__v2di) __v1_old,
11156 __addr,
11157 (__v4si) __index,
11158 __mask, __scale);
11159 }
11160
11161 extern __inline __m128i
11162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11163 _mm256_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
11164 __m256i __index, void const *__addr,
11165 int __scale)
11166 {
11167 return (__m128i) __builtin_ia32_gather3div8si ((__v4si) __v1_old,
11168 __addr,
11169 (__v4di) __index,
11170 __mask, __scale);
11171 }
11172
11173 extern __inline __m128i
11174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11175 _mm_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
11176 __m128i __index, void const *__addr,
11177 int __scale)
11178 {
11179 return (__m128i) __builtin_ia32_gather3div4si ((__v4si) __v1_old,
11180 __addr,
11181 (__v2di) __index,
11182 __mask, __scale);
11183 }
11184
11185 extern __inline __m256i
11186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11187 _mm256_mmask_i64gather_epi64 (__m256i __v1_old, __mmask8 __mask,
11188 __m256i __index, void const *__addr,
11189 int __scale)
11190 {
11191 return (__m256i) __builtin_ia32_gather3div4di ((__v4di) __v1_old,
11192 __addr,
11193 (__v4di) __index,
11194 __mask, __scale);
11195 }
11196
11197 extern __inline __m128i
11198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11199 _mm_mmask_i64gather_epi64 (__m128i __v1_old, __mmask8 __mask,
11200 __m128i __index, void const *__addr,
11201 int __scale)
11202 {
11203 return (__m128i) __builtin_ia32_gather3div2di ((__v2di) __v1_old,
11204 __addr,
11205 (__v2di) __index,
11206 __mask, __scale);
11207 }
11208
11209 extern __inline void
11210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11211 _mm256_i32scatter_ps (void *__addr, __m256i __index,
11212 __m256 __v1, const int __scale)
11213 {
11214 __builtin_ia32_scattersiv8sf (__addr, (__mmask8) 0xFF,
11215 (__v8si) __index, (__v8sf) __v1,
11216 __scale);
11217 }
11218
11219 extern __inline void
11220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11221 _mm256_mask_i32scatter_ps (void *__addr, __mmask8 __mask,
11222 __m256i __index, __m256 __v1,
11223 const int __scale)
11224 {
11225 __builtin_ia32_scattersiv8sf (__addr, __mask, (__v8si) __index,
11226 (__v8sf) __v1, __scale);
11227 }
11228
11229 extern __inline void
11230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11231 _mm_i32scatter_ps (void *__addr, __m128i __index, __m128 __v1,
11232 const int __scale)
11233 {
11234 __builtin_ia32_scattersiv4sf (__addr, (__mmask8) 0xFF,
11235 (__v4si) __index, (__v4sf) __v1,
11236 __scale);
11237 }
11238
11239 extern __inline void
11240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11241 _mm_mask_i32scatter_ps (void *__addr, __mmask8 __mask,
11242 __m128i __index, __m128 __v1,
11243 const int __scale)
11244 {
11245 __builtin_ia32_scattersiv4sf (__addr, __mask, (__v4si) __index,
11246 (__v4sf) __v1, __scale);
11247 }
11248
11249 extern __inline void
11250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11251 _mm256_i32scatter_pd (void *__addr, __m128i __index,
11252 __m256d __v1, const int __scale)
11253 {
11254 __builtin_ia32_scattersiv4df (__addr, (__mmask8) 0xFF,
11255 (__v4si) __index, (__v4df) __v1,
11256 __scale);
11257 }
11258
11259 extern __inline void
11260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11261 _mm256_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
11262 __m128i __index, __m256d __v1,
11263 const int __scale)
11264 {
11265 __builtin_ia32_scattersiv4df (__addr, __mask, (__v4si) __index,
11266 (__v4df) __v1, __scale);
11267 }
11268
11269 extern __inline void
11270 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11271 _mm_i32scatter_pd (void *__addr, __m128i __index,
11272 __m128d __v1, const int __scale)
11273 {
11274 __builtin_ia32_scattersiv2df (__addr, (__mmask8) 0xFF,
11275 (__v4si) __index, (__v2df) __v1,
11276 __scale);
11277 }
11278
11279 extern __inline void
11280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11281 _mm_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
11282 __m128i __index, __m128d __v1,
11283 const int __scale)
11284 {
11285 __builtin_ia32_scattersiv2df (__addr, __mask, (__v4si) __index,
11286 (__v2df) __v1, __scale);
11287 }
11288
11289 extern __inline void
11290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11291 _mm256_i64scatter_ps (void *__addr, __m256i __index,
11292 __m128 __v1, const int __scale)
11293 {
11294 __builtin_ia32_scatterdiv8sf (__addr, (__mmask8) 0xFF,
11295 (__v4di) __index, (__v4sf) __v1,
11296 __scale);
11297 }
11298
11299 extern __inline void
11300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11301 _mm256_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
11302 __m256i __index, __m128 __v1,
11303 const int __scale)
11304 {
11305 __builtin_ia32_scatterdiv8sf (__addr, __mask, (__v4di) __index,
11306 (__v4sf) __v1, __scale);
11307 }
11308
11309 extern __inline void
11310 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11311 _mm_i64scatter_ps (void *__addr, __m128i __index, __m128 __v1,
11312 const int __scale)
11313 {
11314 __builtin_ia32_scatterdiv4sf (__addr, (__mmask8) 0xFF,
11315 (__v2di) __index, (__v4sf) __v1,
11316 __scale);
11317 }
11318
11319 extern __inline void
11320 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11321 _mm_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
11322 __m128i __index, __m128 __v1,
11323 const int __scale)
11324 {
11325 __builtin_ia32_scatterdiv4sf (__addr, __mask, (__v2di) __index,
11326 (__v4sf) __v1, __scale);
11327 }
11328
11329 extern __inline void
11330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11331 _mm256_i64scatter_pd (void *__addr, __m256i __index,
11332 __m256d __v1, const int __scale)
11333 {
11334 __builtin_ia32_scatterdiv4df (__addr, (__mmask8) 0xFF,
11335 (__v4di) __index, (__v4df) __v1,
11336 __scale);
11337 }
11338
11339 extern __inline void
11340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11341 _mm256_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
11342 __m256i __index, __m256d __v1,
11343 const int __scale)
11344 {
11345 __builtin_ia32_scatterdiv4df (__addr, __mask, (__v4di) __index,
11346 (__v4df) __v1, __scale);
11347 }
11348
11349 extern __inline void
11350 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11351 _mm_i64scatter_pd (void *__addr, __m128i __index,
11352 __m128d __v1, const int __scale)
11353 {
11354 __builtin_ia32_scatterdiv2df (__addr, (__mmask8) 0xFF,
11355 (__v2di) __index, (__v2df) __v1,
11356 __scale);
11357 }
11358
11359 extern __inline void
11360 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11361 _mm_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
11362 __m128i __index, __m128d __v1,
11363 const int __scale)
11364 {
11365 __builtin_ia32_scatterdiv2df (__addr, __mask, (__v2di) __index,
11366 (__v2df) __v1, __scale);
11367 }
11368
11369 extern __inline void
11370 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11371 _mm256_i32scatter_epi32 (void *__addr, __m256i __index,
11372 __m256i __v1, const int __scale)
11373 {
11374 __builtin_ia32_scattersiv8si (__addr, (__mmask8) 0xFF,
11375 (__v8si) __index, (__v8si) __v1,
11376 __scale);
11377 }
11378
11379 extern __inline void
11380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11381 _mm256_mask_i32scatter_epi32 (void *__addr, __mmask8 __mask,
11382 __m256i __index, __m256i __v1,
11383 const int __scale)
11384 {
11385 __builtin_ia32_scattersiv8si (__addr, __mask, (__v8si) __index,
11386 (__v8si) __v1, __scale);
11387 }
11388
11389 extern __inline void
11390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11391 _mm_i32scatter_epi32 (void *__addr, __m128i __index,
11392 __m128i __v1, const int __scale)
11393 {
11394 __builtin_ia32_scattersiv4si (__addr, (__mmask8) 0xFF,
11395 (__v4si) __index, (__v4si) __v1,
11396 __scale);
11397 }
11398
11399 extern __inline void
11400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11401 _mm_mask_i32scatter_epi32 (void *__addr, __mmask8 __mask,
11402 __m128i __index, __m128i __v1,
11403 const int __scale)
11404 {
11405 __builtin_ia32_scattersiv4si (__addr, __mask, (__v4si) __index,
11406 (__v4si) __v1, __scale);
11407 }
11408
11409 extern __inline void
11410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11411 _mm256_i32scatter_epi64 (void *__addr, __m128i __index,
11412 __m256i __v1, const int __scale)
11413 {
11414 __builtin_ia32_scattersiv4di (__addr, (__mmask8) 0xFF,
11415 (__v4si) __index, (__v4di) __v1,
11416 __scale);
11417 }
11418
11419 extern __inline void
11420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11421 _mm256_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
11422 __m128i __index, __m256i __v1,
11423 const int __scale)
11424 {
11425 __builtin_ia32_scattersiv4di (__addr, __mask, (__v4si) __index,
11426 (__v4di) __v1, __scale);
11427 }
11428
11429 extern __inline void
11430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11431 _mm_i32scatter_epi64 (void *__addr, __m128i __index,
11432 __m128i __v1, const int __scale)
11433 {
11434 __builtin_ia32_scattersiv2di (__addr, (__mmask8) 0xFF,
11435 (__v4si) __index, (__v2di) __v1,
11436 __scale);
11437 }
11438
11439 extern __inline void
11440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11441 _mm_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
11442 __m128i __index, __m128i __v1,
11443 const int __scale)
11444 {
11445 __builtin_ia32_scattersiv2di (__addr, __mask, (__v4si) __index,
11446 (__v2di) __v1, __scale);
11447 }
11448
11449 extern __inline void
11450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11451 _mm256_i64scatter_epi32 (void *__addr, __m256i __index,
11452 __m128i __v1, const int __scale)
11453 {
11454 __builtin_ia32_scatterdiv8si (__addr, (__mmask8) 0xFF,
11455 (__v4di) __index, (__v4si) __v1,
11456 __scale);
11457 }
11458
11459 extern __inline void
11460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11461 _mm256_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
11462 __m256i __index, __m128i __v1,
11463 const int __scale)
11464 {
11465 __builtin_ia32_scatterdiv8si (__addr, __mask, (__v4di) __index,
11466 (__v4si) __v1, __scale);
11467 }
11468
11469 extern __inline void
11470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11471 _mm_i64scatter_epi32 (void *__addr, __m128i __index,
11472 __m128i __v1, const int __scale)
11473 {
11474 __builtin_ia32_scatterdiv4si (__addr, (__mmask8) 0xFF,
11475 (__v2di) __index, (__v4si) __v1,
11476 __scale);
11477 }
11478
11479 extern __inline void
11480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11481 _mm_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
11482 __m128i __index, __m128i __v1,
11483 const int __scale)
11484 {
11485 __builtin_ia32_scatterdiv4si (__addr, __mask, (__v2di) __index,
11486 (__v4si) __v1, __scale);
11487 }
11488
11489 extern __inline void
11490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11491 _mm256_i64scatter_epi64 (void *__addr, __m256i __index,
11492 __m256i __v1, const int __scale)
11493 {
11494 __builtin_ia32_scatterdiv4di (__addr, (__mmask8) 0xFF,
11495 (__v4di) __index, (__v4di) __v1,
11496 __scale);
11497 }
11498
11499 extern __inline void
11500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11501 _mm256_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
11502 __m256i __index, __m256i __v1,
11503 const int __scale)
11504 {
11505 __builtin_ia32_scatterdiv4di (__addr, __mask, (__v4di) __index,
11506 (__v4di) __v1, __scale);
11507 }
11508
11509 extern __inline void
11510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11511 _mm_i64scatter_epi64 (void *__addr, __m128i __index,
11512 __m128i __v1, const int __scale)
11513 {
11514 __builtin_ia32_scatterdiv2di (__addr, (__mmask8) 0xFF,
11515 (__v2di) __index, (__v2di) __v1,
11516 __scale);
11517 }
11518
11519 extern __inline void
11520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11521 _mm_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
11522 __m128i __index, __m128i __v1,
11523 const int __scale)
11524 {
11525 __builtin_ia32_scatterdiv2di (__addr, __mask, (__v2di) __index,
11526 (__v2di) __v1, __scale);
11527 }
11528
11529 extern __inline __m256i
11530 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11531 _mm256_mask_shuffle_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11532 _MM_PERM_ENUM __mask)
11533 {
11534 return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
11535 (__v8si) __W,
11536 (__mmask8) __U);
11537 }
11538
11539 extern __inline __m256i
11540 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11541 _mm256_maskz_shuffle_epi32 (__mmask8 __U, __m256i __A,
11542 _MM_PERM_ENUM __mask)
11543 {
11544 return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
11545 (__v8si)
11546 _mm256_setzero_si256 (),
11547 (__mmask8) __U);
11548 }
11549
11550 extern __inline __m128i
11551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11552 _mm_mask_shuffle_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11553 _MM_PERM_ENUM __mask)
11554 {
11555 return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
11556 (__v4si) __W,
11557 (__mmask8) __U);
11558 }
11559
11560 extern __inline __m128i
11561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11562 _mm_maskz_shuffle_epi32 (__mmask8 __U, __m128i __A,
11563 _MM_PERM_ENUM __mask)
11564 {
11565 return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
11566 (__v4si)
11567 _mm_setzero_si128 (),
11568 (__mmask8) __U);
11569 }
11570
11571 extern __inline __m256i
11572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11573 _mm256_rol_epi32 (__m256i __A, const int __B)
11574 {
11575 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
11576 (__v8si)
11577 _mm256_setzero_si256 (),
11578 (__mmask8) -1);
11579 }
11580
11581 extern __inline __m256i
11582 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11583 _mm256_mask_rol_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11584 const int __B)
11585 {
11586 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
11587 (__v8si) __W,
11588 (__mmask8) __U);
11589 }
11590
11591 extern __inline __m256i
11592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11593 _mm256_maskz_rol_epi32 (__mmask8 __U, __m256i __A, const int __B)
11594 {
11595 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
11596 (__v8si)
11597 _mm256_setzero_si256 (),
11598 (__mmask8) __U);
11599 }
11600
11601 extern __inline __m128i
11602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11603 _mm_rol_epi32 (__m128i __A, const int __B)
11604 {
11605 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
11606 (__v4si)
11607 _mm_setzero_si128 (),
11608 (__mmask8) -1);
11609 }
11610
11611 extern __inline __m128i
11612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11613 _mm_mask_rol_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11614 const int __B)
11615 {
11616 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
11617 (__v4si) __W,
11618 (__mmask8) __U);
11619 }
11620
11621 extern __inline __m128i
11622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11623 _mm_maskz_rol_epi32 (__mmask8 __U, __m128i __A, const int __B)
11624 {
11625 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
11626 (__v4si)
11627 _mm_setzero_si128 (),
11628 (__mmask8) __U);
11629 }
11630
11631 extern __inline __m256i
11632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11633 _mm256_ror_epi32 (__m256i __A, const int __B)
11634 {
11635 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
11636 (__v8si)
11637 _mm256_setzero_si256 (),
11638 (__mmask8) -1);
11639 }
11640
11641 extern __inline __m256i
11642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11643 _mm256_mask_ror_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11644 const int __B)
11645 {
11646 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
11647 (__v8si) __W,
11648 (__mmask8) __U);
11649 }
11650
11651 extern __inline __m256i
11652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11653 _mm256_maskz_ror_epi32 (__mmask8 __U, __m256i __A, const int __B)
11654 {
11655 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
11656 (__v8si)
11657 _mm256_setzero_si256 (),
11658 (__mmask8) __U);
11659 }
11660
11661 extern __inline __m128i
11662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11663 _mm_ror_epi32 (__m128i __A, const int __B)
11664 {
11665 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
11666 (__v4si)
11667 _mm_setzero_si128 (),
11668 (__mmask8) -1);
11669 }
11670
11671 extern __inline __m128i
11672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11673 _mm_mask_ror_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11674 const int __B)
11675 {
11676 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
11677 (__v4si) __W,
11678 (__mmask8) __U);
11679 }
11680
11681 extern __inline __m128i
11682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11683 _mm_maskz_ror_epi32 (__mmask8 __U, __m128i __A, const int __B)
11684 {
11685 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
11686 (__v4si)
11687 _mm_setzero_si128 (),
11688 (__mmask8) __U);
11689 }
11690
11691 extern __inline __m256i
11692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11693 _mm256_rol_epi64 (__m256i __A, const int __B)
11694 {
11695 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
11696 (__v4di)
11697 _mm256_setzero_si256 (),
11698 (__mmask8) -1);
11699 }
11700
11701 extern __inline __m256i
11702 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11703 _mm256_mask_rol_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11704 const int __B)
11705 {
11706 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
11707 (__v4di) __W,
11708 (__mmask8) __U);
11709 }
11710
11711 extern __inline __m256i
11712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11713 _mm256_maskz_rol_epi64 (__mmask8 __U, __m256i __A, const int __B)
11714 {
11715 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
11716 (__v4di)
11717 _mm256_setzero_si256 (),
11718 (__mmask8) __U);
11719 }
11720
11721 extern __inline __m128i
11722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11723 _mm_rol_epi64 (__m128i __A, const int __B)
11724 {
11725 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
11726 (__v2di)
11727 _mm_setzero_si128 (),
11728 (__mmask8) -1);
11729 }
11730
11731 extern __inline __m128i
11732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11733 _mm_mask_rol_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11734 const int __B)
11735 {
11736 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
11737 (__v2di) __W,
11738 (__mmask8) __U);
11739 }
11740
11741 extern __inline __m128i
11742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11743 _mm_maskz_rol_epi64 (__mmask8 __U, __m128i __A, const int __B)
11744 {
11745 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
11746 (__v2di)
11747 _mm_setzero_si128 (),
11748 (__mmask8) __U);
11749 }
11750
11751 extern __inline __m256i
11752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11753 _mm256_ror_epi64 (__m256i __A, const int __B)
11754 {
11755 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
11756 (__v4di)
11757 _mm256_setzero_si256 (),
11758 (__mmask8) -1);
11759 }
11760
11761 extern __inline __m256i
11762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11763 _mm256_mask_ror_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11764 const int __B)
11765 {
11766 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
11767 (__v4di) __W,
11768 (__mmask8) __U);
11769 }
11770
11771 extern __inline __m256i
11772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11773 _mm256_maskz_ror_epi64 (__mmask8 __U, __m256i __A, const int __B)
11774 {
11775 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
11776 (__v4di)
11777 _mm256_setzero_si256 (),
11778 (__mmask8) __U);
11779 }
11780
11781 extern __inline __m128i
11782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11783 _mm_ror_epi64 (__m128i __A, const int __B)
11784 {
11785 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11786 (__v2di)
11787 _mm_setzero_si128 (),
11788 (__mmask8) -1);
11789 }
11790
11791 extern __inline __m128i
11792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11793 _mm_mask_ror_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11794 const int __B)
11795 {
11796 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11797 (__v2di) __W,
11798 (__mmask8) __U);
11799 }
11800
11801 extern __inline __m128i
11802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11803 _mm_maskz_ror_epi64 (__mmask8 __U, __m128i __A, const int __B)
11804 {
11805 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11806 (__v2di)
11807 _mm_setzero_si128 (),
11808 (__mmask8) __U);
11809 }
11810
11811 extern __inline __m128i
11812 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11813 _mm_alignr_epi32 (__m128i __A, __m128i __B, const int __imm)
11814 {
11815 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11816 (__v4si) __B, __imm,
11817 (__v4si)
11818 _mm_setzero_si128 (),
11819 (__mmask8) -1);
11820 }
11821
11822 extern __inline __m128i
11823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11824 _mm_mask_alignr_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11825 __m128i __B, const int __imm)
11826 {
11827 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11828 (__v4si) __B, __imm,
11829 (__v4si) __W,
11830 (__mmask8) __U);
11831 }
11832
11833 extern __inline __m128i
11834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11835 _mm_maskz_alignr_epi32 (__mmask8 __U, __m128i __A, __m128i __B,
11836 const int __imm)
11837 {
11838 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11839 (__v4si) __B, __imm,
11840 (__v4si)
11841 _mm_setzero_si128 (),
11842 (__mmask8) __U);
11843 }
11844
11845 extern __inline __m128i
11846 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11847 _mm_alignr_epi64 (__m128i __A, __m128i __B, const int __imm)
11848 {
11849 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11850 (__v2di) __B, __imm,
11851 (__v2di)
11852 _mm_setzero_si128 (),
11853 (__mmask8) -1);
11854 }
11855
11856 extern __inline __m128i
11857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11858 _mm_mask_alignr_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11859 __m128i __B, const int __imm)
11860 {
11861 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11862 (__v2di) __B, __imm,
11863 (__v2di) __W,
11864 (__mmask8) __U);
11865 }
11866
11867 extern __inline __m128i
11868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11869 _mm_maskz_alignr_epi64 (__mmask8 __U, __m128i __A, __m128i __B,
11870 const int __imm)
11871 {
11872 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11873 (__v2di) __B, __imm,
11874 (__v2di)
11875 _mm_setzero_si128 (),
11876 (__mmask8) __U);
11877 }
11878
11879 extern __inline __m256i
11880 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11881 _mm256_alignr_epi32 (__m256i __A, __m256i __B, const int __imm)
11882 {
11883 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11884 (__v8si) __B, __imm,
11885 (__v8si)
11886 _mm256_setzero_si256 (),
11887 (__mmask8) -1);
11888 }
11889
11890 extern __inline __m256i
11891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11892 _mm256_mask_alignr_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11893 __m256i __B, const int __imm)
11894 {
11895 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11896 (__v8si) __B, __imm,
11897 (__v8si) __W,
11898 (__mmask8) __U);
11899 }
11900
11901 extern __inline __m256i
11902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11903 _mm256_maskz_alignr_epi32 (__mmask8 __U, __m256i __A, __m256i __B,
11904 const int __imm)
11905 {
11906 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11907 (__v8si) __B, __imm,
11908 (__v8si)
11909 _mm256_setzero_si256 (),
11910 (__mmask8) __U);
11911 }
11912
11913 extern __inline __m256i
11914 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11915 _mm256_alignr_epi64 (__m256i __A, __m256i __B, const int __imm)
11916 {
11917 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11918 (__v4di) __B, __imm,
11919 (__v4di)
11920 _mm256_setzero_si256 (),
11921 (__mmask8) -1);
11922 }
11923
11924 extern __inline __m256i
11925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11926 _mm256_mask_alignr_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11927 __m256i __B, const int __imm)
11928 {
11929 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11930 (__v4di) __B, __imm,
11931 (__v4di) __W,
11932 (__mmask8) __U);
11933 }
11934
11935 extern __inline __m256i
11936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11937 _mm256_maskz_alignr_epi64 (__mmask8 __U, __m256i __A, __m256i __B,
11938 const int __imm)
11939 {
11940 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11941 (__v4di) __B, __imm,
11942 (__v4di)
11943 _mm256_setzero_si256 (),
11944 (__mmask8) __U);
11945 }
11946
11947 extern __inline __m128i
11948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11949 _mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A,
11950 const int __I)
11951 {
11952 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
11953 (__v8hi) __W,
11954 (__mmask8) __U);
11955 }
11956
11957 extern __inline __m128i
11958 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11959 _mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A, const int __I)
11960 {
11961 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
11962 (__v8hi)
11963 _mm_setzero_si128 (),
11964 (__mmask8) __U);
11965 }
11966
11967 extern __inline __m128i
11968 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11969 _mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A,
11970 const int __I)
11971 {
11972 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
11973 (__v8hi) __W,
11974 (__mmask8) __U);
11975 }
11976
11977 extern __inline __m128i
11978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11979 _mm256_maskz_cvtps_ph (__mmask8 __U, __m256 __A, const int __I)
11980 {
11981 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
11982 (__v8hi)
11983 _mm_setzero_si128 (),
11984 (__mmask8) __U);
11985 }
11986
11987 extern __inline __m256i
11988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11989 _mm256_mask_srai_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11990 const int __imm)
11991 {
11992 return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
11993 (__v8si) __W,
11994 (__mmask8) __U);
11995 }
11996
11997 extern __inline __m256i
11998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11999 _mm256_maskz_srai_epi32 (__mmask8 __U, __m256i __A, const int __imm)
12000 {
12001 return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
12002 (__v8si)
12003 _mm256_setzero_si256 (),
12004 (__mmask8) __U);
12005 }
12006
12007 extern __inline __m128i
12008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12009 _mm_mask_srai_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
12010 const int __imm)
12011 {
12012 return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
12013 (__v4si) __W,
12014 (__mmask8) __U);
12015 }
12016
12017 extern __inline __m128i
12018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12019 _mm_maskz_srai_epi32 (__mmask8 __U, __m128i __A, const int __imm)
12020 {
12021 return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
12022 (__v4si)
12023 _mm_setzero_si128 (),
12024 (__mmask8) __U);
12025 }
12026
12027 extern __inline __m256i
12028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12029 _mm256_srai_epi64 (__m256i __A, const int __imm)
12030 {
12031 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
12032 (__v4di)
12033 _mm256_setzero_si256 (),
12034 (__mmask8) -1);
12035 }
12036
12037 extern __inline __m256i
12038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12039 _mm256_mask_srai_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
12040 const int __imm)
12041 {
12042 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
12043 (__v4di) __W,
12044 (__mmask8) __U);
12045 }
12046
12047 extern __inline __m256i
12048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12049 _mm256_maskz_srai_epi64 (__mmask8 __U, __m256i __A, const int __imm)
12050 {
12051 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
12052 (__v4di)
12053 _mm256_setzero_si256 (),
12054 (__mmask8) __U);
12055 }
12056
12057 extern __inline __m128i
12058 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12059 _mm_srai_epi64 (__m128i __A, const int __imm)
12060 {
12061 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
12062 (__v2di)
12063 _mm_setzero_si128 (),
12064 (__mmask8) -1);
12065 }
12066
12067 extern __inline __m128i
12068 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12069 _mm_mask_srai_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
12070 const int __imm)
12071 {
12072 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
12073 (__v2di) __W,
12074 (__mmask8) __U);
12075 }
12076
12077 extern __inline __m128i
12078 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12079 _mm_maskz_srai_epi64 (__mmask8 __U, __m128i __A, const int __imm)
12080 {
12081 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
12082 (__v2di)
12083 _mm_setzero_si128 (),
12084 (__mmask8) __U);
12085 }
12086
12087 extern __inline __m128i
12088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12089 _mm_mask_slli_epi32 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
12090 {
12091 return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
12092 (__v4si) __W,
12093 (__mmask8) __U);
12094 }
12095
12096 extern __inline __m128i
12097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12098 _mm_maskz_slli_epi32 (__mmask8 __U, __m128i __A, int __B)
12099 {
12100 return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
12101 (__v4si)
12102 _mm_setzero_si128 (),
12103 (__mmask8) __U);
12104 }
12105
12106 extern __inline __m128i
12107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12108 _mm_mask_slli_epi64 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
12109 {
12110 return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
12111 (__v2di) __W,
12112 (__mmask8) __U);
12113 }
12114
12115 extern __inline __m128i
12116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12117 _mm_maskz_slli_epi64 (__mmask8 __U, __m128i __A, int __B)
12118 {
12119 return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
12120 (__v2di)
12121 _mm_setzero_si128 (),
12122 (__mmask8) __U);
12123 }
12124
12125 extern __inline __m256i
12126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12127 _mm256_mask_slli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
12128 int __B)
12129 {
12130 return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
12131 (__v8si) __W,
12132 (__mmask8) __U);
12133 }
12134
12135 extern __inline __m256i
12136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12137 _mm256_maskz_slli_epi32 (__mmask8 __U, __m256i __A, int __B)
12138 {
12139 return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
12140 (__v8si)
12141 _mm256_setzero_si256 (),
12142 (__mmask8) __U);
12143 }
12144
12145 extern __inline __m256i
12146 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12147 _mm256_mask_slli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
12148 int __B)
12149 {
12150 return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
12151 (__v4di) __W,
12152 (__mmask8) __U);
12153 }
12154
12155 extern __inline __m256i
12156 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12157 _mm256_maskz_slli_epi64 (__mmask8 __U, __m256i __A, int __B)
12158 {
12159 return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
12160 (__v4di)
12161 _mm256_setzero_si256 (),
12162 (__mmask8) __U);
12163 }
12164
12165 extern __inline __m256d
12166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12167 _mm256_mask_permutex_pd (__m256d __W, __mmask8 __U, __m256d __X,
12168 const int __imm)
12169 {
12170 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
12171 (__v4df) __W,
12172 (__mmask8) __U);
12173 }
12174
12175 extern __inline __m256d
12176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12177 _mm256_maskz_permutex_pd (__mmask8 __U, __m256d __X, const int __imm)
12178 {
12179 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
12180 (__v4df)
12181 _mm256_setzero_pd (),
12182 (__mmask8) __U);
12183 }
12184
12185 extern __inline __m256d
12186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12187 _mm256_mask_permute_pd (__m256d __W, __mmask8 __U, __m256d __X,
12188 const int __C)
12189 {
12190 return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
12191 (__v4df) __W,
12192 (__mmask8) __U);
12193 }
12194
12195 extern __inline __m256d
12196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12197 _mm256_maskz_permute_pd (__mmask8 __U, __m256d __X, const int __C)
12198 {
12199 return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
12200 (__v4df)
12201 _mm256_setzero_pd (),
12202 (__mmask8) __U);
12203 }
12204
12205 extern __inline __m128d
12206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12207 _mm_mask_permute_pd (__m128d __W, __mmask8 __U, __m128d __X,
12208 const int __C)
12209 {
12210 return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
12211 (__v2df) __W,
12212 (__mmask8) __U);
12213 }
12214
12215 extern __inline __m128d
12216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12217 _mm_maskz_permute_pd (__mmask8 __U, __m128d __X, const int __C)
12218 {
12219 return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
12220 (__v2df)
12221 _mm_setzero_pd (),
12222 (__mmask8) __U);
12223 }
12224
12225 extern __inline __m256
12226 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12227 _mm256_mask_permute_ps (__m256 __W, __mmask8 __U, __m256 __X,
12228 const int __C)
12229 {
12230 return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
12231 (__v8sf) __W,
12232 (__mmask8) __U);
12233 }
12234
12235 extern __inline __m256
12236 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12237 _mm256_maskz_permute_ps (__mmask8 __U, __m256 __X, const int __C)
12238 {
12239 return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
12240 (__v8sf)
12241 _mm256_setzero_ps (),
12242 (__mmask8) __U);
12243 }
12244
12245 extern __inline __m128
12246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12247 _mm_mask_permute_ps (__m128 __W, __mmask8 __U, __m128 __X,
12248 const int __C)
12249 {
12250 return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
12251 (__v4sf) __W,
12252 (__mmask8) __U);
12253 }
12254
12255 extern __inline __m128
12256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12257 _mm_maskz_permute_ps (__mmask8 __U, __m128 __X, const int __C)
12258 {
12259 return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
12260 (__v4sf)
12261 _mm_setzero_ps (),
12262 (__mmask8) __U);
12263 }
12264
12265 extern __inline __m256d
12266 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12267 _mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W)
12268 {
12269 return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A,
12270 (__v4df) __W,
12271 (__mmask8) __U);
12272 }
12273
12274 extern __inline __m256
12275 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12276 _mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W)
12277 {
12278 return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A,
12279 (__v8sf) __W,
12280 (__mmask8) __U);
12281 }
12282
12283 extern __inline __m256i
12284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12285 _mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W)
12286 {
12287 return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A,
12288 (__v4di) __W,
12289 (__mmask8) __U);
12290 }
12291
12292 extern __inline __m256i
12293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12294 _mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W)
12295 {
12296 return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A,
12297 (__v8si) __W,
12298 (__mmask8) __U);
12299 }
12300
12301 extern __inline __m128d
12302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12303 _mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W)
12304 {
12305 return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A,
12306 (__v2df) __W,
12307 (__mmask8) __U);
12308 }
12309
12310 extern __inline __m128
12311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12312 _mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W)
12313 {
12314 return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A,
12315 (__v4sf) __W,
12316 (__mmask8) __U);
12317 }
12318
12319 extern __inline __m128i
12320 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12321 _mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W)
12322 {
12323 return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A,
12324 (__v2di) __W,
12325 (__mmask8) __U);
12326 }
12327
12328 extern __inline __m128i
12329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12330 _mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W)
12331 {
12332 return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A,
12333 (__v4si) __W,
12334 (__mmask8) __U);
12335 }
12336
12337 extern __inline __mmask8
12338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12339 _mm256_cmp_epi64_mask (__m256i __X, __m256i __Y, const int __P)
12340 {
12341 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12342 (__v4di) __Y, __P,
12343 (__mmask8) -1);
12344 }
12345
12346 extern __inline __mmask8
12347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12348 _mm256_cmp_epi32_mask (__m256i __X, __m256i __Y, const int __P)
12349 {
12350 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
12351 (__v8si) __Y, __P,
12352 (__mmask8) -1);
12353 }
12354
12355 extern __inline __mmask8
12356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12357 _mm256_cmp_epu64_mask (__m256i __X, __m256i __Y, const int __P)
12358 {
12359 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
12360 (__v4di) __Y, __P,
12361 (__mmask8) -1);
12362 }
12363
12364 extern __inline __mmask8
12365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12366 _mm256_cmp_epu32_mask (__m256i __X, __m256i __Y, const int __P)
12367 {
12368 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
12369 (__v8si) __Y, __P,
12370 (__mmask8) -1);
12371 }
12372
12373 extern __inline __mmask8
12374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12375 _mm256_cmp_pd_mask (__m256d __X, __m256d __Y, const int __P)
12376 {
12377 return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
12378 (__v4df) __Y, __P,
12379 (__mmask8) -1);
12380 }
12381
12382 extern __inline __mmask8
12383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12384 _mm256_cmp_ps_mask (__m256 __X, __m256 __Y, const int __P)
12385 {
12386 return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
12387 (__v8sf) __Y, __P,
12388 (__mmask8) -1);
12389 }
12390
12391 extern __inline __mmask8
12392 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12393 _mm256_mask_cmp_epi64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
12394 const int __P)
12395 {
12396 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12397 (__v4di) __Y, __P,
12398 (__mmask8) __U);
12399 }
12400
12401 extern __inline __mmask8
12402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12403 _mm256_mask_cmp_epi32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
12404 const int __P)
12405 {
12406 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
12407 (__v8si) __Y, __P,
12408 (__mmask8) __U);
12409 }
12410
12411 extern __inline __mmask8
12412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12413 _mm256_mask_cmp_epu64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
12414 const int __P)
12415 {
12416 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
12417 (__v4di) __Y, __P,
12418 (__mmask8) __U);
12419 }
12420
12421 extern __inline __mmask8
12422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12423 _mm256_mask_cmp_epu32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
12424 const int __P)
12425 {
12426 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
12427 (__v8si) __Y, __P,
12428 (__mmask8) __U);
12429 }
12430
12431 extern __inline __mmask8
12432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12433 _mm256_mask_cmp_pd_mask (__mmask8 __U, __m256d __X, __m256d __Y,
12434 const int __P)
12435 {
12436 return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
12437 (__v4df) __Y, __P,
12438 (__mmask8) __U);
12439 }
12440
12441 extern __inline __mmask8
12442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12443 _mm256_mask_cmp_ps_mask (__mmask8 __U, __m256 __X, __m256 __Y,
12444 const int __P)
12445 {
12446 return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
12447 (__v8sf) __Y, __P,
12448 (__mmask8) __U);
12449 }
12450
12451 extern __inline __mmask8
12452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12453 _mm_cmp_epi64_mask (__m128i __X, __m128i __Y, const int __P)
12454 {
12455 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12456 (__v2di) __Y, __P,
12457 (__mmask8) -1);
12458 }
12459
12460 extern __inline __mmask8
12461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12462 _mm_cmp_epi32_mask (__m128i __X, __m128i __Y, const int __P)
12463 {
12464 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12465 (__v4si) __Y, __P,
12466 (__mmask8) -1);
12467 }
12468
12469 extern __inline __mmask8
12470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12471 _mm_cmp_epu64_mask (__m128i __X, __m128i __Y, const int __P)
12472 {
12473 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12474 (__v2di) __Y, __P,
12475 (__mmask8) -1);
12476 }
12477
12478 extern __inline __mmask8
12479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12480 _mm_cmp_epu32_mask (__m128i __X, __m128i __Y, const int __P)
12481 {
12482 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12483 (__v4si) __Y, __P,
12484 (__mmask8) -1);
12485 }
12486
12487 extern __inline __mmask8
12488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12489 _mm_cmp_pd_mask (__m128d __X, __m128d __Y, const int __P)
12490 {
12491 return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
12492 (__v2df) __Y, __P,
12493 (__mmask8) -1);
12494 }
12495
12496 extern __inline __mmask8
12497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12498 _mm_cmp_ps_mask (__m128 __X, __m128 __Y, const int __P)
12499 {
12500 return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
12501 (__v4sf) __Y, __P,
12502 (__mmask8) -1);
12503 }
12504
12505 extern __inline __mmask8
12506 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12507 _mm_mask_cmp_epi64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
12508 const int __P)
12509 {
12510 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12511 (__v2di) __Y, __P,
12512 (__mmask8) __U);
12513 }
12514
12515 extern __inline __mmask8
12516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12517 _mm_mask_cmp_epi32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
12518 const int __P)
12519 {
12520 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12521 (__v4si) __Y, __P,
12522 (__mmask8) __U);
12523 }
12524
12525 extern __inline __mmask8
12526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12527 _mm_mask_cmp_epu64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
12528 const int __P)
12529 {
12530 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12531 (__v2di) __Y, __P,
12532 (__mmask8) __U);
12533 }
12534
12535 extern __inline __mmask8
12536 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12537 _mm_mask_cmp_epu32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
12538 const int __P)
12539 {
12540 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12541 (__v4si) __Y, __P,
12542 (__mmask8) __U);
12543 }
12544
12545 extern __inline __mmask8
12546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12547 _mm_mask_cmp_pd_mask (__mmask8 __U, __m128d __X, __m128d __Y,
12548 const int __P)
12549 {
12550 return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
12551 (__v2df) __Y, __P,
12552 (__mmask8) __U);
12553 }
12554
12555 extern __inline __mmask8
12556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12557 _mm_mask_cmp_ps_mask (__mmask8 __U, __m128 __X, __m128 __Y,
12558 const int __P)
12559 {
12560 return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
12561 (__v4sf) __Y, __P,
12562 (__mmask8) __U);
12563 }
12564
12565 extern __inline __m256d
12566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12567 _mm256_permutex_pd (__m256d __X, const int __M)
12568 {
12569 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __M,
12570 (__v4df)
12571 _mm256_undefined_pd (),
12572 (__mmask8) -1);
12573 }
12574
12575 #else
12576 #define _mm256_permutex_pd(X, M) \
12577 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(X), (int)(M), \
12578 (__v4df)(__m256d) \
12579 _mm256_undefined_pd (), \
12580 (__mmask8)-1))
12581
12582 #define _mm256_permutex_epi64(X, I) \
12583 ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
12584 (int)(I), \
12585 (__v4di)(__m256i) \
12586 (_mm256_setzero_si256 ()),\
12587 (__mmask8) -1))
12588
12589 #define _mm256_maskz_permutex_epi64(M, X, I) \
12590 ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
12591 (int)(I), \
12592 (__v4di)(__m256i) \
12593 (_mm256_setzero_si256 ()),\
12594 (__mmask8)(M)))
12595
12596 #define _mm256_mask_permutex_epi64(W, M, X, I) \
12597 ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
12598 (int)(I), \
12599 (__v4di)(__m256i)(W), \
12600 (__mmask8)(M)))
12601
12602 #define _mm256_insertf32x4(X, Y, C) \
12603 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
12604 (__v4sf)(__m128) (Y), (int) (C), \
12605 (__v8sf)(__m256)_mm256_setzero_ps (), \
12606 (__mmask8)-1))
12607
12608 #define _mm256_mask_insertf32x4(W, U, X, Y, C) \
12609 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
12610 (__v4sf)(__m128) (Y), (int) (C), \
12611 (__v8sf)(__m256)(W), \
12612 (__mmask8)(U)))
12613
12614 #define _mm256_maskz_insertf32x4(U, X, Y, C) \
12615 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
12616 (__v4sf)(__m128) (Y), (int) (C), \
12617 (__v8sf)(__m256)_mm256_setzero_ps (), \
12618 (__mmask8)(U)))
12619
12620 #define _mm256_inserti32x4(X, Y, C) \
12621 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12622 (__v4si)(__m128i) (Y), (int) (C), \
12623 (__v8si)(__m256i)_mm256_setzero_si256 (), \
12624 (__mmask8)-1))
12625
12626 #define _mm256_mask_inserti32x4(W, U, X, Y, C) \
12627 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12628 (__v4si)(__m128i) (Y), (int) (C), \
12629 (__v8si)(__m256i)(W), \
12630 (__mmask8)(U)))
12631
12632 #define _mm256_maskz_inserti32x4(U, X, Y, C) \
12633 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12634 (__v4si)(__m128i) (Y), (int) (C), \
12635 (__v8si)(__m256i)_mm256_setzero_si256 (), \
12636 (__mmask8)(U)))
12637
12638 #define _mm256_extractf32x4_ps(X, C) \
12639 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12640 (int) (C), \
12641 (__v4sf)(__m128)_mm_setzero_ps (), \
12642 (__mmask8)-1))
12643
12644 #define _mm256_mask_extractf32x4_ps(W, U, X, C) \
12645 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12646 (int) (C), \
12647 (__v4sf)(__m128)(W), \
12648 (__mmask8)(U)))
12649
12650 #define _mm256_maskz_extractf32x4_ps(U, X, C) \
12651 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12652 (int) (C), \
12653 (__v4sf)(__m128)_mm_setzero_ps (), \
12654 (__mmask8)(U)))
12655
12656 #define _mm256_extracti32x4_epi32(X, C) \
12657 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12658 (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)-1))
12659
12660 #define _mm256_mask_extracti32x4_epi32(W, U, X, C) \
12661 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12662 (int) (C), (__v4si)(__m128i)(W), (__mmask8)(U)))
12663
12664 #define _mm256_maskz_extracti32x4_epi32(U, X, C) \
12665 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12666 (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)(U)))
12667
12668 #define _mm256_shuffle_i64x2(X, Y, C) \
12669 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
12670 (__v4di)(__m256i)(Y), (int)(C), \
12671 (__v4di)(__m256i)_mm256_setzero_si256 (), \
12672 (__mmask8)-1))
12673
12674 #define _mm256_mask_shuffle_i64x2(W, U, X, Y, C) \
12675 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
12676 (__v4di)(__m256i)(Y), (int)(C), \
12677 (__v4di)(__m256i)(W),\
12678 (__mmask8)(U)))
12679
12680 #define _mm256_maskz_shuffle_i64x2(U, X, Y, C) \
12681 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
12682 (__v4di)(__m256i)(Y), (int)(C), \
12683 (__v4di)(__m256i)_mm256_setzero_si256 (), \
12684 (__mmask8)(U)))
12685
12686 #define _mm256_shuffle_i32x4(X, Y, C) \
12687 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
12688 (__v8si)(__m256i)(Y), (int)(C), \
12689 (__v8si)(__m256i) \
12690 _mm256_setzero_si256 (), \
12691 (__mmask8)-1))
12692
12693 #define _mm256_mask_shuffle_i32x4(W, U, X, Y, C) \
12694 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
12695 (__v8si)(__m256i)(Y), (int)(C), \
12696 (__v8si)(__m256i)(W), \
12697 (__mmask8)(U)))
12698
12699 #define _mm256_maskz_shuffle_i32x4(U, X, Y, C) \
12700 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
12701 (__v8si)(__m256i)(Y), (int)(C), \
12702 (__v8si)(__m256i) \
12703 _mm256_setzero_si256 (), \
12704 (__mmask8)(U)))
12705
12706 #define _mm256_shuffle_f64x2(X, Y, C) \
12707 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
12708 (__v4df)(__m256d)(Y), (int)(C), \
12709 (__v4df)(__m256d)_mm256_setzero_pd (),\
12710 (__mmask8)-1))
12711
12712 #define _mm256_mask_shuffle_f64x2(W, U, X, Y, C) \
12713 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
12714 (__v4df)(__m256d)(Y), (int)(C), \
12715 (__v4df)(__m256d)(W), \
12716 (__mmask8)(U)))
12717
12718 #define _mm256_maskz_shuffle_f64x2(U, X, Y, C) \
12719 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
12720 (__v4df)(__m256d)(Y), (int)(C), \
12721 (__v4df)(__m256d)_mm256_setzero_pd( ),\
12722 (__mmask8)(U)))
12723
12724 #define _mm256_shuffle_f32x4(X, Y, C) \
12725 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
12726 (__v8sf)(__m256)(Y), (int)(C), \
12727 (__v8sf)(__m256)_mm256_setzero_ps (), \
12728 (__mmask8)-1))
12729
12730 #define _mm256_mask_shuffle_f32x4(W, U, X, Y, C) \
12731 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
12732 (__v8sf)(__m256)(Y), (int)(C), \
12733 (__v8sf)(__m256)(W), \
12734 (__mmask8)(U)))
12735
12736 #define _mm256_maskz_shuffle_f32x4(U, X, Y, C) \
12737 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
12738 (__v8sf)(__m256)(Y), (int)(C), \
12739 (__v8sf)(__m256)_mm256_setzero_ps (), \
12740 (__mmask8)(U)))
12741
12742 #define _mm256_mask_shuffle_pd(W, U, A, B, C) \
12743 ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A), \
12744 (__v4df)(__m256d)(B), (int)(C), \
12745 (__v4df)(__m256d)(W), \
12746 (__mmask8)(U)))
12747
12748 #define _mm256_maskz_shuffle_pd(U, A, B, C) \
12749 ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A), \
12750 (__v4df)(__m256d)(B), (int)(C), \
12751 (__v4df)(__m256d) \
12752 _mm256_setzero_pd (), \
12753 (__mmask8)(U)))
12754
12755 #define _mm_mask_shuffle_pd(W, U, A, B, C) \
12756 ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A), \
12757 (__v2df)(__m128d)(B), (int)(C), \
12758 (__v2df)(__m128d)(W), \
12759 (__mmask8)(U)))
12760
12761 #define _mm_maskz_shuffle_pd(U, A, B, C) \
12762 ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A), \
12763 (__v2df)(__m128d)(B), (int)(C), \
12764 (__v2df)(__m128d)_mm_setzero_pd (), \
12765 (__mmask8)(U)))
12766
12767 #define _mm256_mask_shuffle_ps(W, U, A, B, C) \
12768 ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A), \
12769 (__v8sf)(__m256)(B), (int)(C), \
12770 (__v8sf)(__m256)(W), \
12771 (__mmask8)(U)))
12772
12773 #define _mm256_maskz_shuffle_ps(U, A, B, C) \
12774 ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A), \
12775 (__v8sf)(__m256)(B), (int)(C), \
12776 (__v8sf)(__m256)_mm256_setzero_ps (),\
12777 (__mmask8)(U)))
12778
12779 #define _mm_mask_shuffle_ps(W, U, A, B, C) \
12780 ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A), \
12781 (__v4sf)(__m128)(B), (int)(C), \
12782 (__v4sf)(__m128)(W), \
12783 (__mmask8)(U)))
12784
12785 #define _mm_maskz_shuffle_ps(U, A, B, C) \
12786 ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A), \
12787 (__v4sf)(__m128)(B), (int)(C), \
12788 (__v4sf)(__m128)_mm_setzero_ps (), \
12789 (__mmask8)(U)))
12790
12791 #define _mm256_fixupimm_pd(X, Y, Z, C) \
12792 ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X), \
12793 (__v4df)(__m256d)(Y), \
12794 (__v4di)(__m256i)(Z), (int)(C), \
12795 (__mmask8)(-1)))
12796
12797 #define _mm256_mask_fixupimm_pd(X, U, Y, Z, C) \
12798 ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X), \
12799 (__v4df)(__m256d)(Y), \
12800 (__v4di)(__m256i)(Z), (int)(C), \
12801 (__mmask8)(U)))
12802
12803 #define _mm256_maskz_fixupimm_pd(U, X, Y, Z, C) \
12804 ((__m256d)__builtin_ia32_fixupimmpd256_maskz ((__v4df)(__m256d)(X), \
12805 (__v4df)(__m256d)(Y), \
12806 (__v4di)(__m256i)(Z), (int)(C),\
12807 (__mmask8)(U)))
12808
12809 #define _mm256_fixupimm_ps(X, Y, Z, C) \
12810 ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X), \
12811 (__v8sf)(__m256)(Y), \
12812 (__v8si)(__m256i)(Z), (int)(C), \
12813 (__mmask8)(-1)))
12814
12815
12816 #define _mm256_mask_fixupimm_ps(X, U, Y, Z, C) \
12817 ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X), \
12818 (__v8sf)(__m256)(Y), \
12819 (__v8si)(__m256i)(Z), (int)(C), \
12820 (__mmask8)(U)))
12821
12822 #define _mm256_maskz_fixupimm_ps(U, X, Y, Z, C) \
12823 ((__m256)__builtin_ia32_fixupimmps256_maskz ((__v8sf)(__m256)(X), \
12824 (__v8sf)(__m256)(Y), \
12825 (__v8si)(__m256i)(Z), (int)(C),\
12826 (__mmask8)(U)))
12827
12828 #define _mm_fixupimm_pd(X, Y, Z, C) \
12829 ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X), \
12830 (__v2df)(__m128d)(Y), \
12831 (__v2di)(__m128i)(Z), (int)(C), \
12832 (__mmask8)(-1)))
12833
12834
12835 #define _mm_mask_fixupimm_pd(X, U, Y, Z, C) \
12836 ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X), \
12837 (__v2df)(__m128d)(Y), \
12838 (__v2di)(__m128i)(Z), (int)(C), \
12839 (__mmask8)(U)))
12840
12841 #define _mm_maskz_fixupimm_pd(U, X, Y, Z, C) \
12842 ((__m128d)__builtin_ia32_fixupimmpd128_maskz ((__v2df)(__m128d)(X), \
12843 (__v2df)(__m128d)(Y), \
12844 (__v2di)(__m128i)(Z), (int)(C),\
12845 (__mmask8)(U)))
12846
12847 #define _mm_fixupimm_ps(X, Y, Z, C) \
12848 ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X), \
12849 (__v4sf)(__m128)(Y), \
12850 (__v4si)(__m128i)(Z), (int)(C), \
12851 (__mmask8)(-1)))
12852
12853 #define _mm_mask_fixupimm_ps(X, U, Y, Z, C) \
12854 ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X), \
12855 (__v4sf)(__m128)(Y), \
12856 (__v4si)(__m128i)(Z), (int)(C),\
12857 (__mmask8)(U)))
12858
12859 #define _mm_maskz_fixupimm_ps(U, X, Y, Z, C) \
12860 ((__m128)__builtin_ia32_fixupimmps128_maskz ((__v4sf)(__m128)(X), \
12861 (__v4sf)(__m128)(Y), \
12862 (__v4si)(__m128i)(Z), (int)(C),\
12863 (__mmask8)(U)))
12864
12865 #define _mm256_mask_srli_epi32(W, U, A, B) \
12866 ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \
12867 (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
12868
12869 #define _mm256_maskz_srli_epi32(U, A, B) \
12870 ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \
12871 (int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U)))
12872
12873 #define _mm_mask_srli_epi32(W, U, A, B) \
12874 ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \
12875 (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
12876
12877 #define _mm_maskz_srli_epi32(U, A, B) \
12878 ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \
12879 (int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U)))
12880
12881 #define _mm256_mask_srli_epi64(W, U, A, B) \
12882 ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \
12883 (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
12884
12885 #define _mm256_maskz_srli_epi64(U, A, B) \
12886 ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \
12887 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
12888
12889 #define _mm_mask_srli_epi64(W, U, A, B) \
12890 ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \
12891 (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
12892
12893 #define _mm_maskz_srli_epi64(U, A, B) \
12894 ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \
12895 (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U)))
12896
12897 #define _mm256_mask_slli_epi32(W, U, X, C) \
12898 ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
12899 (__v8si)(__m256i)(W), \
12900 (__mmask8)(U)))
12901
12902 #define _mm256_maskz_slli_epi32(U, X, C) \
12903 ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
12904 (__v8si)(__m256i)_mm256_setzero_si256 (), \
12905 (__mmask8)(U)))
12906
12907 #define _mm256_mask_slli_epi64(W, U, X, C) \
12908 ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
12909 (__v4di)(__m256i)(W), \
12910 (__mmask8)(U)))
12911
12912 #define _mm256_maskz_slli_epi64(U, X, C) \
12913 ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
12914 (__v4di)(__m256i)_mm256_setzero_si256 (), \
12915 (__mmask8)(U)))
12916
12917 #define _mm_mask_slli_epi32(W, U, X, C) \
12918 ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
12919 (__v4si)(__m128i)(W),\
12920 (__mmask8)(U)))
12921
12922 #define _mm_maskz_slli_epi32(U, X, C) \
12923 ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
12924 (__v4si)(__m128i)_mm_setzero_si128 (),\
12925 (__mmask8)(U)))
12926
12927 #define _mm_mask_slli_epi64(W, U, X, C) \
12928 ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
12929 (__v2di)(__m128i)(W),\
12930 (__mmask8)(U)))
12931
12932 #define _mm_maskz_slli_epi64(U, X, C) \
12933 ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
12934 (__v2di)(__m128i)_mm_setzero_si128 (),\
12935 (__mmask8)(U)))
12936
12937 #define _mm256_ternarylogic_epi64(A, B, C, I) \
12938 ((__m256i) \
12939 __builtin_ia32_pternlogq256_mask ((__v4di) (__m256i) (A), \
12940 (__v4di) (__m256i) (B), \
12941 (__v4di) (__m256i) (C), \
12942 (unsigned char) (I), \
12943 (__mmask8) -1))
12944
12945 #define _mm256_mask_ternarylogic_epi64(A, U, B, C, I) \
12946 ((__m256i) \
12947 __builtin_ia32_pternlogq256_mask ((__v4di) (__m256i) (A), \
12948 (__v4di) (__m256i) (B), \
12949 (__v4di) (__m256i) (C), \
12950 (unsigned char) (I), \
12951 (__mmask8) (U)))
12952
12953 #define _mm256_maskz_ternarylogic_epi64(U, A, B, C, I) \
12954 ((__m256i) \
12955 __builtin_ia32_pternlogq256_maskz ((__v4di) (__m256i) (A), \
12956 (__v4di) (__m256i) (B), \
12957 (__v4di) (__m256i) (C), \
12958 (unsigned char) (I), \
12959 (__mmask8) (U)))
12960
12961 #define _mm256_ternarylogic_epi32(A, B, C, I) \
12962 ((__m256i) \
12963 __builtin_ia32_pternlogd256_mask ((__v8si) (__m256i) (A), \
12964 (__v8si) (__m256i) (B), \
12965 (__v8si) (__m256i) (C), \
12966 (unsigned char) (I), \
12967 (__mmask8) -1))
12968
12969 #define _mm256_mask_ternarylogic_epi32(A, U, B, C, I) \
12970 ((__m256i) \
12971 __builtin_ia32_pternlogd256_mask ((__v8si) (__m256i) (A), \
12972 (__v8si) (__m256i) (B), \
12973 (__v8si) (__m256i) (C), \
12974 (unsigned char) (I), \
12975 (__mmask8) (U)))
12976
12977 #define _mm256_maskz_ternarylogic_epi32(U, A, B, C, I) \
12978 ((__m256i) \
12979 __builtin_ia32_pternlogd256_maskz ((__v8si) (__m256i) (A), \
12980 (__v8si) (__m256i) (B), \
12981 (__v8si) (__m256i) (C), \
12982 (unsigned char) (I), \
12983 (__mmask8) (U)))
12984
12985 #define _mm_ternarylogic_epi64(A, B, C, I) \
12986 ((__m128i) \
12987 __builtin_ia32_pternlogq128_mask ((__v2di) (__m128i) (A), \
12988 (__v2di) (__m128i) (B), \
12989 (__v2di) (__m128i) (C), \
12990 (unsigned char) (I), \
12991 (__mmask8) -1))
12992
12993 #define _mm_mask_ternarylogic_epi64(A, U, B, C, I) \
12994 ((__m128i) \
12995 __builtin_ia32_pternlogq128_mask ((__v2di) (__m128i) (A), \
12996 (__v2di) (__m128i) (B), \
12997 (__v2di) (__m128i) (C), \
12998 (unsigned char) (I), \
12999 (__mmask8) (U)))
13000
13001 #define _mm_maskz_ternarylogic_epi64(U, A, B, C, I) \
13002 ((__m128i) \
13003 __builtin_ia32_pternlogq128_maskz ((__v2di) (__m128i) (A), \
13004 (__v2di) (__m128i) (B), \
13005 (__v2di) (__m128i) (C), \
13006 (unsigned char) (I), \
13007 (__mmask8) (U)))
13008
13009 #define _mm_ternarylogic_epi32(A, B, C, I) \
13010 ((__m128i) \
13011 __builtin_ia32_pternlogd128_mask ((__v4si) (__m128i) (A), \
13012 (__v4si) (__m128i) (B), \
13013 (__v4si) (__m128i) (C), \
13014 (unsigned char) (I), \
13015 (__mmask8) -1))
13016
13017 #define _mm_mask_ternarylogic_epi32(A, U, B, C, I) \
13018 ((__m128i) \
13019 __builtin_ia32_pternlogd128_mask ((__v4si) (__m128i) (A), \
13020 (__v4si) (__m128i) (B), \
13021 (__v4si) (__m128i) (C), \
13022 (unsigned char) (I), \
13023 (__mmask8) (U)))
13024
13025 #define _mm_maskz_ternarylogic_epi32(U, A, B, C, I) \
13026 ((__m128i) \
13027 __builtin_ia32_pternlogd128_maskz ((__v4si) (__m128i) (A), \
13028 (__v4si) (__m128i) (B), \
13029 (__v4si) (__m128i) (C), \
13030 (unsigned char) (I), \
13031 (__mmask8) (U)))
13032
13033 #define _mm256_roundscale_ps(A, B) \
13034 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
13035 (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)-1))
13036
13037 #define _mm256_mask_roundscale_ps(W, U, A, B) \
13038 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
13039 (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U)))
13040
13041 #define _mm256_maskz_roundscale_ps(U, A, B) \
13042 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
13043 (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)(U)))
13044
13045 #define _mm256_roundscale_pd(A, B) \
13046 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
13047 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)-1))
13048
13049 #define _mm256_mask_roundscale_pd(W, U, A, B) \
13050 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
13051 (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
13052
13053 #define _mm256_maskz_roundscale_pd(U, A, B) \
13054 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
13055 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)(U)))
13056
13057 #define _mm_roundscale_ps(A, B) \
13058 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
13059 (int)(B), (__v4sf)(__m128)_mm_setzero_ps (), (__mmask8)-1))
13060
13061 #define _mm_mask_roundscale_ps(W, U, A, B) \
13062 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
13063 (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U)))
13064
13065 #define _mm_maskz_roundscale_ps(U, A, B) \
13066 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
13067 (int)(B), (__v4sf)(__m128)_mm_setzero_ps (), (__mmask8)(U)))
13068
13069 #define _mm_roundscale_pd(A, B) \
13070 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
13071 (int)(B), (__v2df)(__m128d)_mm_setzero_pd (), (__mmask8)-1))
13072
13073 #define _mm_mask_roundscale_pd(W, U, A, B) \
13074 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
13075 (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U)))
13076
13077 #define _mm_maskz_roundscale_pd(U, A, B) \
13078 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
13079 (int)(B), (__v2df)(__m128d)_mm_setzero_pd (), (__mmask8)(U)))
13080
13081 #define _mm256_getmant_ps(X, B, C) \
13082 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
13083 (int)(((C)<<2) | (B)), \
13084 (__v8sf)(__m256)_mm256_setzero_ps (), \
13085 (__mmask8)-1))
13086
13087 #define _mm256_mask_getmant_ps(W, U, X, B, C) \
13088 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
13089 (int)(((C)<<2) | (B)), \
13090 (__v8sf)(__m256)(W), \
13091 (__mmask8)(U)))
13092
13093 #define _mm256_maskz_getmant_ps(U, X, B, C) \
13094 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
13095 (int)(((C)<<2) | (B)), \
13096 (__v8sf)(__m256)_mm256_setzero_ps (), \
13097 (__mmask8)(U)))
13098
13099 #define _mm_getmant_ps(X, B, C) \
13100 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
13101 (int)(((C)<<2) | (B)), \
13102 (__v4sf)(__m128)_mm_setzero_ps (), \
13103 (__mmask8)-1))
13104
13105 #define _mm_mask_getmant_ps(W, U, X, B, C) \
13106 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
13107 (int)(((C)<<2) | (B)), \
13108 (__v4sf)(__m128)(W), \
13109 (__mmask8)(U)))
13110
13111 #define _mm_maskz_getmant_ps(U, X, B, C) \
13112 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
13113 (int)(((C)<<2) | (B)), \
13114 (__v4sf)(__m128)_mm_setzero_ps (), \
13115 (__mmask8)(U)))
13116
13117 #define _mm256_getmant_pd(X, B, C) \
13118 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
13119 (int)(((C)<<2) | (B)), \
13120 (__v4df)(__m256d)_mm256_setzero_pd (),\
13121 (__mmask8)-1))
13122
13123 #define _mm256_mask_getmant_pd(W, U, X, B, C) \
13124 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
13125 (int)(((C)<<2) | (B)), \
13126 (__v4df)(__m256d)(W), \
13127 (__mmask8)(U)))
13128
13129 #define _mm256_maskz_getmant_pd(U, X, B, C) \
13130 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
13131 (int)(((C)<<2) | (B)), \
13132 (__v4df)(__m256d)_mm256_setzero_pd (),\
13133 (__mmask8)(U)))
13134
13135 #define _mm_getmant_pd(X, B, C) \
13136 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
13137 (int)(((C)<<2) | (B)), \
13138 (__v2df)(__m128d)_mm_setzero_pd (), \
13139 (__mmask8)-1))
13140
13141 #define _mm_mask_getmant_pd(W, U, X, B, C) \
13142 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
13143 (int)(((C)<<2) | (B)), \
13144 (__v2df)(__m128d)(W), \
13145 (__mmask8)(U)))
13146
13147 #define _mm_maskz_getmant_pd(U, X, B, C) \
13148 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
13149 (int)(((C)<<2) | (B)), \
13150 (__v2df)(__m128d)_mm_setzero_pd (), \
13151 (__mmask8)(U)))
13152
13153 #define _mm256_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
13154 (__m256) __builtin_ia32_gather3siv8sf ((__v8sf)(__m256) (V1OLD), \
13155 (void const *) (ADDR), \
13156 (__v8si)(__m256i) (INDEX), \
13157 (__mmask8) (MASK), \
13158 (int) (SCALE))
13159
13160 #define _mm_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
13161 (__m128) __builtin_ia32_gather3siv4sf ((__v4sf)(__m128) (V1OLD), \
13162 (void const *) (ADDR), \
13163 (__v4si)(__m128i) (INDEX), \
13164 (__mmask8) (MASK), \
13165 (int) (SCALE))
13166
13167 #define _mm256_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
13168 (__m256d) __builtin_ia32_gather3siv4df ((__v4df)(__m256d) (V1OLD), \
13169 (void const *) (ADDR), \
13170 (__v4si)(__m128i) (INDEX), \
13171 (__mmask8) (MASK), \
13172 (int) (SCALE))
13173
13174 #define _mm_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
13175 (__m128d) __builtin_ia32_gather3siv2df ((__v2df)(__m128d) (V1OLD), \
13176 (void const *) (ADDR), \
13177 (__v4si)(__m128i) (INDEX), \
13178 (__mmask8) (MASK), \
13179 (int) (SCALE))
13180
13181 #define _mm256_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
13182 (__m128) __builtin_ia32_gather3div8sf ((__v4sf)(__m128) (V1OLD), \
13183 (void const *) (ADDR), \
13184 (__v4di)(__m256i) (INDEX), \
13185 (__mmask8) (MASK), \
13186 (int) (SCALE))
13187
13188 #define _mm_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
13189 (__m128) __builtin_ia32_gather3div4sf ((__v4sf)(__m128) (V1OLD), \
13190 (void const *) (ADDR), \
13191 (__v2di)(__m128i) (INDEX), \
13192 (__mmask8) (MASK), \
13193 (int) (SCALE))
13194
13195 #define _mm256_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
13196 (__m256d) __builtin_ia32_gather3div4df ((__v4df)(__m256d) (V1OLD), \
13197 (void const *) (ADDR), \
13198 (__v4di)(__m256i) (INDEX), \
13199 (__mmask8) (MASK), \
13200 (int) (SCALE))
13201
13202 #define _mm_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
13203 (__m128d) __builtin_ia32_gather3div2df ((__v2df)(__m128d) (V1OLD), \
13204 (void const *) (ADDR), \
13205 (__v2di)(__m128i) (INDEX), \
13206 (__mmask8) (MASK), \
13207 (int) (SCALE))
13208
13209 #define _mm256_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
13210 (__m256i) __builtin_ia32_gather3siv8si ((__v8si)(__m256i) (V1OLD), \
13211 (void const *) (ADDR), \
13212 (__v8si)(__m256i) (INDEX), \
13213 (__mmask8) (MASK), \
13214 (int) (SCALE))
13215
13216 #define _mm_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
13217 (__m128i) __builtin_ia32_gather3siv4si ((__v4si)(__m128i) (V1OLD), \
13218 (void const *) (ADDR), \
13219 (__v4si)(__m128i) (INDEX), \
13220 (__mmask8) (MASK), \
13221 (int) (SCALE))
13222
13223 #define _mm256_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
13224 (__m256i) __builtin_ia32_gather3siv4di ((__v4di)(__m256i) (V1OLD), \
13225 (void const *) (ADDR), \
13226 (__v4si)(__m128i) (INDEX), \
13227 (__mmask8) (MASK), \
13228 (int) (SCALE))
13229
13230 #define _mm_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
13231 (__m128i) __builtin_ia32_gather3siv2di ((__v2di)(__m128i) (V1OLD), \
13232 (void const *) (ADDR), \
13233 (__v4si)(__m128i) (INDEX), \
13234 (__mmask8) (MASK), \
13235 (int) (SCALE))
13236
13237 #define _mm256_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
13238 (__m128i) __builtin_ia32_gather3div8si ((__v4si)(__m128i) (V1OLD), \
13239 (void const *) (ADDR), \
13240 (__v4di)(__m256i) (INDEX), \
13241 (__mmask8) (MASK), \
13242 (int) (SCALE))
13243
13244 #define _mm_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
13245 (__m128i) __builtin_ia32_gather3div4si ((__v4si)(__m128i) (V1OLD), \
13246 (void const *) (ADDR), \
13247 (__v2di)(__m128i) (INDEX), \
13248 (__mmask8) (MASK), \
13249 (int) (SCALE))
13250
13251 #define _mm256_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
13252 (__m256i) __builtin_ia32_gather3div4di ((__v4di)(__m256i) (V1OLD), \
13253 (void const *) (ADDR), \
13254 (__v4di)(__m256i) (INDEX), \
13255 (__mmask8) (MASK), \
13256 (int) (SCALE))
13257
13258 #define _mm_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
13259 (__m128i) __builtin_ia32_gather3div2di ((__v2di)(__m128i) (V1OLD), \
13260 (void const *) (ADDR), \
13261 (__v2di)(__m128i) (INDEX), \
13262 (__mmask8) (MASK), \
13263 (int) (SCALE))
13264
13265 #define _mm256_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
13266 __builtin_ia32_scattersiv8sf ((void *) (ADDR), (__mmask8)0xFF, \
13267 (__v8si)(__m256i) (INDEX), \
13268 (__v8sf)(__m256) (V1), (int) (SCALE))
13269
13270 #define _mm256_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
13271 __builtin_ia32_scattersiv8sf ((void *) (ADDR), (__mmask8) (MASK), \
13272 (__v8si)(__m256i) (INDEX), \
13273 (__v8sf)(__m256) (V1), (int) (SCALE))
13274
13275 #define _mm_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
13276 __builtin_ia32_scattersiv4sf ((void *) (ADDR), (__mmask8)0xFF, \
13277 (__v4si)(__m128i) (INDEX), \
13278 (__v4sf)(__m128) (V1), (int) (SCALE))
13279
13280 #define _mm_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
13281 __builtin_ia32_scattersiv4sf ((void *) (ADDR), (__mmask8) (MASK), \
13282 (__v4si)(__m128i) (INDEX), \
13283 (__v4sf)(__m128) (V1), (int) (SCALE))
13284
13285 #define _mm256_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
13286 __builtin_ia32_scattersiv4df ((void *) (ADDR), (__mmask8)0xFF, \
13287 (__v4si)(__m128i) (INDEX), \
13288 (__v4df)(__m256d) (V1), (int) (SCALE))
13289
13290 #define _mm256_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
13291 __builtin_ia32_scattersiv4df ((void *) (ADDR), (__mmask8) (MASK), \
13292 (__v4si)(__m128i) (INDEX), \
13293 (__v4df)(__m256d) (V1), (int) (SCALE))
13294
13295 #define _mm_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
13296 __builtin_ia32_scattersiv2df ((void *) (ADDR), (__mmask8)0xFF, \
13297 (__v4si)(__m128i) (INDEX), \
13298 (__v2df)(__m128d) (V1), (int) (SCALE))
13299
13300 #define _mm_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
13301 __builtin_ia32_scattersiv2df ((void *) (ADDR), (__mmask8) (MASK), \
13302 (__v4si)(__m128i) (INDEX), \
13303 (__v2df)(__m128d) (V1), (int) (SCALE))
13304
13305 #define _mm256_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
13306 __builtin_ia32_scatterdiv8sf ((void *) (ADDR), (__mmask8)0xFF, \
13307 (__v4di)(__m256i) (INDEX), \
13308 (__v4sf)(__m128) (V1), (int) (SCALE))
13309
13310 #define _mm256_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
13311 __builtin_ia32_scatterdiv8sf ((void *) (ADDR), (__mmask8) (MASK), \
13312 (__v4di)(__m256i) (INDEX), \
13313 (__v4sf)(__m128) (V1), (int) (SCALE))
13314
13315 #define _mm_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
13316 __builtin_ia32_scatterdiv4sf ((void *) (ADDR), (__mmask8)0xFF, \
13317 (__v2di)(__m128i) (INDEX), \
13318 (__v4sf)(__m128) (V1), (int) (SCALE))
13319
13320 #define _mm_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
13321 __builtin_ia32_scatterdiv4sf ((void *) (ADDR), (__mmask8) (MASK), \
13322 (__v2di)(__m128i) (INDEX), \
13323 (__v4sf)(__m128) (V1), (int) (SCALE))
13324
13325 #define _mm256_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
13326 __builtin_ia32_scatterdiv4df ((void *) (ADDR), (__mmask8)0xFF, \
13327 (__v4di)(__m256i) (INDEX), \
13328 (__v4df)(__m256d) (V1), (int) (SCALE))
13329
13330 #define _mm256_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
13331 __builtin_ia32_scatterdiv4df ((void *) (ADDR), (__mmask8) (MASK), \
13332 (__v4di)(__m256i) (INDEX), \
13333 (__v4df)(__m256d) (V1), (int) (SCALE))
13334
13335 #define _mm_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
13336 __builtin_ia32_scatterdiv2df ((void *) (ADDR), (__mmask8)0xFF, \
13337 (__v2di)(__m128i) (INDEX), \
13338 (__v2df)(__m128d) (V1), (int) (SCALE))
13339
13340 #define _mm_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
13341 __builtin_ia32_scatterdiv2df ((void *) (ADDR), (__mmask8) (MASK), \
13342 (__v2di)(__m128i) (INDEX), \
13343 (__v2df)(__m128d) (V1), (int) (SCALE))
13344
13345 #define _mm256_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
13346 __builtin_ia32_scattersiv8si ((void *) (ADDR), (__mmask8)0xFF, \
13347 (__v8si)(__m256i) (INDEX), \
13348 (__v8si)(__m256i) (V1), (int) (SCALE))
13349
13350 #define _mm256_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
13351 __builtin_ia32_scattersiv8si ((void *) (ADDR), (__mmask8) (MASK), \
13352 (__v8si)(__m256i) (INDEX), \
13353 (__v8si)(__m256i) (V1), (int) (SCALE))
13354
13355 #define _mm_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
13356 __builtin_ia32_scattersiv4si ((void *) (ADDR), (__mmask8)0xFF, \
13357 (__v4si)(__m128i) (INDEX), \
13358 (__v4si)(__m128i) (V1), (int) (SCALE))
13359
13360 #define _mm_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
13361 __builtin_ia32_scattersiv4si ((void *) (ADDR), (__mmask8) (MASK), \
13362 (__v4si)(__m128i) (INDEX), \
13363 (__v4si)(__m128i) (V1), (int) (SCALE))
13364
13365 #define _mm256_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
13366 __builtin_ia32_scattersiv4di ((void *) (ADDR), (__mmask8)0xFF, \
13367 (__v4si)(__m128i) (INDEX), \
13368 (__v4di)(__m256i) (V1), (int) (SCALE))
13369
13370 #define _mm256_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
13371 __builtin_ia32_scattersiv4di ((void *) (ADDR), (__mmask8) (MASK), \
13372 (__v4si)(__m128i) (INDEX), \
13373 (__v4di)(__m256i) (V1), (int) (SCALE))
13374
13375 #define _mm_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
13376 __builtin_ia32_scattersiv2di ((void *) (ADDR), (__mmask8)0xFF, \
13377 (__v4si)(__m128i) (INDEX), \
13378 (__v2di)(__m128i) (V1), (int) (SCALE))
13379
13380 #define _mm_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
13381 __builtin_ia32_scattersiv2di ((void *) (ADDR), (__mmask8) (MASK), \
13382 (__v4si)(__m128i) (INDEX), \
13383 (__v2di)(__m128i) (V1), (int) (SCALE))
13384
13385 #define _mm256_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
13386 __builtin_ia32_scatterdiv8si ((void *) (ADDR), (__mmask8)0xFF, \
13387 (__v4di)(__m256i) (INDEX), \
13388 (__v4si)(__m128i) (V1), (int) (SCALE))
13389
13390 #define _mm256_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
13391 __builtin_ia32_scatterdiv8si ((void *) (ADDR), (__mmask8) (MASK), \
13392 (__v4di)(__m256i) (INDEX), \
13393 (__v4si)(__m128i) (V1), (int) (SCALE))
13394
13395 #define _mm_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
13396 __builtin_ia32_scatterdiv4si ((void *) (ADDR), (__mmask8)0xFF, \
13397 (__v2di)(__m128i) (INDEX), \
13398 (__v4si)(__m128i) (V1), (int) (SCALE))
13399
13400 #define _mm_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
13401 __builtin_ia32_scatterdiv4si ((void *) (ADDR), (__mmask8) (MASK), \
13402 (__v2di)(__m128i) (INDEX), \
13403 (__v4si)(__m128i) (V1), (int) (SCALE))
13404
13405 #define _mm256_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
13406 __builtin_ia32_scatterdiv4di ((void *) (ADDR), (__mmask8)0xFF, \
13407 (__v4di)(__m256i) (INDEX), \
13408 (__v4di)(__m256i) (V1), (int) (SCALE))
13409
13410 #define _mm256_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
13411 __builtin_ia32_scatterdiv4di ((void *) (ADDR), (__mmask8) (MASK), \
13412 (__v4di)(__m256i) (INDEX), \
13413 (__v4di)(__m256i) (V1), (int) (SCALE))
13414
13415 #define _mm_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
13416 __builtin_ia32_scatterdiv2di ((void *) (ADDR), (__mmask8)0xFF, \
13417 (__v2di)(__m128i) (INDEX), \
13418 (__v2di)(__m128i) (V1), (int) (SCALE))
13419
13420 #define _mm_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
13421 __builtin_ia32_scatterdiv2di ((void *) (ADDR), (__mmask8) (MASK), \
13422 (__v2di)(__m128i) (INDEX), \
13423 (__v2di)(__m128i) (V1), (int) (SCALE))
13424
13425 #define _mm256_mask_shuffle_epi32(W, U, X, C) \
13426 ((__m256i) __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C), \
13427 (__v8si)(__m256i)(W), \
13428 (__mmask8)(U)))
13429
13430 #define _mm256_maskz_shuffle_epi32(U, X, C) \
13431 ((__m256i) __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C), \
13432 (__v8si)(__m256i) \
13433 _mm256_setzero_si256 (), \
13434 (__mmask8)(U)))
13435
13436 #define _mm_mask_shuffle_epi32(W, U, X, C) \
13437 ((__m128i) __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C), \
13438 (__v4si)(__m128i)(W), \
13439 (__mmask8)(U)))
13440
13441 #define _mm_maskz_shuffle_epi32(U, X, C) \
13442 ((__m128i) __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C), \
13443 (__v4si)(__m128i)_mm_setzero_si128 (), \
13444 (__mmask8)(U)))
13445
13446 #define _mm256_rol_epi64(A, B) \
13447 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13448 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13449 (__mmask8)-1))
13450
13451 #define _mm256_mask_rol_epi64(W, U, A, B) \
13452 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13453 (__v4di)(__m256i)(W), \
13454 (__mmask8)(U)))
13455
13456 #define _mm256_maskz_rol_epi64(U, A, B) \
13457 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13458 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13459 (__mmask8)(U)))
13460
13461 #define _mm_rol_epi64(A, B) \
13462 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13463 (__v2di)(__m128i)_mm_setzero_si128 (),\
13464 (__mmask8)-1))
13465
13466 #define _mm_mask_rol_epi64(W, U, A, B) \
13467 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13468 (__v2di)(__m128i)(W), \
13469 (__mmask8)(U)))
13470
13471 #define _mm_maskz_rol_epi64(U, A, B) \
13472 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13473 (__v2di)(__m128i)_mm_setzero_si128 (),\
13474 (__mmask8)(U)))
13475
13476 #define _mm256_ror_epi64(A, B) \
13477 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13478 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13479 (__mmask8)-1))
13480
13481 #define _mm256_mask_ror_epi64(W, U, A, B) \
13482 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13483 (__v4di)(__m256i)(W), \
13484 (__mmask8)(U)))
13485
13486 #define _mm256_maskz_ror_epi64(U, A, B) \
13487 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13488 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13489 (__mmask8)(U)))
13490
13491 #define _mm_ror_epi64(A, B) \
13492 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13493 (__v2di)(__m128i)_mm_setzero_si128 (),\
13494 (__mmask8)-1))
13495
13496 #define _mm_mask_ror_epi64(W, U, A, B) \
13497 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13498 (__v2di)(__m128i)(W), \
13499 (__mmask8)(U)))
13500
13501 #define _mm_maskz_ror_epi64(U, A, B) \
13502 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13503 (__v2di)(__m128i)_mm_setzero_si128 (),\
13504 (__mmask8)(U)))
13505
13506 #define _mm256_rol_epi32(A, B) \
13507 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
13508 (__v8si)(__m256i)_mm256_setzero_si256 (),\
13509 (__mmask8)-1))
13510
13511 #define _mm256_mask_rol_epi32(W, U, A, B) \
13512 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
13513 (__v8si)(__m256i)(W), \
13514 (__mmask8)(U)))
13515
13516 #define _mm256_maskz_rol_epi32(U, A, B) \
13517 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
13518 (__v8si)(__m256i)_mm256_setzero_si256 (),\
13519 (__mmask8)(U)))
13520
13521 #define _mm_rol_epi32(A, B) \
13522 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
13523 (__v4si)(__m128i)_mm_setzero_si128 (),\
13524 (__mmask8)-1))
13525
13526 #define _mm_mask_rol_epi32(W, U, A, B) \
13527 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
13528 (__v4si)(__m128i)(W), \
13529 (__mmask8)(U)))
13530
13531 #define _mm_maskz_rol_epi32(U, A, B) \
13532 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
13533 (__v4si)(__m128i)_mm_setzero_si128 (),\
13534 (__mmask8)(U)))
13535
13536 #define _mm256_ror_epi32(A, B) \
13537 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
13538 (__v8si)(__m256i)_mm256_setzero_si256 (),\
13539 (__mmask8)-1))
13540
13541 #define _mm256_mask_ror_epi32(W, U, A, B) \
13542 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
13543 (__v8si)(__m256i)(W), \
13544 (__mmask8)(U)))
13545
13546 #define _mm256_maskz_ror_epi32(U, A, B) \
13547 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
13548 (__v8si)(__m256i) \
13549 _mm256_setzero_si256 (), \
13550 (__mmask8)(U)))
13551
13552 #define _mm_ror_epi32(A, B) \
13553 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
13554 (__v4si)(__m128i)_mm_setzero_si128 (),\
13555 (__mmask8)-1))
13556
13557 #define _mm_mask_ror_epi32(W, U, A, B) \
13558 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
13559 (__v4si)(__m128i)(W), \
13560 (__mmask8)(U)))
13561
13562 #define _mm_maskz_ror_epi32(U, A, B) \
13563 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
13564 (__v4si)(__m128i)_mm_setzero_si128 (),\
13565 (__mmask8)(U)))
13566
13567 #define _mm256_alignr_epi32(X, Y, C) \
13568 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
13569 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(X), (__mmask8)-1))
13570
13571 #define _mm256_mask_alignr_epi32(W, U, X, Y, C) \
13572 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
13573 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(W), (__mmask8)(U)))
13574
13575 #define _mm256_maskz_alignr_epi32(U, X, Y, C) \
13576 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
13577 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)_mm256_setzero_si256 (),\
13578 (__mmask8)(U)))
13579
13580 #define _mm256_alignr_epi64(X, Y, C) \
13581 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
13582 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(X), (__mmask8)-1))
13583
13584 #define _mm256_mask_alignr_epi64(W, U, X, Y, C) \
13585 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
13586 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(W), (__mmask8)(U)))
13587
13588 #define _mm256_maskz_alignr_epi64(U, X, Y, C) \
13589 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
13590 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)_mm256_setzero_si256 (),\
13591 (__mmask8)(U)))
13592
13593 #define _mm_alignr_epi32(X, Y, C) \
13594 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
13595 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(X), (__mmask8)-1))
13596
13597 #define _mm_mask_alignr_epi32(W, U, X, Y, C) \
13598 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
13599 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(W), (__mmask8)(U)))
13600
13601 #define _mm_maskz_alignr_epi32(U, X, Y, C) \
13602 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
13603 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)_mm_setzero_si128 (),\
13604 (__mmask8)(U)))
13605
13606 #define _mm_alignr_epi64(X, Y, C) \
13607 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
13608 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
13609
13610 #define _mm_mask_alignr_epi64(W, U, X, Y, C) \
13611 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
13612 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(W), (__mmask8)(U)))
13613
13614 #define _mm_maskz_alignr_epi64(U, X, Y, C) \
13615 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
13616 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)_mm_setzero_si128 (),\
13617 (__mmask8)(U)))
13618
13619 #define _mm_mask_cvtps_ph(W, U, A, I) \
13620 ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) (A), (int) (I), \
13621 (__v8hi)(__m128i) (W), (__mmask8) (U)))
13622
13623 #define _mm_maskz_cvtps_ph(U, A, I) \
13624 ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) (A), (int) (I), \
13625 (__v8hi)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
13626
13627 #define _mm256_mask_cvtps_ph(W, U, A, I) \
13628 ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) (A), (int) (I), \
13629 (__v8hi)(__m128i) (W), (__mmask8) (U)))
13630
13631 #define _mm256_maskz_cvtps_ph(U, A, I) \
13632 ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) (A), (int) (I), \
13633 (__v8hi)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
13634
13635 #define _mm256_mask_srai_epi32(W, U, A, B) \
13636 ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \
13637 (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
13638
13639 #define _mm256_maskz_srai_epi32(U, A, B) \
13640 ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \
13641 (int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U)))
13642
13643 #define _mm_mask_srai_epi32(W, U, A, B) \
13644 ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \
13645 (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
13646
13647 #define _mm_maskz_srai_epi32(U, A, B) \
13648 ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \
13649 (int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U)))
13650
13651 #define _mm256_srai_epi64(A, B) \
13652 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
13653 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)-1))
13654
13655 #define _mm256_mask_srai_epi64(W, U, A, B) \
13656 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
13657 (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
13658
13659 #define _mm256_maskz_srai_epi64(U, A, B) \
13660 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
13661 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
13662
13663 #define _mm_srai_epi64(A, B) \
13664 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
13665 (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)-1))
13666
13667 #define _mm_mask_srai_epi64(W, U, A, B) \
13668 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
13669 (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
13670
13671 #define _mm_maskz_srai_epi64(U, A, B) \
13672 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
13673 (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U)))
13674
13675 #define _mm256_mask_permutex_pd(W, U, A, B) \
13676 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \
13677 (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
13678
13679 #define _mm256_maskz_permutex_pd(U, A, B) \
13680 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \
13681 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)(U)))
13682
13683 #define _mm256_mask_permute_pd(W, U, X, C) \
13684 ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \
13685 (__v4df)(__m256d)(W), \
13686 (__mmask8)(U)))
13687
13688 #define _mm256_maskz_permute_pd(U, X, C) \
13689 ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \
13690 (__v4df)(__m256d)_mm256_setzero_pd (),\
13691 (__mmask8)(U)))
13692
13693 #define _mm256_mask_permute_ps(W, U, X, C) \
13694 ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C), \
13695 (__v8sf)(__m256)(W), (__mmask8)(U)))
13696
13697 #define _mm256_maskz_permute_ps(U, X, C) \
13698 ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C), \
13699 (__v8sf)(__m256)_mm256_setzero_ps (), \
13700 (__mmask8)(U)))
13701
13702 #define _mm_mask_permute_pd(W, U, X, C) \
13703 ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C), \
13704 (__v2df)(__m128d)(W), (__mmask8)(U)))
13705
13706 #define _mm_maskz_permute_pd(U, X, C) \
13707 ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C), \
13708 (__v2df)(__m128d)_mm_setzero_pd (), \
13709 (__mmask8)(U)))
13710
13711 #define _mm_mask_permute_ps(W, U, X, C) \
13712 ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C), \
13713 (__v4sf)(__m128)(W), (__mmask8)(U)))
13714
13715 #define _mm_maskz_permute_ps(U, X, C) \
13716 ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C), \
13717 (__v4sf)(__m128)_mm_setzero_ps (), \
13718 (__mmask8)(U)))
13719
13720 #define _mm256_mask_blend_pd(__U, __A, __W) \
13721 ((__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) (__A), \
13722 (__v4df) (__W), \
13723 (__mmask8) (__U)))
13724
13725 #define _mm256_mask_blend_ps(__U, __A, __W) \
13726 ((__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) (__A), \
13727 (__v8sf) (__W), \
13728 (__mmask8) (__U)))
13729
13730 #define _mm256_mask_blend_epi64(__U, __A, __W) \
13731 ((__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) (__A), \
13732 (__v4di) (__W), \
13733 (__mmask8) (__U)))
13734
13735 #define _mm256_mask_blend_epi32(__U, __A, __W) \
13736 ((__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) (__A), \
13737 (__v8si) (__W), \
13738 (__mmask8) (__U)))
13739
13740 #define _mm_mask_blend_pd(__U, __A, __W) \
13741 ((__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) (__A), \
13742 (__v2df) (__W), \
13743 (__mmask8) (__U)))
13744
13745 #define _mm_mask_blend_ps(__U, __A, __W) \
13746 ((__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) (__A), \
13747 (__v4sf) (__W), \
13748 (__mmask8) (__U)))
13749
13750 #define _mm_mask_blend_epi64(__U, __A, __W) \
13751 ((__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) (__A), \
13752 (__v2di) (__W), \
13753 (__mmask8) (__U)))
13754
13755 #define _mm_mask_blend_epi32(__U, __A, __W) \
13756 ((__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) (__A), \
13757 (__v4si) (__W), \
13758 (__mmask8) (__U)))
13759
13760 #define _mm256_cmp_epu32_mask(X, Y, P) \
13761 ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \
13762 (__v8si)(__m256i)(Y), (int)(P),\
13763 (__mmask8)-1))
13764
13765 #define _mm256_cmp_epi64_mask(X, Y, P) \
13766 ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X), \
13767 (__v4di)(__m256i)(Y), (int)(P),\
13768 (__mmask8)-1))
13769
13770 #define _mm256_cmp_epi32_mask(X, Y, P) \
13771 ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X), \
13772 (__v8si)(__m256i)(Y), (int)(P),\
13773 (__mmask8)-1))
13774
13775 #define _mm256_cmp_epu64_mask(X, Y, P) \
13776 ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X), \
13777 (__v4di)(__m256i)(Y), (int)(P),\
13778 (__mmask8)-1))
13779
13780 #define _mm256_cmp_pd_mask(X, Y, P) \
13781 ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X), \
13782 (__v4df)(__m256d)(Y), (int)(P),\
13783 (__mmask8)-1))
13784
13785 #define _mm256_cmp_ps_mask(X, Y, P) \
13786 ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X), \
13787 (__v8sf)(__m256)(Y), (int)(P),\
13788 (__mmask8)-1))
13789
13790 #define _mm256_mask_cmp_epi64_mask(M, X, Y, P) \
13791 ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X), \
13792 (__v4di)(__m256i)(Y), (int)(P),\
13793 (__mmask8)(M)))
13794
13795 #define _mm256_mask_cmp_epi32_mask(M, X, Y, P) \
13796 ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X), \
13797 (__v8si)(__m256i)(Y), (int)(P),\
13798 (__mmask8)(M)))
13799
13800 #define _mm256_mask_cmp_epu64_mask(M, X, Y, P) \
13801 ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X), \
13802 (__v4di)(__m256i)(Y), (int)(P),\
13803 (__mmask8)(M)))
13804
13805 #define _mm256_mask_cmp_epu32_mask(M, X, Y, P) \
13806 ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \
13807 (__v8si)(__m256i)(Y), (int)(P),\
13808 (__mmask8)(M)))
13809
13810 #define _mm256_mask_cmp_pd_mask(M, X, Y, P) \
13811 ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X), \
13812 (__v4df)(__m256d)(Y), (int)(P),\
13813 (__mmask8)(M)))
13814
13815 #define _mm256_mask_cmp_ps_mask(M, X, Y, P) \
13816 ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X), \
13817 (__v8sf)(__m256)(Y), (int)(P),\
13818 (__mmask8)(M)))
13819
13820 #define _mm_cmp_epi64_mask(X, Y, P) \
13821 ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X), \
13822 (__v2di)(__m128i)(Y), (int)(P),\
13823 (__mmask8)-1))
13824
13825 #define _mm_cmp_epi32_mask(X, Y, P) \
13826 ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X), \
13827 (__v4si)(__m128i)(Y), (int)(P),\
13828 (__mmask8)-1))
13829
13830 #define _mm_cmp_epu64_mask(X, Y, P) \
13831 ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X), \
13832 (__v2di)(__m128i)(Y), (int)(P),\
13833 (__mmask8)-1))
13834
13835 #define _mm_cmp_epu32_mask(X, Y, P) \
13836 ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X), \
13837 (__v4si)(__m128i)(Y), (int)(P),\
13838 (__mmask8)-1))
13839
13840 #define _mm_cmp_pd_mask(X, Y, P) \
13841 ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X), \
13842 (__v2df)(__m128d)(Y), (int)(P),\
13843 (__mmask8)-1))
13844
13845 #define _mm_cmp_ps_mask(X, Y, P) \
13846 ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X), \
13847 (__v4sf)(__m128)(Y), (int)(P),\
13848 (__mmask8)-1))
13849
13850 #define _mm_mask_cmp_epi64_mask(M, X, Y, P) \
13851 ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X), \
13852 (__v2di)(__m128i)(Y), (int)(P),\
13853 (__mmask8)(M)))
13854
13855 #define _mm_mask_cmp_epi32_mask(M, X, Y, P) \
13856 ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X), \
13857 (__v4si)(__m128i)(Y), (int)(P),\
13858 (__mmask8)(M)))
13859
13860 #define _mm_mask_cmp_epu64_mask(M, X, Y, P) \
13861 ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X), \
13862 (__v2di)(__m128i)(Y), (int)(P),\
13863 (__mmask8)(M)))
13864
13865 #define _mm_mask_cmp_epu32_mask(M, X, Y, P) \
13866 ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X), \
13867 (__v4si)(__m128i)(Y), (int)(P),\
13868 (__mmask8)(M)))
13869
13870 #define _mm_mask_cmp_pd_mask(M, X, Y, P) \
13871 ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X), \
13872 (__v2df)(__m128d)(Y), (int)(P),\
13873 (__mmask8)(M)))
13874
13875 #define _mm_mask_cmp_ps_mask(M, X, Y, P) \
13876 ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X), \
13877 (__v4sf)(__m128)(Y), (int)(P),\
13878 (__mmask8)(M)))
13879
13880 #endif
13881
13882 #define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps ((B), (A))
13883 #define _mm256_mask_cvt_roundps_ph(A, B, C, D) \
13884 _mm256_mask_cvtps_ph ((A), (B), (C), (D))
13885 #define _mm256_maskz_cvt_roundps_ph(A, B, C) \
13886 _mm256_maskz_cvtps_ph ((A), (B), (C))
13887 #define _mm_mask_cvt_roundps_ph(A, B, C, D) \
13888 _mm_mask_cvtps_ph ((A), (B), (C), (D))
13889 #define _mm_maskz_cvt_roundps_ph(A, B, C) _mm_maskz_cvtps_ph ((A), (B), (C))
13890
13891 #ifdef __DISABLE_AVX512VL__
13892 #undef __DISABLE_AVX512VL__
13893 #pragma GCC pop_options
13894 #endif /* __DISABLE_AVX512VL__ */
13895
13896 #endif /* _AVX512VLINTRIN_H_INCLUDED */
13897