avx512vldqintrin.h revision 1.1 1 1.1 mrg /* Copyright (C) 2014-2015 Free Software Foundation, Inc.
2 1.1 mrg
3 1.1 mrg This file is part of GCC.
4 1.1 mrg
5 1.1 mrg GCC is free software; you can redistribute it and/or modify
6 1.1 mrg it under the terms of the GNU General Public License as published by
7 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
8 1.1 mrg any later version.
9 1.1 mrg
10 1.1 mrg GCC is distributed in the hope that it will be useful,
11 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of
12 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 1.1 mrg GNU General Public License for more details.
14 1.1 mrg
15 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
16 1.1 mrg permissions described in the GCC Runtime Library Exception, version
17 1.1 mrg 3.1, as published by the Free Software Foundation.
18 1.1 mrg
19 1.1 mrg You should have received a copy of the GNU General Public License and
20 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
21 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 1.1 mrg <http://www.gnu.org/licenses/>. */
23 1.1 mrg
24 1.1 mrg #ifndef _IMMINTRIN_H_INCLUDED
25 1.1 mrg #error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
26 1.1 mrg #endif
27 1.1 mrg
28 1.1 mrg #ifndef _AVX512VLDQINTRIN_H_INCLUDED
29 1.1 mrg #define _AVX512VLDQINTRIN_H_INCLUDED
30 1.1 mrg
31 1.1 mrg #if !defined(__AVX512VL__) || !defined(__AVX512DQ__)
32 1.1 mrg #pragma GCC push_options
33 1.1 mrg #pragma GCC target("avx512vl,avx512dq")
34 1.1 mrg #define __DISABLE_AVX512VLDQ__
35 1.1 mrg #endif /* __AVX512VLDQ__ */
36 1.1 mrg
37 1.1 mrg extern __inline __m256i
38 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
39 1.1 mrg _mm256_cvttpd_epi64 (__m256d __A)
40 1.1 mrg {
41 1.1 mrg return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
42 1.1 mrg (__v4di)
43 1.1 mrg _mm256_setzero_si256 (),
44 1.1 mrg (__mmask8) -1);
45 1.1 mrg }
46 1.1 mrg
47 1.1 mrg extern __inline __m256i
48 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
49 1.1 mrg _mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A)
50 1.1 mrg {
51 1.1 mrg return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
52 1.1 mrg (__v4di) __W,
53 1.1 mrg (__mmask8) __U);
54 1.1 mrg }
55 1.1 mrg
56 1.1 mrg extern __inline __m256i
57 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
58 1.1 mrg _mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A)
59 1.1 mrg {
60 1.1 mrg return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
61 1.1 mrg (__v4di)
62 1.1 mrg _mm256_setzero_si256 (),
63 1.1 mrg (__mmask8) __U);
64 1.1 mrg }
65 1.1 mrg
66 1.1 mrg extern __inline __m128i
67 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
68 1.1 mrg _mm_cvttpd_epi64 (__m128d __A)
69 1.1 mrg {
70 1.1 mrg return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
71 1.1 mrg (__v2di)
72 1.1 mrg _mm_setzero_di (),
73 1.1 mrg (__mmask8) -1);
74 1.1 mrg }
75 1.1 mrg
76 1.1 mrg extern __inline __m128i
77 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
78 1.1 mrg _mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A)
79 1.1 mrg {
80 1.1 mrg return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
81 1.1 mrg (__v2di) __W,
82 1.1 mrg (__mmask8) __U);
83 1.1 mrg }
84 1.1 mrg
85 1.1 mrg extern __inline __m128i
86 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
87 1.1 mrg _mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A)
88 1.1 mrg {
89 1.1 mrg return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
90 1.1 mrg (__v2di)
91 1.1 mrg _mm_setzero_si128 (),
92 1.1 mrg (__mmask8) __U);
93 1.1 mrg }
94 1.1 mrg
95 1.1 mrg extern __inline __m256i
96 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
97 1.1 mrg _mm256_cvttpd_epu64 (__m256d __A)
98 1.1 mrg {
99 1.1 mrg return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
100 1.1 mrg (__v4di)
101 1.1 mrg _mm256_setzero_si256 (),
102 1.1 mrg (__mmask8) -1);
103 1.1 mrg }
104 1.1 mrg
105 1.1 mrg extern __inline __m256i
106 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
107 1.1 mrg _mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A)
108 1.1 mrg {
109 1.1 mrg return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
110 1.1 mrg (__v4di) __W,
111 1.1 mrg (__mmask8) __U);
112 1.1 mrg }
113 1.1 mrg
114 1.1 mrg extern __inline __m256i
115 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
116 1.1 mrg _mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A)
117 1.1 mrg {
118 1.1 mrg return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
119 1.1 mrg (__v4di)
120 1.1 mrg _mm256_setzero_si256 (),
121 1.1 mrg (__mmask8) __U);
122 1.1 mrg }
123 1.1 mrg
124 1.1 mrg extern __inline __m128i
125 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
126 1.1 mrg _mm_cvttpd_epu64 (__m128d __A)
127 1.1 mrg {
128 1.1 mrg return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
129 1.1 mrg (__v2di)
130 1.1 mrg _mm_setzero_di (),
131 1.1 mrg (__mmask8) -1);
132 1.1 mrg }
133 1.1 mrg
134 1.1 mrg extern __inline __m128i
135 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
136 1.1 mrg _mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A)
137 1.1 mrg {
138 1.1 mrg return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
139 1.1 mrg (__v2di) __W,
140 1.1 mrg (__mmask8) __U);
141 1.1 mrg }
142 1.1 mrg
143 1.1 mrg extern __inline __m128i
144 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
145 1.1 mrg _mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A)
146 1.1 mrg {
147 1.1 mrg return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
148 1.1 mrg (__v2di)
149 1.1 mrg _mm_setzero_si128 (),
150 1.1 mrg (__mmask8) __U);
151 1.1 mrg }
152 1.1 mrg
153 1.1 mrg extern __inline __m256i
154 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
155 1.1 mrg _mm256_cvtpd_epi64 (__m256d __A)
156 1.1 mrg {
157 1.1 mrg return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
158 1.1 mrg (__v4di)
159 1.1 mrg _mm256_setzero_si256 (),
160 1.1 mrg (__mmask8) -1);
161 1.1 mrg }
162 1.1 mrg
163 1.1 mrg extern __inline __m256i
164 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
165 1.1 mrg _mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A)
166 1.1 mrg {
167 1.1 mrg return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
168 1.1 mrg (__v4di) __W,
169 1.1 mrg (__mmask8) __U);
170 1.1 mrg }
171 1.1 mrg
172 1.1 mrg extern __inline __m256i
173 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
174 1.1 mrg _mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A)
175 1.1 mrg {
176 1.1 mrg return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
177 1.1 mrg (__v4di)
178 1.1 mrg _mm256_setzero_si256 (),
179 1.1 mrg (__mmask8) __U);
180 1.1 mrg }
181 1.1 mrg
182 1.1 mrg extern __inline __m128i
183 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
184 1.1 mrg _mm_cvtpd_epi64 (__m128d __A)
185 1.1 mrg {
186 1.1 mrg return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
187 1.1 mrg (__v2di)
188 1.1 mrg _mm_setzero_di (),
189 1.1 mrg (__mmask8) -1);
190 1.1 mrg }
191 1.1 mrg
192 1.1 mrg extern __inline __m128i
193 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
194 1.1 mrg _mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A)
195 1.1 mrg {
196 1.1 mrg return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
197 1.1 mrg (__v2di) __W,
198 1.1 mrg (__mmask8) __U);
199 1.1 mrg }
200 1.1 mrg
201 1.1 mrg extern __inline __m128i
202 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
203 1.1 mrg _mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A)
204 1.1 mrg {
205 1.1 mrg return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
206 1.1 mrg (__v2di)
207 1.1 mrg _mm_setzero_si128 (),
208 1.1 mrg (__mmask8) __U);
209 1.1 mrg }
210 1.1 mrg
211 1.1 mrg extern __inline __m256i
212 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
213 1.1 mrg _mm256_cvtpd_epu64 (__m256d __A)
214 1.1 mrg {
215 1.1 mrg return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
216 1.1 mrg (__v4di)
217 1.1 mrg _mm256_setzero_si256 (),
218 1.1 mrg (__mmask8) -1);
219 1.1 mrg }
220 1.1 mrg
221 1.1 mrg extern __inline __m256i
222 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
223 1.1 mrg _mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A)
224 1.1 mrg {
225 1.1 mrg return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
226 1.1 mrg (__v4di) __W,
227 1.1 mrg (__mmask8) __U);
228 1.1 mrg }
229 1.1 mrg
230 1.1 mrg extern __inline __m256i
231 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
232 1.1 mrg _mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A)
233 1.1 mrg {
234 1.1 mrg return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
235 1.1 mrg (__v4di)
236 1.1 mrg _mm256_setzero_si256 (),
237 1.1 mrg (__mmask8) __U);
238 1.1 mrg }
239 1.1 mrg
240 1.1 mrg extern __inline __m128i
241 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
242 1.1 mrg _mm_cvtpd_epu64 (__m128d __A)
243 1.1 mrg {
244 1.1 mrg return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
245 1.1 mrg (__v2di)
246 1.1 mrg _mm_setzero_di (),
247 1.1 mrg (__mmask8) -1);
248 1.1 mrg }
249 1.1 mrg
250 1.1 mrg extern __inline __m128i
251 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
252 1.1 mrg _mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A)
253 1.1 mrg {
254 1.1 mrg return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
255 1.1 mrg (__v2di) __W,
256 1.1 mrg (__mmask8) __U);
257 1.1 mrg }
258 1.1 mrg
259 1.1 mrg extern __inline __m128i
260 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
261 1.1 mrg _mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A)
262 1.1 mrg {
263 1.1 mrg return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
264 1.1 mrg (__v2di)
265 1.1 mrg _mm_setzero_si128 (),
266 1.1 mrg (__mmask8) __U);
267 1.1 mrg }
268 1.1 mrg
269 1.1 mrg extern __inline __m256i
270 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
271 1.1 mrg _mm256_cvttps_epi64 (__m128 __A)
272 1.1 mrg {
273 1.1 mrg return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
274 1.1 mrg (__v4di)
275 1.1 mrg _mm256_setzero_si256 (),
276 1.1 mrg (__mmask8) -1);
277 1.1 mrg }
278 1.1 mrg
279 1.1 mrg extern __inline __m256i
280 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
281 1.1 mrg _mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A)
282 1.1 mrg {
283 1.1 mrg return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
284 1.1 mrg (__v4di) __W,
285 1.1 mrg (__mmask8) __U);
286 1.1 mrg }
287 1.1 mrg
288 1.1 mrg extern __inline __m256i
289 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
290 1.1 mrg _mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A)
291 1.1 mrg {
292 1.1 mrg return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
293 1.1 mrg (__v4di)
294 1.1 mrg _mm256_setzero_si256 (),
295 1.1 mrg (__mmask8) __U);
296 1.1 mrg }
297 1.1 mrg
298 1.1 mrg extern __inline __m128i
299 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
300 1.1 mrg _mm_cvttps_epi64 (__m128 __A)
301 1.1 mrg {
302 1.1 mrg return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
303 1.1 mrg (__v2di)
304 1.1 mrg _mm_setzero_di (),
305 1.1 mrg (__mmask8) -1);
306 1.1 mrg }
307 1.1 mrg
308 1.1 mrg extern __inline __m128i
309 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
310 1.1 mrg _mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A)
311 1.1 mrg {
312 1.1 mrg return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
313 1.1 mrg (__v2di) __W,
314 1.1 mrg (__mmask8) __U);
315 1.1 mrg }
316 1.1 mrg
317 1.1 mrg extern __inline __m128i
318 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
319 1.1 mrg _mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A)
320 1.1 mrg {
321 1.1 mrg return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
322 1.1 mrg (__v2di)
323 1.1 mrg _mm_setzero_di (),
324 1.1 mrg (__mmask8) __U);
325 1.1 mrg }
326 1.1 mrg
327 1.1 mrg extern __inline __m256i
328 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
329 1.1 mrg _mm256_cvttps_epu64 (__m128 __A)
330 1.1 mrg {
331 1.1 mrg return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
332 1.1 mrg (__v4di)
333 1.1 mrg _mm256_setzero_si256 (),
334 1.1 mrg (__mmask8) -1);
335 1.1 mrg }
336 1.1 mrg
337 1.1 mrg extern __inline __m256i
338 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
339 1.1 mrg _mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A)
340 1.1 mrg {
341 1.1 mrg return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
342 1.1 mrg (__v4di) __W,
343 1.1 mrg (__mmask8) __U);
344 1.1 mrg }
345 1.1 mrg
346 1.1 mrg extern __inline __m256i
347 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
348 1.1 mrg _mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A)
349 1.1 mrg {
350 1.1 mrg return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
351 1.1 mrg (__v4di)
352 1.1 mrg _mm256_setzero_si256 (),
353 1.1 mrg (__mmask8) __U);
354 1.1 mrg }
355 1.1 mrg
356 1.1 mrg extern __inline __m128i
357 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
358 1.1 mrg _mm_cvttps_epu64 (__m128 __A)
359 1.1 mrg {
360 1.1 mrg return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
361 1.1 mrg (__v2di)
362 1.1 mrg _mm_setzero_di (),
363 1.1 mrg (__mmask8) -1);
364 1.1 mrg }
365 1.1 mrg
366 1.1 mrg extern __inline __m128i
367 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
368 1.1 mrg _mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A)
369 1.1 mrg {
370 1.1 mrg return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
371 1.1 mrg (__v2di) __W,
372 1.1 mrg (__mmask8) __U);
373 1.1 mrg }
374 1.1 mrg
375 1.1 mrg extern __inline __m128i
376 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
377 1.1 mrg _mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A)
378 1.1 mrg {
379 1.1 mrg return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
380 1.1 mrg (__v2di)
381 1.1 mrg _mm_setzero_di (),
382 1.1 mrg (__mmask8) __U);
383 1.1 mrg }
384 1.1 mrg
385 1.1 mrg extern __inline __m256d
386 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
387 1.1 mrg _mm256_broadcast_f64x2 (__m128d __A)
388 1.1 mrg {
389 1.1 mrg return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
390 1.1 mrg __A,
391 1.1 mrg (__v4df)_mm256_undefined_pd(),
392 1.1 mrg (__mmask8) -
393 1.1 mrg 1);
394 1.1 mrg }
395 1.1 mrg
396 1.1 mrg extern __inline __m256d
397 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
398 1.1 mrg _mm256_mask_broadcast_f64x2 (__m256d __O, __mmask8 __M, __m128d __A)
399 1.1 mrg {
400 1.1 mrg return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
401 1.1 mrg __A,
402 1.1 mrg (__v4df)
403 1.1 mrg __O, __M);
404 1.1 mrg }
405 1.1 mrg
406 1.1 mrg extern __inline __m256d
407 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
408 1.1 mrg _mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
409 1.1 mrg {
410 1.1 mrg return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
411 1.1 mrg __A,
412 1.1 mrg (__v4df)
413 1.1 mrg _mm256_setzero_ps (),
414 1.1 mrg __M);
415 1.1 mrg }
416 1.1 mrg
417 1.1 mrg extern __inline __m256i
418 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
419 1.1 mrg _mm256_broadcast_i64x2 (__m128i __A)
420 1.1 mrg {
421 1.1 mrg return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
422 1.1 mrg __A,
423 1.1 mrg (__v4di)_mm256_undefined_si256(),
424 1.1 mrg (__mmask8) -
425 1.1 mrg 1);
426 1.1 mrg }
427 1.1 mrg
428 1.1 mrg extern __inline __m256i
429 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
430 1.1 mrg _mm256_mask_broadcast_i64x2 (__m256i __O, __mmask8 __M, __m128i __A)
431 1.1 mrg {
432 1.1 mrg return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
433 1.1 mrg __A,
434 1.1 mrg (__v4di)
435 1.1 mrg __O, __M);
436 1.1 mrg }
437 1.1 mrg
438 1.1 mrg extern __inline __m256i
439 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
440 1.1 mrg _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
441 1.1 mrg {
442 1.1 mrg return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
443 1.1 mrg __A,
444 1.1 mrg (__v4di)
445 1.1 mrg _mm256_setzero_si256 (),
446 1.1 mrg __M);
447 1.1 mrg }
448 1.1 mrg
449 1.1 mrg extern __inline __m256
450 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
451 1.1 mrg _mm256_broadcast_f32x2 (__m128 __A)
452 1.1 mrg {
453 1.1 mrg return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
454 1.1 mrg (__v8sf)_mm256_undefined_ps(),
455 1.1 mrg (__mmask8) -
456 1.1 mrg 1);
457 1.1 mrg }
458 1.1 mrg
459 1.1 mrg extern __inline __m256
460 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
461 1.1 mrg _mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
462 1.1 mrg {
463 1.1 mrg return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
464 1.1 mrg (__v8sf) __O,
465 1.1 mrg __M);
466 1.1 mrg }
467 1.1 mrg
468 1.1 mrg extern __inline __m256
469 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
470 1.1 mrg _mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
471 1.1 mrg {
472 1.1 mrg return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
473 1.1 mrg (__v8sf)
474 1.1 mrg _mm256_setzero_ps (),
475 1.1 mrg __M);
476 1.1 mrg }
477 1.1 mrg
478 1.1 mrg extern __inline __m256i
479 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
480 1.1 mrg _mm256_broadcast_i32x2 (__m128i __A)
481 1.1 mrg {
482 1.1 mrg return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
483 1.1 mrg __A,
484 1.1 mrg (__v8si)_mm256_undefined_si256(),
485 1.1 mrg (__mmask8) -
486 1.1 mrg 1);
487 1.1 mrg }
488 1.1 mrg
489 1.1 mrg extern __inline __m256i
490 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
491 1.1 mrg _mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
492 1.1 mrg {
493 1.1 mrg return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
494 1.1 mrg __A,
495 1.1 mrg (__v8si)
496 1.1 mrg __O, __M);
497 1.1 mrg }
498 1.1 mrg
499 1.1 mrg extern __inline __m256i
500 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
501 1.1 mrg _mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
502 1.1 mrg {
503 1.1 mrg return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
504 1.1 mrg __A,
505 1.1 mrg (__v8si)
506 1.1 mrg _mm256_setzero_si256 (),
507 1.1 mrg __M);
508 1.1 mrg }
509 1.1 mrg
510 1.1 mrg extern __inline __m128i
511 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
512 1.1 mrg _mm_broadcast_i32x2 (__m128i __A)
513 1.1 mrg {
514 1.1 mrg return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
515 1.1 mrg __A,
516 1.1 mrg (__v4si)_mm_undefined_si128(),
517 1.1 mrg (__mmask8) -
518 1.1 mrg 1);
519 1.1 mrg }
520 1.1 mrg
521 1.1 mrg extern __inline __m128i
522 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
523 1.1 mrg _mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
524 1.1 mrg {
525 1.1 mrg return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
526 1.1 mrg __A,
527 1.1 mrg (__v4si)
528 1.1 mrg __O, __M);
529 1.1 mrg }
530 1.1 mrg
531 1.1 mrg extern __inline __m128i
532 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
533 1.1 mrg _mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
534 1.1 mrg {
535 1.1 mrg return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
536 1.1 mrg __A,
537 1.1 mrg (__v4si)
538 1.1 mrg _mm_setzero_si128 (),
539 1.1 mrg __M);
540 1.1 mrg }
541 1.1 mrg
542 1.1 mrg extern __inline __m256i
543 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
544 1.1 mrg _mm256_mullo_epi64 (__m256i __A, __m256i __B)
545 1.1 mrg {
546 1.1 mrg return (__m256i) ((__v4du) __A * (__v4du) __B);
547 1.1 mrg }
548 1.1 mrg
549 1.1 mrg extern __inline __m256i
550 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
551 1.1 mrg _mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
552 1.1 mrg __m256i __B)
553 1.1 mrg {
554 1.1 mrg return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
555 1.1 mrg (__v4di) __B,
556 1.1 mrg (__v4di) __W,
557 1.1 mrg (__mmask8) __U);
558 1.1 mrg }
559 1.1 mrg
560 1.1 mrg extern __inline __m256i
561 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
562 1.1 mrg _mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
563 1.1 mrg {
564 1.1 mrg return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
565 1.1 mrg (__v4di) __B,
566 1.1 mrg (__v4di)
567 1.1 mrg _mm256_setzero_si256 (),
568 1.1 mrg (__mmask8) __U);
569 1.1 mrg }
570 1.1 mrg
571 1.1 mrg extern __inline __m128i
572 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
573 1.1 mrg _mm_mullo_epi64 (__m128i __A, __m128i __B)
574 1.1 mrg {
575 1.1 mrg return (__m128i) ((__v2du) __A * (__v2du) __B);
576 1.1 mrg }
577 1.1 mrg
578 1.1 mrg extern __inline __m128i
579 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
580 1.1 mrg _mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
581 1.1 mrg __m128i __B)
582 1.1 mrg {
583 1.1 mrg return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
584 1.1 mrg (__v2di) __B,
585 1.1 mrg (__v2di) __W,
586 1.1 mrg (__mmask8) __U);
587 1.1 mrg }
588 1.1 mrg
589 1.1 mrg extern __inline __m128i
590 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
591 1.1 mrg _mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
592 1.1 mrg {
593 1.1 mrg return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
594 1.1 mrg (__v2di) __B,
595 1.1 mrg (__v2di)
596 1.1 mrg _mm_setzero_di (),
597 1.1 mrg (__mmask8) __U);
598 1.1 mrg }
599 1.1 mrg
600 1.1 mrg extern __inline __m256d
601 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
602 1.1 mrg _mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A,
603 1.1 mrg __m256d __B)
604 1.1 mrg {
605 1.1 mrg return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
606 1.1 mrg (__v4df) __B,
607 1.1 mrg (__v4df) __W,
608 1.1 mrg (__mmask8) __U);
609 1.1 mrg }
610 1.1 mrg
611 1.1 mrg extern __inline __m256d
612 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
613 1.1 mrg _mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B)
614 1.1 mrg {
615 1.1 mrg return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
616 1.1 mrg (__v4df) __B,
617 1.1 mrg (__v4df)
618 1.1 mrg _mm256_setzero_pd (),
619 1.1 mrg (__mmask8) __U);
620 1.1 mrg }
621 1.1 mrg
622 1.1 mrg extern __inline __m128d
623 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
624 1.1 mrg _mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A,
625 1.1 mrg __m128d __B)
626 1.1 mrg {
627 1.1 mrg return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
628 1.1 mrg (__v2df) __B,
629 1.1 mrg (__v2df) __W,
630 1.1 mrg (__mmask8) __U);
631 1.1 mrg }
632 1.1 mrg
633 1.1 mrg extern __inline __m128d
634 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
635 1.1 mrg _mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B)
636 1.1 mrg {
637 1.1 mrg return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
638 1.1 mrg (__v2df) __B,
639 1.1 mrg (__v2df)
640 1.1 mrg _mm_setzero_pd (),
641 1.1 mrg (__mmask8) __U);
642 1.1 mrg }
643 1.1 mrg
644 1.1 mrg extern __inline __m256
645 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
646 1.1 mrg _mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A,
647 1.1 mrg __m256 __B)
648 1.1 mrg {
649 1.1 mrg return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
650 1.1 mrg (__v8sf) __B,
651 1.1 mrg (__v8sf) __W,
652 1.1 mrg (__mmask8) __U);
653 1.1 mrg }
654 1.1 mrg
655 1.1 mrg extern __inline __m256
656 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
657 1.1 mrg _mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B)
658 1.1 mrg {
659 1.1 mrg return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
660 1.1 mrg (__v8sf) __B,
661 1.1 mrg (__v8sf)
662 1.1 mrg _mm256_setzero_ps (),
663 1.1 mrg (__mmask8) __U);
664 1.1 mrg }
665 1.1 mrg
666 1.1 mrg extern __inline __m128
667 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
668 1.1 mrg _mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
669 1.1 mrg {
670 1.1 mrg return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
671 1.1 mrg (__v4sf) __B,
672 1.1 mrg (__v4sf) __W,
673 1.1 mrg (__mmask8) __U);
674 1.1 mrg }
675 1.1 mrg
676 1.1 mrg extern __inline __m128
677 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
678 1.1 mrg _mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B)
679 1.1 mrg {
680 1.1 mrg return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
681 1.1 mrg (__v4sf) __B,
682 1.1 mrg (__v4sf)
683 1.1 mrg _mm_setzero_ps (),
684 1.1 mrg (__mmask8) __U);
685 1.1 mrg }
686 1.1 mrg
687 1.1 mrg extern __inline __m256i
688 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
689 1.1 mrg _mm256_cvtps_epi64 (__m128 __A)
690 1.1 mrg {
691 1.1 mrg return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
692 1.1 mrg (__v4di)
693 1.1 mrg _mm256_setzero_si256 (),
694 1.1 mrg (__mmask8) -1);
695 1.1 mrg }
696 1.1 mrg
697 1.1 mrg extern __inline __m256i
698 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
699 1.1 mrg _mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A)
700 1.1 mrg {
701 1.1 mrg return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
702 1.1 mrg (__v4di) __W,
703 1.1 mrg (__mmask8) __U);
704 1.1 mrg }
705 1.1 mrg
706 1.1 mrg extern __inline __m256i
707 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
708 1.1 mrg _mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A)
709 1.1 mrg {
710 1.1 mrg return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
711 1.1 mrg (__v4di)
712 1.1 mrg _mm256_setzero_si256 (),
713 1.1 mrg (__mmask8) __U);
714 1.1 mrg }
715 1.1 mrg
716 1.1 mrg extern __inline __m128i
717 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
718 1.1 mrg _mm_cvtps_epi64 (__m128 __A)
719 1.1 mrg {
720 1.1 mrg return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
721 1.1 mrg (__v2di)
722 1.1 mrg _mm_setzero_di (),
723 1.1 mrg (__mmask8) -1);
724 1.1 mrg }
725 1.1 mrg
726 1.1 mrg extern __inline __m128i
727 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
728 1.1 mrg _mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A)
729 1.1 mrg {
730 1.1 mrg return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
731 1.1 mrg (__v2di) __W,
732 1.1 mrg (__mmask8) __U);
733 1.1 mrg }
734 1.1 mrg
735 1.1 mrg extern __inline __m128i
736 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
737 1.1 mrg _mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A)
738 1.1 mrg {
739 1.1 mrg return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
740 1.1 mrg (__v2di)
741 1.1 mrg _mm_setzero_di (),
742 1.1 mrg (__mmask8) __U);
743 1.1 mrg }
744 1.1 mrg
745 1.1 mrg extern __inline __m256i
746 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
747 1.1 mrg _mm256_cvtps_epu64 (__m128 __A)
748 1.1 mrg {
749 1.1 mrg return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
750 1.1 mrg (__v4di)
751 1.1 mrg _mm256_setzero_si256 (),
752 1.1 mrg (__mmask8) -1);
753 1.1 mrg }
754 1.1 mrg
755 1.1 mrg extern __inline __m256i
756 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
757 1.1 mrg _mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A)
758 1.1 mrg {
759 1.1 mrg return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
760 1.1 mrg (__v4di) __W,
761 1.1 mrg (__mmask8) __U);
762 1.1 mrg }
763 1.1 mrg
764 1.1 mrg extern __inline __m256i
765 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
766 1.1 mrg _mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A)
767 1.1 mrg {
768 1.1 mrg return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
769 1.1 mrg (__v4di)
770 1.1 mrg _mm256_setzero_si256 (),
771 1.1 mrg (__mmask8) __U);
772 1.1 mrg }
773 1.1 mrg
774 1.1 mrg extern __inline __m128i
775 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
776 1.1 mrg _mm_cvtps_epu64 (__m128 __A)
777 1.1 mrg {
778 1.1 mrg return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
779 1.1 mrg (__v2di)
780 1.1 mrg _mm_setzero_di (),
781 1.1 mrg (__mmask8) -1);
782 1.1 mrg }
783 1.1 mrg
784 1.1 mrg extern __inline __m128i
785 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
786 1.1 mrg _mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A)
787 1.1 mrg {
788 1.1 mrg return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
789 1.1 mrg (__v2di) __W,
790 1.1 mrg (__mmask8) __U);
791 1.1 mrg }
792 1.1 mrg
793 1.1 mrg extern __inline __m128i
794 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
795 1.1 mrg _mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A)
796 1.1 mrg {
797 1.1 mrg return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
798 1.1 mrg (__v2di)
799 1.1 mrg _mm_setzero_di (),
800 1.1 mrg (__mmask8) __U);
801 1.1 mrg }
802 1.1 mrg
803 1.1 mrg extern __inline __m128
804 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
805 1.1 mrg _mm256_cvtepi64_ps (__m256i __A)
806 1.1 mrg {
807 1.1 mrg return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
808 1.1 mrg (__v4sf)
809 1.1 mrg _mm_setzero_ps (),
810 1.1 mrg (__mmask8) -1);
811 1.1 mrg }
812 1.1 mrg
813 1.1 mrg extern __inline __m128
814 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
815 1.1 mrg _mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A)
816 1.1 mrg {
817 1.1 mrg return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
818 1.1 mrg (__v4sf) __W,
819 1.1 mrg (__mmask8) __U);
820 1.1 mrg }
821 1.1 mrg
822 1.1 mrg extern __inline __m128
823 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
824 1.1 mrg _mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A)
825 1.1 mrg {
826 1.1 mrg return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
827 1.1 mrg (__v4sf)
828 1.1 mrg _mm_setzero_ps (),
829 1.1 mrg (__mmask8) __U);
830 1.1 mrg }
831 1.1 mrg
832 1.1 mrg extern __inline __m128
833 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
834 1.1 mrg _mm_cvtepi64_ps (__m128i __A)
835 1.1 mrg {
836 1.1 mrg return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
837 1.1 mrg (__v4sf)
838 1.1 mrg _mm_setzero_ps (),
839 1.1 mrg (__mmask8) -1);
840 1.1 mrg }
841 1.1 mrg
842 1.1 mrg extern __inline __m128
843 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
844 1.1 mrg _mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A)
845 1.1 mrg {
846 1.1 mrg return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
847 1.1 mrg (__v4sf) __W,
848 1.1 mrg (__mmask8) __U);
849 1.1 mrg }
850 1.1 mrg
851 1.1 mrg extern __inline __m128
852 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
853 1.1 mrg _mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A)
854 1.1 mrg {
855 1.1 mrg return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
856 1.1 mrg (__v4sf)
857 1.1 mrg _mm_setzero_ps (),
858 1.1 mrg (__mmask8) __U);
859 1.1 mrg }
860 1.1 mrg
861 1.1 mrg extern __inline __m128
862 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
863 1.1 mrg _mm256_cvtepu64_ps (__m256i __A)
864 1.1 mrg {
865 1.1 mrg return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
866 1.1 mrg (__v4sf)
867 1.1 mrg _mm_setzero_ps (),
868 1.1 mrg (__mmask8) -1);
869 1.1 mrg }
870 1.1 mrg
871 1.1 mrg extern __inline __m128
872 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
873 1.1 mrg _mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A)
874 1.1 mrg {
875 1.1 mrg return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
876 1.1 mrg (__v4sf) __W,
877 1.1 mrg (__mmask8) __U);
878 1.1 mrg }
879 1.1 mrg
880 1.1 mrg extern __inline __m128
881 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
882 1.1 mrg _mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A)
883 1.1 mrg {
884 1.1 mrg return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
885 1.1 mrg (__v4sf)
886 1.1 mrg _mm_setzero_ps (),
887 1.1 mrg (__mmask8) __U);
888 1.1 mrg }
889 1.1 mrg
890 1.1 mrg extern __inline __m128
891 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
892 1.1 mrg _mm_cvtepu64_ps (__m128i __A)
893 1.1 mrg {
894 1.1 mrg return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
895 1.1 mrg (__v4sf)
896 1.1 mrg _mm_setzero_ps (),
897 1.1 mrg (__mmask8) -1);
898 1.1 mrg }
899 1.1 mrg
900 1.1 mrg extern __inline __m128
901 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
902 1.1 mrg _mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A)
903 1.1 mrg {
904 1.1 mrg return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
905 1.1 mrg (__v4sf) __W,
906 1.1 mrg (__mmask8) __U);
907 1.1 mrg }
908 1.1 mrg
909 1.1 mrg extern __inline __m128
910 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
911 1.1 mrg _mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A)
912 1.1 mrg {
913 1.1 mrg return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
914 1.1 mrg (__v4sf)
915 1.1 mrg _mm_setzero_ps (),
916 1.1 mrg (__mmask8) __U);
917 1.1 mrg }
918 1.1 mrg
919 1.1 mrg extern __inline __m256d
920 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
921 1.1 mrg _mm256_cvtepi64_pd (__m256i __A)
922 1.1 mrg {
923 1.1 mrg return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
924 1.1 mrg (__v4df)
925 1.1 mrg _mm256_setzero_pd (),
926 1.1 mrg (__mmask8) -1);
927 1.1 mrg }
928 1.1 mrg
929 1.1 mrg extern __inline __m256d
930 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
931 1.1 mrg _mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A)
932 1.1 mrg {
933 1.1 mrg return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
934 1.1 mrg (__v4df) __W,
935 1.1 mrg (__mmask8) __U);
936 1.1 mrg }
937 1.1 mrg
938 1.1 mrg extern __inline __m256d
939 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
940 1.1 mrg _mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A)
941 1.1 mrg {
942 1.1 mrg return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
943 1.1 mrg (__v4df)
944 1.1 mrg _mm256_setzero_pd (),
945 1.1 mrg (__mmask8) __U);
946 1.1 mrg }
947 1.1 mrg
948 1.1 mrg extern __inline __m128d
949 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
950 1.1 mrg _mm_cvtepi64_pd (__m128i __A)
951 1.1 mrg {
952 1.1 mrg return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
953 1.1 mrg (__v2df)
954 1.1 mrg _mm_setzero_pd (),
955 1.1 mrg (__mmask8) -1);
956 1.1 mrg }
957 1.1 mrg
958 1.1 mrg extern __inline __m128d
959 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
960 1.1 mrg _mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A)
961 1.1 mrg {
962 1.1 mrg return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
963 1.1 mrg (__v2df) __W,
964 1.1 mrg (__mmask8) __U);
965 1.1 mrg }
966 1.1 mrg
967 1.1 mrg extern __inline __m128d
968 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
969 1.1 mrg _mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A)
970 1.1 mrg {
971 1.1 mrg return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
972 1.1 mrg (__v2df)
973 1.1 mrg _mm_setzero_pd (),
974 1.1 mrg (__mmask8) __U);
975 1.1 mrg }
976 1.1 mrg
977 1.1 mrg extern __inline __m256d
978 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
979 1.1 mrg _mm256_cvtepu64_pd (__m256i __A)
980 1.1 mrg {
981 1.1 mrg return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
982 1.1 mrg (__v4df)
983 1.1 mrg _mm256_setzero_pd (),
984 1.1 mrg (__mmask8) -1);
985 1.1 mrg }
986 1.1 mrg
987 1.1 mrg extern __inline __m256d
988 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
989 1.1 mrg _mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A)
990 1.1 mrg {
991 1.1 mrg return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
992 1.1 mrg (__v4df) __W,
993 1.1 mrg (__mmask8) __U);
994 1.1 mrg }
995 1.1 mrg
996 1.1 mrg extern __inline __m256d
997 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
998 1.1 mrg _mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A)
999 1.1 mrg {
1000 1.1 mrg return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
1001 1.1 mrg (__v4df)
1002 1.1 mrg _mm256_setzero_pd (),
1003 1.1 mrg (__mmask8) __U);
1004 1.1 mrg }
1005 1.1 mrg
1006 1.1 mrg extern __inline __m256d
1007 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1008 1.1 mrg _mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A,
1009 1.1 mrg __m256d __B)
1010 1.1 mrg {
1011 1.1 mrg return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
1012 1.1 mrg (__v4df) __B,
1013 1.1 mrg (__v4df) __W,
1014 1.1 mrg (__mmask8) __U);
1015 1.1 mrg }
1016 1.1 mrg
1017 1.1 mrg extern __inline __m256d
1018 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1019 1.1 mrg _mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B)
1020 1.1 mrg {
1021 1.1 mrg return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
1022 1.1 mrg (__v4df) __B,
1023 1.1 mrg (__v4df)
1024 1.1 mrg _mm256_setzero_pd (),
1025 1.1 mrg (__mmask8) __U);
1026 1.1 mrg }
1027 1.1 mrg
1028 1.1 mrg extern __inline __m128d
1029 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1030 1.1 mrg _mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1031 1.1 mrg {
1032 1.1 mrg return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
1033 1.1 mrg (__v2df) __B,
1034 1.1 mrg (__v2df) __W,
1035 1.1 mrg (__mmask8) __U);
1036 1.1 mrg }
1037 1.1 mrg
1038 1.1 mrg extern __inline __m128d
1039 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1040 1.1 mrg _mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B)
1041 1.1 mrg {
1042 1.1 mrg return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
1043 1.1 mrg (__v2df) __B,
1044 1.1 mrg (__v2df)
1045 1.1 mrg _mm_setzero_pd (),
1046 1.1 mrg (__mmask8) __U);
1047 1.1 mrg }
1048 1.1 mrg
1049 1.1 mrg extern __inline __m256
1050 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1051 1.1 mrg _mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
1052 1.1 mrg {
1053 1.1 mrg return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
1054 1.1 mrg (__v8sf) __B,
1055 1.1 mrg (__v8sf) __W,
1056 1.1 mrg (__mmask8) __U);
1057 1.1 mrg }
1058 1.1 mrg
1059 1.1 mrg extern __inline __m256
1060 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1061 1.1 mrg _mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B)
1062 1.1 mrg {
1063 1.1 mrg return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
1064 1.1 mrg (__v8sf) __B,
1065 1.1 mrg (__v8sf)
1066 1.1 mrg _mm256_setzero_ps (),
1067 1.1 mrg (__mmask8) __U);
1068 1.1 mrg }
1069 1.1 mrg
1070 1.1 mrg extern __inline __m128
1071 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1072 1.1 mrg _mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1073 1.1 mrg {
1074 1.1 mrg return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
1075 1.1 mrg (__v4sf) __B,
1076 1.1 mrg (__v4sf) __W,
1077 1.1 mrg (__mmask8) __U);
1078 1.1 mrg }
1079 1.1 mrg
1080 1.1 mrg extern __inline __m128
1081 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1082 1.1 mrg _mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B)
1083 1.1 mrg {
1084 1.1 mrg return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
1085 1.1 mrg (__v4sf) __B,
1086 1.1 mrg (__v4sf)
1087 1.1 mrg _mm_setzero_ps (),
1088 1.1 mrg (__mmask8) __U);
1089 1.1 mrg }
1090 1.1 mrg
1091 1.1 mrg extern __inline __m128d
1092 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1093 1.1 mrg _mm_cvtepu64_pd (__m128i __A)
1094 1.1 mrg {
1095 1.1 mrg return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
1096 1.1 mrg (__v2df)
1097 1.1 mrg _mm_setzero_pd (),
1098 1.1 mrg (__mmask8) -1);
1099 1.1 mrg }
1100 1.1 mrg
1101 1.1 mrg extern __inline __m128d
1102 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1103 1.1 mrg _mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A)
1104 1.1 mrg {
1105 1.1 mrg return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
1106 1.1 mrg (__v2df) __W,
1107 1.1 mrg (__mmask8) __U);
1108 1.1 mrg }
1109 1.1 mrg
1110 1.1 mrg extern __inline __m128d
1111 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1112 1.1 mrg _mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A)
1113 1.1 mrg {
1114 1.1 mrg return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
1115 1.1 mrg (__v2df)
1116 1.1 mrg _mm_setzero_pd (),
1117 1.1 mrg (__mmask8) __U);
1118 1.1 mrg }
1119 1.1 mrg
1120 1.1 mrg extern __inline __m256d
1121 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1122 1.1 mrg _mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A,
1123 1.1 mrg __m256d __B)
1124 1.1 mrg {
1125 1.1 mrg return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
1126 1.1 mrg (__v4df) __B,
1127 1.1 mrg (__v4df) __W,
1128 1.1 mrg (__mmask8) __U);
1129 1.1 mrg }
1130 1.1 mrg
1131 1.1 mrg extern __inline __m256d
1132 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1133 1.1 mrg _mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B)
1134 1.1 mrg {
1135 1.1 mrg return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
1136 1.1 mrg (__v4df) __B,
1137 1.1 mrg (__v4df)
1138 1.1 mrg _mm256_setzero_pd (),
1139 1.1 mrg (__mmask8) __U);
1140 1.1 mrg }
1141 1.1 mrg
1142 1.1 mrg extern __inline __m128d
1143 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1144 1.1 mrg _mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1145 1.1 mrg {
1146 1.1 mrg return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
1147 1.1 mrg (__v2df) __B,
1148 1.1 mrg (__v2df) __W,
1149 1.1 mrg (__mmask8) __U);
1150 1.1 mrg }
1151 1.1 mrg
1152 1.1 mrg extern __inline __m128d
1153 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1154 1.1 mrg _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B)
1155 1.1 mrg {
1156 1.1 mrg return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
1157 1.1 mrg (__v2df) __B,
1158 1.1 mrg (__v2df)
1159 1.1 mrg _mm_setzero_pd (),
1160 1.1 mrg (__mmask8) __U);
1161 1.1 mrg }
1162 1.1 mrg
1163 1.1 mrg extern __inline __m256
1164 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1165 1.1 mrg _mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
1166 1.1 mrg {
1167 1.1 mrg return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
1168 1.1 mrg (__v8sf) __B,
1169 1.1 mrg (__v8sf) __W,
1170 1.1 mrg (__mmask8) __U);
1171 1.1 mrg }
1172 1.1 mrg
1173 1.1 mrg extern __inline __m256
1174 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1175 1.1 mrg _mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B)
1176 1.1 mrg {
1177 1.1 mrg return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
1178 1.1 mrg (__v8sf) __B,
1179 1.1 mrg (__v8sf)
1180 1.1 mrg _mm256_setzero_ps (),
1181 1.1 mrg (__mmask8) __U);
1182 1.1 mrg }
1183 1.1 mrg
1184 1.1 mrg extern __inline __m128
1185 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1186 1.1 mrg _mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1187 1.1 mrg {
1188 1.1 mrg return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
1189 1.1 mrg (__v4sf) __B,
1190 1.1 mrg (__v4sf) __W,
1191 1.1 mrg (__mmask8) __U);
1192 1.1 mrg }
1193 1.1 mrg
1194 1.1 mrg extern __inline __m128
1195 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1196 1.1 mrg _mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B)
1197 1.1 mrg {
1198 1.1 mrg return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
1199 1.1 mrg (__v4sf) __B,
1200 1.1 mrg (__v4sf)
1201 1.1 mrg _mm_setzero_ps (),
1202 1.1 mrg (__mmask8) __U);
1203 1.1 mrg }
1204 1.1 mrg
1205 1.1 mrg extern __inline __m256d
1206 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1207 1.1 mrg _mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
1208 1.1 mrg {
1209 1.1 mrg return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
1210 1.1 mrg (__v4df) __B,
1211 1.1 mrg (__v4df) __W,
1212 1.1 mrg (__mmask8) __U);
1213 1.1 mrg }
1214 1.1 mrg
1215 1.1 mrg extern __inline __m256d
1216 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1217 1.1 mrg _mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B)
1218 1.1 mrg {
1219 1.1 mrg return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
1220 1.1 mrg (__v4df) __B,
1221 1.1 mrg (__v4df)
1222 1.1 mrg _mm256_setzero_pd (),
1223 1.1 mrg (__mmask8) __U);
1224 1.1 mrg }
1225 1.1 mrg
1226 1.1 mrg extern __inline __m128d
1227 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1228 1.1 mrg _mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1229 1.1 mrg {
1230 1.1 mrg return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
1231 1.1 mrg (__v2df) __B,
1232 1.1 mrg (__v2df) __W,
1233 1.1 mrg (__mmask8) __U);
1234 1.1 mrg }
1235 1.1 mrg
1236 1.1 mrg extern __inline __m128d
1237 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1238 1.1 mrg _mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B)
1239 1.1 mrg {
1240 1.1 mrg return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
1241 1.1 mrg (__v2df) __B,
1242 1.1 mrg (__v2df)
1243 1.1 mrg _mm_setzero_pd (),
1244 1.1 mrg (__mmask8) __U);
1245 1.1 mrg }
1246 1.1 mrg
1247 1.1 mrg extern __inline __m256
1248 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1249 1.1 mrg _mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
1250 1.1 mrg {
1251 1.1 mrg return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
1252 1.1 mrg (__v8sf) __B,
1253 1.1 mrg (__v8sf) __W,
1254 1.1 mrg (__mmask8) __U);
1255 1.1 mrg }
1256 1.1 mrg
1257 1.1 mrg extern __inline __m256
1258 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1259 1.1 mrg _mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B)
1260 1.1 mrg {
1261 1.1 mrg return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
1262 1.1 mrg (__v8sf) __B,
1263 1.1 mrg (__v8sf)
1264 1.1 mrg _mm256_setzero_ps (),
1265 1.1 mrg (__mmask8) __U);
1266 1.1 mrg }
1267 1.1 mrg
1268 1.1 mrg extern __inline __m128
1269 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1270 1.1 mrg _mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1271 1.1 mrg {
1272 1.1 mrg return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
1273 1.1 mrg (__v4sf) __B,
1274 1.1 mrg (__v4sf) __W,
1275 1.1 mrg (__mmask8) __U);
1276 1.1 mrg }
1277 1.1 mrg
1278 1.1 mrg extern __inline __m128
1279 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1280 1.1 mrg _mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B)
1281 1.1 mrg {
1282 1.1 mrg return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
1283 1.1 mrg (__v4sf) __B,
1284 1.1 mrg (__v4sf)
1285 1.1 mrg _mm_setzero_ps (),
1286 1.1 mrg (__mmask8) __U);
1287 1.1 mrg }
1288 1.1 mrg
1289 1.1 mrg extern __inline __m128i
1290 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1291 1.1 mrg _mm_movm_epi32 (__mmask8 __A)
1292 1.1 mrg {
1293 1.1 mrg return (__m128i) __builtin_ia32_cvtmask2d128 (__A);
1294 1.1 mrg }
1295 1.1 mrg
1296 1.1 mrg extern __inline __m256i
1297 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1298 1.1 mrg _mm256_movm_epi32 (__mmask8 __A)
1299 1.1 mrg {
1300 1.1 mrg return (__m256i) __builtin_ia32_cvtmask2d256 (__A);
1301 1.1 mrg }
1302 1.1 mrg
1303 1.1 mrg extern __inline __m128i
1304 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1305 1.1 mrg _mm_movm_epi64 (__mmask8 __A)
1306 1.1 mrg {
1307 1.1 mrg return (__m128i) __builtin_ia32_cvtmask2q128 (__A);
1308 1.1 mrg }
1309 1.1 mrg
1310 1.1 mrg extern __inline __m256i
1311 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1312 1.1 mrg _mm256_movm_epi64 (__mmask8 __A)
1313 1.1 mrg {
1314 1.1 mrg return (__m256i) __builtin_ia32_cvtmask2q256 (__A);
1315 1.1 mrg }
1316 1.1 mrg
1317 1.1 mrg extern __inline __mmask8
1318 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1319 1.1 mrg _mm_movepi32_mask (__m128i __A)
1320 1.1 mrg {
1321 1.1 mrg return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A);
1322 1.1 mrg }
1323 1.1 mrg
1324 1.1 mrg extern __inline __mmask8
1325 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1326 1.1 mrg _mm256_movepi32_mask (__m256i __A)
1327 1.1 mrg {
1328 1.1 mrg return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A);
1329 1.1 mrg }
1330 1.1 mrg
1331 1.1 mrg extern __inline __mmask8
1332 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1333 1.1 mrg _mm_movepi64_mask (__m128i __A)
1334 1.1 mrg {
1335 1.1 mrg return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A);
1336 1.1 mrg }
1337 1.1 mrg
1338 1.1 mrg extern __inline __mmask8
1339 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1340 1.1 mrg _mm256_movepi64_mask (__m256i __A)
1341 1.1 mrg {
1342 1.1 mrg return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
1343 1.1 mrg }
1344 1.1 mrg
1345 1.1 mrg #ifdef __OPTIMIZE__
1346 1.1 mrg extern __inline __m128d
1347 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1348 1.1 mrg _mm256_extractf64x2_pd (__m256d __A, const int __imm)
1349 1.1 mrg {
1350 1.1 mrg return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
1351 1.1 mrg __imm,
1352 1.1 mrg (__v2df)
1353 1.1 mrg _mm_setzero_pd (),
1354 1.1 mrg (__mmask8) -
1355 1.1 mrg 1);
1356 1.1 mrg }
1357 1.1 mrg
1358 1.1 mrg extern __inline __m128d
1359 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1360 1.1 mrg _mm256_mask_extractf64x2_pd (__m128d __W, __mmask8 __U, __m256d __A,
1361 1.1 mrg const int __imm)
1362 1.1 mrg {
1363 1.1 mrg return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
1364 1.1 mrg __imm,
1365 1.1 mrg (__v2df) __W,
1366 1.1 mrg (__mmask8)
1367 1.1 mrg __U);
1368 1.1 mrg }
1369 1.1 mrg
1370 1.1 mrg extern __inline __m128d
1371 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1372 1.1 mrg _mm256_maskz_extractf64x2_pd (__mmask8 __U, __m256d __A,
1373 1.1 mrg const int __imm)
1374 1.1 mrg {
1375 1.1 mrg return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
1376 1.1 mrg __imm,
1377 1.1 mrg (__v2df)
1378 1.1 mrg _mm_setzero_pd (),
1379 1.1 mrg (__mmask8)
1380 1.1 mrg __U);
1381 1.1 mrg }
1382 1.1 mrg
1383 1.1 mrg extern __inline __m128i
1384 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1385 1.1 mrg _mm256_extracti64x2_epi64 (__m256i __A, const int __imm)
1386 1.1 mrg {
1387 1.1 mrg return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
1388 1.1 mrg __imm,
1389 1.1 mrg (__v2di)
1390 1.1 mrg _mm_setzero_di (),
1391 1.1 mrg (__mmask8) -
1392 1.1 mrg 1);
1393 1.1 mrg }
1394 1.1 mrg
1395 1.1 mrg extern __inline __m128i
1396 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1397 1.1 mrg _mm256_mask_extracti64x2_epi64 (__m128i __W, __mmask8 __U, __m256i __A,
1398 1.1 mrg const int __imm)
1399 1.1 mrg {
1400 1.1 mrg return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
1401 1.1 mrg __imm,
1402 1.1 mrg (__v2di) __W,
1403 1.1 mrg (__mmask8)
1404 1.1 mrg __U);
1405 1.1 mrg }
1406 1.1 mrg
1407 1.1 mrg extern __inline __m128i
1408 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1409 1.1 mrg _mm256_maskz_extracti64x2_epi64 (__mmask8 __U, __m256i __A,
1410 1.1 mrg const int __imm)
1411 1.1 mrg {
1412 1.1 mrg return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
1413 1.1 mrg __imm,
1414 1.1 mrg (__v2di)
1415 1.1 mrg _mm_setzero_di (),
1416 1.1 mrg (__mmask8)
1417 1.1 mrg __U);
1418 1.1 mrg }
1419 1.1 mrg
1420 1.1 mrg extern __inline __m256d
1421 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1422 1.1 mrg _mm256_reduce_pd (__m256d __A, int __B)
1423 1.1 mrg {
1424 1.1 mrg return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
1425 1.1 mrg (__v4df)
1426 1.1 mrg _mm256_setzero_pd (),
1427 1.1 mrg (__mmask8) -1);
1428 1.1 mrg }
1429 1.1 mrg
1430 1.1 mrg extern __inline __m256d
1431 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1432 1.1 mrg _mm256_mask_reduce_pd (__m256d __W, __mmask8 __U, __m256d __A, int __B)
1433 1.1 mrg {
1434 1.1 mrg return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
1435 1.1 mrg (__v4df) __W,
1436 1.1 mrg (__mmask8) __U);
1437 1.1 mrg }
1438 1.1 mrg
1439 1.1 mrg extern __inline __m256d
1440 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1441 1.1 mrg _mm256_maskz_reduce_pd (__mmask8 __U, __m256d __A, int __B)
1442 1.1 mrg {
1443 1.1 mrg return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
1444 1.1 mrg (__v4df)
1445 1.1 mrg _mm256_setzero_pd (),
1446 1.1 mrg (__mmask8) __U);
1447 1.1 mrg }
1448 1.1 mrg
1449 1.1 mrg extern __inline __m128d
1450 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1451 1.1 mrg _mm_reduce_pd (__m128d __A, int __B)
1452 1.1 mrg {
1453 1.1 mrg return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
1454 1.1 mrg (__v2df)
1455 1.1 mrg _mm_setzero_pd (),
1456 1.1 mrg (__mmask8) -1);
1457 1.1 mrg }
1458 1.1 mrg
1459 1.1 mrg extern __inline __m128d
1460 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1461 1.1 mrg _mm_mask_reduce_pd (__m128d __W, __mmask8 __U, __m128d __A, int __B)
1462 1.1 mrg {
1463 1.1 mrg return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
1464 1.1 mrg (__v2df) __W,
1465 1.1 mrg (__mmask8) __U);
1466 1.1 mrg }
1467 1.1 mrg
1468 1.1 mrg extern __inline __m128d
1469 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1470 1.1 mrg _mm_maskz_reduce_pd (__mmask8 __U, __m128d __A, int __B)
1471 1.1 mrg {
1472 1.1 mrg return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
1473 1.1 mrg (__v2df)
1474 1.1 mrg _mm_setzero_pd (),
1475 1.1 mrg (__mmask8) __U);
1476 1.1 mrg }
1477 1.1 mrg
1478 1.1 mrg extern __inline __m256
1479 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1480 1.1 mrg _mm256_reduce_ps (__m256 __A, int __B)
1481 1.1 mrg {
1482 1.1 mrg return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
1483 1.1 mrg (__v8sf)
1484 1.1 mrg _mm256_setzero_ps (),
1485 1.1 mrg (__mmask8) -1);
1486 1.1 mrg }
1487 1.1 mrg
1488 1.1 mrg extern __inline __m256
1489 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1490 1.1 mrg _mm256_mask_reduce_ps (__m256 __W, __mmask8 __U, __m256 __A, int __B)
1491 1.1 mrg {
1492 1.1 mrg return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
1493 1.1 mrg (__v8sf) __W,
1494 1.1 mrg (__mmask8) __U);
1495 1.1 mrg }
1496 1.1 mrg
1497 1.1 mrg extern __inline __m256
1498 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1499 1.1 mrg _mm256_maskz_reduce_ps (__mmask8 __U, __m256 __A, int __B)
1500 1.1 mrg {
1501 1.1 mrg return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
1502 1.1 mrg (__v8sf)
1503 1.1 mrg _mm256_setzero_ps (),
1504 1.1 mrg (__mmask8) __U);
1505 1.1 mrg }
1506 1.1 mrg
1507 1.1 mrg extern __inline __m128
1508 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1509 1.1 mrg _mm_reduce_ps (__m128 __A, int __B)
1510 1.1 mrg {
1511 1.1 mrg return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
1512 1.1 mrg (__v4sf)
1513 1.1 mrg _mm_setzero_ps (),
1514 1.1 mrg (__mmask8) -1);
1515 1.1 mrg }
1516 1.1 mrg
1517 1.1 mrg extern __inline __m128
1518 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1519 1.1 mrg _mm_mask_reduce_ps (__m128 __W, __mmask8 __U, __m128 __A, int __B)
1520 1.1 mrg {
1521 1.1 mrg return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
1522 1.1 mrg (__v4sf) __W,
1523 1.1 mrg (__mmask8) __U);
1524 1.1 mrg }
1525 1.1 mrg
1526 1.1 mrg extern __inline __m128
1527 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1528 1.1 mrg _mm_maskz_reduce_ps (__mmask8 __U, __m128 __A, int __B)
1529 1.1 mrg {
1530 1.1 mrg return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
1531 1.1 mrg (__v4sf)
1532 1.1 mrg _mm_setzero_ps (),
1533 1.1 mrg (__mmask8) __U);
1534 1.1 mrg }
1535 1.1 mrg
1536 1.1 mrg extern __inline __m256d
1537 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1538 1.1 mrg _mm256_range_pd (__m256d __A, __m256d __B, int __C)
1539 1.1 mrg {
1540 1.1 mrg return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
1541 1.1 mrg (__v4df) __B, __C,
1542 1.1 mrg (__v4df)
1543 1.1 mrg _mm256_setzero_pd (),
1544 1.1 mrg (__mmask8) -1);
1545 1.1 mrg }
1546 1.1 mrg
1547 1.1 mrg extern __inline __m256d
1548 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1549 1.1 mrg _mm256_mask_range_pd (__m256d __W, __mmask8 __U,
1550 1.1 mrg __m256d __A, __m256d __B, int __C)
1551 1.1 mrg {
1552 1.1 mrg return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
1553 1.1 mrg (__v4df) __B, __C,
1554 1.1 mrg (__v4df) __W,
1555 1.1 mrg (__mmask8) __U);
1556 1.1 mrg }
1557 1.1 mrg
1558 1.1 mrg extern __inline __m256d
1559 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1560 1.1 mrg _mm256_maskz_range_pd (__mmask8 __U, __m256d __A, __m256d __B, int __C)
1561 1.1 mrg {
1562 1.1 mrg return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
1563 1.1 mrg (__v4df) __B, __C,
1564 1.1 mrg (__v4df)
1565 1.1 mrg _mm256_setzero_pd (),
1566 1.1 mrg (__mmask8) __U);
1567 1.1 mrg }
1568 1.1 mrg
1569 1.1 mrg extern __inline __m128d
1570 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1571 1.1 mrg _mm_range_pd (__m128d __A, __m128d __B, int __C)
1572 1.1 mrg {
1573 1.1 mrg return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
1574 1.1 mrg (__v2df) __B, __C,
1575 1.1 mrg (__v2df)
1576 1.1 mrg _mm_setzero_pd (),
1577 1.1 mrg (__mmask8) -1);
1578 1.1 mrg }
1579 1.1 mrg
1580 1.1 mrg extern __inline __m128d
1581 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1582 1.1 mrg _mm_mask_range_pd (__m128d __W, __mmask8 __U,
1583 1.1 mrg __m128d __A, __m128d __B, int __C)
1584 1.1 mrg {
1585 1.1 mrg return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
1586 1.1 mrg (__v2df) __B, __C,
1587 1.1 mrg (__v2df) __W,
1588 1.1 mrg (__mmask8) __U);
1589 1.1 mrg }
1590 1.1 mrg
1591 1.1 mrg extern __inline __m128d
1592 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1593 1.1 mrg _mm_maskz_range_pd (__mmask8 __U, __m128d __A, __m128d __B, int __C)
1594 1.1 mrg {
1595 1.1 mrg return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
1596 1.1 mrg (__v2df) __B, __C,
1597 1.1 mrg (__v2df)
1598 1.1 mrg _mm_setzero_pd (),
1599 1.1 mrg (__mmask8) __U);
1600 1.1 mrg }
1601 1.1 mrg
1602 1.1 mrg extern __inline __m256
1603 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1604 1.1 mrg _mm256_range_ps (__m256 __A, __m256 __B, int __C)
1605 1.1 mrg {
1606 1.1 mrg return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
1607 1.1 mrg (__v8sf) __B, __C,
1608 1.1 mrg (__v8sf)
1609 1.1 mrg _mm256_setzero_ps (),
1610 1.1 mrg (__mmask8) -1);
1611 1.1 mrg }
1612 1.1 mrg
1613 1.1 mrg extern __inline __m256
1614 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1615 1.1 mrg _mm256_mask_range_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B,
1616 1.1 mrg int __C)
1617 1.1 mrg {
1618 1.1 mrg return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
1619 1.1 mrg (__v8sf) __B, __C,
1620 1.1 mrg (__v8sf) __W,
1621 1.1 mrg (__mmask8) __U);
1622 1.1 mrg }
1623 1.1 mrg
1624 1.1 mrg extern __inline __m256
1625 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1626 1.1 mrg _mm256_maskz_range_ps (__mmask8 __U, __m256 __A, __m256 __B, int __C)
1627 1.1 mrg {
1628 1.1 mrg return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
1629 1.1 mrg (__v8sf) __B, __C,
1630 1.1 mrg (__v8sf)
1631 1.1 mrg _mm256_setzero_ps (),
1632 1.1 mrg (__mmask8) __U);
1633 1.1 mrg }
1634 1.1 mrg
1635 1.1 mrg extern __inline __m128
1636 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1637 1.1 mrg _mm_range_ps (__m128 __A, __m128 __B, int __C)
1638 1.1 mrg {
1639 1.1 mrg return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
1640 1.1 mrg (__v4sf) __B, __C,
1641 1.1 mrg (__v4sf)
1642 1.1 mrg _mm_setzero_ps (),
1643 1.1 mrg (__mmask8) -1);
1644 1.1 mrg }
1645 1.1 mrg
1646 1.1 mrg extern __inline __m128
1647 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1648 1.1 mrg _mm_mask_range_ps (__m128 __W, __mmask8 __U,
1649 1.1 mrg __m128 __A, __m128 __B, int __C)
1650 1.1 mrg {
1651 1.1 mrg return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
1652 1.1 mrg (__v4sf) __B, __C,
1653 1.1 mrg (__v4sf) __W,
1654 1.1 mrg (__mmask8) __U);
1655 1.1 mrg }
1656 1.1 mrg
1657 1.1 mrg extern __inline __m128
1658 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1659 1.1 mrg _mm_maskz_range_ps (__mmask8 __U, __m128 __A, __m128 __B, int __C)
1660 1.1 mrg {
1661 1.1 mrg return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
1662 1.1 mrg (__v4sf) __B, __C,
1663 1.1 mrg (__v4sf)
1664 1.1 mrg _mm_setzero_ps (),
1665 1.1 mrg (__mmask8) __U);
1666 1.1 mrg }
1667 1.1 mrg
1668 1.1 mrg extern __inline __mmask8
1669 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1670 1.1 mrg _mm256_mask_fpclass_pd_mask (__mmask8 __U, __m256d __A,
1671 1.1 mrg const int __imm)
1672 1.1 mrg {
1673 1.1 mrg return (__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) __A,
1674 1.1 mrg __imm, __U);
1675 1.1 mrg }
1676 1.1 mrg
1677 1.1 mrg extern __inline __mmask8
1678 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1679 1.1 mrg _mm256_fpclass_pd_mask (__m256d __A, const int __imm)
1680 1.1 mrg {
1681 1.1 mrg return (__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) __A,
1682 1.1 mrg __imm,
1683 1.1 mrg (__mmask8) -1);
1684 1.1 mrg }
1685 1.1 mrg
1686 1.1 mrg extern __inline __mmask8
1687 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1688 1.1 mrg _mm256_mask_fpclass_ps_mask (__mmask8 __U, __m256 __A, const int __imm)
1689 1.1 mrg {
1690 1.1 mrg return (__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) __A,
1691 1.1 mrg __imm, __U);
1692 1.1 mrg }
1693 1.1 mrg
1694 1.1 mrg extern __inline __mmask8
1695 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1696 1.1 mrg _mm256_fpclass_ps_mask (__m256 __A, const int __imm)
1697 1.1 mrg {
1698 1.1 mrg return (__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) __A,
1699 1.1 mrg __imm,
1700 1.1 mrg (__mmask8) -1);
1701 1.1 mrg }
1702 1.1 mrg
1703 1.1 mrg extern __inline __mmask8
1704 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1705 1.1 mrg _mm_mask_fpclass_pd_mask (__mmask8 __U, __m128d __A, const int __imm)
1706 1.1 mrg {
1707 1.1 mrg return (__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) __A,
1708 1.1 mrg __imm, __U);
1709 1.1 mrg }
1710 1.1 mrg
1711 1.1 mrg extern __inline __mmask8
1712 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1713 1.1 mrg _mm_fpclass_pd_mask (__m128d __A, const int __imm)
1714 1.1 mrg {
1715 1.1 mrg return (__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) __A,
1716 1.1 mrg __imm,
1717 1.1 mrg (__mmask8) -1);
1718 1.1 mrg }
1719 1.1 mrg
1720 1.1 mrg extern __inline __mmask8
1721 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1722 1.1 mrg _mm_mask_fpclass_ps_mask (__mmask8 __U, __m128 __A, const int __imm)
1723 1.1 mrg {
1724 1.1 mrg return (__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) __A,
1725 1.1 mrg __imm, __U);
1726 1.1 mrg }
1727 1.1 mrg
1728 1.1 mrg extern __inline __mmask8
1729 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1730 1.1 mrg _mm_fpclass_ps_mask (__m128 __A, const int __imm)
1731 1.1 mrg {
1732 1.1 mrg return (__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) __A,
1733 1.1 mrg __imm,
1734 1.1 mrg (__mmask8) -1);
1735 1.1 mrg }
1736 1.1 mrg
1737 1.1 mrg extern __inline __m256i
1738 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1739 1.1 mrg _mm256_inserti64x2 (__m256i __A, __m128i __B, const int __imm)
1740 1.1 mrg {
1741 1.1 mrg return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
1742 1.1 mrg (__v2di) __B,
1743 1.1 mrg __imm,
1744 1.1 mrg (__v4di)
1745 1.1 mrg _mm256_setzero_si256 (),
1746 1.1 mrg (__mmask8) -
1747 1.1 mrg 1);
1748 1.1 mrg }
1749 1.1 mrg
1750 1.1 mrg extern __inline __m256i
1751 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1752 1.1 mrg _mm256_mask_inserti64x2 (__m256i __W, __mmask8 __U, __m256i __A,
1753 1.1 mrg __m128i __B, const int __imm)
1754 1.1 mrg {
1755 1.1 mrg return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
1756 1.1 mrg (__v2di) __B,
1757 1.1 mrg __imm,
1758 1.1 mrg (__v4di) __W,
1759 1.1 mrg (__mmask8)
1760 1.1 mrg __U);
1761 1.1 mrg }
1762 1.1 mrg
1763 1.1 mrg extern __inline __m256i
1764 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1765 1.1 mrg _mm256_maskz_inserti64x2 (__mmask8 __U, __m256i __A, __m128i __B,
1766 1.1 mrg const int __imm)
1767 1.1 mrg {
1768 1.1 mrg return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
1769 1.1 mrg (__v2di) __B,
1770 1.1 mrg __imm,
1771 1.1 mrg (__v4di)
1772 1.1 mrg _mm256_setzero_si256 (),
1773 1.1 mrg (__mmask8)
1774 1.1 mrg __U);
1775 1.1 mrg }
1776 1.1 mrg
1777 1.1 mrg extern __inline __m256d
1778 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1779 1.1 mrg _mm256_insertf64x2 (__m256d __A, __m128d __B, const int __imm)
1780 1.1 mrg {
1781 1.1 mrg return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
1782 1.1 mrg (__v2df) __B,
1783 1.1 mrg __imm,
1784 1.1 mrg (__v4df)
1785 1.1 mrg _mm256_setzero_pd (),
1786 1.1 mrg (__mmask8) -
1787 1.1 mrg 1);
1788 1.1 mrg }
1789 1.1 mrg
1790 1.1 mrg extern __inline __m256d
1791 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1792 1.1 mrg _mm256_mask_insertf64x2 (__m256d __W, __mmask8 __U, __m256d __A,
1793 1.1 mrg __m128d __B, const int __imm)
1794 1.1 mrg {
1795 1.1 mrg return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
1796 1.1 mrg (__v2df) __B,
1797 1.1 mrg __imm,
1798 1.1 mrg (__v4df) __W,
1799 1.1 mrg (__mmask8)
1800 1.1 mrg __U);
1801 1.1 mrg }
1802 1.1 mrg
1803 1.1 mrg extern __inline __m256d
1804 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1805 1.1 mrg _mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, __m128d __B,
1806 1.1 mrg const int __imm)
1807 1.1 mrg {
1808 1.1 mrg return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
1809 1.1 mrg (__v2df) __B,
1810 1.1 mrg __imm,
1811 1.1 mrg (__v4df)
1812 1.1 mrg _mm256_setzero_pd (),
1813 1.1 mrg (__mmask8)
1814 1.1 mrg __U);
1815 1.1 mrg }
1816 1.1 mrg
1817 1.1 mrg #else
1818 1.1 mrg #define _mm256_insertf64x2(X, Y, C) \
1819 1.1 mrg ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
1820 1.1 mrg (__v2df)(__m128d) (Y), (int) (C), \
1821 1.1 mrg (__v4df)(__m256d)_mm256_setzero_pd(), \
1822 1.1 mrg (__mmask8)-1))
1823 1.1 mrg
1824 1.1 mrg #define _mm256_mask_insertf64x2(W, U, X, Y, C) \
1825 1.1 mrg ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
1826 1.1 mrg (__v2df)(__m128d) (Y), (int) (C), \
1827 1.1 mrg (__v4df)(__m256d)(W), \
1828 1.1 mrg (__mmask8)(U)))
1829 1.1 mrg
1830 1.1 mrg #define _mm256_maskz_insertf64x2(U, X, Y, C) \
1831 1.1 mrg ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
1832 1.1 mrg (__v2df)(__m128d) (Y), (int) (C), \
1833 1.1 mrg (__v4df)(__m256d)_mm256_setzero_pd(), \
1834 1.1 mrg (__mmask8)(U)))
1835 1.1 mrg
1836 1.1 mrg #define _mm256_inserti64x2(X, Y, C) \
1837 1.1 mrg ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
1838 1.1 mrg (__v2di)(__m128i) (Y), (int) (C), \
1839 1.1 mrg (__v4di)(__m256i)_mm256_setzero_si256 (), \
1840 1.1 mrg (__mmask8)-1))
1841 1.1 mrg
1842 1.1 mrg #define _mm256_mask_inserti64x2(W, U, X, Y, C) \
1843 1.1 mrg ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
1844 1.1 mrg (__v2di)(__m128i) (Y), (int) (C), \
1845 1.1 mrg (__v4di)(__m256i)(W), \
1846 1.1 mrg (__mmask8)(U)))
1847 1.1 mrg
1848 1.1 mrg #define _mm256_maskz_inserti64x2(U, X, Y, C) \
1849 1.1 mrg ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
1850 1.1 mrg (__v2di)(__m128i) (Y), (int) (C), \
1851 1.1 mrg (__v4di)(__m256i)_mm256_setzero_si256 (), \
1852 1.1 mrg (__mmask8)(U)))
1853 1.1 mrg
1854 1.1 mrg #define _mm256_extractf64x2_pd(X, C) \
1855 1.1 mrg ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
1856 1.1 mrg (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8)-1))
1857 1.1 mrg
1858 1.1 mrg #define _mm256_mask_extractf64x2_pd(W, U, X, C) \
1859 1.1 mrg ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
1860 1.1 mrg (int) (C), (__v2df)(__m128d) (W), (__mmask8) (U)))
1861 1.1 mrg
1862 1.1 mrg #define _mm256_maskz_extractf64x2_pd(U, X, C) \
1863 1.1 mrg ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
1864 1.1 mrg (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8) (U)))
1865 1.1 mrg
1866 1.1 mrg #define _mm256_extracti64x2_epi64(X, C) \
1867 1.1 mrg ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
1868 1.1 mrg (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8)-1))
1869 1.1 mrg
1870 1.1 mrg #define _mm256_mask_extracti64x2_epi64(W, U, X, C) \
1871 1.1 mrg ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
1872 1.1 mrg (int) (C), (__v2di)(__m128i) (W), (__mmask8) (U)))
1873 1.1 mrg
1874 1.1 mrg #define _mm256_maskz_extracti64x2_epi64(U, X, C) \
1875 1.1 mrg ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
1876 1.1 mrg (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8) (U)))
1877 1.1 mrg
1878 1.1 mrg #define _mm256_reduce_pd(A, B) \
1879 1.1 mrg ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \
1880 1.1 mrg (int)(B), (__v4df)_mm256_setzero_pd(), (__mmask8)-1))
1881 1.1 mrg
1882 1.1 mrg #define _mm256_mask_reduce_pd(W, U, A, B) \
1883 1.1 mrg ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \
1884 1.1 mrg (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
1885 1.1 mrg
1886 1.1 mrg #define _mm256_maskz_reduce_pd(U, A, B) \
1887 1.1 mrg ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \
1888 1.1 mrg (int)(B), (__v4df)_mm256_setzero_pd(), (__mmask8)(U)))
1889 1.1 mrg
1890 1.1 mrg #define _mm_reduce_pd(A, B) \
1891 1.1 mrg ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \
1892 1.1 mrg (int)(B), (__v2df)_mm_setzero_pd(), (__mmask8)-1))
1893 1.1 mrg
1894 1.1 mrg #define _mm_mask_reduce_pd(W, U, A, B) \
1895 1.1 mrg ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \
1896 1.1 mrg (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U)))
1897 1.1 mrg
1898 1.1 mrg #define _mm_maskz_reduce_pd(U, A, B) \
1899 1.1 mrg ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \
1900 1.1 mrg (int)(B), (__v2df)_mm_setzero_pd(), (__mmask8)(U)))
1901 1.1 mrg
1902 1.1 mrg #define _mm256_reduce_ps(A, B) \
1903 1.1 mrg ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \
1904 1.1 mrg (int)(B), (__v8sf)_mm256_setzero_ps(), (__mmask8)-1))
1905 1.1 mrg
1906 1.1 mrg #define _mm256_mask_reduce_ps(W, U, A, B) \
1907 1.1 mrg ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \
1908 1.1 mrg (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U)))
1909 1.1 mrg
1910 1.1 mrg #define _mm256_maskz_reduce_ps(U, A, B) \
1911 1.1 mrg ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \
1912 1.1 mrg (int)(B), (__v8sf)_mm256_setzero_ps(), (__mmask8)(U)))
1913 1.1 mrg
1914 1.1 mrg #define _mm_reduce_ps(A, B) \
1915 1.1 mrg ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \
1916 1.1 mrg (int)(B), (__v4sf)_mm_setzero_ps(), (__mmask8)-1))
1917 1.1 mrg
1918 1.1 mrg #define _mm_mask_reduce_ps(W, U, A, B) \
1919 1.1 mrg ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \
1920 1.1 mrg (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U)))
1921 1.1 mrg
1922 1.1 mrg #define _mm_maskz_reduce_ps(U, A, B) \
1923 1.1 mrg ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \
1924 1.1 mrg (int)(B), (__v4sf)_mm_setzero_ps(), (__mmask8)(U)))
1925 1.1 mrg
1926 1.1 mrg #define _mm256_range_pd(A, B, C) \
1927 1.1 mrg ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \
1928 1.1 mrg (__v4df)(__m256d)(B), (int)(C), \
1929 1.1 mrg (__v4df)_mm256_setzero_pd(), (__mmask8)-1))
1930 1.1 mrg
1931 1.1 mrg #define _mm256_maskz_range_pd(U, A, B, C) \
1932 1.1 mrg ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \
1933 1.1 mrg (__v4df)(__m256d)(B), (int)(C), \
1934 1.1 mrg (__v4df)_mm256_setzero_pd(), (__mmask8)(U)))
1935 1.1 mrg
1936 1.1 mrg #define _mm_range_pd(A, B, C) \
1937 1.1 mrg ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \
1938 1.1 mrg (__v2df)(__m128d)(B), (int)(C), \
1939 1.1 mrg (__v2df)_mm_setzero_pd(), (__mmask8)-1))
1940 1.1 mrg
1941 1.1 mrg #define _mm256_range_ps(A, B, C) \
1942 1.1 mrg ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \
1943 1.1 mrg (__v8sf)(__m256)(B), (int)(C), \
1944 1.1 mrg (__v8sf)_mm256_setzero_ps(), (__mmask8)-1))
1945 1.1 mrg
1946 1.1 mrg #define _mm256_mask_range_ps(W, U, A, B, C) \
1947 1.1 mrg ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \
1948 1.1 mrg (__v8sf)(__m256)(B), (int)(C), \
1949 1.1 mrg (__v8sf)(__m256)(W), (__mmask8)(U)))
1950 1.1 mrg
1951 1.1 mrg #define _mm256_maskz_range_ps(U, A, B, C) \
1952 1.1 mrg ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \
1953 1.1 mrg (__v8sf)(__m256)(B), (int)(C), \
1954 1.1 mrg (__v8sf)_mm256_setzero_ps(), (__mmask8)(U)))
1955 1.1 mrg
1956 1.1 mrg #define _mm_range_ps(A, B, C) \
1957 1.1 mrg ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \
1958 1.1 mrg (__v4sf)(__m128)(B), (int)(C), \
1959 1.1 mrg (__v4sf)_mm_setzero_ps(), (__mmask8)-1))
1960 1.1 mrg
1961 1.1 mrg #define _mm_mask_range_ps(W, U, A, B, C) \
1962 1.1 mrg ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \
1963 1.1 mrg (__v4sf)(__m128)(B), (int)(C), \
1964 1.1 mrg (__v4sf)(__m128)(W), (__mmask8)(U)))
1965 1.1 mrg
1966 1.1 mrg #define _mm_maskz_range_ps(U, A, B, C) \
1967 1.1 mrg ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \
1968 1.1 mrg (__v4sf)(__m128)(B), (int)(C), \
1969 1.1 mrg (__v4sf)_mm_setzero_ps(), (__mmask8)(U)))
1970 1.1 mrg
1971 1.1 mrg #define _mm256_mask_range_pd(W, U, A, B, C) \
1972 1.1 mrg ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \
1973 1.1 mrg (__v4df)(__m256d)(B), (int)(C), \
1974 1.1 mrg (__v4df)(__m256d)(W), (__mmask8)(U)))
1975 1.1 mrg
1976 1.1 mrg #define _mm_mask_range_pd(W, U, A, B, C) \
1977 1.1 mrg ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \
1978 1.1 mrg (__v2df)(__m128d)(B), (int)(C), \
1979 1.1 mrg (__v2df)(__m128d)(W), (__mmask8)(U)))
1980 1.1 mrg
1981 1.1 mrg #define _mm_maskz_range_pd(U, A, B, C) \
1982 1.1 mrg ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \
1983 1.1 mrg (__v2df)(__m128d)(B), (int)(C), \
1984 1.1 mrg (__v2df)_mm_setzero_pd(), (__mmask8)(U)))
1985 1.1 mrg
1986 1.1 mrg #define _mm256_mask_fpclass_pd_mask(u, X, C) \
1987 1.1 mrg ((__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) (__m256d) (X), \
1988 1.1 mrg (int) (C),(__mmask8)(u)))
1989 1.1 mrg
1990 1.1 mrg #define _mm256_mask_fpclass_ps_mask(u, X, C) \
1991 1.1 mrg ((__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) (__m256) (X), \
1992 1.1 mrg (int) (C),(__mmask8)(u)))
1993 1.1 mrg
1994 1.1 mrg #define _mm_mask_fpclass_pd_mask(u, X, C) \
1995 1.1 mrg ((__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) (__m128d) (X), \
1996 1.1 mrg (int) (C),(__mmask8)(u)))
1997 1.1 mrg
1998 1.1 mrg #define _mm_mask_fpclass_ps_mask(u, X, C) \
1999 1.1 mrg ((__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) (__m128) (X), \
2000 1.1 mrg (int) (C),(__mmask8)(u)))
2001 1.1 mrg
2002 1.1 mrg #define _mm256_fpclass_pd_mask(X, C) \
2003 1.1 mrg ((__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) (__m256d) (X), \
2004 1.1 mrg (int) (C),(__mmask8)-1))
2005 1.1 mrg
2006 1.1 mrg #define _mm256_fpclass_ps_mask(X, C) \
2007 1.1 mrg ((__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) (__m256) (X), \
2008 1.1 mrg (int) (C),(__mmask8)-1))
2009 1.1 mrg
2010 1.1 mrg #define _mm_fpclass_pd_mask(X, C) \
2011 1.1 mrg ((__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) (__m128d) (X), \
2012 1.1 mrg (int) (C),(__mmask8)-1))
2013 1.1 mrg
2014 1.1 mrg #define _mm_fpclass_ps_mask(X, C) \
2015 1.1 mrg ((__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) (__m128) (X), \
2016 1.1 mrg (int) (C),(__mmask8)-1))
2017 1.1 mrg
2018 1.1 mrg #endif
2019 1.1 mrg
2020 1.1 mrg #ifdef __DISABLE_AVX512VLDQ__
2021 1.1 mrg #undef __DISABLE_AVX512VLDQ__
2022 1.1 mrg #pragma GCC pop_options
2023 1.1 mrg #endif /* __DISABLE_AVX512VLDQ__ */
2024 1.1 mrg
2025 1.1 mrg #endif /* _AVX512VLDQINTRIN_H_INCLUDED */
2026