avx512vldqintrin.h revision 1.1.1.2 1 1.1 mrg /* Copyright (C) 2014-2015 Free Software Foundation, Inc.
2 1.1 mrg
3 1.1 mrg This file is part of GCC.
4 1.1 mrg
5 1.1 mrg GCC is free software; you can redistribute it and/or modify
6 1.1 mrg it under the terms of the GNU General Public License as published by
7 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
8 1.1 mrg any later version.
9 1.1 mrg
10 1.1 mrg GCC is distributed in the hope that it will be useful,
11 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of
12 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 1.1 mrg GNU General Public License for more details.
14 1.1 mrg
15 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
16 1.1 mrg permissions described in the GCC Runtime Library Exception, version
17 1.1 mrg 3.1, as published by the Free Software Foundation.
18 1.1 mrg
19 1.1 mrg You should have received a copy of the GNU General Public License and
20 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
21 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 1.1 mrg <http://www.gnu.org/licenses/>. */
23 1.1 mrg
24 1.1 mrg #ifndef _IMMINTRIN_H_INCLUDED
25 1.1 mrg #error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
26 1.1 mrg #endif
27 1.1 mrg
28 1.1 mrg #ifndef _AVX512VLDQINTRIN_H_INCLUDED
29 1.1 mrg #define _AVX512VLDQINTRIN_H_INCLUDED
30 1.1 mrg
31 1.1 mrg #if !defined(__AVX512VL__) || !defined(__AVX512DQ__)
32 1.1 mrg #pragma GCC push_options
33 1.1 mrg #pragma GCC target("avx512vl,avx512dq")
34 1.1 mrg #define __DISABLE_AVX512VLDQ__
35 1.1 mrg #endif /* __AVX512VLDQ__ */
36 1.1 mrg
37 1.1 mrg extern __inline __m256i
38 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
39 1.1 mrg _mm256_cvttpd_epi64 (__m256d __A)
40 1.1 mrg {
41 1.1 mrg return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
42 1.1 mrg (__v4di)
43 1.1 mrg _mm256_setzero_si256 (),
44 1.1 mrg (__mmask8) -1);
45 1.1 mrg }
46 1.1 mrg
47 1.1 mrg extern __inline __m256i
48 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
49 1.1 mrg _mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A)
50 1.1 mrg {
51 1.1 mrg return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
52 1.1 mrg (__v4di) __W,
53 1.1 mrg (__mmask8) __U);
54 1.1 mrg }
55 1.1 mrg
56 1.1 mrg extern __inline __m256i
57 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
58 1.1 mrg _mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A)
59 1.1 mrg {
60 1.1 mrg return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
61 1.1 mrg (__v4di)
62 1.1 mrg _mm256_setzero_si256 (),
63 1.1 mrg (__mmask8) __U);
64 1.1 mrg }
65 1.1 mrg
66 1.1 mrg extern __inline __m128i
67 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
68 1.1 mrg _mm_cvttpd_epi64 (__m128d __A)
69 1.1 mrg {
70 1.1 mrg return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
71 1.1 mrg (__v2di)
72 1.1 mrg _mm_setzero_di (),
73 1.1 mrg (__mmask8) -1);
74 1.1 mrg }
75 1.1 mrg
76 1.1 mrg extern __inline __m128i
77 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
78 1.1 mrg _mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A)
79 1.1 mrg {
80 1.1 mrg return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
81 1.1 mrg (__v2di) __W,
82 1.1 mrg (__mmask8) __U);
83 1.1 mrg }
84 1.1 mrg
85 1.1 mrg extern __inline __m128i
86 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
87 1.1 mrg _mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A)
88 1.1 mrg {
89 1.1 mrg return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
90 1.1 mrg (__v2di)
91 1.1 mrg _mm_setzero_si128 (),
92 1.1 mrg (__mmask8) __U);
93 1.1 mrg }
94 1.1 mrg
95 1.1 mrg extern __inline __m256i
96 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
97 1.1 mrg _mm256_cvttpd_epu64 (__m256d __A)
98 1.1 mrg {
99 1.1 mrg return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
100 1.1 mrg (__v4di)
101 1.1 mrg _mm256_setzero_si256 (),
102 1.1 mrg (__mmask8) -1);
103 1.1 mrg }
104 1.1 mrg
105 1.1 mrg extern __inline __m256i
106 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
107 1.1 mrg _mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A)
108 1.1 mrg {
109 1.1 mrg return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
110 1.1 mrg (__v4di) __W,
111 1.1 mrg (__mmask8) __U);
112 1.1 mrg }
113 1.1 mrg
114 1.1 mrg extern __inline __m256i
115 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
116 1.1 mrg _mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A)
117 1.1 mrg {
118 1.1 mrg return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
119 1.1 mrg (__v4di)
120 1.1 mrg _mm256_setzero_si256 (),
121 1.1 mrg (__mmask8) __U);
122 1.1 mrg }
123 1.1 mrg
124 1.1 mrg extern __inline __m128i
125 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
126 1.1 mrg _mm_cvttpd_epu64 (__m128d __A)
127 1.1 mrg {
128 1.1 mrg return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
129 1.1 mrg (__v2di)
130 1.1 mrg _mm_setzero_di (),
131 1.1 mrg (__mmask8) -1);
132 1.1 mrg }
133 1.1 mrg
134 1.1 mrg extern __inline __m128i
135 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
136 1.1 mrg _mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A)
137 1.1 mrg {
138 1.1 mrg return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
139 1.1 mrg (__v2di) __W,
140 1.1 mrg (__mmask8) __U);
141 1.1 mrg }
142 1.1 mrg
143 1.1 mrg extern __inline __m128i
144 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
145 1.1 mrg _mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A)
146 1.1 mrg {
147 1.1 mrg return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
148 1.1 mrg (__v2di)
149 1.1 mrg _mm_setzero_si128 (),
150 1.1 mrg (__mmask8) __U);
151 1.1 mrg }
152 1.1 mrg
153 1.1 mrg extern __inline __m256i
154 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
155 1.1 mrg _mm256_cvtpd_epi64 (__m256d __A)
156 1.1 mrg {
157 1.1 mrg return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
158 1.1 mrg (__v4di)
159 1.1 mrg _mm256_setzero_si256 (),
160 1.1 mrg (__mmask8) -1);
161 1.1 mrg }
162 1.1 mrg
163 1.1 mrg extern __inline __m256i
164 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
165 1.1 mrg _mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A)
166 1.1 mrg {
167 1.1 mrg return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
168 1.1 mrg (__v4di) __W,
169 1.1 mrg (__mmask8) __U);
170 1.1 mrg }
171 1.1 mrg
172 1.1 mrg extern __inline __m256i
173 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
174 1.1 mrg _mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A)
175 1.1 mrg {
176 1.1 mrg return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
177 1.1 mrg (__v4di)
178 1.1 mrg _mm256_setzero_si256 (),
179 1.1 mrg (__mmask8) __U);
180 1.1 mrg }
181 1.1 mrg
182 1.1 mrg extern __inline __m128i
183 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
184 1.1 mrg _mm_cvtpd_epi64 (__m128d __A)
185 1.1 mrg {
186 1.1 mrg return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
187 1.1 mrg (__v2di)
188 1.1 mrg _mm_setzero_di (),
189 1.1 mrg (__mmask8) -1);
190 1.1 mrg }
191 1.1 mrg
192 1.1 mrg extern __inline __m128i
193 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
194 1.1 mrg _mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A)
195 1.1 mrg {
196 1.1 mrg return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
197 1.1 mrg (__v2di) __W,
198 1.1 mrg (__mmask8) __U);
199 1.1 mrg }
200 1.1 mrg
201 1.1 mrg extern __inline __m128i
202 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
203 1.1 mrg _mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A)
204 1.1 mrg {
205 1.1 mrg return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
206 1.1 mrg (__v2di)
207 1.1 mrg _mm_setzero_si128 (),
208 1.1 mrg (__mmask8) __U);
209 1.1 mrg }
210 1.1 mrg
211 1.1 mrg extern __inline __m256i
212 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
213 1.1 mrg _mm256_cvtpd_epu64 (__m256d __A)
214 1.1 mrg {
215 1.1 mrg return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
216 1.1 mrg (__v4di)
217 1.1 mrg _mm256_setzero_si256 (),
218 1.1 mrg (__mmask8) -1);
219 1.1 mrg }
220 1.1 mrg
221 1.1 mrg extern __inline __m256i
222 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
223 1.1 mrg _mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A)
224 1.1 mrg {
225 1.1 mrg return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
226 1.1 mrg (__v4di) __W,
227 1.1 mrg (__mmask8) __U);
228 1.1 mrg }
229 1.1 mrg
230 1.1 mrg extern __inline __m256i
231 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
232 1.1 mrg _mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A)
233 1.1 mrg {
234 1.1 mrg return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
235 1.1 mrg (__v4di)
236 1.1 mrg _mm256_setzero_si256 (),
237 1.1 mrg (__mmask8) __U);
238 1.1 mrg }
239 1.1 mrg
240 1.1 mrg extern __inline __m128i
241 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
242 1.1 mrg _mm_cvtpd_epu64 (__m128d __A)
243 1.1 mrg {
244 1.1 mrg return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
245 1.1 mrg (__v2di)
246 1.1 mrg _mm_setzero_di (),
247 1.1 mrg (__mmask8) -1);
248 1.1 mrg }
249 1.1 mrg
250 1.1 mrg extern __inline __m128i
251 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
252 1.1 mrg _mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A)
253 1.1 mrg {
254 1.1 mrg return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
255 1.1 mrg (__v2di) __W,
256 1.1 mrg (__mmask8) __U);
257 1.1 mrg }
258 1.1 mrg
259 1.1 mrg extern __inline __m128i
260 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
261 1.1 mrg _mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A)
262 1.1 mrg {
263 1.1 mrg return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
264 1.1 mrg (__v2di)
265 1.1 mrg _mm_setzero_si128 (),
266 1.1 mrg (__mmask8) __U);
267 1.1 mrg }
268 1.1 mrg
269 1.1 mrg extern __inline __m256i
270 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
271 1.1 mrg _mm256_cvttps_epi64 (__m128 __A)
272 1.1 mrg {
273 1.1 mrg return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
274 1.1 mrg (__v4di)
275 1.1 mrg _mm256_setzero_si256 (),
276 1.1 mrg (__mmask8) -1);
277 1.1 mrg }
278 1.1 mrg
279 1.1 mrg extern __inline __m256i
280 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
281 1.1 mrg _mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A)
282 1.1 mrg {
283 1.1 mrg return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
284 1.1 mrg (__v4di) __W,
285 1.1 mrg (__mmask8) __U);
286 1.1 mrg }
287 1.1 mrg
288 1.1 mrg extern __inline __m256i
289 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
290 1.1 mrg _mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A)
291 1.1 mrg {
292 1.1 mrg return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
293 1.1 mrg (__v4di)
294 1.1 mrg _mm256_setzero_si256 (),
295 1.1 mrg (__mmask8) __U);
296 1.1 mrg }
297 1.1 mrg
298 1.1 mrg extern __inline __m128i
299 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
300 1.1 mrg _mm_cvttps_epi64 (__m128 __A)
301 1.1 mrg {
302 1.1 mrg return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
303 1.1 mrg (__v2di)
304 1.1 mrg _mm_setzero_di (),
305 1.1 mrg (__mmask8) -1);
306 1.1 mrg }
307 1.1 mrg
308 1.1 mrg extern __inline __m128i
309 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
310 1.1 mrg _mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A)
311 1.1 mrg {
312 1.1 mrg return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
313 1.1 mrg (__v2di) __W,
314 1.1 mrg (__mmask8) __U);
315 1.1 mrg }
316 1.1 mrg
317 1.1 mrg extern __inline __m128i
318 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
319 1.1 mrg _mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A)
320 1.1 mrg {
321 1.1 mrg return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
322 1.1 mrg (__v2di)
323 1.1 mrg _mm_setzero_di (),
324 1.1 mrg (__mmask8) __U);
325 1.1 mrg }
326 1.1 mrg
327 1.1 mrg extern __inline __m256i
328 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
329 1.1 mrg _mm256_cvttps_epu64 (__m128 __A)
330 1.1 mrg {
331 1.1 mrg return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
332 1.1 mrg (__v4di)
333 1.1 mrg _mm256_setzero_si256 (),
334 1.1 mrg (__mmask8) -1);
335 1.1 mrg }
336 1.1 mrg
337 1.1 mrg extern __inline __m256i
338 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
339 1.1 mrg _mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A)
340 1.1 mrg {
341 1.1 mrg return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
342 1.1 mrg (__v4di) __W,
343 1.1 mrg (__mmask8) __U);
344 1.1 mrg }
345 1.1 mrg
346 1.1 mrg extern __inline __m256i
347 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
348 1.1 mrg _mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A)
349 1.1 mrg {
350 1.1 mrg return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
351 1.1 mrg (__v4di)
352 1.1 mrg _mm256_setzero_si256 (),
353 1.1 mrg (__mmask8) __U);
354 1.1 mrg }
355 1.1 mrg
356 1.1 mrg extern __inline __m128i
357 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
358 1.1 mrg _mm_cvttps_epu64 (__m128 __A)
359 1.1 mrg {
360 1.1 mrg return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
361 1.1 mrg (__v2di)
362 1.1 mrg _mm_setzero_di (),
363 1.1 mrg (__mmask8) -1);
364 1.1 mrg }
365 1.1 mrg
366 1.1 mrg extern __inline __m128i
367 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
368 1.1 mrg _mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A)
369 1.1 mrg {
370 1.1 mrg return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
371 1.1 mrg (__v2di) __W,
372 1.1 mrg (__mmask8) __U);
373 1.1 mrg }
374 1.1 mrg
375 1.1 mrg extern __inline __m128i
376 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
377 1.1 mrg _mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A)
378 1.1 mrg {
379 1.1 mrg return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
380 1.1 mrg (__v2di)
381 1.1 mrg _mm_setzero_di (),
382 1.1 mrg (__mmask8) __U);
383 1.1 mrg }
384 1.1 mrg
385 1.1 mrg extern __inline __m256d
386 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
387 1.1 mrg _mm256_broadcast_f64x2 (__m128d __A)
388 1.1 mrg {
389 1.1 mrg return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
390 1.1 mrg __A,
391 1.1 mrg (__v4df)_mm256_undefined_pd(),
392 1.1.1.2 mrg (__mmask8) -1);
393 1.1 mrg }
394 1.1 mrg
395 1.1 mrg extern __inline __m256d
396 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
397 1.1 mrg _mm256_mask_broadcast_f64x2 (__m256d __O, __mmask8 __M, __m128d __A)
398 1.1 mrg {
399 1.1 mrg return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
400 1.1 mrg __A,
401 1.1 mrg (__v4df)
402 1.1 mrg __O, __M);
403 1.1 mrg }
404 1.1 mrg
405 1.1 mrg extern __inline __m256d
406 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
407 1.1 mrg _mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
408 1.1 mrg {
409 1.1 mrg return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
410 1.1 mrg __A,
411 1.1 mrg (__v4df)
412 1.1 mrg _mm256_setzero_ps (),
413 1.1 mrg __M);
414 1.1 mrg }
415 1.1 mrg
416 1.1 mrg extern __inline __m256i
417 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
418 1.1 mrg _mm256_broadcast_i64x2 (__m128i __A)
419 1.1 mrg {
420 1.1 mrg return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
421 1.1 mrg __A,
422 1.1 mrg (__v4di)_mm256_undefined_si256(),
423 1.1.1.2 mrg (__mmask8) -1);
424 1.1 mrg }
425 1.1 mrg
426 1.1 mrg extern __inline __m256i
427 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
428 1.1 mrg _mm256_mask_broadcast_i64x2 (__m256i __O, __mmask8 __M, __m128i __A)
429 1.1 mrg {
430 1.1 mrg return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
431 1.1 mrg __A,
432 1.1 mrg (__v4di)
433 1.1 mrg __O, __M);
434 1.1 mrg }
435 1.1 mrg
436 1.1 mrg extern __inline __m256i
437 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
438 1.1 mrg _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
439 1.1 mrg {
440 1.1 mrg return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
441 1.1 mrg __A,
442 1.1 mrg (__v4di)
443 1.1 mrg _mm256_setzero_si256 (),
444 1.1 mrg __M);
445 1.1 mrg }
446 1.1 mrg
447 1.1 mrg extern __inline __m256
448 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
449 1.1 mrg _mm256_broadcast_f32x2 (__m128 __A)
450 1.1 mrg {
451 1.1 mrg return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
452 1.1 mrg (__v8sf)_mm256_undefined_ps(),
453 1.1.1.2 mrg (__mmask8) -1);
454 1.1 mrg }
455 1.1 mrg
456 1.1 mrg extern __inline __m256
457 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
458 1.1 mrg _mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
459 1.1 mrg {
460 1.1 mrg return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
461 1.1 mrg (__v8sf) __O,
462 1.1 mrg __M);
463 1.1 mrg }
464 1.1 mrg
465 1.1 mrg extern __inline __m256
466 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
467 1.1 mrg _mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
468 1.1 mrg {
469 1.1 mrg return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
470 1.1 mrg (__v8sf)
471 1.1 mrg _mm256_setzero_ps (),
472 1.1 mrg __M);
473 1.1 mrg }
474 1.1 mrg
475 1.1 mrg extern __inline __m256i
476 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
477 1.1 mrg _mm256_broadcast_i32x2 (__m128i __A)
478 1.1 mrg {
479 1.1 mrg return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
480 1.1 mrg __A,
481 1.1 mrg (__v8si)_mm256_undefined_si256(),
482 1.1.1.2 mrg (__mmask8) -1);
483 1.1 mrg }
484 1.1 mrg
485 1.1 mrg extern __inline __m256i
486 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
487 1.1 mrg _mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
488 1.1 mrg {
489 1.1 mrg return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
490 1.1 mrg __A,
491 1.1 mrg (__v8si)
492 1.1 mrg __O, __M);
493 1.1 mrg }
494 1.1 mrg
495 1.1 mrg extern __inline __m256i
496 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
497 1.1 mrg _mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
498 1.1 mrg {
499 1.1 mrg return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
500 1.1 mrg __A,
501 1.1 mrg (__v8si)
502 1.1 mrg _mm256_setzero_si256 (),
503 1.1 mrg __M);
504 1.1 mrg }
505 1.1 mrg
506 1.1 mrg extern __inline __m128i
507 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
508 1.1 mrg _mm_broadcast_i32x2 (__m128i __A)
509 1.1 mrg {
510 1.1 mrg return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
511 1.1 mrg __A,
512 1.1 mrg (__v4si)_mm_undefined_si128(),
513 1.1.1.2 mrg (__mmask8) -1);
514 1.1 mrg }
515 1.1 mrg
516 1.1 mrg extern __inline __m128i
517 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
518 1.1 mrg _mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
519 1.1 mrg {
520 1.1 mrg return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
521 1.1 mrg __A,
522 1.1 mrg (__v4si)
523 1.1 mrg __O, __M);
524 1.1 mrg }
525 1.1 mrg
526 1.1 mrg extern __inline __m128i
527 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
528 1.1 mrg _mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
529 1.1 mrg {
530 1.1 mrg return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
531 1.1 mrg __A,
532 1.1 mrg (__v4si)
533 1.1 mrg _mm_setzero_si128 (),
534 1.1 mrg __M);
535 1.1 mrg }
536 1.1 mrg
537 1.1 mrg extern __inline __m256i
538 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
539 1.1 mrg _mm256_mullo_epi64 (__m256i __A, __m256i __B)
540 1.1 mrg {
541 1.1 mrg return (__m256i) ((__v4du) __A * (__v4du) __B);
542 1.1 mrg }
543 1.1 mrg
544 1.1 mrg extern __inline __m256i
545 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
546 1.1 mrg _mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
547 1.1 mrg __m256i __B)
548 1.1 mrg {
549 1.1 mrg return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
550 1.1 mrg (__v4di) __B,
551 1.1 mrg (__v4di) __W,
552 1.1 mrg (__mmask8) __U);
553 1.1 mrg }
554 1.1 mrg
555 1.1 mrg extern __inline __m256i
556 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
557 1.1 mrg _mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
558 1.1 mrg {
559 1.1 mrg return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
560 1.1 mrg (__v4di) __B,
561 1.1 mrg (__v4di)
562 1.1 mrg _mm256_setzero_si256 (),
563 1.1 mrg (__mmask8) __U);
564 1.1 mrg }
565 1.1 mrg
566 1.1 mrg extern __inline __m128i
567 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
568 1.1 mrg _mm_mullo_epi64 (__m128i __A, __m128i __B)
569 1.1 mrg {
570 1.1 mrg return (__m128i) ((__v2du) __A * (__v2du) __B);
571 1.1 mrg }
572 1.1 mrg
573 1.1 mrg extern __inline __m128i
574 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
575 1.1 mrg _mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
576 1.1 mrg __m128i __B)
577 1.1 mrg {
578 1.1 mrg return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
579 1.1 mrg (__v2di) __B,
580 1.1 mrg (__v2di) __W,
581 1.1 mrg (__mmask8) __U);
582 1.1 mrg }
583 1.1 mrg
584 1.1 mrg extern __inline __m128i
585 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
586 1.1 mrg _mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
587 1.1 mrg {
588 1.1 mrg return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
589 1.1 mrg (__v2di) __B,
590 1.1 mrg (__v2di)
591 1.1 mrg _mm_setzero_di (),
592 1.1 mrg (__mmask8) __U);
593 1.1 mrg }
594 1.1 mrg
595 1.1 mrg extern __inline __m256d
596 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
597 1.1 mrg _mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A,
598 1.1 mrg __m256d __B)
599 1.1 mrg {
600 1.1 mrg return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
601 1.1 mrg (__v4df) __B,
602 1.1 mrg (__v4df) __W,
603 1.1 mrg (__mmask8) __U);
604 1.1 mrg }
605 1.1 mrg
606 1.1 mrg extern __inline __m256d
607 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
608 1.1 mrg _mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B)
609 1.1 mrg {
610 1.1 mrg return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
611 1.1 mrg (__v4df) __B,
612 1.1 mrg (__v4df)
613 1.1 mrg _mm256_setzero_pd (),
614 1.1 mrg (__mmask8) __U);
615 1.1 mrg }
616 1.1 mrg
617 1.1 mrg extern __inline __m128d
618 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
619 1.1 mrg _mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A,
620 1.1 mrg __m128d __B)
621 1.1 mrg {
622 1.1 mrg return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
623 1.1 mrg (__v2df) __B,
624 1.1 mrg (__v2df) __W,
625 1.1 mrg (__mmask8) __U);
626 1.1 mrg }
627 1.1 mrg
628 1.1 mrg extern __inline __m128d
629 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
630 1.1 mrg _mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B)
631 1.1 mrg {
632 1.1 mrg return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
633 1.1 mrg (__v2df) __B,
634 1.1 mrg (__v2df)
635 1.1 mrg _mm_setzero_pd (),
636 1.1 mrg (__mmask8) __U);
637 1.1 mrg }
638 1.1 mrg
639 1.1 mrg extern __inline __m256
640 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
641 1.1 mrg _mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A,
642 1.1 mrg __m256 __B)
643 1.1 mrg {
644 1.1 mrg return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
645 1.1 mrg (__v8sf) __B,
646 1.1 mrg (__v8sf) __W,
647 1.1 mrg (__mmask8) __U);
648 1.1 mrg }
649 1.1 mrg
650 1.1 mrg extern __inline __m256
651 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
652 1.1 mrg _mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B)
653 1.1 mrg {
654 1.1 mrg return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
655 1.1 mrg (__v8sf) __B,
656 1.1 mrg (__v8sf)
657 1.1 mrg _mm256_setzero_ps (),
658 1.1 mrg (__mmask8) __U);
659 1.1 mrg }
660 1.1 mrg
661 1.1 mrg extern __inline __m128
662 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
663 1.1 mrg _mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
664 1.1 mrg {
665 1.1 mrg return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
666 1.1 mrg (__v4sf) __B,
667 1.1 mrg (__v4sf) __W,
668 1.1 mrg (__mmask8) __U);
669 1.1 mrg }
670 1.1 mrg
671 1.1 mrg extern __inline __m128
672 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
673 1.1 mrg _mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B)
674 1.1 mrg {
675 1.1 mrg return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
676 1.1 mrg (__v4sf) __B,
677 1.1 mrg (__v4sf)
678 1.1 mrg _mm_setzero_ps (),
679 1.1 mrg (__mmask8) __U);
680 1.1 mrg }
681 1.1 mrg
682 1.1 mrg extern __inline __m256i
683 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
684 1.1 mrg _mm256_cvtps_epi64 (__m128 __A)
685 1.1 mrg {
686 1.1 mrg return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
687 1.1 mrg (__v4di)
688 1.1 mrg _mm256_setzero_si256 (),
689 1.1 mrg (__mmask8) -1);
690 1.1 mrg }
691 1.1 mrg
692 1.1 mrg extern __inline __m256i
693 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
694 1.1 mrg _mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A)
695 1.1 mrg {
696 1.1 mrg return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
697 1.1 mrg (__v4di) __W,
698 1.1 mrg (__mmask8) __U);
699 1.1 mrg }
700 1.1 mrg
701 1.1 mrg extern __inline __m256i
702 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
703 1.1 mrg _mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A)
704 1.1 mrg {
705 1.1 mrg return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
706 1.1 mrg (__v4di)
707 1.1 mrg _mm256_setzero_si256 (),
708 1.1 mrg (__mmask8) __U);
709 1.1 mrg }
710 1.1 mrg
711 1.1 mrg extern __inline __m128i
712 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
713 1.1 mrg _mm_cvtps_epi64 (__m128 __A)
714 1.1 mrg {
715 1.1 mrg return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
716 1.1 mrg (__v2di)
717 1.1 mrg _mm_setzero_di (),
718 1.1 mrg (__mmask8) -1);
719 1.1 mrg }
720 1.1 mrg
721 1.1 mrg extern __inline __m128i
722 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
723 1.1 mrg _mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A)
724 1.1 mrg {
725 1.1 mrg return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
726 1.1 mrg (__v2di) __W,
727 1.1 mrg (__mmask8) __U);
728 1.1 mrg }
729 1.1 mrg
730 1.1 mrg extern __inline __m128i
731 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
732 1.1 mrg _mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A)
733 1.1 mrg {
734 1.1 mrg return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
735 1.1 mrg (__v2di)
736 1.1 mrg _mm_setzero_di (),
737 1.1 mrg (__mmask8) __U);
738 1.1 mrg }
739 1.1 mrg
740 1.1 mrg extern __inline __m256i
741 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
742 1.1 mrg _mm256_cvtps_epu64 (__m128 __A)
743 1.1 mrg {
744 1.1 mrg return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
745 1.1 mrg (__v4di)
746 1.1 mrg _mm256_setzero_si256 (),
747 1.1 mrg (__mmask8) -1);
748 1.1 mrg }
749 1.1 mrg
750 1.1 mrg extern __inline __m256i
751 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
752 1.1 mrg _mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A)
753 1.1 mrg {
754 1.1 mrg return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
755 1.1 mrg (__v4di) __W,
756 1.1 mrg (__mmask8) __U);
757 1.1 mrg }
758 1.1 mrg
759 1.1 mrg extern __inline __m256i
760 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
761 1.1 mrg _mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A)
762 1.1 mrg {
763 1.1 mrg return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
764 1.1 mrg (__v4di)
765 1.1 mrg _mm256_setzero_si256 (),
766 1.1 mrg (__mmask8) __U);
767 1.1 mrg }
768 1.1 mrg
769 1.1 mrg extern __inline __m128i
770 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
771 1.1 mrg _mm_cvtps_epu64 (__m128 __A)
772 1.1 mrg {
773 1.1 mrg return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
774 1.1 mrg (__v2di)
775 1.1 mrg _mm_setzero_di (),
776 1.1 mrg (__mmask8) -1);
777 1.1 mrg }
778 1.1 mrg
779 1.1 mrg extern __inline __m128i
780 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
781 1.1 mrg _mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A)
782 1.1 mrg {
783 1.1 mrg return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
784 1.1 mrg (__v2di) __W,
785 1.1 mrg (__mmask8) __U);
786 1.1 mrg }
787 1.1 mrg
788 1.1 mrg extern __inline __m128i
789 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
790 1.1 mrg _mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A)
791 1.1 mrg {
792 1.1 mrg return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
793 1.1 mrg (__v2di)
794 1.1 mrg _mm_setzero_di (),
795 1.1 mrg (__mmask8) __U);
796 1.1 mrg }
797 1.1 mrg
798 1.1 mrg extern __inline __m128
799 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
800 1.1 mrg _mm256_cvtepi64_ps (__m256i __A)
801 1.1 mrg {
802 1.1 mrg return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
803 1.1 mrg (__v4sf)
804 1.1 mrg _mm_setzero_ps (),
805 1.1 mrg (__mmask8) -1);
806 1.1 mrg }
807 1.1 mrg
808 1.1 mrg extern __inline __m128
809 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
810 1.1 mrg _mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A)
811 1.1 mrg {
812 1.1 mrg return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
813 1.1 mrg (__v4sf) __W,
814 1.1 mrg (__mmask8) __U);
815 1.1 mrg }
816 1.1 mrg
817 1.1 mrg extern __inline __m128
818 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
819 1.1 mrg _mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A)
820 1.1 mrg {
821 1.1 mrg return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
822 1.1 mrg (__v4sf)
823 1.1 mrg _mm_setzero_ps (),
824 1.1 mrg (__mmask8) __U);
825 1.1 mrg }
826 1.1 mrg
827 1.1 mrg extern __inline __m128
828 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
829 1.1 mrg _mm_cvtepi64_ps (__m128i __A)
830 1.1 mrg {
831 1.1 mrg return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
832 1.1 mrg (__v4sf)
833 1.1 mrg _mm_setzero_ps (),
834 1.1 mrg (__mmask8) -1);
835 1.1 mrg }
836 1.1 mrg
837 1.1 mrg extern __inline __m128
838 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
839 1.1 mrg _mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A)
840 1.1 mrg {
841 1.1 mrg return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
842 1.1 mrg (__v4sf) __W,
843 1.1 mrg (__mmask8) __U);
844 1.1 mrg }
845 1.1 mrg
846 1.1 mrg extern __inline __m128
847 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
848 1.1 mrg _mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A)
849 1.1 mrg {
850 1.1 mrg return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
851 1.1 mrg (__v4sf)
852 1.1 mrg _mm_setzero_ps (),
853 1.1 mrg (__mmask8) __U);
854 1.1 mrg }
855 1.1 mrg
856 1.1 mrg extern __inline __m128
857 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
858 1.1 mrg _mm256_cvtepu64_ps (__m256i __A)
859 1.1 mrg {
860 1.1 mrg return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
861 1.1 mrg (__v4sf)
862 1.1 mrg _mm_setzero_ps (),
863 1.1 mrg (__mmask8) -1);
864 1.1 mrg }
865 1.1 mrg
866 1.1 mrg extern __inline __m128
867 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
868 1.1 mrg _mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A)
869 1.1 mrg {
870 1.1 mrg return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
871 1.1 mrg (__v4sf) __W,
872 1.1 mrg (__mmask8) __U);
873 1.1 mrg }
874 1.1 mrg
875 1.1 mrg extern __inline __m128
876 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
877 1.1 mrg _mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A)
878 1.1 mrg {
879 1.1 mrg return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
880 1.1 mrg (__v4sf)
881 1.1 mrg _mm_setzero_ps (),
882 1.1 mrg (__mmask8) __U);
883 1.1 mrg }
884 1.1 mrg
885 1.1 mrg extern __inline __m128
886 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
887 1.1 mrg _mm_cvtepu64_ps (__m128i __A)
888 1.1 mrg {
889 1.1 mrg return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
890 1.1 mrg (__v4sf)
891 1.1 mrg _mm_setzero_ps (),
892 1.1 mrg (__mmask8) -1);
893 1.1 mrg }
894 1.1 mrg
895 1.1 mrg extern __inline __m128
896 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
897 1.1 mrg _mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A)
898 1.1 mrg {
899 1.1 mrg return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
900 1.1 mrg (__v4sf) __W,
901 1.1 mrg (__mmask8) __U);
902 1.1 mrg }
903 1.1 mrg
904 1.1 mrg extern __inline __m128
905 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
906 1.1 mrg _mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A)
907 1.1 mrg {
908 1.1 mrg return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
909 1.1 mrg (__v4sf)
910 1.1 mrg _mm_setzero_ps (),
911 1.1 mrg (__mmask8) __U);
912 1.1 mrg }
913 1.1 mrg
914 1.1 mrg extern __inline __m256d
915 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
916 1.1 mrg _mm256_cvtepi64_pd (__m256i __A)
917 1.1 mrg {
918 1.1 mrg return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
919 1.1 mrg (__v4df)
920 1.1 mrg _mm256_setzero_pd (),
921 1.1 mrg (__mmask8) -1);
922 1.1 mrg }
923 1.1 mrg
924 1.1 mrg extern __inline __m256d
925 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
926 1.1 mrg _mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A)
927 1.1 mrg {
928 1.1 mrg return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
929 1.1 mrg (__v4df) __W,
930 1.1 mrg (__mmask8) __U);
931 1.1 mrg }
932 1.1 mrg
933 1.1 mrg extern __inline __m256d
934 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
935 1.1 mrg _mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A)
936 1.1 mrg {
937 1.1 mrg return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
938 1.1 mrg (__v4df)
939 1.1 mrg _mm256_setzero_pd (),
940 1.1 mrg (__mmask8) __U);
941 1.1 mrg }
942 1.1 mrg
943 1.1 mrg extern __inline __m128d
944 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
945 1.1 mrg _mm_cvtepi64_pd (__m128i __A)
946 1.1 mrg {
947 1.1 mrg return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
948 1.1 mrg (__v2df)
949 1.1 mrg _mm_setzero_pd (),
950 1.1 mrg (__mmask8) -1);
951 1.1 mrg }
952 1.1 mrg
953 1.1 mrg extern __inline __m128d
954 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
955 1.1 mrg _mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A)
956 1.1 mrg {
957 1.1 mrg return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
958 1.1 mrg (__v2df) __W,
959 1.1 mrg (__mmask8) __U);
960 1.1 mrg }
961 1.1 mrg
962 1.1 mrg extern __inline __m128d
963 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
964 1.1 mrg _mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A)
965 1.1 mrg {
966 1.1 mrg return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
967 1.1 mrg (__v2df)
968 1.1 mrg _mm_setzero_pd (),
969 1.1 mrg (__mmask8) __U);
970 1.1 mrg }
971 1.1 mrg
972 1.1 mrg extern __inline __m256d
973 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
974 1.1 mrg _mm256_cvtepu64_pd (__m256i __A)
975 1.1 mrg {
976 1.1 mrg return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
977 1.1 mrg (__v4df)
978 1.1 mrg _mm256_setzero_pd (),
979 1.1 mrg (__mmask8) -1);
980 1.1 mrg }
981 1.1 mrg
982 1.1 mrg extern __inline __m256d
983 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
984 1.1 mrg _mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A)
985 1.1 mrg {
986 1.1 mrg return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
987 1.1 mrg (__v4df) __W,
988 1.1 mrg (__mmask8) __U);
989 1.1 mrg }
990 1.1 mrg
991 1.1 mrg extern __inline __m256d
992 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
993 1.1 mrg _mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A)
994 1.1 mrg {
995 1.1 mrg return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
996 1.1 mrg (__v4df)
997 1.1 mrg _mm256_setzero_pd (),
998 1.1 mrg (__mmask8) __U);
999 1.1 mrg }
1000 1.1 mrg
1001 1.1 mrg extern __inline __m256d
1002 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1003 1.1 mrg _mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A,
1004 1.1 mrg __m256d __B)
1005 1.1 mrg {
1006 1.1 mrg return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
1007 1.1 mrg (__v4df) __B,
1008 1.1 mrg (__v4df) __W,
1009 1.1 mrg (__mmask8) __U);
1010 1.1 mrg }
1011 1.1 mrg
1012 1.1 mrg extern __inline __m256d
1013 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1014 1.1 mrg _mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B)
1015 1.1 mrg {
1016 1.1 mrg return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
1017 1.1 mrg (__v4df) __B,
1018 1.1 mrg (__v4df)
1019 1.1 mrg _mm256_setzero_pd (),
1020 1.1 mrg (__mmask8) __U);
1021 1.1 mrg }
1022 1.1 mrg
1023 1.1 mrg extern __inline __m128d
1024 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1025 1.1 mrg _mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1026 1.1 mrg {
1027 1.1 mrg return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
1028 1.1 mrg (__v2df) __B,
1029 1.1 mrg (__v2df) __W,
1030 1.1 mrg (__mmask8) __U);
1031 1.1 mrg }
1032 1.1 mrg
1033 1.1 mrg extern __inline __m128d
1034 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1035 1.1 mrg _mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B)
1036 1.1 mrg {
1037 1.1 mrg return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
1038 1.1 mrg (__v2df) __B,
1039 1.1 mrg (__v2df)
1040 1.1 mrg _mm_setzero_pd (),
1041 1.1 mrg (__mmask8) __U);
1042 1.1 mrg }
1043 1.1 mrg
1044 1.1 mrg extern __inline __m256
1045 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1046 1.1 mrg _mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
1047 1.1 mrg {
1048 1.1 mrg return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
1049 1.1 mrg (__v8sf) __B,
1050 1.1 mrg (__v8sf) __W,
1051 1.1 mrg (__mmask8) __U);
1052 1.1 mrg }
1053 1.1 mrg
1054 1.1 mrg extern __inline __m256
1055 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1056 1.1 mrg _mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B)
1057 1.1 mrg {
1058 1.1 mrg return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
1059 1.1 mrg (__v8sf) __B,
1060 1.1 mrg (__v8sf)
1061 1.1 mrg _mm256_setzero_ps (),
1062 1.1 mrg (__mmask8) __U);
1063 1.1 mrg }
1064 1.1 mrg
1065 1.1 mrg extern __inline __m128
1066 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1067 1.1 mrg _mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1068 1.1 mrg {
1069 1.1 mrg return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
1070 1.1 mrg (__v4sf) __B,
1071 1.1 mrg (__v4sf) __W,
1072 1.1 mrg (__mmask8) __U);
1073 1.1 mrg }
1074 1.1 mrg
1075 1.1 mrg extern __inline __m128
1076 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1077 1.1 mrg _mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B)
1078 1.1 mrg {
1079 1.1 mrg return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
1080 1.1 mrg (__v4sf) __B,
1081 1.1 mrg (__v4sf)
1082 1.1 mrg _mm_setzero_ps (),
1083 1.1 mrg (__mmask8) __U);
1084 1.1 mrg }
1085 1.1 mrg
1086 1.1 mrg extern __inline __m128d
1087 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1088 1.1 mrg _mm_cvtepu64_pd (__m128i __A)
1089 1.1 mrg {
1090 1.1 mrg return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
1091 1.1 mrg (__v2df)
1092 1.1 mrg _mm_setzero_pd (),
1093 1.1 mrg (__mmask8) -1);
1094 1.1 mrg }
1095 1.1 mrg
1096 1.1 mrg extern __inline __m128d
1097 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1098 1.1 mrg _mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A)
1099 1.1 mrg {
1100 1.1 mrg return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
1101 1.1 mrg (__v2df) __W,
1102 1.1 mrg (__mmask8) __U);
1103 1.1 mrg }
1104 1.1 mrg
1105 1.1 mrg extern __inline __m128d
1106 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1107 1.1 mrg _mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A)
1108 1.1 mrg {
1109 1.1 mrg return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
1110 1.1 mrg (__v2df)
1111 1.1 mrg _mm_setzero_pd (),
1112 1.1 mrg (__mmask8) __U);
1113 1.1 mrg }
1114 1.1 mrg
1115 1.1 mrg extern __inline __m256d
1116 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1117 1.1 mrg _mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A,
1118 1.1 mrg __m256d __B)
1119 1.1 mrg {
1120 1.1 mrg return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
1121 1.1 mrg (__v4df) __B,
1122 1.1 mrg (__v4df) __W,
1123 1.1 mrg (__mmask8) __U);
1124 1.1 mrg }
1125 1.1 mrg
1126 1.1 mrg extern __inline __m256d
1127 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1128 1.1 mrg _mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B)
1129 1.1 mrg {
1130 1.1 mrg return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
1131 1.1 mrg (__v4df) __B,
1132 1.1 mrg (__v4df)
1133 1.1 mrg _mm256_setzero_pd (),
1134 1.1 mrg (__mmask8) __U);
1135 1.1 mrg }
1136 1.1 mrg
1137 1.1 mrg extern __inline __m128d
1138 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1139 1.1 mrg _mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1140 1.1 mrg {
1141 1.1 mrg return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
1142 1.1 mrg (__v2df) __B,
1143 1.1 mrg (__v2df) __W,
1144 1.1 mrg (__mmask8) __U);
1145 1.1 mrg }
1146 1.1 mrg
1147 1.1 mrg extern __inline __m128d
1148 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1149 1.1 mrg _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B)
1150 1.1 mrg {
1151 1.1 mrg return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
1152 1.1 mrg (__v2df) __B,
1153 1.1 mrg (__v2df)
1154 1.1 mrg _mm_setzero_pd (),
1155 1.1 mrg (__mmask8) __U);
1156 1.1 mrg }
1157 1.1 mrg
1158 1.1 mrg extern __inline __m256
1159 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1160 1.1 mrg _mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
1161 1.1 mrg {
1162 1.1 mrg return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
1163 1.1 mrg (__v8sf) __B,
1164 1.1 mrg (__v8sf) __W,
1165 1.1 mrg (__mmask8) __U);
1166 1.1 mrg }
1167 1.1 mrg
1168 1.1 mrg extern __inline __m256
1169 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1170 1.1 mrg _mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B)
1171 1.1 mrg {
1172 1.1 mrg return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
1173 1.1 mrg (__v8sf) __B,
1174 1.1 mrg (__v8sf)
1175 1.1 mrg _mm256_setzero_ps (),
1176 1.1 mrg (__mmask8) __U);
1177 1.1 mrg }
1178 1.1 mrg
1179 1.1 mrg extern __inline __m128
1180 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1181 1.1 mrg _mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1182 1.1 mrg {
1183 1.1 mrg return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
1184 1.1 mrg (__v4sf) __B,
1185 1.1 mrg (__v4sf) __W,
1186 1.1 mrg (__mmask8) __U);
1187 1.1 mrg }
1188 1.1 mrg
1189 1.1 mrg extern __inline __m128
1190 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1191 1.1 mrg _mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B)
1192 1.1 mrg {
1193 1.1 mrg return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
1194 1.1 mrg (__v4sf) __B,
1195 1.1 mrg (__v4sf)
1196 1.1 mrg _mm_setzero_ps (),
1197 1.1 mrg (__mmask8) __U);
1198 1.1 mrg }
1199 1.1 mrg
1200 1.1 mrg extern __inline __m256d
1201 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1202 1.1 mrg _mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
1203 1.1 mrg {
1204 1.1 mrg return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
1205 1.1 mrg (__v4df) __B,
1206 1.1 mrg (__v4df) __W,
1207 1.1 mrg (__mmask8) __U);
1208 1.1 mrg }
1209 1.1 mrg
1210 1.1 mrg extern __inline __m256d
1211 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1212 1.1 mrg _mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B)
1213 1.1 mrg {
1214 1.1 mrg return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
1215 1.1 mrg (__v4df) __B,
1216 1.1 mrg (__v4df)
1217 1.1 mrg _mm256_setzero_pd (),
1218 1.1 mrg (__mmask8) __U);
1219 1.1 mrg }
1220 1.1 mrg
1221 1.1 mrg extern __inline __m128d
1222 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1223 1.1 mrg _mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1224 1.1 mrg {
1225 1.1 mrg return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
1226 1.1 mrg (__v2df) __B,
1227 1.1 mrg (__v2df) __W,
1228 1.1 mrg (__mmask8) __U);
1229 1.1 mrg }
1230 1.1 mrg
1231 1.1 mrg extern __inline __m128d
1232 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1233 1.1 mrg _mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B)
1234 1.1 mrg {
1235 1.1 mrg return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
1236 1.1 mrg (__v2df) __B,
1237 1.1 mrg (__v2df)
1238 1.1 mrg _mm_setzero_pd (),
1239 1.1 mrg (__mmask8) __U);
1240 1.1 mrg }
1241 1.1 mrg
1242 1.1 mrg extern __inline __m256
1243 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1244 1.1 mrg _mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
1245 1.1 mrg {
1246 1.1 mrg return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
1247 1.1 mrg (__v8sf) __B,
1248 1.1 mrg (__v8sf) __W,
1249 1.1 mrg (__mmask8) __U);
1250 1.1 mrg }
1251 1.1 mrg
1252 1.1 mrg extern __inline __m256
1253 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1254 1.1 mrg _mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B)
1255 1.1 mrg {
1256 1.1 mrg return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
1257 1.1 mrg (__v8sf) __B,
1258 1.1 mrg (__v8sf)
1259 1.1 mrg _mm256_setzero_ps (),
1260 1.1 mrg (__mmask8) __U);
1261 1.1 mrg }
1262 1.1 mrg
1263 1.1 mrg extern __inline __m128
1264 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1265 1.1 mrg _mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1266 1.1 mrg {
1267 1.1 mrg return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
1268 1.1 mrg (__v4sf) __B,
1269 1.1 mrg (__v4sf) __W,
1270 1.1 mrg (__mmask8) __U);
1271 1.1 mrg }
1272 1.1 mrg
1273 1.1 mrg extern __inline __m128
1274 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1275 1.1 mrg _mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B)
1276 1.1 mrg {
1277 1.1 mrg return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
1278 1.1 mrg (__v4sf) __B,
1279 1.1 mrg (__v4sf)
1280 1.1 mrg _mm_setzero_ps (),
1281 1.1 mrg (__mmask8) __U);
1282 1.1 mrg }
1283 1.1 mrg
1284 1.1 mrg extern __inline __m128i
1285 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1286 1.1 mrg _mm_movm_epi32 (__mmask8 __A)
1287 1.1 mrg {
1288 1.1 mrg return (__m128i) __builtin_ia32_cvtmask2d128 (__A);
1289 1.1 mrg }
1290 1.1 mrg
1291 1.1 mrg extern __inline __m256i
1292 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1293 1.1 mrg _mm256_movm_epi32 (__mmask8 __A)
1294 1.1 mrg {
1295 1.1 mrg return (__m256i) __builtin_ia32_cvtmask2d256 (__A);
1296 1.1 mrg }
1297 1.1 mrg
1298 1.1 mrg extern __inline __m128i
1299 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1300 1.1 mrg _mm_movm_epi64 (__mmask8 __A)
1301 1.1 mrg {
1302 1.1 mrg return (__m128i) __builtin_ia32_cvtmask2q128 (__A);
1303 1.1 mrg }
1304 1.1 mrg
1305 1.1 mrg extern __inline __m256i
1306 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1307 1.1 mrg _mm256_movm_epi64 (__mmask8 __A)
1308 1.1 mrg {
1309 1.1 mrg return (__m256i) __builtin_ia32_cvtmask2q256 (__A);
1310 1.1 mrg }
1311 1.1 mrg
1312 1.1 mrg extern __inline __mmask8
1313 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1314 1.1 mrg _mm_movepi32_mask (__m128i __A)
1315 1.1 mrg {
1316 1.1 mrg return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A);
1317 1.1 mrg }
1318 1.1 mrg
1319 1.1 mrg extern __inline __mmask8
1320 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1321 1.1 mrg _mm256_movepi32_mask (__m256i __A)
1322 1.1 mrg {
1323 1.1 mrg return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A);
1324 1.1 mrg }
1325 1.1 mrg
1326 1.1 mrg extern __inline __mmask8
1327 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1328 1.1 mrg _mm_movepi64_mask (__m128i __A)
1329 1.1 mrg {
1330 1.1 mrg return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A);
1331 1.1 mrg }
1332 1.1 mrg
1333 1.1 mrg extern __inline __mmask8
1334 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1335 1.1 mrg _mm256_movepi64_mask (__m256i __A)
1336 1.1 mrg {
1337 1.1 mrg return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
1338 1.1 mrg }
1339 1.1 mrg
1340 1.1 mrg #ifdef __OPTIMIZE__
1341 1.1 mrg extern __inline __m128d
1342 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1343 1.1 mrg _mm256_extractf64x2_pd (__m256d __A, const int __imm)
1344 1.1 mrg {
1345 1.1 mrg return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
1346 1.1 mrg __imm,
1347 1.1 mrg (__v2df)
1348 1.1 mrg _mm_setzero_pd (),
1349 1.1.1.2 mrg (__mmask8) -1);
1350 1.1 mrg }
1351 1.1 mrg
1352 1.1 mrg extern __inline __m128d
1353 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1354 1.1 mrg _mm256_mask_extractf64x2_pd (__m128d __W, __mmask8 __U, __m256d __A,
1355 1.1 mrg const int __imm)
1356 1.1 mrg {
1357 1.1 mrg return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
1358 1.1 mrg __imm,
1359 1.1 mrg (__v2df) __W,
1360 1.1 mrg (__mmask8)
1361 1.1 mrg __U);
1362 1.1 mrg }
1363 1.1 mrg
1364 1.1 mrg extern __inline __m128d
1365 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1366 1.1 mrg _mm256_maskz_extractf64x2_pd (__mmask8 __U, __m256d __A,
1367 1.1 mrg const int __imm)
1368 1.1 mrg {
1369 1.1 mrg return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
1370 1.1 mrg __imm,
1371 1.1 mrg (__v2df)
1372 1.1 mrg _mm_setzero_pd (),
1373 1.1 mrg (__mmask8)
1374 1.1 mrg __U);
1375 1.1 mrg }
1376 1.1 mrg
1377 1.1 mrg extern __inline __m128i
1378 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1379 1.1 mrg _mm256_extracti64x2_epi64 (__m256i __A, const int __imm)
1380 1.1 mrg {
1381 1.1 mrg return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
1382 1.1 mrg __imm,
1383 1.1 mrg (__v2di)
1384 1.1 mrg _mm_setzero_di (),
1385 1.1.1.2 mrg (__mmask8) -1);
1386 1.1 mrg }
1387 1.1 mrg
1388 1.1 mrg extern __inline __m128i
1389 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1390 1.1 mrg _mm256_mask_extracti64x2_epi64 (__m128i __W, __mmask8 __U, __m256i __A,
1391 1.1 mrg const int __imm)
1392 1.1 mrg {
1393 1.1 mrg return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
1394 1.1 mrg __imm,
1395 1.1 mrg (__v2di) __W,
1396 1.1 mrg (__mmask8)
1397 1.1 mrg __U);
1398 1.1 mrg }
1399 1.1 mrg
1400 1.1 mrg extern __inline __m128i
1401 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1402 1.1 mrg _mm256_maskz_extracti64x2_epi64 (__mmask8 __U, __m256i __A,
1403 1.1 mrg const int __imm)
1404 1.1 mrg {
1405 1.1 mrg return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
1406 1.1 mrg __imm,
1407 1.1 mrg (__v2di)
1408 1.1 mrg _mm_setzero_di (),
1409 1.1 mrg (__mmask8)
1410 1.1 mrg __U);
1411 1.1 mrg }
1412 1.1 mrg
1413 1.1 mrg extern __inline __m256d
1414 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1415 1.1 mrg _mm256_reduce_pd (__m256d __A, int __B)
1416 1.1 mrg {
1417 1.1 mrg return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
1418 1.1 mrg (__v4df)
1419 1.1 mrg _mm256_setzero_pd (),
1420 1.1 mrg (__mmask8) -1);
1421 1.1 mrg }
1422 1.1 mrg
1423 1.1 mrg extern __inline __m256d
1424 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1425 1.1 mrg _mm256_mask_reduce_pd (__m256d __W, __mmask8 __U, __m256d __A, int __B)
1426 1.1 mrg {
1427 1.1 mrg return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
1428 1.1 mrg (__v4df) __W,
1429 1.1 mrg (__mmask8) __U);
1430 1.1 mrg }
1431 1.1 mrg
1432 1.1 mrg extern __inline __m256d
1433 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1434 1.1 mrg _mm256_maskz_reduce_pd (__mmask8 __U, __m256d __A, int __B)
1435 1.1 mrg {
1436 1.1 mrg return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
1437 1.1 mrg (__v4df)
1438 1.1 mrg _mm256_setzero_pd (),
1439 1.1 mrg (__mmask8) __U);
1440 1.1 mrg }
1441 1.1 mrg
1442 1.1 mrg extern __inline __m128d
1443 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1444 1.1 mrg _mm_reduce_pd (__m128d __A, int __B)
1445 1.1 mrg {
1446 1.1 mrg return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
1447 1.1 mrg (__v2df)
1448 1.1 mrg _mm_setzero_pd (),
1449 1.1 mrg (__mmask8) -1);
1450 1.1 mrg }
1451 1.1 mrg
1452 1.1 mrg extern __inline __m128d
1453 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1454 1.1 mrg _mm_mask_reduce_pd (__m128d __W, __mmask8 __U, __m128d __A, int __B)
1455 1.1 mrg {
1456 1.1 mrg return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
1457 1.1 mrg (__v2df) __W,
1458 1.1 mrg (__mmask8) __U);
1459 1.1 mrg }
1460 1.1 mrg
1461 1.1 mrg extern __inline __m128d
1462 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1463 1.1 mrg _mm_maskz_reduce_pd (__mmask8 __U, __m128d __A, int __B)
1464 1.1 mrg {
1465 1.1 mrg return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
1466 1.1 mrg (__v2df)
1467 1.1 mrg _mm_setzero_pd (),
1468 1.1 mrg (__mmask8) __U);
1469 1.1 mrg }
1470 1.1 mrg
1471 1.1 mrg extern __inline __m256
1472 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1473 1.1 mrg _mm256_reduce_ps (__m256 __A, int __B)
1474 1.1 mrg {
1475 1.1 mrg return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
1476 1.1 mrg (__v8sf)
1477 1.1 mrg _mm256_setzero_ps (),
1478 1.1 mrg (__mmask8) -1);
1479 1.1 mrg }
1480 1.1 mrg
1481 1.1 mrg extern __inline __m256
1482 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1483 1.1 mrg _mm256_mask_reduce_ps (__m256 __W, __mmask8 __U, __m256 __A, int __B)
1484 1.1 mrg {
1485 1.1 mrg return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
1486 1.1 mrg (__v8sf) __W,
1487 1.1 mrg (__mmask8) __U);
1488 1.1 mrg }
1489 1.1 mrg
1490 1.1 mrg extern __inline __m256
1491 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1492 1.1 mrg _mm256_maskz_reduce_ps (__mmask8 __U, __m256 __A, int __B)
1493 1.1 mrg {
1494 1.1 mrg return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
1495 1.1 mrg (__v8sf)
1496 1.1 mrg _mm256_setzero_ps (),
1497 1.1 mrg (__mmask8) __U);
1498 1.1 mrg }
1499 1.1 mrg
1500 1.1 mrg extern __inline __m128
1501 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1502 1.1 mrg _mm_reduce_ps (__m128 __A, int __B)
1503 1.1 mrg {
1504 1.1 mrg return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
1505 1.1 mrg (__v4sf)
1506 1.1 mrg _mm_setzero_ps (),
1507 1.1 mrg (__mmask8) -1);
1508 1.1 mrg }
1509 1.1 mrg
1510 1.1 mrg extern __inline __m128
1511 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1512 1.1 mrg _mm_mask_reduce_ps (__m128 __W, __mmask8 __U, __m128 __A, int __B)
1513 1.1 mrg {
1514 1.1 mrg return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
1515 1.1 mrg (__v4sf) __W,
1516 1.1 mrg (__mmask8) __U);
1517 1.1 mrg }
1518 1.1 mrg
1519 1.1 mrg extern __inline __m128
1520 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1521 1.1 mrg _mm_maskz_reduce_ps (__mmask8 __U, __m128 __A, int __B)
1522 1.1 mrg {
1523 1.1 mrg return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
1524 1.1 mrg (__v4sf)
1525 1.1 mrg _mm_setzero_ps (),
1526 1.1 mrg (__mmask8) __U);
1527 1.1 mrg }
1528 1.1 mrg
1529 1.1 mrg extern __inline __m256d
1530 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1531 1.1 mrg _mm256_range_pd (__m256d __A, __m256d __B, int __C)
1532 1.1 mrg {
1533 1.1 mrg return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
1534 1.1 mrg (__v4df) __B, __C,
1535 1.1 mrg (__v4df)
1536 1.1 mrg _mm256_setzero_pd (),
1537 1.1 mrg (__mmask8) -1);
1538 1.1 mrg }
1539 1.1 mrg
1540 1.1 mrg extern __inline __m256d
1541 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1542 1.1 mrg _mm256_mask_range_pd (__m256d __W, __mmask8 __U,
1543 1.1 mrg __m256d __A, __m256d __B, int __C)
1544 1.1 mrg {
1545 1.1 mrg return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
1546 1.1 mrg (__v4df) __B, __C,
1547 1.1 mrg (__v4df) __W,
1548 1.1 mrg (__mmask8) __U);
1549 1.1 mrg }
1550 1.1 mrg
1551 1.1 mrg extern __inline __m256d
1552 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1553 1.1 mrg _mm256_maskz_range_pd (__mmask8 __U, __m256d __A, __m256d __B, int __C)
1554 1.1 mrg {
1555 1.1 mrg return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
1556 1.1 mrg (__v4df) __B, __C,
1557 1.1 mrg (__v4df)
1558 1.1 mrg _mm256_setzero_pd (),
1559 1.1 mrg (__mmask8) __U);
1560 1.1 mrg }
1561 1.1 mrg
1562 1.1 mrg extern __inline __m128d
1563 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1564 1.1 mrg _mm_range_pd (__m128d __A, __m128d __B, int __C)
1565 1.1 mrg {
1566 1.1 mrg return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
1567 1.1 mrg (__v2df) __B, __C,
1568 1.1 mrg (__v2df)
1569 1.1 mrg _mm_setzero_pd (),
1570 1.1 mrg (__mmask8) -1);
1571 1.1 mrg }
1572 1.1 mrg
1573 1.1 mrg extern __inline __m128d
1574 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1575 1.1 mrg _mm_mask_range_pd (__m128d __W, __mmask8 __U,
1576 1.1 mrg __m128d __A, __m128d __B, int __C)
1577 1.1 mrg {
1578 1.1 mrg return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
1579 1.1 mrg (__v2df) __B, __C,
1580 1.1 mrg (__v2df) __W,
1581 1.1 mrg (__mmask8) __U);
1582 1.1 mrg }
1583 1.1 mrg
1584 1.1 mrg extern __inline __m128d
1585 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1586 1.1 mrg _mm_maskz_range_pd (__mmask8 __U, __m128d __A, __m128d __B, int __C)
1587 1.1 mrg {
1588 1.1 mrg return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
1589 1.1 mrg (__v2df) __B, __C,
1590 1.1 mrg (__v2df)
1591 1.1 mrg _mm_setzero_pd (),
1592 1.1 mrg (__mmask8) __U);
1593 1.1 mrg }
1594 1.1 mrg
1595 1.1 mrg extern __inline __m256
1596 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1597 1.1 mrg _mm256_range_ps (__m256 __A, __m256 __B, int __C)
1598 1.1 mrg {
1599 1.1 mrg return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
1600 1.1 mrg (__v8sf) __B, __C,
1601 1.1 mrg (__v8sf)
1602 1.1 mrg _mm256_setzero_ps (),
1603 1.1 mrg (__mmask8) -1);
1604 1.1 mrg }
1605 1.1 mrg
1606 1.1 mrg extern __inline __m256
1607 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1608 1.1 mrg _mm256_mask_range_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B,
1609 1.1 mrg int __C)
1610 1.1 mrg {
1611 1.1 mrg return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
1612 1.1 mrg (__v8sf) __B, __C,
1613 1.1 mrg (__v8sf) __W,
1614 1.1 mrg (__mmask8) __U);
1615 1.1 mrg }
1616 1.1 mrg
1617 1.1 mrg extern __inline __m256
1618 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1619 1.1 mrg _mm256_maskz_range_ps (__mmask8 __U, __m256 __A, __m256 __B, int __C)
1620 1.1 mrg {
1621 1.1 mrg return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
1622 1.1 mrg (__v8sf) __B, __C,
1623 1.1 mrg (__v8sf)
1624 1.1 mrg _mm256_setzero_ps (),
1625 1.1 mrg (__mmask8) __U);
1626 1.1 mrg }
1627 1.1 mrg
1628 1.1 mrg extern __inline __m128
1629 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1630 1.1 mrg _mm_range_ps (__m128 __A, __m128 __B, int __C)
1631 1.1 mrg {
1632 1.1 mrg return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
1633 1.1 mrg (__v4sf) __B, __C,
1634 1.1 mrg (__v4sf)
1635 1.1 mrg _mm_setzero_ps (),
1636 1.1 mrg (__mmask8) -1);
1637 1.1 mrg }
1638 1.1 mrg
1639 1.1 mrg extern __inline __m128
1640 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1641 1.1 mrg _mm_mask_range_ps (__m128 __W, __mmask8 __U,
1642 1.1 mrg __m128 __A, __m128 __B, int __C)
1643 1.1 mrg {
1644 1.1 mrg return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
1645 1.1 mrg (__v4sf) __B, __C,
1646 1.1 mrg (__v4sf) __W,
1647 1.1 mrg (__mmask8) __U);
1648 1.1 mrg }
1649 1.1 mrg
1650 1.1 mrg extern __inline __m128
1651 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1652 1.1 mrg _mm_maskz_range_ps (__mmask8 __U, __m128 __A, __m128 __B, int __C)
1653 1.1 mrg {
1654 1.1 mrg return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
1655 1.1 mrg (__v4sf) __B, __C,
1656 1.1 mrg (__v4sf)
1657 1.1 mrg _mm_setzero_ps (),
1658 1.1 mrg (__mmask8) __U);
1659 1.1 mrg }
1660 1.1 mrg
1661 1.1 mrg extern __inline __mmask8
1662 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1663 1.1 mrg _mm256_mask_fpclass_pd_mask (__mmask8 __U, __m256d __A,
1664 1.1 mrg const int __imm)
1665 1.1 mrg {
1666 1.1 mrg return (__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) __A,
1667 1.1 mrg __imm, __U);
1668 1.1 mrg }
1669 1.1 mrg
1670 1.1 mrg extern __inline __mmask8
1671 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1672 1.1 mrg _mm256_fpclass_pd_mask (__m256d __A, const int __imm)
1673 1.1 mrg {
1674 1.1 mrg return (__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) __A,
1675 1.1 mrg __imm,
1676 1.1 mrg (__mmask8) -1);
1677 1.1 mrg }
1678 1.1 mrg
1679 1.1 mrg extern __inline __mmask8
1680 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1681 1.1 mrg _mm256_mask_fpclass_ps_mask (__mmask8 __U, __m256 __A, const int __imm)
1682 1.1 mrg {
1683 1.1 mrg return (__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) __A,
1684 1.1 mrg __imm, __U);
1685 1.1 mrg }
1686 1.1 mrg
1687 1.1 mrg extern __inline __mmask8
1688 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1689 1.1 mrg _mm256_fpclass_ps_mask (__m256 __A, const int __imm)
1690 1.1 mrg {
1691 1.1 mrg return (__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) __A,
1692 1.1 mrg __imm,
1693 1.1 mrg (__mmask8) -1);
1694 1.1 mrg }
1695 1.1 mrg
1696 1.1 mrg extern __inline __mmask8
1697 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1698 1.1 mrg _mm_mask_fpclass_pd_mask (__mmask8 __U, __m128d __A, const int __imm)
1699 1.1 mrg {
1700 1.1 mrg return (__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) __A,
1701 1.1 mrg __imm, __U);
1702 1.1 mrg }
1703 1.1 mrg
1704 1.1 mrg extern __inline __mmask8
1705 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1706 1.1 mrg _mm_fpclass_pd_mask (__m128d __A, const int __imm)
1707 1.1 mrg {
1708 1.1 mrg return (__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) __A,
1709 1.1 mrg __imm,
1710 1.1 mrg (__mmask8) -1);
1711 1.1 mrg }
1712 1.1 mrg
1713 1.1 mrg extern __inline __mmask8
1714 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1715 1.1 mrg _mm_mask_fpclass_ps_mask (__mmask8 __U, __m128 __A, const int __imm)
1716 1.1 mrg {
1717 1.1 mrg return (__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) __A,
1718 1.1 mrg __imm, __U);
1719 1.1 mrg }
1720 1.1 mrg
1721 1.1 mrg extern __inline __mmask8
1722 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1723 1.1 mrg _mm_fpclass_ps_mask (__m128 __A, const int __imm)
1724 1.1 mrg {
1725 1.1 mrg return (__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) __A,
1726 1.1 mrg __imm,
1727 1.1 mrg (__mmask8) -1);
1728 1.1 mrg }
1729 1.1 mrg
1730 1.1 mrg extern __inline __m256i
1731 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1732 1.1 mrg _mm256_inserti64x2 (__m256i __A, __m128i __B, const int __imm)
1733 1.1 mrg {
1734 1.1 mrg return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
1735 1.1 mrg (__v2di) __B,
1736 1.1 mrg __imm,
1737 1.1 mrg (__v4di)
1738 1.1 mrg _mm256_setzero_si256 (),
1739 1.1.1.2 mrg (__mmask8) -1);
1740 1.1 mrg }
1741 1.1 mrg
1742 1.1 mrg extern __inline __m256i
1743 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1744 1.1 mrg _mm256_mask_inserti64x2 (__m256i __W, __mmask8 __U, __m256i __A,
1745 1.1 mrg __m128i __B, const int __imm)
1746 1.1 mrg {
1747 1.1 mrg return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
1748 1.1 mrg (__v2di) __B,
1749 1.1 mrg __imm,
1750 1.1 mrg (__v4di) __W,
1751 1.1 mrg (__mmask8)
1752 1.1 mrg __U);
1753 1.1 mrg }
1754 1.1 mrg
1755 1.1 mrg extern __inline __m256i
1756 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1757 1.1 mrg _mm256_maskz_inserti64x2 (__mmask8 __U, __m256i __A, __m128i __B,
1758 1.1 mrg const int __imm)
1759 1.1 mrg {
1760 1.1 mrg return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
1761 1.1 mrg (__v2di) __B,
1762 1.1 mrg __imm,
1763 1.1 mrg (__v4di)
1764 1.1 mrg _mm256_setzero_si256 (),
1765 1.1 mrg (__mmask8)
1766 1.1 mrg __U);
1767 1.1 mrg }
1768 1.1 mrg
1769 1.1 mrg extern __inline __m256d
1770 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1771 1.1 mrg _mm256_insertf64x2 (__m256d __A, __m128d __B, const int __imm)
1772 1.1 mrg {
1773 1.1 mrg return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
1774 1.1 mrg (__v2df) __B,
1775 1.1 mrg __imm,
1776 1.1 mrg (__v4df)
1777 1.1 mrg _mm256_setzero_pd (),
1778 1.1.1.2 mrg (__mmask8) -1);
1779 1.1 mrg }
1780 1.1 mrg
1781 1.1 mrg extern __inline __m256d
1782 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1783 1.1 mrg _mm256_mask_insertf64x2 (__m256d __W, __mmask8 __U, __m256d __A,
1784 1.1 mrg __m128d __B, const int __imm)
1785 1.1 mrg {
1786 1.1 mrg return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
1787 1.1 mrg (__v2df) __B,
1788 1.1 mrg __imm,
1789 1.1 mrg (__v4df) __W,
1790 1.1 mrg (__mmask8)
1791 1.1 mrg __U);
1792 1.1 mrg }
1793 1.1 mrg
1794 1.1 mrg extern __inline __m256d
1795 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1796 1.1 mrg _mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, __m128d __B,
1797 1.1 mrg const int __imm)
1798 1.1 mrg {
1799 1.1 mrg return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
1800 1.1 mrg (__v2df) __B,
1801 1.1 mrg __imm,
1802 1.1 mrg (__v4df)
1803 1.1 mrg _mm256_setzero_pd (),
1804 1.1 mrg (__mmask8)
1805 1.1 mrg __U);
1806 1.1 mrg }
1807 1.1 mrg
1808 1.1 mrg #else
1809 1.1 mrg #define _mm256_insertf64x2(X, Y, C) \
1810 1.1 mrg ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
1811 1.1 mrg (__v2df)(__m128d) (Y), (int) (C), \
1812 1.1 mrg (__v4df)(__m256d)_mm256_setzero_pd(), \
1813 1.1 mrg (__mmask8)-1))
1814 1.1 mrg
1815 1.1 mrg #define _mm256_mask_insertf64x2(W, U, X, Y, C) \
1816 1.1 mrg ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
1817 1.1 mrg (__v2df)(__m128d) (Y), (int) (C), \
1818 1.1 mrg (__v4df)(__m256d)(W), \
1819 1.1 mrg (__mmask8)(U)))
1820 1.1 mrg
1821 1.1 mrg #define _mm256_maskz_insertf64x2(U, X, Y, C) \
1822 1.1 mrg ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
1823 1.1 mrg (__v2df)(__m128d) (Y), (int) (C), \
1824 1.1 mrg (__v4df)(__m256d)_mm256_setzero_pd(), \
1825 1.1 mrg (__mmask8)(U)))
1826 1.1 mrg
1827 1.1 mrg #define _mm256_inserti64x2(X, Y, C) \
1828 1.1 mrg ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
1829 1.1 mrg (__v2di)(__m128i) (Y), (int) (C), \
1830 1.1 mrg (__v4di)(__m256i)_mm256_setzero_si256 (), \
1831 1.1 mrg (__mmask8)-1))
1832 1.1 mrg
1833 1.1 mrg #define _mm256_mask_inserti64x2(W, U, X, Y, C) \
1834 1.1 mrg ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
1835 1.1 mrg (__v2di)(__m128i) (Y), (int) (C), \
1836 1.1 mrg (__v4di)(__m256i)(W), \
1837 1.1 mrg (__mmask8)(U)))
1838 1.1 mrg
1839 1.1 mrg #define _mm256_maskz_inserti64x2(U, X, Y, C) \
1840 1.1 mrg ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
1841 1.1 mrg (__v2di)(__m128i) (Y), (int) (C), \
1842 1.1 mrg (__v4di)(__m256i)_mm256_setzero_si256 (), \
1843 1.1 mrg (__mmask8)(U)))
1844 1.1 mrg
1845 1.1 mrg #define _mm256_extractf64x2_pd(X, C) \
1846 1.1 mrg ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
1847 1.1 mrg (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8)-1))
1848 1.1 mrg
1849 1.1 mrg #define _mm256_mask_extractf64x2_pd(W, U, X, C) \
1850 1.1 mrg ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
1851 1.1 mrg (int) (C), (__v2df)(__m128d) (W), (__mmask8) (U)))
1852 1.1 mrg
1853 1.1 mrg #define _mm256_maskz_extractf64x2_pd(U, X, C) \
1854 1.1 mrg ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
1855 1.1 mrg (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8) (U)))
1856 1.1 mrg
1857 1.1 mrg #define _mm256_extracti64x2_epi64(X, C) \
1858 1.1 mrg ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
1859 1.1 mrg (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8)-1))
1860 1.1 mrg
1861 1.1 mrg #define _mm256_mask_extracti64x2_epi64(W, U, X, C) \
1862 1.1 mrg ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
1863 1.1 mrg (int) (C), (__v2di)(__m128i) (W), (__mmask8) (U)))
1864 1.1 mrg
1865 1.1 mrg #define _mm256_maskz_extracti64x2_epi64(U, X, C) \
1866 1.1 mrg ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
1867 1.1 mrg (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8) (U)))
1868 1.1 mrg
1869 1.1 mrg #define _mm256_reduce_pd(A, B) \
1870 1.1 mrg ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \
1871 1.1 mrg (int)(B), (__v4df)_mm256_setzero_pd(), (__mmask8)-1))
1872 1.1 mrg
1873 1.1 mrg #define _mm256_mask_reduce_pd(W, U, A, B) \
1874 1.1 mrg ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \
1875 1.1 mrg (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
1876 1.1 mrg
1877 1.1 mrg #define _mm256_maskz_reduce_pd(U, A, B) \
1878 1.1 mrg ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \
1879 1.1 mrg (int)(B), (__v4df)_mm256_setzero_pd(), (__mmask8)(U)))
1880 1.1 mrg
1881 1.1 mrg #define _mm_reduce_pd(A, B) \
1882 1.1 mrg ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \
1883 1.1 mrg (int)(B), (__v2df)_mm_setzero_pd(), (__mmask8)-1))
1884 1.1 mrg
1885 1.1 mrg #define _mm_mask_reduce_pd(W, U, A, B) \
1886 1.1 mrg ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \
1887 1.1 mrg (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U)))
1888 1.1 mrg
1889 1.1 mrg #define _mm_maskz_reduce_pd(U, A, B) \
1890 1.1 mrg ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \
1891 1.1 mrg (int)(B), (__v2df)_mm_setzero_pd(), (__mmask8)(U)))
1892 1.1 mrg
1893 1.1 mrg #define _mm256_reduce_ps(A, B) \
1894 1.1 mrg ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \
1895 1.1 mrg (int)(B), (__v8sf)_mm256_setzero_ps(), (__mmask8)-1))
1896 1.1 mrg
1897 1.1 mrg #define _mm256_mask_reduce_ps(W, U, A, B) \
1898 1.1 mrg ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \
1899 1.1 mrg (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U)))
1900 1.1 mrg
1901 1.1 mrg #define _mm256_maskz_reduce_ps(U, A, B) \
1902 1.1 mrg ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \
1903 1.1 mrg (int)(B), (__v8sf)_mm256_setzero_ps(), (__mmask8)(U)))
1904 1.1 mrg
1905 1.1 mrg #define _mm_reduce_ps(A, B) \
1906 1.1 mrg ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \
1907 1.1 mrg (int)(B), (__v4sf)_mm_setzero_ps(), (__mmask8)-1))
1908 1.1 mrg
1909 1.1 mrg #define _mm_mask_reduce_ps(W, U, A, B) \
1910 1.1 mrg ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \
1911 1.1 mrg (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U)))
1912 1.1 mrg
1913 1.1 mrg #define _mm_maskz_reduce_ps(U, A, B) \
1914 1.1 mrg ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \
1915 1.1 mrg (int)(B), (__v4sf)_mm_setzero_ps(), (__mmask8)(U)))
1916 1.1 mrg
1917 1.1 mrg #define _mm256_range_pd(A, B, C) \
1918 1.1 mrg ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \
1919 1.1 mrg (__v4df)(__m256d)(B), (int)(C), \
1920 1.1 mrg (__v4df)_mm256_setzero_pd(), (__mmask8)-1))
1921 1.1 mrg
1922 1.1 mrg #define _mm256_maskz_range_pd(U, A, B, C) \
1923 1.1 mrg ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \
1924 1.1 mrg (__v4df)(__m256d)(B), (int)(C), \
1925 1.1 mrg (__v4df)_mm256_setzero_pd(), (__mmask8)(U)))
1926 1.1 mrg
1927 1.1 mrg #define _mm_range_pd(A, B, C) \
1928 1.1 mrg ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \
1929 1.1 mrg (__v2df)(__m128d)(B), (int)(C), \
1930 1.1 mrg (__v2df)_mm_setzero_pd(), (__mmask8)-1))
1931 1.1 mrg
1932 1.1 mrg #define _mm256_range_ps(A, B, C) \
1933 1.1 mrg ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \
1934 1.1 mrg (__v8sf)(__m256)(B), (int)(C), \
1935 1.1 mrg (__v8sf)_mm256_setzero_ps(), (__mmask8)-1))
1936 1.1 mrg
1937 1.1 mrg #define _mm256_mask_range_ps(W, U, A, B, C) \
1938 1.1 mrg ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \
1939 1.1 mrg (__v8sf)(__m256)(B), (int)(C), \
1940 1.1 mrg (__v8sf)(__m256)(W), (__mmask8)(U)))
1941 1.1 mrg
1942 1.1 mrg #define _mm256_maskz_range_ps(U, A, B, C) \
1943 1.1 mrg ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \
1944 1.1 mrg (__v8sf)(__m256)(B), (int)(C), \
1945 1.1 mrg (__v8sf)_mm256_setzero_ps(), (__mmask8)(U)))
1946 1.1 mrg
1947 1.1 mrg #define _mm_range_ps(A, B, C) \
1948 1.1 mrg ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \
1949 1.1 mrg (__v4sf)(__m128)(B), (int)(C), \
1950 1.1 mrg (__v4sf)_mm_setzero_ps(), (__mmask8)-1))
1951 1.1 mrg
1952 1.1 mrg #define _mm_mask_range_ps(W, U, A, B, C) \
1953 1.1 mrg ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \
1954 1.1 mrg (__v4sf)(__m128)(B), (int)(C), \
1955 1.1 mrg (__v4sf)(__m128)(W), (__mmask8)(U)))
1956 1.1 mrg
1957 1.1 mrg #define _mm_maskz_range_ps(U, A, B, C) \
1958 1.1 mrg ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \
1959 1.1 mrg (__v4sf)(__m128)(B), (int)(C), \
1960 1.1 mrg (__v4sf)_mm_setzero_ps(), (__mmask8)(U)))
1961 1.1 mrg
1962 1.1 mrg #define _mm256_mask_range_pd(W, U, A, B, C) \
1963 1.1 mrg ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \
1964 1.1 mrg (__v4df)(__m256d)(B), (int)(C), \
1965 1.1 mrg (__v4df)(__m256d)(W), (__mmask8)(U)))
1966 1.1 mrg
1967 1.1 mrg #define _mm_mask_range_pd(W, U, A, B, C) \
1968 1.1 mrg ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \
1969 1.1 mrg (__v2df)(__m128d)(B), (int)(C), \
1970 1.1 mrg (__v2df)(__m128d)(W), (__mmask8)(U)))
1971 1.1 mrg
1972 1.1 mrg #define _mm_maskz_range_pd(U, A, B, C) \
1973 1.1 mrg ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \
1974 1.1 mrg (__v2df)(__m128d)(B), (int)(C), \
1975 1.1 mrg (__v2df)_mm_setzero_pd(), (__mmask8)(U)))
1976 1.1 mrg
1977 1.1 mrg #define _mm256_mask_fpclass_pd_mask(u, X, C) \
1978 1.1 mrg ((__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) (__m256d) (X), \
1979 1.1 mrg (int) (C),(__mmask8)(u)))
1980 1.1 mrg
1981 1.1 mrg #define _mm256_mask_fpclass_ps_mask(u, X, C) \
1982 1.1 mrg ((__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) (__m256) (X), \
1983 1.1 mrg (int) (C),(__mmask8)(u)))
1984 1.1 mrg
1985 1.1 mrg #define _mm_mask_fpclass_pd_mask(u, X, C) \
1986 1.1 mrg ((__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) (__m128d) (X), \
1987 1.1 mrg (int) (C),(__mmask8)(u)))
1988 1.1 mrg
1989 1.1 mrg #define _mm_mask_fpclass_ps_mask(u, X, C) \
1990 1.1 mrg ((__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) (__m128) (X), \
1991 1.1 mrg (int) (C),(__mmask8)(u)))
1992 1.1 mrg
1993 1.1 mrg #define _mm256_fpclass_pd_mask(X, C) \
1994 1.1 mrg ((__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) (__m256d) (X), \
1995 1.1 mrg (int) (C),(__mmask8)-1))
1996 1.1 mrg
1997 1.1 mrg #define _mm256_fpclass_ps_mask(X, C) \
1998 1.1 mrg ((__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) (__m256) (X), \
1999 1.1 mrg (int) (C),(__mmask8)-1))
2000 1.1 mrg
2001 1.1 mrg #define _mm_fpclass_pd_mask(X, C) \
2002 1.1 mrg ((__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) (__m128d) (X), \
2003 1.1 mrg (int) (C),(__mmask8)-1))
2004 1.1 mrg
2005 1.1 mrg #define _mm_fpclass_ps_mask(X, C) \
2006 1.1 mrg ((__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) (__m128) (X), \
2007 1.1 mrg (int) (C),(__mmask8)-1))
2008 1.1 mrg
2009 1.1 mrg #endif
2010 1.1 mrg
2011 1.1 mrg #ifdef __DISABLE_AVX512VLDQ__
2012 1.1 mrg #undef __DISABLE_AVX512VLDQ__
2013 1.1 mrg #pragma GCC pop_options
2014 1.1 mrg #endif /* __DISABLE_AVX512VLDQ__ */
2015 1.1 mrg
2016 1.1 mrg #endif /* _AVX512VLDQINTRIN_H_INCLUDED */
2017