avx512dqintrin.h revision 1.1 1 1.1 mrg /* Copyright (C) 2014-2015 Free Software Foundation, Inc.
2 1.1 mrg
3 1.1 mrg This file is part of GCC.
4 1.1 mrg
5 1.1 mrg GCC is free software; you can redistribute it and/or modify
6 1.1 mrg it under the terms of the GNU General Public License as published by
7 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
8 1.1 mrg any later version.
9 1.1 mrg
10 1.1 mrg GCC is distributed in the hope that it will be useful,
11 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of
12 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 1.1 mrg GNU General Public License for more details.
14 1.1 mrg
15 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
16 1.1 mrg permissions described in the GCC Runtime Library Exception, version
17 1.1 mrg 3.1, as published by the Free Software Foundation.
18 1.1 mrg
19 1.1 mrg You should have received a copy of the GNU General Public License and
20 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
21 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 1.1 mrg <http://www.gnu.org/licenses/>. */
23 1.1 mrg
24 1.1 mrg #ifndef _IMMINTRIN_H_INCLUDED
25 1.1 mrg #error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead."
26 1.1 mrg #endif
27 1.1 mrg
28 1.1 mrg #ifndef _AVX512DQINTRIN_H_INCLUDED
29 1.1 mrg #define _AVX512DQINTRIN_H_INCLUDED
30 1.1 mrg
31 1.1 mrg #ifndef __AVX512DQ__
32 1.1 mrg #pragma GCC push_options
33 1.1 mrg #pragma GCC target("avx512dq")
34 1.1 mrg #define __DISABLE_AVX512DQ__
35 1.1 mrg #endif /* __AVX512DQ__ */
36 1.1 mrg
37 1.1 mrg extern __inline __m512d
38 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
39 1.1 mrg _mm512_broadcast_f64x2 (__m128d __A)
40 1.1 mrg {
41 1.1 mrg return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df)
42 1.1 mrg __A,
43 1.1 mrg _mm512_undefined_pd(),
44 1.1 mrg (__mmask8) -
45 1.1 mrg 1);
46 1.1 mrg }
47 1.1 mrg
48 1.1 mrg extern __inline __m512d
49 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
50 1.1 mrg _mm512_mask_broadcast_f64x2 (__m512d __O, __mmask8 __M, __m128d __A)
51 1.1 mrg {
52 1.1 mrg return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df)
53 1.1 mrg __A,
54 1.1 mrg (__v8df)
55 1.1 mrg __O, __M);
56 1.1 mrg }
57 1.1 mrg
58 1.1 mrg extern __inline __m512d
59 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
60 1.1 mrg _mm512_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
61 1.1 mrg {
62 1.1 mrg return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df)
63 1.1 mrg __A,
64 1.1 mrg (__v8df)
65 1.1 mrg _mm512_setzero_ps (),
66 1.1 mrg __M);
67 1.1 mrg }
68 1.1 mrg
69 1.1 mrg extern __inline __m512i
70 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
71 1.1 mrg _mm512_broadcast_i64x2 (__m128i __A)
72 1.1 mrg {
73 1.1 mrg return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di)
74 1.1 mrg __A,
75 1.1 mrg _mm512_undefined_si512(),
76 1.1 mrg (__mmask8) -
77 1.1 mrg 1);
78 1.1 mrg }
79 1.1 mrg
80 1.1 mrg extern __inline __m512i
81 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
82 1.1 mrg _mm512_mask_broadcast_i64x2 (__m512i __O, __mmask8 __M, __m128i __A)
83 1.1 mrg {
84 1.1 mrg return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di)
85 1.1 mrg __A,
86 1.1 mrg (__v8di)
87 1.1 mrg __O, __M);
88 1.1 mrg }
89 1.1 mrg
90 1.1 mrg extern __inline __m512i
91 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
92 1.1 mrg _mm512_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
93 1.1 mrg {
94 1.1 mrg return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di)
95 1.1 mrg __A,
96 1.1 mrg (__v8di)
97 1.1 mrg _mm512_setzero_si512 (),
98 1.1 mrg __M);
99 1.1 mrg }
100 1.1 mrg
101 1.1 mrg extern __inline __m512
102 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
103 1.1 mrg _mm512_broadcast_f32x2 (__m128 __A)
104 1.1 mrg {
105 1.1 mrg return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
106 1.1 mrg (__v16sf)_mm512_undefined_ps(),
107 1.1 mrg (__mmask16) -
108 1.1 mrg 1);
109 1.1 mrg }
110 1.1 mrg
111 1.1 mrg extern __inline __m512
112 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
113 1.1 mrg _mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
114 1.1 mrg {
115 1.1 mrg return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
116 1.1 mrg (__v16sf)
117 1.1 mrg __O, __M);
118 1.1 mrg }
119 1.1 mrg
120 1.1 mrg extern __inline __m512
121 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
122 1.1 mrg _mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
123 1.1 mrg {
124 1.1 mrg return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
125 1.1 mrg (__v16sf)
126 1.1 mrg _mm512_setzero_ps (),
127 1.1 mrg __M);
128 1.1 mrg }
129 1.1 mrg
130 1.1 mrg extern __inline __m512i
131 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
132 1.1 mrg _mm512_broadcast_i32x2 (__m128i __A)
133 1.1 mrg {
134 1.1 mrg return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si)
135 1.1 mrg __A,
136 1.1 mrg (__v16si)_mm512_undefined_si512(),
137 1.1 mrg (__mmask16)
138 1.1 mrg -1);
139 1.1 mrg }
140 1.1 mrg
141 1.1 mrg extern __inline __m512i
142 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
143 1.1 mrg _mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
144 1.1 mrg {
145 1.1 mrg return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si)
146 1.1 mrg __A,
147 1.1 mrg (__v16si)
148 1.1 mrg __O, __M);
149 1.1 mrg }
150 1.1 mrg
151 1.1 mrg extern __inline __m512i
152 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
153 1.1 mrg _mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
154 1.1 mrg {
155 1.1 mrg return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si)
156 1.1 mrg __A,
157 1.1 mrg (__v16si)
158 1.1 mrg _mm512_setzero_si512 (),
159 1.1 mrg __M);
160 1.1 mrg }
161 1.1 mrg
162 1.1 mrg extern __inline __m512
163 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
164 1.1 mrg _mm512_broadcast_f32x8 (__m256 __A)
165 1.1 mrg {
166 1.1 mrg return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
167 1.1 mrg _mm512_undefined_ps(),
168 1.1 mrg (__mmask16) -
169 1.1 mrg 1);
170 1.1 mrg }
171 1.1 mrg
172 1.1 mrg extern __inline __m512
173 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
174 1.1 mrg _mm512_mask_broadcast_f32x8 (__m512 __O, __mmask16 __M, __m256 __A)
175 1.1 mrg {
176 1.1 mrg return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
177 1.1 mrg (__v16sf)__O,
178 1.1 mrg __M);
179 1.1 mrg }
180 1.1 mrg
181 1.1 mrg extern __inline __m512
182 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
183 1.1 mrg _mm512_maskz_broadcast_f32x8 (__mmask16 __M, __m256 __A)
184 1.1 mrg {
185 1.1 mrg return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
186 1.1 mrg (__v16sf)
187 1.1 mrg _mm512_setzero_ps (),
188 1.1 mrg __M);
189 1.1 mrg }
190 1.1 mrg
191 1.1 mrg extern __inline __m512i
192 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
193 1.1 mrg _mm512_broadcast_i32x8 (__m256i __A)
194 1.1 mrg {
195 1.1 mrg return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si)
196 1.1 mrg __A,
197 1.1 mrg (__v16si)_mm512_undefined_si512(),
198 1.1 mrg (__mmask16)
199 1.1 mrg -1);
200 1.1 mrg }
201 1.1 mrg
202 1.1 mrg extern __inline __m512i
203 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
204 1.1 mrg _mm512_mask_broadcast_i32x8 (__m512i __O, __mmask16 __M, __m256i __A)
205 1.1 mrg {
206 1.1 mrg return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si)
207 1.1 mrg __A,
208 1.1 mrg (__v16si)__O,
209 1.1 mrg __M);
210 1.1 mrg }
211 1.1 mrg
212 1.1 mrg extern __inline __m512i
213 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
214 1.1 mrg _mm512_maskz_broadcast_i32x8 (__mmask16 __M, __m256i __A)
215 1.1 mrg {
216 1.1 mrg return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si)
217 1.1 mrg __A,
218 1.1 mrg (__v16si)
219 1.1 mrg _mm512_setzero_si512 (),
220 1.1 mrg __M);
221 1.1 mrg }
222 1.1 mrg
223 1.1 mrg extern __inline __m512i
224 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
225 1.1 mrg _mm512_mullo_epi64 (__m512i __A, __m512i __B)
226 1.1 mrg {
227 1.1 mrg return (__m512i) ((__v8du) __A * (__v8du) __B);
228 1.1 mrg }
229 1.1 mrg
230 1.1 mrg extern __inline __m512i
231 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
232 1.1 mrg _mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
233 1.1 mrg __m512i __B)
234 1.1 mrg {
235 1.1 mrg return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
236 1.1 mrg (__v8di) __B,
237 1.1 mrg (__v8di) __W,
238 1.1 mrg (__mmask8) __U);
239 1.1 mrg }
240 1.1 mrg
241 1.1 mrg extern __inline __m512i
242 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
243 1.1 mrg _mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
244 1.1 mrg {
245 1.1 mrg return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
246 1.1 mrg (__v8di) __B,
247 1.1 mrg (__v8di)
248 1.1 mrg _mm512_setzero_si512 (),
249 1.1 mrg (__mmask8) __U);
250 1.1 mrg }
251 1.1 mrg
252 1.1 mrg extern __inline __m512d
253 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
254 1.1 mrg _mm512_xor_pd (__m512d __A, __m512d __B)
255 1.1 mrg {
256 1.1 mrg return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
257 1.1 mrg (__v8df) __B,
258 1.1 mrg (__v8df)
259 1.1 mrg _mm512_setzero_pd (),
260 1.1 mrg (__mmask8) -1);
261 1.1 mrg }
262 1.1 mrg
263 1.1 mrg extern __inline __m512d
264 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
265 1.1 mrg _mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A,
266 1.1 mrg __m512d __B)
267 1.1 mrg {
268 1.1 mrg return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
269 1.1 mrg (__v8df) __B,
270 1.1 mrg (__v8df) __W,
271 1.1 mrg (__mmask8) __U);
272 1.1 mrg }
273 1.1 mrg
274 1.1 mrg extern __inline __m512d
275 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
276 1.1 mrg _mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B)
277 1.1 mrg {
278 1.1 mrg return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
279 1.1 mrg (__v8df) __B,
280 1.1 mrg (__v8df)
281 1.1 mrg _mm512_setzero_pd (),
282 1.1 mrg (__mmask8) __U);
283 1.1 mrg }
284 1.1 mrg
285 1.1 mrg extern __inline __m512
286 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
287 1.1 mrg _mm512_xor_ps (__m512 __A, __m512 __B)
288 1.1 mrg {
289 1.1 mrg return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
290 1.1 mrg (__v16sf) __B,
291 1.1 mrg (__v16sf)
292 1.1 mrg _mm512_setzero_ps (),
293 1.1 mrg (__mmask16) -1);
294 1.1 mrg }
295 1.1 mrg
296 1.1 mrg extern __inline __m512
297 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
298 1.1 mrg _mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
299 1.1 mrg {
300 1.1 mrg return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
301 1.1 mrg (__v16sf) __B,
302 1.1 mrg (__v16sf) __W,
303 1.1 mrg (__mmask16) __U);
304 1.1 mrg }
305 1.1 mrg
306 1.1 mrg extern __inline __m512
307 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
308 1.1 mrg _mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B)
309 1.1 mrg {
310 1.1 mrg return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
311 1.1 mrg (__v16sf) __B,
312 1.1 mrg (__v16sf)
313 1.1 mrg _mm512_setzero_ps (),
314 1.1 mrg (__mmask16) __U);
315 1.1 mrg }
316 1.1 mrg
317 1.1 mrg extern __inline __m512d
318 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
319 1.1 mrg _mm512_or_pd (__m512d __A, __m512d __B)
320 1.1 mrg {
321 1.1 mrg return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
322 1.1 mrg (__v8df) __B,
323 1.1 mrg (__v8df)
324 1.1 mrg _mm512_setzero_pd (),
325 1.1 mrg (__mmask8) -1);
326 1.1 mrg }
327 1.1 mrg
328 1.1 mrg extern __inline __m512d
329 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
330 1.1 mrg _mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
331 1.1 mrg {
332 1.1 mrg return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
333 1.1 mrg (__v8df) __B,
334 1.1 mrg (__v8df) __W,
335 1.1 mrg (__mmask8) __U);
336 1.1 mrg }
337 1.1 mrg
338 1.1 mrg extern __inline __m512d
339 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
340 1.1 mrg _mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B)
341 1.1 mrg {
342 1.1 mrg return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
343 1.1 mrg (__v8df) __B,
344 1.1 mrg (__v8df)
345 1.1 mrg _mm512_setzero_pd (),
346 1.1 mrg (__mmask8) __U);
347 1.1 mrg }
348 1.1 mrg
349 1.1 mrg extern __inline __m512
350 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
351 1.1 mrg _mm512_or_ps (__m512 __A, __m512 __B)
352 1.1 mrg {
353 1.1 mrg return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
354 1.1 mrg (__v16sf) __B,
355 1.1 mrg (__v16sf)
356 1.1 mrg _mm512_setzero_ps (),
357 1.1 mrg (__mmask16) -1);
358 1.1 mrg }
359 1.1 mrg
360 1.1 mrg extern __inline __m512
361 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
362 1.1 mrg _mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
363 1.1 mrg {
364 1.1 mrg return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
365 1.1 mrg (__v16sf) __B,
366 1.1 mrg (__v16sf) __W,
367 1.1 mrg (__mmask16) __U);
368 1.1 mrg }
369 1.1 mrg
370 1.1 mrg extern __inline __m512
371 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
372 1.1 mrg _mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B)
373 1.1 mrg {
374 1.1 mrg return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
375 1.1 mrg (__v16sf) __B,
376 1.1 mrg (__v16sf)
377 1.1 mrg _mm512_setzero_ps (),
378 1.1 mrg (__mmask16) __U);
379 1.1 mrg }
380 1.1 mrg
381 1.1 mrg extern __inline __m512d
382 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
383 1.1 mrg _mm512_and_pd (__m512d __A, __m512d __B)
384 1.1 mrg {
385 1.1 mrg return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
386 1.1 mrg (__v8df) __B,
387 1.1 mrg (__v8df)
388 1.1 mrg _mm512_setzero_pd (),
389 1.1 mrg (__mmask8) -1);
390 1.1 mrg }
391 1.1 mrg
392 1.1 mrg extern __inline __m512d
393 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
394 1.1 mrg _mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A,
395 1.1 mrg __m512d __B)
396 1.1 mrg {
397 1.1 mrg return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
398 1.1 mrg (__v8df) __B,
399 1.1 mrg (__v8df) __W,
400 1.1 mrg (__mmask8) __U);
401 1.1 mrg }
402 1.1 mrg
403 1.1 mrg extern __inline __m512d
404 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
405 1.1 mrg _mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B)
406 1.1 mrg {
407 1.1 mrg return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
408 1.1 mrg (__v8df) __B,
409 1.1 mrg (__v8df)
410 1.1 mrg _mm512_setzero_pd (),
411 1.1 mrg (__mmask8) __U);
412 1.1 mrg }
413 1.1 mrg
414 1.1 mrg extern __inline __m512
415 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
416 1.1 mrg _mm512_and_ps (__m512 __A, __m512 __B)
417 1.1 mrg {
418 1.1 mrg return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
419 1.1 mrg (__v16sf) __B,
420 1.1 mrg (__v16sf)
421 1.1 mrg _mm512_setzero_ps (),
422 1.1 mrg (__mmask16) -1);
423 1.1 mrg }
424 1.1 mrg
425 1.1 mrg extern __inline __m512
426 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
427 1.1 mrg _mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
428 1.1 mrg {
429 1.1 mrg return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
430 1.1 mrg (__v16sf) __B,
431 1.1 mrg (__v16sf) __W,
432 1.1 mrg (__mmask16) __U);
433 1.1 mrg }
434 1.1 mrg
435 1.1 mrg extern __inline __m512
436 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
437 1.1 mrg _mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B)
438 1.1 mrg {
439 1.1 mrg return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
440 1.1 mrg (__v16sf) __B,
441 1.1 mrg (__v16sf)
442 1.1 mrg _mm512_setzero_ps (),
443 1.1 mrg (__mmask16) __U);
444 1.1 mrg }
445 1.1 mrg
446 1.1 mrg extern __inline __m512d
447 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
448 1.1 mrg _mm512_andnot_pd (__m512d __A, __m512d __B)
449 1.1 mrg {
450 1.1 mrg return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
451 1.1 mrg (__v8df) __B,
452 1.1 mrg (__v8df)
453 1.1 mrg _mm512_setzero_pd (),
454 1.1 mrg (__mmask8) -1);
455 1.1 mrg }
456 1.1 mrg
457 1.1 mrg extern __inline __m512d
458 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
459 1.1 mrg _mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A,
460 1.1 mrg __m512d __B)
461 1.1 mrg {
462 1.1 mrg return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
463 1.1 mrg (__v8df) __B,
464 1.1 mrg (__v8df) __W,
465 1.1 mrg (__mmask8) __U);
466 1.1 mrg }
467 1.1 mrg
468 1.1 mrg extern __inline __m512d
469 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
470 1.1 mrg _mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B)
471 1.1 mrg {
472 1.1 mrg return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
473 1.1 mrg (__v8df) __B,
474 1.1 mrg (__v8df)
475 1.1 mrg _mm512_setzero_pd (),
476 1.1 mrg (__mmask8) __U);
477 1.1 mrg }
478 1.1 mrg
479 1.1 mrg extern __inline __m512
480 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
481 1.1 mrg _mm512_andnot_ps (__m512 __A, __m512 __B)
482 1.1 mrg {
483 1.1 mrg return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
484 1.1 mrg (__v16sf) __B,
485 1.1 mrg (__v16sf)
486 1.1 mrg _mm512_setzero_ps (),
487 1.1 mrg (__mmask16) -1);
488 1.1 mrg }
489 1.1 mrg
490 1.1 mrg extern __inline __m512
491 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
492 1.1 mrg _mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A,
493 1.1 mrg __m512 __B)
494 1.1 mrg {
495 1.1 mrg return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
496 1.1 mrg (__v16sf) __B,
497 1.1 mrg (__v16sf) __W,
498 1.1 mrg (__mmask16) __U);
499 1.1 mrg }
500 1.1 mrg
501 1.1 mrg extern __inline __m512
502 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
503 1.1 mrg _mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B)
504 1.1 mrg {
505 1.1 mrg return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
506 1.1 mrg (__v16sf) __B,
507 1.1 mrg (__v16sf)
508 1.1 mrg _mm512_setzero_ps (),
509 1.1 mrg (__mmask16) __U);
510 1.1 mrg }
511 1.1 mrg
512 1.1 mrg extern __inline __mmask16
513 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
514 1.1 mrg _mm512_movepi32_mask (__m512i __A)
515 1.1 mrg {
516 1.1 mrg return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A);
517 1.1 mrg }
518 1.1 mrg
519 1.1 mrg extern __inline __mmask8
520 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
521 1.1 mrg _mm512_movepi64_mask (__m512i __A)
522 1.1 mrg {
523 1.1 mrg return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A);
524 1.1 mrg }
525 1.1 mrg
526 1.1 mrg extern __inline __m512i
527 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
528 1.1 mrg _mm512_movm_epi32 (__mmask16 __A)
529 1.1 mrg {
530 1.1 mrg return (__m512i) __builtin_ia32_cvtmask2d512 (__A);
531 1.1 mrg }
532 1.1 mrg
533 1.1 mrg extern __inline __m512i
534 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
535 1.1 mrg _mm512_movm_epi64 (__mmask8 __A)
536 1.1 mrg {
537 1.1 mrg return (__m512i) __builtin_ia32_cvtmask2q512 (__A);
538 1.1 mrg }
539 1.1 mrg
540 1.1 mrg extern __inline __m512i
541 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
542 1.1 mrg _mm512_cvttpd_epi64 (__m512d __A)
543 1.1 mrg {
544 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
545 1.1 mrg (__v8di)
546 1.1 mrg _mm512_setzero_si512 (),
547 1.1 mrg (__mmask8) -1,
548 1.1 mrg _MM_FROUND_CUR_DIRECTION);
549 1.1 mrg }
550 1.1 mrg
551 1.1 mrg extern __inline __m512i
552 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
553 1.1 mrg _mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A)
554 1.1 mrg {
555 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
556 1.1 mrg (__v8di) __W,
557 1.1 mrg (__mmask8) __U,
558 1.1 mrg _MM_FROUND_CUR_DIRECTION);
559 1.1 mrg }
560 1.1 mrg
561 1.1 mrg extern __inline __m512i
562 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
563 1.1 mrg _mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A)
564 1.1 mrg {
565 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
566 1.1 mrg (__v8di)
567 1.1 mrg _mm512_setzero_si512 (),
568 1.1 mrg (__mmask8) __U,
569 1.1 mrg _MM_FROUND_CUR_DIRECTION);
570 1.1 mrg }
571 1.1 mrg
572 1.1 mrg extern __inline __m512i
573 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
574 1.1 mrg _mm512_cvttpd_epu64 (__m512d __A)
575 1.1 mrg {
576 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
577 1.1 mrg (__v8di)
578 1.1 mrg _mm512_setzero_si512 (),
579 1.1 mrg (__mmask8) -1,
580 1.1 mrg _MM_FROUND_CUR_DIRECTION);
581 1.1 mrg }
582 1.1 mrg
583 1.1 mrg extern __inline __m512i
584 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
585 1.1 mrg _mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A)
586 1.1 mrg {
587 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
588 1.1 mrg (__v8di) __W,
589 1.1 mrg (__mmask8) __U,
590 1.1 mrg _MM_FROUND_CUR_DIRECTION);
591 1.1 mrg }
592 1.1 mrg
593 1.1 mrg extern __inline __m512i
594 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
595 1.1 mrg _mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A)
596 1.1 mrg {
597 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
598 1.1 mrg (__v8di)
599 1.1 mrg _mm512_setzero_si512 (),
600 1.1 mrg (__mmask8) __U,
601 1.1 mrg _MM_FROUND_CUR_DIRECTION);
602 1.1 mrg }
603 1.1 mrg
604 1.1 mrg extern __inline __m512i
605 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
606 1.1 mrg _mm512_cvttps_epi64 (__m256 __A)
607 1.1 mrg {
608 1.1 mrg return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
609 1.1 mrg (__v8di)
610 1.1 mrg _mm512_setzero_si512 (),
611 1.1 mrg (__mmask8) -1,
612 1.1 mrg _MM_FROUND_CUR_DIRECTION);
613 1.1 mrg }
614 1.1 mrg
615 1.1 mrg extern __inline __m512i
616 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
617 1.1 mrg _mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A)
618 1.1 mrg {
619 1.1 mrg return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
620 1.1 mrg (__v8di) __W,
621 1.1 mrg (__mmask8) __U,
622 1.1 mrg _MM_FROUND_CUR_DIRECTION);
623 1.1 mrg }
624 1.1 mrg
625 1.1 mrg extern __inline __m512i
626 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
627 1.1 mrg _mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A)
628 1.1 mrg {
629 1.1 mrg return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
630 1.1 mrg (__v8di)
631 1.1 mrg _mm512_setzero_si512 (),
632 1.1 mrg (__mmask8) __U,
633 1.1 mrg _MM_FROUND_CUR_DIRECTION);
634 1.1 mrg }
635 1.1 mrg
636 1.1 mrg extern __inline __m512i
637 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
638 1.1 mrg _mm512_cvttps_epu64 (__m256 __A)
639 1.1 mrg {
640 1.1 mrg return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
641 1.1 mrg (__v8di)
642 1.1 mrg _mm512_setzero_si512 (),
643 1.1 mrg (__mmask8) -1,
644 1.1 mrg _MM_FROUND_CUR_DIRECTION);
645 1.1 mrg }
646 1.1 mrg
647 1.1 mrg extern __inline __m512i
648 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
649 1.1 mrg _mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A)
650 1.1 mrg {
651 1.1 mrg return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
652 1.1 mrg (__v8di) __W,
653 1.1 mrg (__mmask8) __U,
654 1.1 mrg _MM_FROUND_CUR_DIRECTION);
655 1.1 mrg }
656 1.1 mrg
657 1.1 mrg extern __inline __m512i
658 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
659 1.1 mrg _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A)
660 1.1 mrg {
661 1.1 mrg return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
662 1.1 mrg (__v8di)
663 1.1 mrg _mm512_setzero_si512 (),
664 1.1 mrg (__mmask8) __U,
665 1.1 mrg _MM_FROUND_CUR_DIRECTION);
666 1.1 mrg }
667 1.1 mrg
668 1.1 mrg extern __inline __m512i
669 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
670 1.1 mrg _mm512_cvtpd_epi64 (__m512d __A)
671 1.1 mrg {
672 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
673 1.1 mrg (__v8di)
674 1.1 mrg _mm512_setzero_si512 (),
675 1.1 mrg (__mmask8) -1,
676 1.1 mrg _MM_FROUND_CUR_DIRECTION);
677 1.1 mrg }
678 1.1 mrg
679 1.1 mrg extern __inline __m512i
680 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
681 1.1 mrg _mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A)
682 1.1 mrg {
683 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
684 1.1 mrg (__v8di) __W,
685 1.1 mrg (__mmask8) __U,
686 1.1 mrg _MM_FROUND_CUR_DIRECTION);
687 1.1 mrg }
688 1.1 mrg
689 1.1 mrg extern __inline __m512i
690 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
691 1.1 mrg _mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A)
692 1.1 mrg {
693 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
694 1.1 mrg (__v8di)
695 1.1 mrg _mm512_setzero_si512 (),
696 1.1 mrg (__mmask8) __U,
697 1.1 mrg _MM_FROUND_CUR_DIRECTION);
698 1.1 mrg }
699 1.1 mrg
700 1.1 mrg extern __inline __m512i
701 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
702 1.1 mrg _mm512_cvtpd_epu64 (__m512d __A)
703 1.1 mrg {
704 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
705 1.1 mrg (__v8di)
706 1.1 mrg _mm512_setzero_si512 (),
707 1.1 mrg (__mmask8) -1,
708 1.1 mrg _MM_FROUND_CUR_DIRECTION);
709 1.1 mrg }
710 1.1 mrg
711 1.1 mrg extern __inline __m512i
712 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
713 1.1 mrg _mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A)
714 1.1 mrg {
715 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
716 1.1 mrg (__v8di) __W,
717 1.1 mrg (__mmask8) __U,
718 1.1 mrg _MM_FROUND_CUR_DIRECTION);
719 1.1 mrg }
720 1.1 mrg
721 1.1 mrg extern __inline __m512i
722 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
723 1.1 mrg _mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A)
724 1.1 mrg {
725 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
726 1.1 mrg (__v8di)
727 1.1 mrg _mm512_setzero_si512 (),
728 1.1 mrg (__mmask8) __U,
729 1.1 mrg _MM_FROUND_CUR_DIRECTION);
730 1.1 mrg }
731 1.1 mrg
732 1.1 mrg extern __inline __m512i
733 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
734 1.1 mrg _mm512_cvtps_epi64 (__m256 __A)
735 1.1 mrg {
736 1.1 mrg return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
737 1.1 mrg (__v8di)
738 1.1 mrg _mm512_setzero_si512 (),
739 1.1 mrg (__mmask8) -1,
740 1.1 mrg _MM_FROUND_CUR_DIRECTION);
741 1.1 mrg }
742 1.1 mrg
743 1.1 mrg extern __inline __m512i
744 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
745 1.1 mrg _mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A)
746 1.1 mrg {
747 1.1 mrg return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
748 1.1 mrg (__v8di) __W,
749 1.1 mrg (__mmask8) __U,
750 1.1 mrg _MM_FROUND_CUR_DIRECTION);
751 1.1 mrg }
752 1.1 mrg
753 1.1 mrg extern __inline __m512i
754 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
755 1.1 mrg _mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A)
756 1.1 mrg {
757 1.1 mrg return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
758 1.1 mrg (__v8di)
759 1.1 mrg _mm512_setzero_si512 (),
760 1.1 mrg (__mmask8) __U,
761 1.1 mrg _MM_FROUND_CUR_DIRECTION);
762 1.1 mrg }
763 1.1 mrg
764 1.1 mrg extern __inline __m512i
765 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
766 1.1 mrg _mm512_cvtps_epu64 (__m256 __A)
767 1.1 mrg {
768 1.1 mrg return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
769 1.1 mrg (__v8di)
770 1.1 mrg _mm512_setzero_si512 (),
771 1.1 mrg (__mmask8) -1,
772 1.1 mrg _MM_FROUND_CUR_DIRECTION);
773 1.1 mrg }
774 1.1 mrg
775 1.1 mrg extern __inline __m512i
776 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
777 1.1 mrg _mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A)
778 1.1 mrg {
779 1.1 mrg return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
780 1.1 mrg (__v8di) __W,
781 1.1 mrg (__mmask8) __U,
782 1.1 mrg _MM_FROUND_CUR_DIRECTION);
783 1.1 mrg }
784 1.1 mrg
785 1.1 mrg extern __inline __m512i
786 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
787 1.1 mrg _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A)
788 1.1 mrg {
789 1.1 mrg return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
790 1.1 mrg (__v8di)
791 1.1 mrg _mm512_setzero_si512 (),
792 1.1 mrg (__mmask8) __U,
793 1.1 mrg _MM_FROUND_CUR_DIRECTION);
794 1.1 mrg }
795 1.1 mrg
796 1.1 mrg extern __inline __m256
797 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
798 1.1 mrg _mm512_cvtepi64_ps (__m512i __A)
799 1.1 mrg {
800 1.1 mrg return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
801 1.1 mrg (__v8sf)
802 1.1 mrg _mm256_setzero_ps (),
803 1.1 mrg (__mmask8) -1,
804 1.1 mrg _MM_FROUND_CUR_DIRECTION);
805 1.1 mrg }
806 1.1 mrg
807 1.1 mrg extern __inline __m256
808 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
809 1.1 mrg _mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A)
810 1.1 mrg {
811 1.1 mrg return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
812 1.1 mrg (__v8sf) __W,
813 1.1 mrg (__mmask8) __U,
814 1.1 mrg _MM_FROUND_CUR_DIRECTION);
815 1.1 mrg }
816 1.1 mrg
817 1.1 mrg extern __inline __m256
818 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
819 1.1 mrg _mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A)
820 1.1 mrg {
821 1.1 mrg return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
822 1.1 mrg (__v8sf)
823 1.1 mrg _mm256_setzero_ps (),
824 1.1 mrg (__mmask8) __U,
825 1.1 mrg _MM_FROUND_CUR_DIRECTION);
826 1.1 mrg }
827 1.1 mrg
828 1.1 mrg extern __inline __m256
829 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
830 1.1 mrg _mm512_cvtepu64_ps (__m512i __A)
831 1.1 mrg {
832 1.1 mrg return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
833 1.1 mrg (__v8sf)
834 1.1 mrg _mm256_setzero_ps (),
835 1.1 mrg (__mmask8) -1,
836 1.1 mrg _MM_FROUND_CUR_DIRECTION);
837 1.1 mrg }
838 1.1 mrg
839 1.1 mrg extern __inline __m256
840 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
841 1.1 mrg _mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A)
842 1.1 mrg {
843 1.1 mrg return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
844 1.1 mrg (__v8sf) __W,
845 1.1 mrg (__mmask8) __U,
846 1.1 mrg _MM_FROUND_CUR_DIRECTION);
847 1.1 mrg }
848 1.1 mrg
849 1.1 mrg extern __inline __m256
850 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
851 1.1 mrg _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A)
852 1.1 mrg {
853 1.1 mrg return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
854 1.1 mrg (__v8sf)
855 1.1 mrg _mm256_setzero_ps (),
856 1.1 mrg (__mmask8) __U,
857 1.1 mrg _MM_FROUND_CUR_DIRECTION);
858 1.1 mrg }
859 1.1 mrg
860 1.1 mrg extern __inline __m512d
861 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
862 1.1 mrg _mm512_cvtepi64_pd (__m512i __A)
863 1.1 mrg {
864 1.1 mrg return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
865 1.1 mrg (__v8df)
866 1.1 mrg _mm512_setzero_pd (),
867 1.1 mrg (__mmask8) -1,
868 1.1 mrg _MM_FROUND_CUR_DIRECTION);
869 1.1 mrg }
870 1.1 mrg
871 1.1 mrg extern __inline __m512d
872 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
873 1.1 mrg _mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A)
874 1.1 mrg {
875 1.1 mrg return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
876 1.1 mrg (__v8df) __W,
877 1.1 mrg (__mmask8) __U,
878 1.1 mrg _MM_FROUND_CUR_DIRECTION);
879 1.1 mrg }
880 1.1 mrg
881 1.1 mrg extern __inline __m512d
882 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
883 1.1 mrg _mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A)
884 1.1 mrg {
885 1.1 mrg return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
886 1.1 mrg (__v8df)
887 1.1 mrg _mm512_setzero_pd (),
888 1.1 mrg (__mmask8) __U,
889 1.1 mrg _MM_FROUND_CUR_DIRECTION);
890 1.1 mrg }
891 1.1 mrg
892 1.1 mrg extern __inline __m512d
893 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
894 1.1 mrg _mm512_cvtepu64_pd (__m512i __A)
895 1.1 mrg {
896 1.1 mrg return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
897 1.1 mrg (__v8df)
898 1.1 mrg _mm512_setzero_pd (),
899 1.1 mrg (__mmask8) -1,
900 1.1 mrg _MM_FROUND_CUR_DIRECTION);
901 1.1 mrg }
902 1.1 mrg
903 1.1 mrg extern __inline __m512d
904 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
905 1.1 mrg _mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A)
906 1.1 mrg {
907 1.1 mrg return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
908 1.1 mrg (__v8df) __W,
909 1.1 mrg (__mmask8) __U,
910 1.1 mrg _MM_FROUND_CUR_DIRECTION);
911 1.1 mrg }
912 1.1 mrg
913 1.1 mrg extern __inline __m512d
914 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
915 1.1 mrg _mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A)
916 1.1 mrg {
917 1.1 mrg return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
918 1.1 mrg (__v8df)
919 1.1 mrg _mm512_setzero_pd (),
920 1.1 mrg (__mmask8) __U,
921 1.1 mrg _MM_FROUND_CUR_DIRECTION);
922 1.1 mrg }
923 1.1 mrg
924 1.1 mrg #ifdef __OPTIMIZE__
925 1.1 mrg extern __inline __m512d
926 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
927 1.1 mrg _mm512_range_pd (__m512d __A, __m512d __B, int __C)
928 1.1 mrg {
929 1.1 mrg return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
930 1.1 mrg (__v8df) __B, __C,
931 1.1 mrg (__v8df)
932 1.1 mrg _mm512_setzero_pd (),
933 1.1 mrg (__mmask8) -1,
934 1.1 mrg _MM_FROUND_CUR_DIRECTION);
935 1.1 mrg }
936 1.1 mrg
937 1.1 mrg extern __inline __m512d
938 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
939 1.1 mrg _mm512_mask_range_pd (__m512d __W, __mmask8 __U,
940 1.1 mrg __m512d __A, __m512d __B, int __C)
941 1.1 mrg {
942 1.1 mrg return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
943 1.1 mrg (__v8df) __B, __C,
944 1.1 mrg (__v8df) __W,
945 1.1 mrg (__mmask8) __U,
946 1.1 mrg _MM_FROUND_CUR_DIRECTION);
947 1.1 mrg }
948 1.1 mrg
949 1.1 mrg extern __inline __m512d
950 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
951 1.1 mrg _mm512_maskz_range_pd (__mmask8 __U, __m512d __A, __m512d __B, int __C)
952 1.1 mrg {
953 1.1 mrg return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
954 1.1 mrg (__v8df) __B, __C,
955 1.1 mrg (__v8df)
956 1.1 mrg _mm512_setzero_pd (),
957 1.1 mrg (__mmask8) __U,
958 1.1 mrg _MM_FROUND_CUR_DIRECTION);
959 1.1 mrg }
960 1.1 mrg
961 1.1 mrg extern __inline __m512
962 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
963 1.1 mrg _mm512_range_ps (__m512 __A, __m512 __B, int __C)
964 1.1 mrg {
965 1.1 mrg return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
966 1.1 mrg (__v16sf) __B, __C,
967 1.1 mrg (__v16sf)
968 1.1 mrg _mm512_setzero_ps (),
969 1.1 mrg (__mmask16) -1,
970 1.1 mrg _MM_FROUND_CUR_DIRECTION);
971 1.1 mrg }
972 1.1 mrg
973 1.1 mrg extern __inline __m512
974 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
975 1.1 mrg _mm512_mask_range_ps (__m512 __W, __mmask16 __U,
976 1.1 mrg __m512 __A, __m512 __B, int __C)
977 1.1 mrg {
978 1.1 mrg return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
979 1.1 mrg (__v16sf) __B, __C,
980 1.1 mrg (__v16sf) __W,
981 1.1 mrg (__mmask16) __U,
982 1.1 mrg _MM_FROUND_CUR_DIRECTION);
983 1.1 mrg }
984 1.1 mrg
985 1.1 mrg extern __inline __m512
986 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
987 1.1 mrg _mm512_maskz_range_ps (__mmask16 __U, __m512 __A, __m512 __B, int __C)
988 1.1 mrg {
989 1.1 mrg return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
990 1.1 mrg (__v16sf) __B, __C,
991 1.1 mrg (__v16sf)
992 1.1 mrg _mm512_setzero_ps (),
993 1.1 mrg (__mmask16) __U,
994 1.1 mrg _MM_FROUND_CUR_DIRECTION);
995 1.1 mrg }
996 1.1 mrg
997 1.1 mrg extern __inline __m128d
998 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
999 1.1 mrg _mm_reduce_sd (__m128d __A, __m128d __B, int __C)
1000 1.1 mrg {
1001 1.1 mrg return (__m128d) __builtin_ia32_reducesd ((__v2df) __A,
1002 1.1 mrg (__v2df) __B, __C);
1003 1.1 mrg }
1004 1.1 mrg
1005 1.1 mrg extern __inline __m128
1006 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1007 1.1 mrg _mm_reduce_ss (__m128 __A, __m128 __B, int __C)
1008 1.1 mrg {
1009 1.1 mrg return (__m128) __builtin_ia32_reducess ((__v4sf) __A,
1010 1.1 mrg (__v4sf) __B, __C);
1011 1.1 mrg }
1012 1.1 mrg
1013 1.1 mrg extern __inline __m128d
1014 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1015 1.1 mrg _mm_range_sd (__m128d __A, __m128d __B, int __C)
1016 1.1 mrg {
1017 1.1 mrg return (__m128d) __builtin_ia32_rangesd128_round ((__v2df) __A,
1018 1.1 mrg (__v2df) __B, __C,
1019 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1020 1.1 mrg }
1021 1.1 mrg
1022 1.1 mrg
1023 1.1 mrg extern __inline __m128
1024 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1025 1.1 mrg _mm_range_ss (__m128 __A, __m128 __B, int __C)
1026 1.1 mrg {
1027 1.1 mrg return (__m128) __builtin_ia32_rangess128_round ((__v4sf) __A,
1028 1.1 mrg (__v4sf) __B, __C,
1029 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1030 1.1 mrg }
1031 1.1 mrg
1032 1.1 mrg extern __inline __m128d
1033 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1034 1.1 mrg _mm_range_round_sd (__m128d __A, __m128d __B, int __C, const int __R)
1035 1.1 mrg {
1036 1.1 mrg return (__m128d) __builtin_ia32_rangesd128_round ((__v2df) __A,
1037 1.1 mrg (__v2df) __B, __C,
1038 1.1 mrg __R);
1039 1.1 mrg }
1040 1.1 mrg
1041 1.1 mrg extern __inline __m128
1042 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1043 1.1 mrg _mm_range_round_ss (__m128 __A, __m128 __B, int __C, const int __R)
1044 1.1 mrg {
1045 1.1 mrg return (__m128) __builtin_ia32_rangess128_round ((__v4sf) __A,
1046 1.1 mrg (__v4sf) __B, __C,
1047 1.1 mrg __R);
1048 1.1 mrg }
1049 1.1 mrg
1050 1.1 mrg extern __inline __mmask8
1051 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1052 1.1 mrg _mm_fpclass_ss_mask (__m128 __A, const int __imm)
1053 1.1 mrg {
1054 1.1 mrg return (__mmask8) __builtin_ia32_fpclassss ((__v4sf) __A, __imm);
1055 1.1 mrg }
1056 1.1 mrg
1057 1.1 mrg extern __inline __mmask8
1058 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1059 1.1 mrg _mm_fpclass_sd_mask (__m128d __A, const int __imm)
1060 1.1 mrg {
1061 1.1 mrg return (__mmask8) __builtin_ia32_fpclasssd ((__v2df) __A, __imm);
1062 1.1 mrg }
1063 1.1 mrg
1064 1.1 mrg extern __inline __m512i
1065 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1066 1.1 mrg _mm512_cvtt_roundpd_epi64 (__m512d __A, const int __R)
1067 1.1 mrg {
1068 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
1069 1.1 mrg (__v8di)
1070 1.1 mrg _mm512_setzero_si512 (),
1071 1.1 mrg (__mmask8) -1,
1072 1.1 mrg __R);
1073 1.1 mrg }
1074 1.1 mrg
1075 1.1 mrg extern __inline __m512i
1076 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1077 1.1 mrg _mm512_mask_cvtt_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A,
1078 1.1 mrg const int __R)
1079 1.1 mrg {
1080 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
1081 1.1 mrg (__v8di) __W,
1082 1.1 mrg (__mmask8) __U,
1083 1.1 mrg __R);
1084 1.1 mrg }
1085 1.1 mrg
1086 1.1 mrg extern __inline __m512i
1087 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1088 1.1 mrg _mm512_maskz_cvtt_roundpd_epi64 (__mmask8 __U, __m512d __A,
1089 1.1 mrg const int __R)
1090 1.1 mrg {
1091 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
1092 1.1 mrg (__v8di)
1093 1.1 mrg _mm512_setzero_si512 (),
1094 1.1 mrg (__mmask8) __U,
1095 1.1 mrg __R);
1096 1.1 mrg }
1097 1.1 mrg
1098 1.1 mrg extern __inline __m512i
1099 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1100 1.1 mrg _mm512_cvtt_roundpd_epu64 (__m512d __A, const int __R)
1101 1.1 mrg {
1102 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
1103 1.1 mrg (__v8di)
1104 1.1 mrg _mm512_setzero_si512 (),
1105 1.1 mrg (__mmask8) -1,
1106 1.1 mrg __R);
1107 1.1 mrg }
1108 1.1 mrg
1109 1.1 mrg extern __inline __m512i
1110 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1111 1.1 mrg _mm512_mask_cvtt_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A,
1112 1.1 mrg const int __R)
1113 1.1 mrg {
1114 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
1115 1.1 mrg (__v8di) __W,
1116 1.1 mrg (__mmask8) __U,
1117 1.1 mrg __R);
1118 1.1 mrg }
1119 1.1 mrg
1120 1.1 mrg extern __inline __m512i
1121 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1122 1.1 mrg _mm512_maskz_cvtt_roundpd_epu64 (__mmask8 __U, __m512d __A,
1123 1.1 mrg const int __R)
1124 1.1 mrg {
1125 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
1126 1.1 mrg (__v8di)
1127 1.1 mrg _mm512_setzero_si512 (),
1128 1.1 mrg (__mmask8) __U,
1129 1.1 mrg __R);
1130 1.1 mrg }
1131 1.1 mrg
1132 1.1 mrg extern __inline __m512i
1133 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1134 1.1 mrg _mm512_cvtt_roundps_epi64 (__m256 __A, const int __R)
1135 1.1 mrg {
1136 1.1 mrg return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
1137 1.1 mrg (__v8di)
1138 1.1 mrg _mm512_setzero_si512 (),
1139 1.1 mrg (__mmask8) -1,
1140 1.1 mrg __R);
1141 1.1 mrg }
1142 1.1 mrg
1143 1.1 mrg extern __inline __m512i
1144 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1145 1.1 mrg _mm512_mask_cvtt_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A,
1146 1.1 mrg const int __R)
1147 1.1 mrg {
1148 1.1 mrg return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
1149 1.1 mrg (__v8di) __W,
1150 1.1 mrg (__mmask8) __U,
1151 1.1 mrg __R);
1152 1.1 mrg }
1153 1.1 mrg
1154 1.1 mrg extern __inline __m512i
1155 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1156 1.1 mrg _mm512_maskz_cvtt_roundps_epi64 (__mmask8 __U, __m256 __A,
1157 1.1 mrg const int __R)
1158 1.1 mrg {
1159 1.1 mrg return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
1160 1.1 mrg (__v8di)
1161 1.1 mrg _mm512_setzero_si512 (),
1162 1.1 mrg (__mmask8) __U,
1163 1.1 mrg __R);
1164 1.1 mrg }
1165 1.1 mrg
1166 1.1 mrg extern __inline __m512i
1167 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1168 1.1 mrg _mm512_cvtt_roundps_epu64 (__m256 __A, const int __R)
1169 1.1 mrg {
1170 1.1 mrg return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
1171 1.1 mrg (__v8di)
1172 1.1 mrg _mm512_setzero_si512 (),
1173 1.1 mrg (__mmask8) -1,
1174 1.1 mrg __R);
1175 1.1 mrg }
1176 1.1 mrg
1177 1.1 mrg extern __inline __m512i
1178 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1179 1.1 mrg _mm512_mask_cvtt_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A,
1180 1.1 mrg const int __R)
1181 1.1 mrg {
1182 1.1 mrg return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
1183 1.1 mrg (__v8di) __W,
1184 1.1 mrg (__mmask8) __U,
1185 1.1 mrg __R);
1186 1.1 mrg }
1187 1.1 mrg
1188 1.1 mrg extern __inline __m512i
1189 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1190 1.1 mrg _mm512_maskz_cvtt_roundps_epu64 (__mmask8 __U, __m256 __A,
1191 1.1 mrg const int __R)
1192 1.1 mrg {
1193 1.1 mrg return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
1194 1.1 mrg (__v8di)
1195 1.1 mrg _mm512_setzero_si512 (),
1196 1.1 mrg (__mmask8) __U,
1197 1.1 mrg __R);
1198 1.1 mrg }
1199 1.1 mrg
1200 1.1 mrg extern __inline __m512i
1201 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1202 1.1 mrg _mm512_cvt_roundpd_epi64 (__m512d __A, const int __R)
1203 1.1 mrg {
1204 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
1205 1.1 mrg (__v8di)
1206 1.1 mrg _mm512_setzero_si512 (),
1207 1.1 mrg (__mmask8) -1,
1208 1.1 mrg __R);
1209 1.1 mrg }
1210 1.1 mrg
1211 1.1 mrg extern __inline __m512i
1212 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1213 1.1 mrg _mm512_mask_cvt_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A,
1214 1.1 mrg const int __R)
1215 1.1 mrg {
1216 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
1217 1.1 mrg (__v8di) __W,
1218 1.1 mrg (__mmask8) __U,
1219 1.1 mrg __R);
1220 1.1 mrg }
1221 1.1 mrg
1222 1.1 mrg extern __inline __m512i
1223 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1224 1.1 mrg _mm512_maskz_cvt_roundpd_epi64 (__mmask8 __U, __m512d __A,
1225 1.1 mrg const int __R)
1226 1.1 mrg {
1227 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
1228 1.1 mrg (__v8di)
1229 1.1 mrg _mm512_setzero_si512 (),
1230 1.1 mrg (__mmask8) __U,
1231 1.1 mrg __R);
1232 1.1 mrg }
1233 1.1 mrg
1234 1.1 mrg extern __inline __m512i
1235 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1236 1.1 mrg _mm512_cvt_roundpd_epu64 (__m512d __A, const int __R)
1237 1.1 mrg {
1238 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
1239 1.1 mrg (__v8di)
1240 1.1 mrg _mm512_setzero_si512 (),
1241 1.1 mrg (__mmask8) -1,
1242 1.1 mrg __R);
1243 1.1 mrg }
1244 1.1 mrg
1245 1.1 mrg extern __inline __m512i
1246 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1247 1.1 mrg _mm512_mask_cvt_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A,
1248 1.1 mrg const int __R)
1249 1.1 mrg {
1250 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
1251 1.1 mrg (__v8di) __W,
1252 1.1 mrg (__mmask8) __U,
1253 1.1 mrg __R);
1254 1.1 mrg }
1255 1.1 mrg
1256 1.1 mrg extern __inline __m512i
1257 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1258 1.1 mrg _mm512_maskz_cvt_roundpd_epu64 (__mmask8 __U, __m512d __A,
1259 1.1 mrg const int __R)
1260 1.1 mrg {
1261 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
1262 1.1 mrg (__v8di)
1263 1.1 mrg _mm512_setzero_si512 (),
1264 1.1 mrg (__mmask8) __U,
1265 1.1 mrg __R);
1266 1.1 mrg }
1267 1.1 mrg
1268 1.1 mrg extern __inline __m512i
1269 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1270 1.1 mrg _mm512_cvt_roundps_epi64 (__m256 __A, const int __R)
1271 1.1 mrg {
1272 1.1 mrg return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
1273 1.1 mrg (__v8di)
1274 1.1 mrg _mm512_setzero_si512 (),
1275 1.1 mrg (__mmask8) -1,
1276 1.1 mrg __R);
1277 1.1 mrg }
1278 1.1 mrg
1279 1.1 mrg extern __inline __m512i
1280 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1281 1.1 mrg _mm512_mask_cvt_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A,
1282 1.1 mrg const int __R)
1283 1.1 mrg {
1284 1.1 mrg return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
1285 1.1 mrg (__v8di) __W,
1286 1.1 mrg (__mmask8) __U,
1287 1.1 mrg __R);
1288 1.1 mrg }
1289 1.1 mrg
1290 1.1 mrg extern __inline __m512i
1291 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1292 1.1 mrg _mm512_maskz_cvt_roundps_epi64 (__mmask8 __U, __m256 __A,
1293 1.1 mrg const int __R)
1294 1.1 mrg {
1295 1.1 mrg return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
1296 1.1 mrg (__v8di)
1297 1.1 mrg _mm512_setzero_si512 (),
1298 1.1 mrg (__mmask8) __U,
1299 1.1 mrg __R);
1300 1.1 mrg }
1301 1.1 mrg
1302 1.1 mrg extern __inline __m512i
1303 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1304 1.1 mrg _mm512_cvt_roundps_epu64 (__m256 __A, const int __R)
1305 1.1 mrg {
1306 1.1 mrg return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
1307 1.1 mrg (__v8di)
1308 1.1 mrg _mm512_setzero_si512 (),
1309 1.1 mrg (__mmask8) -1,
1310 1.1 mrg __R);
1311 1.1 mrg }
1312 1.1 mrg
1313 1.1 mrg extern __inline __m512i
1314 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1315 1.1 mrg _mm512_mask_cvt_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A,
1316 1.1 mrg const int __R)
1317 1.1 mrg {
1318 1.1 mrg return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
1319 1.1 mrg (__v8di) __W,
1320 1.1 mrg (__mmask8) __U,
1321 1.1 mrg __R);
1322 1.1 mrg }
1323 1.1 mrg
1324 1.1 mrg extern __inline __m512i
1325 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1326 1.1 mrg _mm512_maskz_cvt_roundps_epu64 (__mmask8 __U, __m256 __A,
1327 1.1 mrg const int __R)
1328 1.1 mrg {
1329 1.1 mrg return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
1330 1.1 mrg (__v8di)
1331 1.1 mrg _mm512_setzero_si512 (),
1332 1.1 mrg (__mmask8) __U,
1333 1.1 mrg __R);
1334 1.1 mrg }
1335 1.1 mrg
1336 1.1 mrg extern __inline __m256
1337 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1338 1.1 mrg _mm512_cvt_roundepi64_ps (__m512i __A, const int __R)
1339 1.1 mrg {
1340 1.1 mrg return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
1341 1.1 mrg (__v8sf)
1342 1.1 mrg _mm256_setzero_ps (),
1343 1.1 mrg (__mmask8) -1,
1344 1.1 mrg __R);
1345 1.1 mrg }
1346 1.1 mrg
1347 1.1 mrg extern __inline __m256
1348 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1349 1.1 mrg _mm512_mask_cvt_roundepi64_ps (__m256 __W, __mmask8 __U, __m512i __A,
1350 1.1 mrg const int __R)
1351 1.1 mrg {
1352 1.1 mrg return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
1353 1.1 mrg (__v8sf) __W,
1354 1.1 mrg (__mmask8) __U,
1355 1.1 mrg __R);
1356 1.1 mrg }
1357 1.1 mrg
1358 1.1 mrg extern __inline __m256
1359 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1360 1.1 mrg _mm512_maskz_cvt_roundepi64_ps (__mmask8 __U, __m512i __A,
1361 1.1 mrg const int __R)
1362 1.1 mrg {
1363 1.1 mrg return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
1364 1.1 mrg (__v8sf)
1365 1.1 mrg _mm256_setzero_ps (),
1366 1.1 mrg (__mmask8) __U,
1367 1.1 mrg __R);
1368 1.1 mrg }
1369 1.1 mrg
1370 1.1 mrg extern __inline __m256
1371 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1372 1.1 mrg _mm512_cvt_roundepu64_ps (__m512i __A, const int __R)
1373 1.1 mrg {
1374 1.1 mrg return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
1375 1.1 mrg (__v8sf)
1376 1.1 mrg _mm256_setzero_ps (),
1377 1.1 mrg (__mmask8) -1,
1378 1.1 mrg __R);
1379 1.1 mrg }
1380 1.1 mrg
1381 1.1 mrg extern __inline __m256
1382 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1383 1.1 mrg _mm512_mask_cvt_roundepu64_ps (__m256 __W, __mmask8 __U, __m512i __A,
1384 1.1 mrg const int __R)
1385 1.1 mrg {
1386 1.1 mrg return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
1387 1.1 mrg (__v8sf) __W,
1388 1.1 mrg (__mmask8) __U,
1389 1.1 mrg __R);
1390 1.1 mrg }
1391 1.1 mrg
1392 1.1 mrg extern __inline __m256
1393 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1394 1.1 mrg _mm512_maskz_cvt_roundepu64_ps (__mmask8 __U, __m512i __A,
1395 1.1 mrg const int __R)
1396 1.1 mrg {
1397 1.1 mrg return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
1398 1.1 mrg (__v8sf)
1399 1.1 mrg _mm256_setzero_ps (),
1400 1.1 mrg (__mmask8) __U,
1401 1.1 mrg __R);
1402 1.1 mrg }
1403 1.1 mrg
1404 1.1 mrg extern __inline __m512d
1405 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1406 1.1 mrg _mm512_cvt_roundepi64_pd (__m512i __A, const int __R)
1407 1.1 mrg {
1408 1.1 mrg return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
1409 1.1 mrg (__v8df)
1410 1.1 mrg _mm512_setzero_pd (),
1411 1.1 mrg (__mmask8) -1,
1412 1.1 mrg __R);
1413 1.1 mrg }
1414 1.1 mrg
1415 1.1 mrg extern __inline __m512d
1416 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1417 1.1 mrg _mm512_mask_cvt_roundepi64_pd (__m512d __W, __mmask8 __U, __m512i __A,
1418 1.1 mrg const int __R)
1419 1.1 mrg {
1420 1.1 mrg return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
1421 1.1 mrg (__v8df) __W,
1422 1.1 mrg (__mmask8) __U,
1423 1.1 mrg __R);
1424 1.1 mrg }
1425 1.1 mrg
1426 1.1 mrg extern __inline __m512d
1427 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1428 1.1 mrg _mm512_maskz_cvt_roundepi64_pd (__mmask8 __U, __m512i __A,
1429 1.1 mrg const int __R)
1430 1.1 mrg {
1431 1.1 mrg return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
1432 1.1 mrg (__v8df)
1433 1.1 mrg _mm512_setzero_pd (),
1434 1.1 mrg (__mmask8) __U,
1435 1.1 mrg __R);
1436 1.1 mrg }
1437 1.1 mrg
1438 1.1 mrg extern __inline __m512d
1439 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1440 1.1 mrg _mm512_cvt_roundepu64_pd (__m512i __A, const int __R)
1441 1.1 mrg {
1442 1.1 mrg return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
1443 1.1 mrg (__v8df)
1444 1.1 mrg _mm512_setzero_pd (),
1445 1.1 mrg (__mmask8) -1,
1446 1.1 mrg __R);
1447 1.1 mrg }
1448 1.1 mrg
1449 1.1 mrg extern __inline __m512d
1450 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1451 1.1 mrg _mm512_mask_cvt_roundepu64_pd (__m512d __W, __mmask8 __U, __m512i __A,
1452 1.1 mrg const int __R)
1453 1.1 mrg {
1454 1.1 mrg return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
1455 1.1 mrg (__v8df) __W,
1456 1.1 mrg (__mmask8) __U,
1457 1.1 mrg __R);
1458 1.1 mrg }
1459 1.1 mrg
1460 1.1 mrg extern __inline __m512d
1461 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1462 1.1 mrg _mm512_maskz_cvt_roundepu64_pd (__mmask8 __U, __m512i __A,
1463 1.1 mrg const int __R)
1464 1.1 mrg {
1465 1.1 mrg return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
1466 1.1 mrg (__v8df)
1467 1.1 mrg _mm512_setzero_pd (),
1468 1.1 mrg (__mmask8) __U,
1469 1.1 mrg __R);
1470 1.1 mrg }
1471 1.1 mrg
1472 1.1 mrg extern __inline __m512d
1473 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1474 1.1 mrg _mm512_reduce_pd (__m512d __A, int __B)
1475 1.1 mrg {
1476 1.1 mrg return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,
1477 1.1 mrg (__v8df)
1478 1.1 mrg _mm512_setzero_pd (),
1479 1.1 mrg (__mmask8) -1);
1480 1.1 mrg }
1481 1.1 mrg
1482 1.1 mrg extern __inline __m512d
1483 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1484 1.1 mrg _mm512_mask_reduce_pd (__m512d __W, __mmask8 __U, __m512d __A, int __B)
1485 1.1 mrg {
1486 1.1 mrg return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,
1487 1.1 mrg (__v8df) __W,
1488 1.1 mrg (__mmask8) __U);
1489 1.1 mrg }
1490 1.1 mrg
1491 1.1 mrg extern __inline __m512d
1492 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1493 1.1 mrg _mm512_maskz_reduce_pd (__mmask8 __U, __m512d __A, int __B)
1494 1.1 mrg {
1495 1.1 mrg return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,
1496 1.1 mrg (__v8df)
1497 1.1 mrg _mm512_setzero_pd (),
1498 1.1 mrg (__mmask8) __U);
1499 1.1 mrg }
1500 1.1 mrg
1501 1.1 mrg extern __inline __m512
1502 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1503 1.1 mrg _mm512_reduce_ps (__m512 __A, int __B)
1504 1.1 mrg {
1505 1.1 mrg return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,
1506 1.1 mrg (__v16sf)
1507 1.1 mrg _mm512_setzero_ps (),
1508 1.1 mrg (__mmask16) -1);
1509 1.1 mrg }
1510 1.1 mrg
1511 1.1 mrg extern __inline __m512
1512 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1513 1.1 mrg _mm512_mask_reduce_ps (__m512 __W, __mmask16 __U, __m512 __A, int __B)
1514 1.1 mrg {
1515 1.1 mrg return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,
1516 1.1 mrg (__v16sf) __W,
1517 1.1 mrg (__mmask16) __U);
1518 1.1 mrg }
1519 1.1 mrg
1520 1.1 mrg extern __inline __m512
1521 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1522 1.1 mrg _mm512_maskz_reduce_ps (__mmask16 __U, __m512 __A, int __B)
1523 1.1 mrg {
1524 1.1 mrg return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,
1525 1.1 mrg (__v16sf)
1526 1.1 mrg _mm512_setzero_ps (),
1527 1.1 mrg (__mmask16) __U);
1528 1.1 mrg }
1529 1.1 mrg
1530 1.1 mrg extern __inline __m256
1531 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1532 1.1 mrg _mm512_extractf32x8_ps (__m512 __A, const int __imm)
1533 1.1 mrg {
1534 1.1 mrg return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A,
1535 1.1 mrg __imm,
1536 1.1 mrg (__v8sf)
1537 1.1 mrg _mm256_setzero_ps (),
1538 1.1 mrg (__mmask8) -1);
1539 1.1 mrg }
1540 1.1 mrg
1541 1.1 mrg extern __inline __m256
1542 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1543 1.1 mrg _mm512_mask_extractf32x8_ps (__m256 __W, __mmask8 __U, __m512 __A,
1544 1.1 mrg const int __imm)
1545 1.1 mrg {
1546 1.1 mrg return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A,
1547 1.1 mrg __imm,
1548 1.1 mrg (__v8sf) __W,
1549 1.1 mrg (__mmask8) __U);
1550 1.1 mrg }
1551 1.1 mrg
1552 1.1 mrg extern __inline __m256
1553 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1554 1.1 mrg _mm512_maskz_extractf32x8_ps (__mmask8 __U, __m512 __A,
1555 1.1 mrg const int __imm)
1556 1.1 mrg {
1557 1.1 mrg return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A,
1558 1.1 mrg __imm,
1559 1.1 mrg (__v8sf)
1560 1.1 mrg _mm256_setzero_ps (),
1561 1.1 mrg (__mmask8) __U);
1562 1.1 mrg }
1563 1.1 mrg
1564 1.1 mrg extern __inline __m128d
1565 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1566 1.1 mrg _mm512_extractf64x2_pd (__m512d __A, const int __imm)
1567 1.1 mrg {
1568 1.1 mrg return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A,
1569 1.1 mrg __imm,
1570 1.1 mrg (__v2df)
1571 1.1 mrg _mm_setzero_pd (),
1572 1.1 mrg (__mmask8) -
1573 1.1 mrg 1);
1574 1.1 mrg }
1575 1.1 mrg
1576 1.1 mrg extern __inline __m128d
1577 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1578 1.1 mrg _mm512_mask_extractf64x2_pd (__m128d __W, __mmask8 __U, __m512d __A,
1579 1.1 mrg const int __imm)
1580 1.1 mrg {
1581 1.1 mrg return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A,
1582 1.1 mrg __imm,
1583 1.1 mrg (__v2df) __W,
1584 1.1 mrg (__mmask8)
1585 1.1 mrg __U);
1586 1.1 mrg }
1587 1.1 mrg
1588 1.1 mrg extern __inline __m128d
1589 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1590 1.1 mrg _mm512_maskz_extractf64x2_pd (__mmask8 __U, __m512d __A,
1591 1.1 mrg const int __imm)
1592 1.1 mrg {
1593 1.1 mrg return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A,
1594 1.1 mrg __imm,
1595 1.1 mrg (__v2df)
1596 1.1 mrg _mm_setzero_pd (),
1597 1.1 mrg (__mmask8)
1598 1.1 mrg __U);
1599 1.1 mrg }
1600 1.1 mrg
1601 1.1 mrg extern __inline __m256i
1602 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1603 1.1 mrg _mm512_extracti32x8_epi32 (__m512i __A, const int __imm)
1604 1.1 mrg {
1605 1.1 mrg return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A,
1606 1.1 mrg __imm,
1607 1.1 mrg (__v8si)
1608 1.1 mrg _mm256_setzero_si256 (),
1609 1.1 mrg (__mmask8) -1);
1610 1.1 mrg }
1611 1.1 mrg
1612 1.1 mrg extern __inline __m256i
1613 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1614 1.1 mrg _mm512_mask_extracti32x8_epi32 (__m256i __W, __mmask8 __U, __m512i __A,
1615 1.1 mrg const int __imm)
1616 1.1 mrg {
1617 1.1 mrg return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A,
1618 1.1 mrg __imm,
1619 1.1 mrg (__v8si) __W,
1620 1.1 mrg (__mmask8) __U);
1621 1.1 mrg }
1622 1.1 mrg
1623 1.1 mrg extern __inline __m256i
1624 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1625 1.1 mrg _mm512_maskz_extracti32x8_epi32 (__mmask8 __U, __m512i __A,
1626 1.1 mrg const int __imm)
1627 1.1 mrg {
1628 1.1 mrg return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A,
1629 1.1 mrg __imm,
1630 1.1 mrg (__v8si)
1631 1.1 mrg _mm256_setzero_si256 (),
1632 1.1 mrg (__mmask8) __U);
1633 1.1 mrg }
1634 1.1 mrg
1635 1.1 mrg extern __inline __m128i
1636 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1637 1.1 mrg _mm512_extracti64x2_epi64 (__m512i __A, const int __imm)
1638 1.1 mrg {
1639 1.1 mrg return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A,
1640 1.1 mrg __imm,
1641 1.1 mrg (__v2di)
1642 1.1 mrg _mm_setzero_di (),
1643 1.1 mrg (__mmask8) -
1644 1.1 mrg 1);
1645 1.1 mrg }
1646 1.1 mrg
1647 1.1 mrg extern __inline __m128i
1648 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1649 1.1 mrg _mm512_mask_extracti64x2_epi64 (__m128i __W, __mmask8 __U, __m512i __A,
1650 1.1 mrg const int __imm)
1651 1.1 mrg {
1652 1.1 mrg return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A,
1653 1.1 mrg __imm,
1654 1.1 mrg (__v2di) __W,
1655 1.1 mrg (__mmask8)
1656 1.1 mrg __U);
1657 1.1 mrg }
1658 1.1 mrg
1659 1.1 mrg extern __inline __m128i
1660 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1661 1.1 mrg _mm512_maskz_extracti64x2_epi64 (__mmask8 __U, __m512i __A,
1662 1.1 mrg const int __imm)
1663 1.1 mrg {
1664 1.1 mrg return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A,
1665 1.1 mrg __imm,
1666 1.1 mrg (__v2di)
1667 1.1 mrg _mm_setzero_di (),
1668 1.1 mrg (__mmask8)
1669 1.1 mrg __U);
1670 1.1 mrg }
1671 1.1 mrg
1672 1.1 mrg extern __inline __m512d
1673 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1674 1.1 mrg _mm512_range_round_pd (__m512d __A, __m512d __B, int __C,
1675 1.1 mrg const int __R)
1676 1.1 mrg {
1677 1.1 mrg return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
1678 1.1 mrg (__v8df) __B, __C,
1679 1.1 mrg (__v8df)
1680 1.1 mrg _mm512_setzero_pd (),
1681 1.1 mrg (__mmask8) -1,
1682 1.1 mrg __R);
1683 1.1 mrg }
1684 1.1 mrg
1685 1.1 mrg extern __inline __m512d
1686 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1687 1.1 mrg _mm512_mask_range_round_pd (__m512d __W, __mmask8 __U,
1688 1.1 mrg __m512d __A, __m512d __B, int __C,
1689 1.1 mrg const int __R)
1690 1.1 mrg {
1691 1.1 mrg return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
1692 1.1 mrg (__v8df) __B, __C,
1693 1.1 mrg (__v8df) __W,
1694 1.1 mrg (__mmask8) __U,
1695 1.1 mrg __R);
1696 1.1 mrg }
1697 1.1 mrg
1698 1.1 mrg extern __inline __m512d
1699 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1700 1.1 mrg _mm512_maskz_range_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
1701 1.1 mrg int __C, const int __R)
1702 1.1 mrg {
1703 1.1 mrg return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
1704 1.1 mrg (__v8df) __B, __C,
1705 1.1 mrg (__v8df)
1706 1.1 mrg _mm512_setzero_pd (),
1707 1.1 mrg (__mmask8) __U,
1708 1.1 mrg __R);
1709 1.1 mrg }
1710 1.1 mrg
1711 1.1 mrg extern __inline __m512
1712 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1713 1.1 mrg _mm512_range_round_ps (__m512 __A, __m512 __B, int __C, const int __R)
1714 1.1 mrg {
1715 1.1 mrg return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
1716 1.1 mrg (__v16sf) __B, __C,
1717 1.1 mrg (__v16sf)
1718 1.1 mrg _mm512_setzero_ps (),
1719 1.1 mrg (__mmask16) -1,
1720 1.1 mrg __R);
1721 1.1 mrg }
1722 1.1 mrg
1723 1.1 mrg extern __inline __m512
1724 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1725 1.1 mrg _mm512_mask_range_round_ps (__m512 __W, __mmask16 __U,
1726 1.1 mrg __m512 __A, __m512 __B, int __C,
1727 1.1 mrg const int __R)
1728 1.1 mrg {
1729 1.1 mrg return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
1730 1.1 mrg (__v16sf) __B, __C,
1731 1.1 mrg (__v16sf) __W,
1732 1.1 mrg (__mmask16) __U,
1733 1.1 mrg __R);
1734 1.1 mrg }
1735 1.1 mrg
1736 1.1 mrg extern __inline __m512
1737 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1738 1.1 mrg _mm512_maskz_range_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
1739 1.1 mrg int __C, const int __R)
1740 1.1 mrg {
1741 1.1 mrg return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
1742 1.1 mrg (__v16sf) __B, __C,
1743 1.1 mrg (__v16sf)
1744 1.1 mrg _mm512_setzero_ps (),
1745 1.1 mrg (__mmask16) __U,
1746 1.1 mrg __R);
1747 1.1 mrg }
1748 1.1 mrg
1749 1.1 mrg extern __inline __m512i
1750 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1751 1.1 mrg _mm512_inserti32x8 (__m512i __A, __m256i __B, const int __imm)
1752 1.1 mrg {
1753 1.1 mrg return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A,
1754 1.1 mrg (__v8si) __B,
1755 1.1 mrg __imm,
1756 1.1 mrg (__v16si)
1757 1.1 mrg _mm512_setzero_si512 (),
1758 1.1 mrg (__mmask16) -1);
1759 1.1 mrg }
1760 1.1 mrg
1761 1.1 mrg extern __inline __m512i
1762 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1763 1.1 mrg _mm512_mask_inserti32x8 (__m512i __W, __mmask16 __U, __m512i __A,
1764 1.1 mrg __m256i __B, const int __imm)
1765 1.1 mrg {
1766 1.1 mrg return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A,
1767 1.1 mrg (__v8si) __B,
1768 1.1 mrg __imm,
1769 1.1 mrg (__v16si) __W,
1770 1.1 mrg (__mmask16) __U);
1771 1.1 mrg }
1772 1.1 mrg
1773 1.1 mrg extern __inline __m512i
1774 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1775 1.1 mrg _mm512_maskz_inserti32x8 (__mmask16 __U, __m512i __A, __m256i __B,
1776 1.1 mrg const int __imm)
1777 1.1 mrg {
1778 1.1 mrg return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A,
1779 1.1 mrg (__v8si) __B,
1780 1.1 mrg __imm,
1781 1.1 mrg (__v16si)
1782 1.1 mrg _mm512_setzero_si512 (),
1783 1.1 mrg (__mmask16) __U);
1784 1.1 mrg }
1785 1.1 mrg
1786 1.1 mrg extern __inline __m512
1787 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1788 1.1 mrg _mm512_insertf32x8 (__m512 __A, __m256 __B, const int __imm)
1789 1.1 mrg {
1790 1.1 mrg return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A,
1791 1.1 mrg (__v8sf) __B,
1792 1.1 mrg __imm,
1793 1.1 mrg (__v16sf)
1794 1.1 mrg _mm512_setzero_ps (),
1795 1.1 mrg (__mmask16) -1);
1796 1.1 mrg }
1797 1.1 mrg
1798 1.1 mrg extern __inline __m512
1799 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1800 1.1 mrg _mm512_mask_insertf32x8 (__m512 __W, __mmask16 __U, __m512 __A,
1801 1.1 mrg __m256 __B, const int __imm)
1802 1.1 mrg {
1803 1.1 mrg return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A,
1804 1.1 mrg (__v8sf) __B,
1805 1.1 mrg __imm,
1806 1.1 mrg (__v16sf) __W,
1807 1.1 mrg (__mmask16) __U);
1808 1.1 mrg }
1809 1.1 mrg
1810 1.1 mrg extern __inline __m512
1811 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1812 1.1 mrg _mm512_maskz_insertf32x8 (__mmask16 __U, __m512 __A, __m256 __B,
1813 1.1 mrg const int __imm)
1814 1.1 mrg {
1815 1.1 mrg return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A,
1816 1.1 mrg (__v8sf) __B,
1817 1.1 mrg __imm,
1818 1.1 mrg (__v16sf)
1819 1.1 mrg _mm512_setzero_ps (),
1820 1.1 mrg (__mmask16) __U);
1821 1.1 mrg }
1822 1.1 mrg
1823 1.1 mrg extern __inline __m512i
1824 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1825 1.1 mrg _mm512_inserti64x2 (__m512i __A, __m128i __B, const int __imm)
1826 1.1 mrg {
1827 1.1 mrg return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A,
1828 1.1 mrg (__v2di) __B,
1829 1.1 mrg __imm,
1830 1.1 mrg (__v8di)
1831 1.1 mrg _mm512_setzero_si512 (),
1832 1.1 mrg (__mmask8) -
1833 1.1 mrg 1);
1834 1.1 mrg }
1835 1.1 mrg
1836 1.1 mrg extern __inline __m512i
1837 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1838 1.1 mrg _mm512_mask_inserti64x2 (__m512i __W, __mmask8 __U, __m512i __A,
1839 1.1 mrg __m128i __B, const int __imm)
1840 1.1 mrg {
1841 1.1 mrg return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A,
1842 1.1 mrg (__v2di) __B,
1843 1.1 mrg __imm,
1844 1.1 mrg (__v8di) __W,
1845 1.1 mrg (__mmask8)
1846 1.1 mrg __U);
1847 1.1 mrg }
1848 1.1 mrg
1849 1.1 mrg extern __inline __m512i
1850 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1851 1.1 mrg _mm512_maskz_inserti64x2 (__mmask8 __U, __m512i __A, __m128i __B,
1852 1.1 mrg const int __imm)
1853 1.1 mrg {
1854 1.1 mrg return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A,
1855 1.1 mrg (__v2di) __B,
1856 1.1 mrg __imm,
1857 1.1 mrg (__v8di)
1858 1.1 mrg _mm512_setzero_si512 (),
1859 1.1 mrg (__mmask8)
1860 1.1 mrg __U);
1861 1.1 mrg }
1862 1.1 mrg
1863 1.1 mrg extern __inline __m512d
1864 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1865 1.1 mrg _mm512_insertf64x2 (__m512d __A, __m128d __B, const int __imm)
1866 1.1 mrg {
1867 1.1 mrg return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A,
1868 1.1 mrg (__v2df) __B,
1869 1.1 mrg __imm,
1870 1.1 mrg (__v8df)
1871 1.1 mrg _mm512_setzero_pd (),
1872 1.1 mrg (__mmask8) -
1873 1.1 mrg 1);
1874 1.1 mrg }
1875 1.1 mrg
1876 1.1 mrg extern __inline __m512d
1877 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1878 1.1 mrg _mm512_mask_insertf64x2 (__m512d __W, __mmask8 __U, __m512d __A,
1879 1.1 mrg __m128d __B, const int __imm)
1880 1.1 mrg {
1881 1.1 mrg return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A,
1882 1.1 mrg (__v2df) __B,
1883 1.1 mrg __imm,
1884 1.1 mrg (__v8df) __W,
1885 1.1 mrg (__mmask8)
1886 1.1 mrg __U);
1887 1.1 mrg }
1888 1.1 mrg
1889 1.1 mrg extern __inline __m512d
1890 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1891 1.1 mrg _mm512_maskz_insertf64x2 (__mmask8 __U, __m512d __A, __m128d __B,
1892 1.1 mrg const int __imm)
1893 1.1 mrg {
1894 1.1 mrg return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A,
1895 1.1 mrg (__v2df) __B,
1896 1.1 mrg __imm,
1897 1.1 mrg (__v8df)
1898 1.1 mrg _mm512_setzero_pd (),
1899 1.1 mrg (__mmask8)
1900 1.1 mrg __U);
1901 1.1 mrg }
1902 1.1 mrg
1903 1.1 mrg extern __inline __mmask8
1904 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1905 1.1 mrg _mm512_mask_fpclass_pd_mask (__mmask8 __U, __m512d __A,
1906 1.1 mrg const int __imm)
1907 1.1 mrg {
1908 1.1 mrg return (__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) __A,
1909 1.1 mrg __imm, __U);
1910 1.1 mrg }
1911 1.1 mrg
1912 1.1 mrg extern __inline __mmask8
1913 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1914 1.1 mrg _mm512_fpclass_pd_mask (__m512d __A, const int __imm)
1915 1.1 mrg {
1916 1.1 mrg return (__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) __A,
1917 1.1 mrg __imm,
1918 1.1 mrg (__mmask8) -1);
1919 1.1 mrg }
1920 1.1 mrg
1921 1.1 mrg extern __inline __mmask16
1922 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1923 1.1 mrg _mm512_mask_fpclass_ps_mask (__mmask16 __U, __m512 __A,
1924 1.1 mrg const int __imm)
1925 1.1 mrg {
1926 1.1 mrg return (__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) __A,
1927 1.1 mrg __imm, __U);
1928 1.1 mrg }
1929 1.1 mrg
1930 1.1 mrg extern __inline __mmask16
1931 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1932 1.1 mrg _mm512_fpclass_ps_mask (__m512 __A, const int __imm)
1933 1.1 mrg {
1934 1.1 mrg return (__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) __A,
1935 1.1 mrg __imm,
1936 1.1 mrg (__mmask16) -
1937 1.1 mrg 1);
1938 1.1 mrg }
1939 1.1 mrg
1940 1.1 mrg #else
1941 1.1 mrg #define _mm_range_sd(A, B, C) \
1942 1.1 mrg ((__m128d) __builtin_ia32_rangesd128_round ((__v2df)(__m128d)(A), \
1943 1.1 mrg (__v2df)(__m128d)(B), (int)(C), \
1944 1.1 mrg _MM_FROUND_CUR_DIRECTION))
1945 1.1 mrg
1946 1.1 mrg #define _mm_range_ss(A, B, C) \
1947 1.1 mrg ((__m128) __builtin_ia32_rangess128_round ((__v4sf)(__m128)(A), \
1948 1.1 mrg (__v4sf)(__m128)(B), (int)(C), \
1949 1.1 mrg _MM_FROUND_CUR_DIRECTION))
1950 1.1 mrg
1951 1.1 mrg #define _mm_range_round_sd(A, B, C, R) \
1952 1.1 mrg ((__m128d) __builtin_ia32_rangesd128_round ((__v2df)(__m128d)(A), \
1953 1.1 mrg (__v2df)(__m128d)(B), (int)(C), (R)))
1954 1.1 mrg
1955 1.1 mrg #define _mm_range_round_ss(A, B, C, R) \
1956 1.1 mrg ((__m128) __builtin_ia32_rangess128_round ((__v4sf)(__m128)(A), \
1957 1.1 mrg (__v4sf)(__m128)(B), (int)(C), (R)))
1958 1.1 mrg
1959 1.1 mrg #define _mm512_cvtt_roundpd_epi64(A, B) \
1960 1.1 mrg ((__m512i)__builtin_ia32_cvttpd2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
1961 1.1 mrg
1962 1.1 mrg #define _mm512_mask_cvtt_roundpd_epi64(W, U, A, B) \
1963 1.1 mrg ((__m512i)__builtin_ia32_cvttpd2qq512_mask((A), (__v8di)(W), (U), (B)))
1964 1.1 mrg
1965 1.1 mrg #define _mm512_maskz_cvtt_roundpd_epi64(U, A, B) \
1966 1.1 mrg ((__m512i)__builtin_ia32_cvttpd2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
1967 1.1 mrg
1968 1.1 mrg #define _mm512_cvtt_roundpd_epu64(A, B) \
1969 1.1 mrg ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
1970 1.1 mrg
1971 1.1 mrg #define _mm512_mask_cvtt_roundpd_epu64(W, U, A, B) \
1972 1.1 mrg ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((A), (__v8di)(W), (U), (B)))
1973 1.1 mrg
1974 1.1 mrg #define _mm512_maskz_cvtt_roundpd_epu64(U, A, B) \
1975 1.1 mrg ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
1976 1.1 mrg
1977 1.1 mrg #define _mm512_cvtt_roundps_epi64(A, B) \
1978 1.1 mrg ((__m512i)__builtin_ia32_cvttps2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
1979 1.1 mrg
1980 1.1 mrg #define _mm512_mask_cvtt_roundps_epi64(W, U, A, B) \
1981 1.1 mrg ((__m512i)__builtin_ia32_cvttps2qq512_mask((A), (__v8di)(W), (U), (B)))
1982 1.1 mrg
1983 1.1 mrg #define _mm512_maskz_cvtt_roundps_epi64(U, A, B) \
1984 1.1 mrg ((__m512i)__builtin_ia32_cvttps2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
1985 1.1 mrg
1986 1.1 mrg #define _mm512_cvtt_roundps_epu64(A, B) \
1987 1.1 mrg ((__m512i)__builtin_ia32_cvttps2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
1988 1.1 mrg
1989 1.1 mrg #define _mm512_mask_cvtt_roundps_epu64(W, U, A, B) \
1990 1.1 mrg ((__m512i)__builtin_ia32_cvttps2uqq512_mask((A), (__v8di)(W), (U), (B)))
1991 1.1 mrg
1992 1.1 mrg #define _mm512_maskz_cvtt_roundps_epu64(U, A, B) \
1993 1.1 mrg ((__m512i)__builtin_ia32_cvttps2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
1994 1.1 mrg
1995 1.1 mrg #define _mm512_cvt_roundpd_epi64(A, B) \
1996 1.1 mrg ((__m512i)__builtin_ia32_cvtpd2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
1997 1.1 mrg
1998 1.1 mrg #define _mm512_mask_cvt_roundpd_epi64(W, U, A, B) \
1999 1.1 mrg ((__m512i)__builtin_ia32_cvtpd2qq512_mask((A), (__v8di)(W), (U), (B)))
2000 1.1 mrg
2001 1.1 mrg #define _mm512_maskz_cvt_roundpd_epi64(U, A, B) \
2002 1.1 mrg ((__m512i)__builtin_ia32_cvtpd2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2003 1.1 mrg
2004 1.1 mrg #define _mm512_cvt_roundpd_epu64(A, B) \
2005 1.1 mrg ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2006 1.1 mrg
2007 1.1 mrg #define _mm512_mask_cvt_roundpd_epu64(W, U, A, B) \
2008 1.1 mrg ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((A), (__v8di)(W), (U), (B)))
2009 1.1 mrg
2010 1.1 mrg #define _mm512_maskz_cvt_roundpd_epu64(U, A, B) \
2011 1.1 mrg ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2012 1.1 mrg
2013 1.1 mrg #define _mm512_cvt_roundps_epi64(A, B) \
2014 1.1 mrg ((__m512i)__builtin_ia32_cvtps2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2015 1.1 mrg
2016 1.1 mrg #define _mm512_mask_cvt_roundps_epi64(W, U, A, B) \
2017 1.1 mrg ((__m512i)__builtin_ia32_cvtps2qq512_mask((A), (__v8di)(W), (U), (B)))
2018 1.1 mrg
2019 1.1 mrg #define _mm512_maskz_cvt_roundps_epi64(U, A, B) \
2020 1.1 mrg ((__m512i)__builtin_ia32_cvtps2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2021 1.1 mrg
2022 1.1 mrg #define _mm512_cvt_roundps_epu64(A, B) \
2023 1.1 mrg ((__m512i)__builtin_ia32_cvtps2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2024 1.1 mrg
2025 1.1 mrg #define _mm512_mask_cvt_roundps_epu64(W, U, A, B) \
2026 1.1 mrg ((__m512i)__builtin_ia32_cvtps2uqq512_mask((A), (__v8di)(W), (U), (B)))
2027 1.1 mrg
2028 1.1 mrg #define _mm512_maskz_cvt_roundps_epu64(U, A, B) \
2029 1.1 mrg ((__m512i)__builtin_ia32_cvtps2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2030 1.1 mrg
2031 1.1 mrg #define _mm512_cvt_roundepi64_ps(A, B) \
2032 1.1 mrg ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(A), (__v8sf)_mm256_setzero_ps(), -1, (B)))
2033 1.1 mrg
2034 1.1 mrg #define _mm512_mask_cvt_roundepi64_ps(W, U, A, B) \
2035 1.1 mrg ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(A), (W), (U), (B)))
2036 1.1 mrg
2037 1.1 mrg #define _mm512_maskz_cvt_roundepi64_ps(U, A, B) \
2038 1.1 mrg ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(A), (__v8sf)_mm256_setzero_ps(), (U), (B)))
2039 1.1 mrg
2040 1.1 mrg #define _mm512_cvt_roundepu64_ps(A, B) \
2041 1.1 mrg ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(A), (__v8sf)_mm256_setzero_ps(), -1, (B)))
2042 1.1 mrg
2043 1.1 mrg #define _mm512_mask_cvt_roundepu64_ps(W, U, A, B) \
2044 1.1 mrg ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(A), (W), (U), (B)))
2045 1.1 mrg
2046 1.1 mrg #define _mm512_maskz_cvt_roundepu64_ps(U, A, B) \
2047 1.1 mrg ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(A), (__v8sf)_mm256_setzero_ps(), (U), (B)))
2048 1.1 mrg
2049 1.1 mrg #define _mm512_cvt_roundepi64_pd(A, B) \
2050 1.1 mrg ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(A), (__v8df)_mm512_setzero_pd(), -1, (B)))
2051 1.1 mrg
2052 1.1 mrg #define _mm512_mask_cvt_roundepi64_pd(W, U, A, B) \
2053 1.1 mrg ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(A), (W), (U), (B)))
2054 1.1 mrg
2055 1.1 mrg #define _mm512_maskz_cvt_roundepi64_pd(U, A, B) \
2056 1.1 mrg ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(A), (__v8df)_mm512_setzero_pd(), (U), (B)))
2057 1.1 mrg
2058 1.1 mrg #define _mm512_cvt_roundepu64_pd(A, B) \
2059 1.1 mrg ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(A), (__v8df)_mm512_setzero_pd(), -1, (B)))
2060 1.1 mrg
2061 1.1 mrg #define _mm512_mask_cvt_roundepu64_pd(W, U, A, B) \
2062 1.1 mrg ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(A), (W), (U), (B)))
2063 1.1 mrg
2064 1.1 mrg #define _mm512_maskz_cvt_roundepu64_pd(U, A, B) \
2065 1.1 mrg ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(A), (__v8df)_mm512_setzero_pd(), (U), (B)))
2066 1.1 mrg
2067 1.1 mrg #define _mm512_reduce_pd(A, B) \
2068 1.1 mrg ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \
2069 1.1 mrg (int)(B), (__v8df)_mm512_setzero_pd(), (__mmask8)-1))
2070 1.1 mrg
2071 1.1 mrg #define _mm512_mask_reduce_pd(W, U, A, B) \
2072 1.1 mrg ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \
2073 1.1 mrg (int)(B), (__v8df)(__m512d)(W), (__mmask8)(U)))
2074 1.1 mrg
2075 1.1 mrg #define _mm512_maskz_reduce_pd(U, A, B) \
2076 1.1 mrg ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \
2077 1.1 mrg (int)(B), (__v8df)_mm512_setzero_pd(), (__mmask8)(U)))
2078 1.1 mrg
2079 1.1 mrg #define _mm512_reduce_ps(A, B) \
2080 1.1 mrg ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \
2081 1.1 mrg (int)(B), (__v16sf)_mm512_setzero_ps(), (__mmask16)-1))
2082 1.1 mrg
2083 1.1 mrg #define _mm512_mask_reduce_ps(W, U, A, B) \
2084 1.1 mrg ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \
2085 1.1 mrg (int)(B), (__v16sf)(__m512)(W), (__mmask16)(U)))
2086 1.1 mrg
2087 1.1 mrg #define _mm512_maskz_reduce_ps(U, A, B) \
2088 1.1 mrg ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \
2089 1.1 mrg (int)(B), (__v16sf)_mm512_setzero_ps(), (__mmask16)(U)))
2090 1.1 mrg
2091 1.1 mrg #define _mm512_extractf32x8_ps(X, C) \
2092 1.1 mrg ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \
2093 1.1 mrg (int) (C), (__v8sf)(__m256) _mm256_setzero_ps(), (__mmask8)-1))
2094 1.1 mrg
2095 1.1 mrg #define _mm512_mask_extractf32x8_ps(W, U, X, C) \
2096 1.1 mrg ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \
2097 1.1 mrg (int) (C), (__v8sf)(__m256) (W), (__mmask8) (U)))
2098 1.1 mrg
2099 1.1 mrg #define _mm512_maskz_extractf32x8_ps(U, X, C) \
2100 1.1 mrg ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \
2101 1.1 mrg (int) (C), (__v8sf)(__m256) _mm256_setzero_ps(), (__mmask8) (U)))
2102 1.1 mrg
2103 1.1 mrg #define _mm512_extractf64x2_pd(X, C) \
2104 1.1 mrg ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
2105 1.1 mrg (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8)-1))
2106 1.1 mrg
2107 1.1 mrg #define _mm512_mask_extractf64x2_pd(W, U, X, C) \
2108 1.1 mrg ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
2109 1.1 mrg (int) (C), (__v2df)(__m128d) (W), (__mmask8) (U)))
2110 1.1 mrg
2111 1.1 mrg #define _mm512_maskz_extractf64x2_pd(U, X, C) \
2112 1.1 mrg ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
2113 1.1 mrg (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8) (U)))
2114 1.1 mrg
2115 1.1 mrg #define _mm512_extracti32x8_epi32(X, C) \
2116 1.1 mrg ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \
2117 1.1 mrg (int) (C), (__v8si)(__m256i) _mm256_setzero_si256(), (__mmask8)-1))
2118 1.1 mrg
2119 1.1 mrg #define _mm512_mask_extracti32x8_epi32(W, U, X, C) \
2120 1.1 mrg ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \
2121 1.1 mrg (int) (C), (__v8si)(__m256i) (W), (__mmask8) (U)))
2122 1.1 mrg
2123 1.1 mrg #define _mm512_maskz_extracti32x8_epi32(U, X, C) \
2124 1.1 mrg ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \
2125 1.1 mrg (int) (C), (__v8si)(__m256i) _mm256_setzero_si256(), (__mmask8) (U)))
2126 1.1 mrg
2127 1.1 mrg #define _mm512_extracti64x2_epi64(X, C) \
2128 1.1 mrg ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
2129 1.1 mrg (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8)-1))
2130 1.1 mrg
2131 1.1 mrg #define _mm512_mask_extracti64x2_epi64(W, U, X, C) \
2132 1.1 mrg ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
2133 1.1 mrg (int) (C), (__v2di)(__m128i) (W), (__mmask8) (U)))
2134 1.1 mrg
2135 1.1 mrg #define _mm512_maskz_extracti64x2_epi64(U, X, C) \
2136 1.1 mrg ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
2137 1.1 mrg (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8) (U)))
2138 1.1 mrg
2139 1.1 mrg #define _mm512_range_pd(A, B, C) \
2140 1.1 mrg ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
2141 1.1 mrg (__v8df)(__m512d)(B), (int)(C), \
2142 1.1 mrg (__v8df)_mm512_setzero_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
2143 1.1 mrg
2144 1.1 mrg #define _mm512_mask_range_pd(W, U, A, B, C) \
2145 1.1 mrg ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
2146 1.1 mrg (__v8df)(__m512d)(B), (int)(C), \
2147 1.1 mrg (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
2148 1.1 mrg
2149 1.1 mrg #define _mm512_maskz_range_pd(U, A, B, C) \
2150 1.1 mrg ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
2151 1.1 mrg (__v8df)(__m512d)(B), (int)(C), \
2152 1.1 mrg (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
2153 1.1 mrg
2154 1.1 mrg #define _mm512_range_ps(A, B, C) \
2155 1.1 mrg ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
2156 1.1 mrg (__v16sf)(__m512)(B), (int)(C), \
2157 1.1 mrg (__v16sf)_mm512_setzero_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
2158 1.1 mrg
2159 1.1 mrg #define _mm512_mask_range_ps(W, U, A, B, C) \
2160 1.1 mrg ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
2161 1.1 mrg (__v16sf)(__m512)(B), (int)(C), \
2162 1.1 mrg (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
2163 1.1 mrg
2164 1.1 mrg #define _mm512_maskz_range_ps(U, A, B, C) \
2165 1.1 mrg ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
2166 1.1 mrg (__v16sf)(__m512)(B), (int)(C), \
2167 1.1 mrg (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
2168 1.1 mrg
2169 1.1 mrg #define _mm512_range_round_pd(A, B, C, R) \
2170 1.1 mrg ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
2171 1.1 mrg (__v8df)(__m512d)(B), (int)(C), \
2172 1.1 mrg (__v8df)_mm512_setzero_pd(), (__mmask8)-1, (R)))
2173 1.1 mrg
2174 1.1 mrg #define _mm512_mask_range_round_pd(W, U, A, B, C, R) \
2175 1.1 mrg ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
2176 1.1 mrg (__v8df)(__m512d)(B), (int)(C), \
2177 1.1 mrg (__v8df)(__m512d)(W), (__mmask8)(U), (R)))
2178 1.1 mrg
2179 1.1 mrg #define _mm512_maskz_range_round_pd(U, A, B, C, R) \
2180 1.1 mrg ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
2181 1.1 mrg (__v8df)(__m512d)(B), (int)(C), \
2182 1.1 mrg (__v8df)_mm512_setzero_pd(), (__mmask8)(U), (R)))
2183 1.1 mrg
2184 1.1 mrg #define _mm512_range_round_ps(A, B, C, R) \
2185 1.1 mrg ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
2186 1.1 mrg (__v16sf)(__m512)(B), (int)(C), \
2187 1.1 mrg (__v16sf)_mm512_setzero_ps(), (__mmask16)-1, (R)))
2188 1.1 mrg
2189 1.1 mrg #define _mm512_mask_range_round_ps(W, U, A, B, C, R) \
2190 1.1 mrg ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
2191 1.1 mrg (__v16sf)(__m512)(B), (int)(C), \
2192 1.1 mrg (__v16sf)(__m512)(W), (__mmask16)(U), (R)))
2193 1.1 mrg
2194 1.1 mrg #define _mm512_maskz_range_round_ps(U, A, B, C, R) \
2195 1.1 mrg ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
2196 1.1 mrg (__v16sf)(__m512)(B), (int)(C), \
2197 1.1 mrg (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), (R)))
2198 1.1 mrg
2199 1.1 mrg #define _mm512_insertf64x2(X, Y, C) \
2200 1.1 mrg ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\
2201 1.1 mrg (__v2df)(__m128d) (Y), (int) (C), (__v8df)(__m512d) (X), \
2202 1.1 mrg (__mmask8)-1))
2203 1.1 mrg
2204 1.1 mrg #define _mm512_mask_insertf64x2(W, U, X, Y, C) \
2205 1.1 mrg ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\
2206 1.1 mrg (__v2df)(__m128d) (Y), (int) (C), (__v8df)(__m512d) (W), \
2207 1.1 mrg (__mmask8) (U)))
2208 1.1 mrg
2209 1.1 mrg #define _mm512_maskz_insertf64x2(U, X, Y, C) \
2210 1.1 mrg ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\
2211 1.1 mrg (__v2df)(__m128d) (Y), (int) (C), \
2212 1.1 mrg (__v8df)(__m512d) _mm512_setzero_pd(), (__mmask8) (U)))
2213 1.1 mrg
2214 1.1 mrg #define _mm512_inserti64x2(X, Y, C) \
2215 1.1 mrg ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\
2216 1.1 mrg (__v2di)(__m128i) (Y), (int) (C), (__v8di)(__m512i) (X), (__mmask8)-1))
2217 1.1 mrg
2218 1.1 mrg #define _mm512_mask_inserti64x2(W, U, X, Y, C) \
2219 1.1 mrg ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\
2220 1.1 mrg (__v2di)(__m128i) (Y), (int) (C), (__v8di)(__m512i) (W), \
2221 1.1 mrg (__mmask8) (U)))
2222 1.1 mrg
2223 1.1 mrg #define _mm512_maskz_inserti64x2(U, X, Y, C) \
2224 1.1 mrg ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\
2225 1.1 mrg (__v2di)(__m128i) (Y), (int) (C), \
2226 1.1 mrg (__v8di)(__m512i) _mm512_setzero_si512 (), (__mmask8) (U)))
2227 1.1 mrg
2228 1.1 mrg #define _mm512_insertf32x8(X, Y, C) \
2229 1.1 mrg ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X), \
2230 1.1 mrg (__v8sf)(__m256) (Y), (int) (C),\
2231 1.1 mrg (__v16sf)(__m512)_mm512_setzero_ps(),\
2232 1.1 mrg (__mmask16)-1))
2233 1.1 mrg
2234 1.1 mrg #define _mm512_mask_insertf32x8(W, U, X, Y, C) \
2235 1.1 mrg ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X), \
2236 1.1 mrg (__v8sf)(__m256) (Y), (int) (C),\
2237 1.1 mrg (__v16sf)(__m512)(W),\
2238 1.1 mrg (__mmask16)(U)))
2239 1.1 mrg
2240 1.1 mrg #define _mm512_maskz_insertf32x8(U, X, Y, C) \
2241 1.1 mrg ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X), \
2242 1.1 mrg (__v8sf)(__m256) (Y), (int) (C),\
2243 1.1 mrg (__v16sf)(__m512)_mm512_setzero_ps(),\
2244 1.1 mrg (__mmask16)(U)))
2245 1.1 mrg
2246 1.1 mrg #define _mm512_inserti32x8(X, Y, C) \
2247 1.1 mrg ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X), \
2248 1.1 mrg (__v8si)(__m256i) (Y), (int) (C),\
2249 1.1 mrg (__v16si)(__m512i)_mm512_setzero_si512 (),\
2250 1.1 mrg (__mmask16)-1))
2251 1.1 mrg
2252 1.1 mrg #define _mm512_mask_inserti32x8(W, U, X, Y, C) \
2253 1.1 mrg ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X), \
2254 1.1 mrg (__v8si)(__m256i) (Y), (int) (C),\
2255 1.1 mrg (__v16si)(__m512i)(W),\
2256 1.1 mrg (__mmask16)(U)))
2257 1.1 mrg
2258 1.1 mrg #define _mm512_maskz_inserti32x8(U, X, Y, C) \
2259 1.1 mrg ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X), \
2260 1.1 mrg (__v8si)(__m256i) (Y), (int) (C),\
2261 1.1 mrg (__v16si)(__m512i)_mm512_setzero_si512 (),\
2262 1.1 mrg (__mmask16)(U)))
2263 1.1 mrg
2264 1.1 mrg #define _mm_fpclass_ss_mask(X, C) \
2265 1.1 mrg ((__mmask8) __builtin_ia32_fpclassss ((__v4sf) (__m128) (X), (int) (C))) \
2266 1.1 mrg
2267 1.1 mrg #define _mm_fpclass_sd_mask(X, C) \
2268 1.1 mrg ((__mmask8) __builtin_ia32_fpclasssd ((__v2df) (__m128d) (X), (int) (C))) \
2269 1.1 mrg
2270 1.1 mrg #define _mm512_mask_fpclass_pd_mask(u, X, C) \
2271 1.1 mrg ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), \
2272 1.1 mrg (int) (C), (__mmask8)(u)))
2273 1.1 mrg
2274 1.1 mrg #define _mm512_mask_fpclass_ps_mask(u, x, c) \
2275 1.1 mrg ((__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) (__m512) (x),\
2276 1.1 mrg (int) (c),(__mmask8)(u)))
2277 1.1 mrg
2278 1.1 mrg #define _mm512_fpclass_pd_mask(X, C) \
2279 1.1 mrg ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), \
2280 1.1 mrg (int) (C), (__mmask8)-1))
2281 1.1 mrg
2282 1.1 mrg #define _mm512_fpclass_ps_mask(x, c) \
2283 1.1 mrg ((__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) (__m512) (x),\
2284 1.1 mrg (int) (c),(__mmask8)-1))
2285 1.1 mrg
2286 1.1 mrg #define _mm_reduce_sd(A, B, C) \
2287 1.1 mrg ((__m128d) __builtin_ia32_reducesd ((__v2df)(__m128d)(A), \
2288 1.1 mrg (__v2df)(__m128d)(B), (int)(C))) \
2289 1.1 mrg
2290 1.1 mrg #define _mm_reduce_ss(A, B, C) \
2291 1.1 mrg ((__m128) __builtin_ia32_reducess ((__v4sf)(__m128)(A), \
2292 1.1 mrg (__v4sf)(__m128)(A), (int)(C))) \
2293 1.1 mrg
2294 1.1 mrg #endif
2295 1.1 mrg
2296 1.1 mrg #ifdef __DISABLE_AVX512DQ__
2297 1.1 mrg #undef __DISABLE_AVX512DQ__
2298 1.1 mrg #pragma GCC pop_options
2299 1.1 mrg #endif /* __DISABLE_AVX512DQ__ */
2300 1.1 mrg
2301 1.1 mrg #endif /* _AVX512DQINTRIN_H_INCLUDED */
2302