avx512bwintrin.h revision 1.1 1 /*===------------- avx512bwintrin.h - AVX512BW intrinsics ------------------===
2 *
3 *
4 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 * See https://llvm.org/LICENSE.txt for license information.
6 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *
8 *===-----------------------------------------------------------------------===
9 */
10 #ifndef __IMMINTRIN_H
11 #error "Never use <avx512bwintrin.h> directly; include <immintrin.h> instead."
12 #endif
13
14 #ifndef __AVX512BWINTRIN_H
15 #define __AVX512BWINTRIN_H
16
17 typedef unsigned int __mmask32;
18 typedef unsigned long long __mmask64;
19
20 /* Define the default attributes for the functions in this file. */
21 #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"), __min_vector_width__(512)))
22 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw")))
23
24 static __inline __mmask32 __DEFAULT_FN_ATTRS
25 _knot_mask32(__mmask32 __M)
26 {
27 return __builtin_ia32_knotsi(__M);
28 }
29
30 static __inline __mmask64 __DEFAULT_FN_ATTRS
31 _knot_mask64(__mmask64 __M)
32 {
33 return __builtin_ia32_knotdi(__M);
34 }
35
36 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
37 _kand_mask32(__mmask32 __A, __mmask32 __B)
38 {
39 return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B);
40 }
41
42 static __inline__ __mmask64 __DEFAULT_FN_ATTRS
43 _kand_mask64(__mmask64 __A, __mmask64 __B)
44 {
45 return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B);
46 }
47
48 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
49 _kandn_mask32(__mmask32 __A, __mmask32 __B)
50 {
51 return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B);
52 }
53
54 static __inline__ __mmask64 __DEFAULT_FN_ATTRS
55 _kandn_mask64(__mmask64 __A, __mmask64 __B)
56 {
57 return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B);
58 }
59
60 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
61 _kor_mask32(__mmask32 __A, __mmask32 __B)
62 {
63 return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B);
64 }
65
66 static __inline__ __mmask64 __DEFAULT_FN_ATTRS
67 _kor_mask64(__mmask64 __A, __mmask64 __B)
68 {
69 return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B);
70 }
71
72 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
73 _kxnor_mask32(__mmask32 __A, __mmask32 __B)
74 {
75 return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B);
76 }
77
78 static __inline__ __mmask64 __DEFAULT_FN_ATTRS
79 _kxnor_mask64(__mmask64 __A, __mmask64 __B)
80 {
81 return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B);
82 }
83
84 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
85 _kxor_mask32(__mmask32 __A, __mmask32 __B)
86 {
87 return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B);
88 }
89
90 static __inline__ __mmask64 __DEFAULT_FN_ATTRS
91 _kxor_mask64(__mmask64 __A, __mmask64 __B)
92 {
93 return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B);
94 }
95
96 static __inline__ unsigned char __DEFAULT_FN_ATTRS
97 _kortestc_mask32_u8(__mmask32 __A, __mmask32 __B)
98 {
99 return (unsigned char)__builtin_ia32_kortestcsi(__A, __B);
100 }
101
102 static __inline__ unsigned char __DEFAULT_FN_ATTRS
103 _kortestz_mask32_u8(__mmask32 __A, __mmask32 __B)
104 {
105 return (unsigned char)__builtin_ia32_kortestzsi(__A, __B);
106 }
107
108 static __inline__ unsigned char __DEFAULT_FN_ATTRS
109 _kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
110 *__C = (unsigned char)__builtin_ia32_kortestcsi(__A, __B);
111 return (unsigned char)__builtin_ia32_kortestzsi(__A, __B);
112 }
113
114 static __inline__ unsigned char __DEFAULT_FN_ATTRS
115 _kortestc_mask64_u8(__mmask64 __A, __mmask64 __B)
116 {
117 return (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
118 }
119
120 static __inline__ unsigned char __DEFAULT_FN_ATTRS
121 _kortestz_mask64_u8(__mmask64 __A, __mmask64 __B)
122 {
123 return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
124 }
125
126 static __inline__ unsigned char __DEFAULT_FN_ATTRS
127 _kortest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
128 *__C = (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
129 return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
130 }
131
132 static __inline__ unsigned char __DEFAULT_FN_ATTRS
133 _ktestc_mask32_u8(__mmask32 __A, __mmask32 __B)
134 {
135 return (unsigned char)__builtin_ia32_ktestcsi(__A, __B);
136 }
137
138 static __inline__ unsigned char __DEFAULT_FN_ATTRS
139 _ktestz_mask32_u8(__mmask32 __A, __mmask32 __B)
140 {
141 return (unsigned char)__builtin_ia32_ktestzsi(__A, __B);
142 }
143
144 static __inline__ unsigned char __DEFAULT_FN_ATTRS
145 _ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
146 *__C = (unsigned char)__builtin_ia32_ktestcsi(__A, __B);
147 return (unsigned char)__builtin_ia32_ktestzsi(__A, __B);
148 }
149
150 static __inline__ unsigned char __DEFAULT_FN_ATTRS
151 _ktestc_mask64_u8(__mmask64 __A, __mmask64 __B)
152 {
153 return (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
154 }
155
156 static __inline__ unsigned char __DEFAULT_FN_ATTRS
157 _ktestz_mask64_u8(__mmask64 __A, __mmask64 __B)
158 {
159 return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
160 }
161
162 static __inline__ unsigned char __DEFAULT_FN_ATTRS
163 _ktest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
164 *__C = (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
165 return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
166 }
167
168 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
169 _kadd_mask32(__mmask32 __A, __mmask32 __B)
170 {
171 return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B);
172 }
173
174 static __inline__ __mmask64 __DEFAULT_FN_ATTRS
175 _kadd_mask64(__mmask64 __A, __mmask64 __B)
176 {
177 return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B);
178 }
179
180 #define _kshiftli_mask32(A, I) \
181 (__mmask32)__builtin_ia32_kshiftlisi((__mmask32)(A), (unsigned int)(I))
182
183 #define _kshiftri_mask32(A, I) \
184 (__mmask32)__builtin_ia32_kshiftrisi((__mmask32)(A), (unsigned int)(I))
185
186 #define _kshiftli_mask64(A, I) \
187 (__mmask64)__builtin_ia32_kshiftlidi((__mmask64)(A), (unsigned int)(I))
188
189 #define _kshiftri_mask64(A, I) \
190 (__mmask64)__builtin_ia32_kshiftridi((__mmask64)(A), (unsigned int)(I))
191
192 static __inline__ unsigned int __DEFAULT_FN_ATTRS
193 _cvtmask32_u32(__mmask32 __A) {
194 return (unsigned int)__builtin_ia32_kmovd((__mmask32)__A);
195 }
196
197 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
198 _cvtmask64_u64(__mmask64 __A) {
199 return (unsigned long long)__builtin_ia32_kmovq((__mmask64)__A);
200 }
201
202 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
203 _cvtu32_mask32(unsigned int __A) {
204 return (__mmask32)__builtin_ia32_kmovd((__mmask32)__A);
205 }
206
207 static __inline__ __mmask64 __DEFAULT_FN_ATTRS
208 _cvtu64_mask64(unsigned long long __A) {
209 return (__mmask64)__builtin_ia32_kmovq((__mmask64)__A);
210 }
211
212 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
213 _load_mask32(__mmask32 *__A) {
214 return (__mmask32)__builtin_ia32_kmovd(*(__mmask32 *)__A);
215 }
216
217 static __inline__ __mmask64 __DEFAULT_FN_ATTRS
218 _load_mask64(__mmask64 *__A) {
219 return (__mmask64)__builtin_ia32_kmovq(*(__mmask64 *)__A);
220 }
221
222 static __inline__ void __DEFAULT_FN_ATTRS
223 _store_mask32(__mmask32 *__A, __mmask32 __B) {
224 *(__mmask32 *)__A = __builtin_ia32_kmovd((__mmask32)__B);
225 }
226
227 static __inline__ void __DEFAULT_FN_ATTRS
228 _store_mask64(__mmask64 *__A, __mmask64 __B) {
229 *(__mmask64 *)__A = __builtin_ia32_kmovq((__mmask64)__B);
230 }
231
232 /* Integer compare */
233
234 #define _mm512_cmp_epi8_mask(a, b, p) \
235 (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
236 (__v64qi)(__m512i)(b), (int)(p), \
237 (__mmask64)-1)
238
239 #define _mm512_mask_cmp_epi8_mask(m, a, b, p) \
240 (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
241 (__v64qi)(__m512i)(b), (int)(p), \
242 (__mmask64)(m))
243
244 #define _mm512_cmp_epu8_mask(a, b, p) \
245 (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
246 (__v64qi)(__m512i)(b), (int)(p), \
247 (__mmask64)-1)
248
249 #define _mm512_mask_cmp_epu8_mask(m, a, b, p) \
250 (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
251 (__v64qi)(__m512i)(b), (int)(p), \
252 (__mmask64)(m))
253
254 #define _mm512_cmp_epi16_mask(a, b, p) \
255 (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
256 (__v32hi)(__m512i)(b), (int)(p), \
257 (__mmask32)-1)
258
259 #define _mm512_mask_cmp_epi16_mask(m, a, b, p) \
260 (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
261 (__v32hi)(__m512i)(b), (int)(p), \
262 (__mmask32)(m))
263
264 #define _mm512_cmp_epu16_mask(a, b, p) \
265 (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
266 (__v32hi)(__m512i)(b), (int)(p), \
267 (__mmask32)-1)
268
269 #define _mm512_mask_cmp_epu16_mask(m, a, b, p) \
270 (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
271 (__v32hi)(__m512i)(b), (int)(p), \
272 (__mmask32)(m))
273
274 #define _mm512_cmpeq_epi8_mask(A, B) \
275 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ)
276 #define _mm512_mask_cmpeq_epi8_mask(k, A, B) \
277 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ)
278 #define _mm512_cmpge_epi8_mask(A, B) \
279 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GE)
280 #define _mm512_mask_cmpge_epi8_mask(k, A, B) \
281 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE)
282 #define _mm512_cmpgt_epi8_mask(A, B) \
283 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GT)
284 #define _mm512_mask_cmpgt_epi8_mask(k, A, B) \
285 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT)
286 #define _mm512_cmple_epi8_mask(A, B) \
287 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LE)
288 #define _mm512_mask_cmple_epi8_mask(k, A, B) \
289 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE)
290 #define _mm512_cmplt_epi8_mask(A, B) \
291 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LT)
292 #define _mm512_mask_cmplt_epi8_mask(k, A, B) \
293 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT)
294 #define _mm512_cmpneq_epi8_mask(A, B) \
295 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_NE)
296 #define _mm512_mask_cmpneq_epi8_mask(k, A, B) \
297 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE)
298
299 #define _mm512_cmpeq_epu8_mask(A, B) \
300 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ)
301 #define _mm512_mask_cmpeq_epu8_mask(k, A, B) \
302 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ)
303 #define _mm512_cmpge_epu8_mask(A, B) \
304 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GE)
305 #define _mm512_mask_cmpge_epu8_mask(k, A, B) \
306 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE)
307 #define _mm512_cmpgt_epu8_mask(A, B) \
308 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GT)
309 #define _mm512_mask_cmpgt_epu8_mask(k, A, B) \
310 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT)
311 #define _mm512_cmple_epu8_mask(A, B) \
312 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LE)
313 #define _mm512_mask_cmple_epu8_mask(k, A, B) \
314 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE)
315 #define _mm512_cmplt_epu8_mask(A, B) \
316 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LT)
317 #define _mm512_mask_cmplt_epu8_mask(k, A, B) \
318 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT)
319 #define _mm512_cmpneq_epu8_mask(A, B) \
320 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_NE)
321 #define _mm512_mask_cmpneq_epu8_mask(k, A, B) \
322 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE)
323
324 #define _mm512_cmpeq_epi16_mask(A, B) \
325 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ)
326 #define _mm512_mask_cmpeq_epi16_mask(k, A, B) \
327 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ)
328 #define _mm512_cmpge_epi16_mask(A, B) \
329 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GE)
330 #define _mm512_mask_cmpge_epi16_mask(k, A, B) \
331 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE)
332 #define _mm512_cmpgt_epi16_mask(A, B) \
333 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GT)
334 #define _mm512_mask_cmpgt_epi16_mask(k, A, B) \
335 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT)
336 #define _mm512_cmple_epi16_mask(A, B) \
337 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LE)
338 #define _mm512_mask_cmple_epi16_mask(k, A, B) \
339 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE)
340 #define _mm512_cmplt_epi16_mask(A, B) \
341 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LT)
342 #define _mm512_mask_cmplt_epi16_mask(k, A, B) \
343 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT)
344 #define _mm512_cmpneq_epi16_mask(A, B) \
345 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_NE)
346 #define _mm512_mask_cmpneq_epi16_mask(k, A, B) \
347 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE)
348
349 #define _mm512_cmpeq_epu16_mask(A, B) \
350 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ)
351 #define _mm512_mask_cmpeq_epu16_mask(k, A, B) \
352 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ)
353 #define _mm512_cmpge_epu16_mask(A, B) \
354 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GE)
355 #define _mm512_mask_cmpge_epu16_mask(k, A, B) \
356 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE)
357 #define _mm512_cmpgt_epu16_mask(A, B) \
358 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GT)
359 #define _mm512_mask_cmpgt_epu16_mask(k, A, B) \
360 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT)
361 #define _mm512_cmple_epu16_mask(A, B) \
362 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LE)
363 #define _mm512_mask_cmple_epu16_mask(k, A, B) \
364 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE)
365 #define _mm512_cmplt_epu16_mask(A, B) \
366 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LT)
367 #define _mm512_mask_cmplt_epu16_mask(k, A, B) \
368 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT)
369 #define _mm512_cmpneq_epu16_mask(A, B) \
370 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_NE)
371 #define _mm512_mask_cmpneq_epu16_mask(k, A, B) \
372 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
373
374 static __inline__ __m512i __DEFAULT_FN_ATTRS512
375 _mm512_add_epi8 (__m512i __A, __m512i __B) {
376 return (__m512i) ((__v64qu) __A + (__v64qu) __B);
377 }
378
379 static __inline__ __m512i __DEFAULT_FN_ATTRS512
380 _mm512_mask_add_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
381 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
382 (__v64qi)_mm512_add_epi8(__A, __B),
383 (__v64qi)__W);
384 }
385
386 static __inline__ __m512i __DEFAULT_FN_ATTRS512
387 _mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
388 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
389 (__v64qi)_mm512_add_epi8(__A, __B),
390 (__v64qi)_mm512_setzero_si512());
391 }
392
393 static __inline__ __m512i __DEFAULT_FN_ATTRS512
394 _mm512_sub_epi8 (__m512i __A, __m512i __B) {
395 return (__m512i) ((__v64qu) __A - (__v64qu) __B);
396 }
397
398 static __inline__ __m512i __DEFAULT_FN_ATTRS512
399 _mm512_mask_sub_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
400 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
401 (__v64qi)_mm512_sub_epi8(__A, __B),
402 (__v64qi)__W);
403 }
404
405 static __inline__ __m512i __DEFAULT_FN_ATTRS512
406 _mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
407 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
408 (__v64qi)_mm512_sub_epi8(__A, __B),
409 (__v64qi)_mm512_setzero_si512());
410 }
411
412 static __inline__ __m512i __DEFAULT_FN_ATTRS512
413 _mm512_add_epi16 (__m512i __A, __m512i __B) {
414 return (__m512i) ((__v32hu) __A + (__v32hu) __B);
415 }
416
417 static __inline__ __m512i __DEFAULT_FN_ATTRS512
418 _mm512_mask_add_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
419 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
420 (__v32hi)_mm512_add_epi16(__A, __B),
421 (__v32hi)__W);
422 }
423
424 static __inline__ __m512i __DEFAULT_FN_ATTRS512
425 _mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
426 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
427 (__v32hi)_mm512_add_epi16(__A, __B),
428 (__v32hi)_mm512_setzero_si512());
429 }
430
431 static __inline__ __m512i __DEFAULT_FN_ATTRS512
432 _mm512_sub_epi16 (__m512i __A, __m512i __B) {
433 return (__m512i) ((__v32hu) __A - (__v32hu) __B);
434 }
435
436 static __inline__ __m512i __DEFAULT_FN_ATTRS512
437 _mm512_mask_sub_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
438 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
439 (__v32hi)_mm512_sub_epi16(__A, __B),
440 (__v32hi)__W);
441 }
442
443 static __inline__ __m512i __DEFAULT_FN_ATTRS512
444 _mm512_maskz_sub_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
445 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
446 (__v32hi)_mm512_sub_epi16(__A, __B),
447 (__v32hi)_mm512_setzero_si512());
448 }
449
450 static __inline__ __m512i __DEFAULT_FN_ATTRS512
451 _mm512_mullo_epi16 (__m512i __A, __m512i __B) {
452 return (__m512i) ((__v32hu) __A * (__v32hu) __B);
453 }
454
455 static __inline__ __m512i __DEFAULT_FN_ATTRS512
456 _mm512_mask_mullo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
457 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
458 (__v32hi)_mm512_mullo_epi16(__A, __B),
459 (__v32hi)__W);
460 }
461
462 static __inline__ __m512i __DEFAULT_FN_ATTRS512
463 _mm512_maskz_mullo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
464 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
465 (__v32hi)_mm512_mullo_epi16(__A, __B),
466 (__v32hi)_mm512_setzero_si512());
467 }
468
469 static __inline__ __m512i __DEFAULT_FN_ATTRS512
470 _mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W)
471 {
472 return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
473 (__v64qi) __W,
474 (__v64qi) __A);
475 }
476
477 static __inline__ __m512i __DEFAULT_FN_ATTRS512
478 _mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W)
479 {
480 return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
481 (__v32hi) __W,
482 (__v32hi) __A);
483 }
484
485 static __inline__ __m512i __DEFAULT_FN_ATTRS512
486 _mm512_abs_epi8 (__m512i __A)
487 {
488 return (__m512i)__builtin_ia32_pabsb512((__v64qi)__A);
489 }
490
491 static __inline__ __m512i __DEFAULT_FN_ATTRS512
492 _mm512_mask_abs_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
493 {
494 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
495 (__v64qi)_mm512_abs_epi8(__A),
496 (__v64qi)__W);
497 }
498
499 static __inline__ __m512i __DEFAULT_FN_ATTRS512
500 _mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A)
501 {
502 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
503 (__v64qi)_mm512_abs_epi8(__A),
504 (__v64qi)_mm512_setzero_si512());
505 }
506
507 static __inline__ __m512i __DEFAULT_FN_ATTRS512
508 _mm512_abs_epi16 (__m512i __A)
509 {
510 return (__m512i)__builtin_ia32_pabsw512((__v32hi)__A);
511 }
512
513 static __inline__ __m512i __DEFAULT_FN_ATTRS512
514 _mm512_mask_abs_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
515 {
516 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
517 (__v32hi)_mm512_abs_epi16(__A),
518 (__v32hi)__W);
519 }
520
521 static __inline__ __m512i __DEFAULT_FN_ATTRS512
522 _mm512_maskz_abs_epi16 (__mmask32 __U, __m512i __A)
523 {
524 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
525 (__v32hi)_mm512_abs_epi16(__A),
526 (__v32hi)_mm512_setzero_si512());
527 }
528
529 static __inline__ __m512i __DEFAULT_FN_ATTRS512
530 _mm512_packs_epi32(__m512i __A, __m512i __B)
531 {
532 return (__m512i)__builtin_ia32_packssdw512((__v16si)__A, (__v16si)__B);
533 }
534
535 static __inline__ __m512i __DEFAULT_FN_ATTRS512
536 _mm512_maskz_packs_epi32(__mmask32 __M, __m512i __A, __m512i __B)
537 {
538 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
539 (__v32hi)_mm512_packs_epi32(__A, __B),
540 (__v32hi)_mm512_setzero_si512());
541 }
542
543 static __inline__ __m512i __DEFAULT_FN_ATTRS512
544 _mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
545 {
546 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
547 (__v32hi)_mm512_packs_epi32(__A, __B),
548 (__v32hi)__W);
549 }
550
551 static __inline__ __m512i __DEFAULT_FN_ATTRS512
552 _mm512_packs_epi16(__m512i __A, __m512i __B)
553 {
554 return (__m512i)__builtin_ia32_packsswb512((__v32hi)__A, (__v32hi) __B);
555 }
556
557 static __inline__ __m512i __DEFAULT_FN_ATTRS512
558 _mm512_mask_packs_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
559 {
560 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
561 (__v64qi)_mm512_packs_epi16(__A, __B),
562 (__v64qi)__W);
563 }
564
565 static __inline__ __m512i __DEFAULT_FN_ATTRS512
566 _mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, __m512i __B)
567 {
568 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
569 (__v64qi)_mm512_packs_epi16(__A, __B),
570 (__v64qi)_mm512_setzero_si512());
571 }
572
573 static __inline__ __m512i __DEFAULT_FN_ATTRS512
574 _mm512_packus_epi32(__m512i __A, __m512i __B)
575 {
576 return (__m512i)__builtin_ia32_packusdw512((__v16si) __A, (__v16si) __B);
577 }
578
579 static __inline__ __m512i __DEFAULT_FN_ATTRS512
580 _mm512_maskz_packus_epi32(__mmask32 __M, __m512i __A, __m512i __B)
581 {
582 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
583 (__v32hi)_mm512_packus_epi32(__A, __B),
584 (__v32hi)_mm512_setzero_si512());
585 }
586
587 static __inline__ __m512i __DEFAULT_FN_ATTRS512
588 _mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
589 {
590 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
591 (__v32hi)_mm512_packus_epi32(__A, __B),
592 (__v32hi)__W);
593 }
594
595 static __inline__ __m512i __DEFAULT_FN_ATTRS512
596 _mm512_packus_epi16(__m512i __A, __m512i __B)
597 {
598 return (__m512i)__builtin_ia32_packuswb512((__v32hi) __A, (__v32hi) __B);
599 }
600
601 static __inline__ __m512i __DEFAULT_FN_ATTRS512
602 _mm512_mask_packus_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
603 {
604 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
605 (__v64qi)_mm512_packus_epi16(__A, __B),
606 (__v64qi)__W);
607 }
608
609 static __inline__ __m512i __DEFAULT_FN_ATTRS512
610 _mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B)
611 {
612 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
613 (__v64qi)_mm512_packus_epi16(__A, __B),
614 (__v64qi)_mm512_setzero_si512());
615 }
616
617 static __inline__ __m512i __DEFAULT_FN_ATTRS512
618 _mm512_adds_epi8 (__m512i __A, __m512i __B)
619 {
620 return (__m512i)__builtin_ia32_paddsb512((__v64qi)__A, (__v64qi)__B);
621 }
622
623 static __inline__ __m512i __DEFAULT_FN_ATTRS512
624 _mm512_mask_adds_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
625 {
626 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
627 (__v64qi)_mm512_adds_epi8(__A, __B),
628 (__v64qi)__W);
629 }
630
631 static __inline__ __m512i __DEFAULT_FN_ATTRS512
632 _mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
633 {
634 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
635 (__v64qi)_mm512_adds_epi8(__A, __B),
636 (__v64qi)_mm512_setzero_si512());
637 }
638
639 static __inline__ __m512i __DEFAULT_FN_ATTRS512
640 _mm512_adds_epi16 (__m512i __A, __m512i __B)
641 {
642 return (__m512i)__builtin_ia32_paddsw512((__v32hi)__A, (__v32hi)__B);
643 }
644
645 static __inline__ __m512i __DEFAULT_FN_ATTRS512
646 _mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
647 {
648 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
649 (__v32hi)_mm512_adds_epi16(__A, __B),
650 (__v32hi)__W);
651 }
652
653 static __inline__ __m512i __DEFAULT_FN_ATTRS512
654 _mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
655 {
656 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
657 (__v32hi)_mm512_adds_epi16(__A, __B),
658 (__v32hi)_mm512_setzero_si512());
659 }
660
661 static __inline__ __m512i __DEFAULT_FN_ATTRS512
662 _mm512_adds_epu8 (__m512i __A, __m512i __B)
663 {
664 return (__m512i)__builtin_ia32_paddusb512((__v64qi) __A, (__v64qi) __B);
665 }
666
667 static __inline__ __m512i __DEFAULT_FN_ATTRS512
668 _mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
669 {
670 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
671 (__v64qi)_mm512_adds_epu8(__A, __B),
672 (__v64qi)__W);
673 }
674
675 static __inline__ __m512i __DEFAULT_FN_ATTRS512
676 _mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
677 {
678 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
679 (__v64qi)_mm512_adds_epu8(__A, __B),
680 (__v64qi)_mm512_setzero_si512());
681 }
682
683 static __inline__ __m512i __DEFAULT_FN_ATTRS512
684 _mm512_adds_epu16 (__m512i __A, __m512i __B)
685 {
686 return (__m512i)__builtin_ia32_paddusw512((__v32hi) __A, (__v32hi) __B);
687 }
688
689 static __inline__ __m512i __DEFAULT_FN_ATTRS512
690 _mm512_mask_adds_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
691 {
692 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
693 (__v32hi)_mm512_adds_epu16(__A, __B),
694 (__v32hi)__W);
695 }
696
697 static __inline__ __m512i __DEFAULT_FN_ATTRS512
698 _mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
699 {
700 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
701 (__v32hi)_mm512_adds_epu16(__A, __B),
702 (__v32hi)_mm512_setzero_si512());
703 }
704
705 static __inline__ __m512i __DEFAULT_FN_ATTRS512
706 _mm512_avg_epu8 (__m512i __A, __m512i __B)
707 {
708 return (__m512i)__builtin_ia32_pavgb512((__v64qi)__A, (__v64qi)__B);
709 }
710
711 static __inline__ __m512i __DEFAULT_FN_ATTRS512
712 _mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A,
713 __m512i __B)
714 {
715 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
716 (__v64qi)_mm512_avg_epu8(__A, __B),
717 (__v64qi)__W);
718 }
719
720 static __inline__ __m512i __DEFAULT_FN_ATTRS512
721 _mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
722 {
723 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
724 (__v64qi)_mm512_avg_epu8(__A, __B),
725 (__v64qi)_mm512_setzero_si512());
726 }
727
728 static __inline__ __m512i __DEFAULT_FN_ATTRS512
729 _mm512_avg_epu16 (__m512i __A, __m512i __B)
730 {
731 return (__m512i)__builtin_ia32_pavgw512((__v32hi)__A, (__v32hi)__B);
732 }
733
734 static __inline__ __m512i __DEFAULT_FN_ATTRS512
735 _mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
736 __m512i __B)
737 {
738 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
739 (__v32hi)_mm512_avg_epu16(__A, __B),
740 (__v32hi)__W);
741 }
742
743 static __inline__ __m512i __DEFAULT_FN_ATTRS512
744 _mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
745 {
746 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
747 (__v32hi)_mm512_avg_epu16(__A, __B),
748 (__v32hi) _mm512_setzero_si512());
749 }
750
751 static __inline__ __m512i __DEFAULT_FN_ATTRS512
752 _mm512_max_epi8 (__m512i __A, __m512i __B)
753 {
754 return (__m512i)__builtin_ia32_pmaxsb512((__v64qi) __A, (__v64qi) __B);
755 }
756
757 static __inline__ __m512i __DEFAULT_FN_ATTRS512
758 _mm512_maskz_max_epi8 (__mmask64 __M, __m512i __A, __m512i __B)
759 {
760 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
761 (__v64qi)_mm512_max_epi8(__A, __B),
762 (__v64qi)_mm512_setzero_si512());
763 }
764
765 static __inline__ __m512i __DEFAULT_FN_ATTRS512
766 _mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
767 {
768 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
769 (__v64qi)_mm512_max_epi8(__A, __B),
770 (__v64qi)__W);
771 }
772
773 static __inline__ __m512i __DEFAULT_FN_ATTRS512
774 _mm512_max_epi16 (__m512i __A, __m512i __B)
775 {
776 return (__m512i)__builtin_ia32_pmaxsw512((__v32hi) __A, (__v32hi) __B);
777 }
778
779 static __inline__ __m512i __DEFAULT_FN_ATTRS512
780 _mm512_maskz_max_epi16 (__mmask32 __M, __m512i __A, __m512i __B)
781 {
782 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
783 (__v32hi)_mm512_max_epi16(__A, __B),
784 (__v32hi)_mm512_setzero_si512());
785 }
786
787 static __inline__ __m512i __DEFAULT_FN_ATTRS512
788 _mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
789 __m512i __B)
790 {
791 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
792 (__v32hi)_mm512_max_epi16(__A, __B),
793 (__v32hi)__W);
794 }
795
796 static __inline__ __m512i __DEFAULT_FN_ATTRS512
797 _mm512_max_epu8 (__m512i __A, __m512i __B)
798 {
799 return (__m512i)__builtin_ia32_pmaxub512((__v64qi)__A, (__v64qi)__B);
800 }
801
802 static __inline__ __m512i __DEFAULT_FN_ATTRS512
803 _mm512_maskz_max_epu8 (__mmask64 __M, __m512i __A, __m512i __B)
804 {
805 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
806 (__v64qi)_mm512_max_epu8(__A, __B),
807 (__v64qi)_mm512_setzero_si512());
808 }
809
810 static __inline__ __m512i __DEFAULT_FN_ATTRS512
811 _mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
812 {
813 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
814 (__v64qi)_mm512_max_epu8(__A, __B),
815 (__v64qi)__W);
816 }
817
818 static __inline__ __m512i __DEFAULT_FN_ATTRS512
819 _mm512_max_epu16 (__m512i __A, __m512i __B)
820 {
821 return (__m512i)__builtin_ia32_pmaxuw512((__v32hi)__A, (__v32hi)__B);
822 }
823
824 static __inline__ __m512i __DEFAULT_FN_ATTRS512
825 _mm512_maskz_max_epu16 (__mmask32 __M, __m512i __A, __m512i __B)
826 {
827 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
828 (__v32hi)_mm512_max_epu16(__A, __B),
829 (__v32hi)_mm512_setzero_si512());
830 }
831
832 static __inline__ __m512i __DEFAULT_FN_ATTRS512
833 _mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
834 {
835 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
836 (__v32hi)_mm512_max_epu16(__A, __B),
837 (__v32hi)__W);
838 }
839
840 static __inline__ __m512i __DEFAULT_FN_ATTRS512
841 _mm512_min_epi8 (__m512i __A, __m512i __B)
842 {
843 return (__m512i)__builtin_ia32_pminsb512((__v64qi) __A, (__v64qi) __B);
844 }
845
846 static __inline__ __m512i __DEFAULT_FN_ATTRS512
847 _mm512_maskz_min_epi8 (__mmask64 __M, __m512i __A, __m512i __B)
848 {
849 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
850 (__v64qi)_mm512_min_epi8(__A, __B),
851 (__v64qi)_mm512_setzero_si512());
852 }
853
854 static __inline__ __m512i __DEFAULT_FN_ATTRS512
855 _mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
856 {
857 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
858 (__v64qi)_mm512_min_epi8(__A, __B),
859 (__v64qi)__W);
860 }
861
862 static __inline__ __m512i __DEFAULT_FN_ATTRS512
863 _mm512_min_epi16 (__m512i __A, __m512i __B)
864 {
865 return (__m512i)__builtin_ia32_pminsw512((__v32hi) __A, (__v32hi) __B);
866 }
867
868 static __inline__ __m512i __DEFAULT_FN_ATTRS512
869 _mm512_maskz_min_epi16 (__mmask32 __M, __m512i __A, __m512i __B)
870 {
871 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
872 (__v32hi)_mm512_min_epi16(__A, __B),
873 (__v32hi)_mm512_setzero_si512());
874 }
875
876 static __inline__ __m512i __DEFAULT_FN_ATTRS512
877 _mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
878 {
879 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
880 (__v32hi)_mm512_min_epi16(__A, __B),
881 (__v32hi)__W);
882 }
883
884 static __inline__ __m512i __DEFAULT_FN_ATTRS512
885 _mm512_min_epu8 (__m512i __A, __m512i __B)
886 {
887 return (__m512i)__builtin_ia32_pminub512((__v64qi)__A, (__v64qi)__B);
888 }
889
890 static __inline__ __m512i __DEFAULT_FN_ATTRS512
891 _mm512_maskz_min_epu8 (__mmask64 __M, __m512i __A, __m512i __B)
892 {
893 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
894 (__v64qi)_mm512_min_epu8(__A, __B),
895 (__v64qi)_mm512_setzero_si512());
896 }
897
898 static __inline__ __m512i __DEFAULT_FN_ATTRS512
899 _mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
900 {
901 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
902 (__v64qi)_mm512_min_epu8(__A, __B),
903 (__v64qi)__W);
904 }
905
906 static __inline__ __m512i __DEFAULT_FN_ATTRS512
907 _mm512_min_epu16 (__m512i __A, __m512i __B)
908 {
909 return (__m512i)__builtin_ia32_pminuw512((__v32hi)__A, (__v32hi)__B);
910 }
911
912 static __inline__ __m512i __DEFAULT_FN_ATTRS512
913 _mm512_maskz_min_epu16 (__mmask32 __M, __m512i __A, __m512i __B)
914 {
915 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
916 (__v32hi)_mm512_min_epu16(__A, __B),
917 (__v32hi)_mm512_setzero_si512());
918 }
919
920 static __inline__ __m512i __DEFAULT_FN_ATTRS512
921 _mm512_mask_min_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
922 {
923 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
924 (__v32hi)_mm512_min_epu16(__A, __B),
925 (__v32hi)__W);
926 }
927
928 static __inline__ __m512i __DEFAULT_FN_ATTRS512
929 _mm512_shuffle_epi8(__m512i __A, __m512i __B)
930 {
931 return (__m512i)__builtin_ia32_pshufb512((__v64qi)__A,(__v64qi)__B);
932 }
933
934 static __inline__ __m512i __DEFAULT_FN_ATTRS512
935 _mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
936 {
937 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
938 (__v64qi)_mm512_shuffle_epi8(__A, __B),
939 (__v64qi)__W);
940 }
941
942 static __inline__ __m512i __DEFAULT_FN_ATTRS512
943 _mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B)
944 {
945 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
946 (__v64qi)_mm512_shuffle_epi8(__A, __B),
947 (__v64qi)_mm512_setzero_si512());
948 }
949
950 static __inline__ __m512i __DEFAULT_FN_ATTRS512
951 _mm512_subs_epi8 (__m512i __A, __m512i __B)
952 {
953 return (__m512i)__builtin_ia32_psubsb512((__v64qi)__A, (__v64qi)__B);
954 }
955
956 static __inline__ __m512i __DEFAULT_FN_ATTRS512
957 _mm512_mask_subs_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
958 {
959 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
960 (__v64qi)_mm512_subs_epi8(__A, __B),
961 (__v64qi)__W);
962 }
963
964 static __inline__ __m512i __DEFAULT_FN_ATTRS512
965 _mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
966 {
967 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
968 (__v64qi)_mm512_subs_epi8(__A, __B),
969 (__v64qi)_mm512_setzero_si512());
970 }
971
972 static __inline__ __m512i __DEFAULT_FN_ATTRS512
973 _mm512_subs_epi16 (__m512i __A, __m512i __B)
974 {
975 return (__m512i)__builtin_ia32_psubsw512((__v32hi)__A, (__v32hi)__B);
976 }
977
978 static __inline__ __m512i __DEFAULT_FN_ATTRS512
979 _mm512_mask_subs_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
980 {
981 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
982 (__v32hi)_mm512_subs_epi16(__A, __B),
983 (__v32hi)__W);
984 }
985
986 static __inline__ __m512i __DEFAULT_FN_ATTRS512
987 _mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
988 {
989 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
990 (__v32hi)_mm512_subs_epi16(__A, __B),
991 (__v32hi)_mm512_setzero_si512());
992 }
993
994 static __inline__ __m512i __DEFAULT_FN_ATTRS512
995 _mm512_subs_epu8 (__m512i __A, __m512i __B)
996 {
997 return (__m512i)__builtin_ia32_psubusb512((__v64qi) __A, (__v64qi) __B);
998 }
999
1000 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1001 _mm512_mask_subs_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
1002 {
1003 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1004 (__v64qi)_mm512_subs_epu8(__A, __B),
1005 (__v64qi)__W);
1006 }
1007
1008 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1009 _mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
1010 {
1011 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1012 (__v64qi)_mm512_subs_epu8(__A, __B),
1013 (__v64qi)_mm512_setzero_si512());
1014 }
1015
1016 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1017 _mm512_subs_epu16 (__m512i __A, __m512i __B)
1018 {
1019 return (__m512i)__builtin_ia32_psubusw512((__v32hi) __A, (__v32hi) __B);
1020 }
1021
1022 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1023 _mm512_mask_subs_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1024 {
1025 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1026 (__v32hi)_mm512_subs_epu16(__A, __B),
1027 (__v32hi)__W);
1028 }
1029
1030 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1031 _mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
1032 {
1033 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1034 (__v32hi)_mm512_subs_epu16(__A, __B),
1035 (__v32hi)_mm512_setzero_si512());
1036 }
1037
1038 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1039 _mm512_permutex2var_epi16(__m512i __A, __m512i __I, __m512i __B)
1040 {
1041 return (__m512i)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I,
1042 (__v32hi)__B);
1043 }
1044
1045 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1046 _mm512_mask_permutex2var_epi16(__m512i __A, __mmask32 __U, __m512i __I,
1047 __m512i __B)
1048 {
1049 return (__m512i)__builtin_ia32_selectw_512(__U,
1050 (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
1051 (__v32hi)__A);
1052 }
1053
1054 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1055 _mm512_mask2_permutex2var_epi16(__m512i __A, __m512i __I, __mmask32 __U,
1056 __m512i __B)
1057 {
1058 return (__m512i)__builtin_ia32_selectw_512(__U,
1059 (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
1060 (__v32hi)__I);
1061 }
1062
1063 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1064 _mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i __I,
1065 __m512i __B)
1066 {
1067 return (__m512i)__builtin_ia32_selectw_512(__U,
1068 (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
1069 (__v32hi)_mm512_setzero_si512());
1070 }
1071
1072 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1073 _mm512_mulhrs_epi16(__m512i __A, __m512i __B)
1074 {
1075 return (__m512i)__builtin_ia32_pmulhrsw512((__v32hi)__A, (__v32hi)__B);
1076 }
1077
1078 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1079 _mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1080 {
1081 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1082 (__v32hi)_mm512_mulhrs_epi16(__A, __B),
1083 (__v32hi)__W);
1084 }
1085
1086 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1087 _mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1088 {
1089 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1090 (__v32hi)_mm512_mulhrs_epi16(__A, __B),
1091 (__v32hi)_mm512_setzero_si512());
1092 }
1093
1094 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1095 _mm512_mulhi_epi16(__m512i __A, __m512i __B)
1096 {
1097 return (__m512i)__builtin_ia32_pmulhw512((__v32hi) __A, (__v32hi) __B);
1098 }
1099
1100 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1101 _mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A,
1102 __m512i __B)
1103 {
1104 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1105 (__v32hi)_mm512_mulhi_epi16(__A, __B),
1106 (__v32hi)__W);
1107 }
1108
1109 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1110 _mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1111 {
1112 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1113 (__v32hi)_mm512_mulhi_epi16(__A, __B),
1114 (__v32hi)_mm512_setzero_si512());
1115 }
1116
1117 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1118 _mm512_mulhi_epu16(__m512i __A, __m512i __B)
1119 {
1120 return (__m512i)__builtin_ia32_pmulhuw512((__v32hi) __A, (__v32hi) __B);
1121 }
1122
1123 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1124 _mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1125 {
1126 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1127 (__v32hi)_mm512_mulhi_epu16(__A, __B),
1128 (__v32hi)__W);
1129 }
1130
1131 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1132 _mm512_maskz_mulhi_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
1133 {
1134 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1135 (__v32hi)_mm512_mulhi_epu16(__A, __B),
1136 (__v32hi)_mm512_setzero_si512());
1137 }
1138
1139 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1140 _mm512_maddubs_epi16(__m512i __X, __m512i __Y) {
1141 return (__m512i)__builtin_ia32_pmaddubsw512((__v64qi)__X, (__v64qi)__Y);
1142 }
1143
1144 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1145 _mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X,
1146 __m512i __Y) {
1147 return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U,
1148 (__v32hi)_mm512_maddubs_epi16(__X, __Y),
1149 (__v32hi)__W);
1150 }
1151
1152 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1153 _mm512_maskz_maddubs_epi16(__mmask32 __U, __m512i __X, __m512i __Y) {
1154 return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U,
1155 (__v32hi)_mm512_maddubs_epi16(__X, __Y),
1156 (__v32hi)_mm512_setzero_si512());
1157 }
1158
1159 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1160 _mm512_madd_epi16(__m512i __A, __m512i __B) {
1161 return (__m512i)__builtin_ia32_pmaddwd512((__v32hi)__A, (__v32hi)__B);
1162 }
1163
1164 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1165 _mm512_mask_madd_epi16(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
1166 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
1167 (__v16si)_mm512_madd_epi16(__A, __B),
1168 (__v16si)__W);
1169 }
1170
1171 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1172 _mm512_maskz_madd_epi16(__mmask16 __U, __m512i __A, __m512i __B) {
1173 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
1174 (__v16si)_mm512_madd_epi16(__A, __B),
1175 (__v16si)_mm512_setzero_si512());
1176 }
1177
1178 static __inline__ __m256i __DEFAULT_FN_ATTRS512
1179 _mm512_cvtsepi16_epi8 (__m512i __A) {
1180 return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
1181 (__v32qi)_mm256_setzero_si256(),
1182 (__mmask32) -1);
1183 }
1184
1185 static __inline__ __m256i __DEFAULT_FN_ATTRS512
1186 _mm512_mask_cvtsepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
1187 return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
1188 (__v32qi)__O,
1189 __M);
1190 }
1191
1192 static __inline__ __m256i __DEFAULT_FN_ATTRS512
1193 _mm512_maskz_cvtsepi16_epi8 (__mmask32 __M, __m512i __A) {
1194 return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
1195 (__v32qi) _mm256_setzero_si256(),
1196 __M);
1197 }
1198
1199 static __inline__ __m256i __DEFAULT_FN_ATTRS512
1200 _mm512_cvtusepi16_epi8 (__m512i __A) {
1201 return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
1202 (__v32qi) _mm256_setzero_si256(),
1203 (__mmask32) -1);
1204 }
1205
1206 static __inline__ __m256i __DEFAULT_FN_ATTRS512
1207 _mm512_mask_cvtusepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
1208 return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
1209 (__v32qi) __O,
1210 __M);
1211 }
1212
1213 static __inline__ __m256i __DEFAULT_FN_ATTRS512
1214 _mm512_maskz_cvtusepi16_epi8 (__mmask32 __M, __m512i __A) {
1215 return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
1216 (__v32qi) _mm256_setzero_si256(),
1217 __M);
1218 }
1219
1220 static __inline__ __m256i __DEFAULT_FN_ATTRS512
1221 _mm512_cvtepi16_epi8 (__m512i __A) {
1222 return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
1223 (__v32qi) _mm256_undefined_si256(),
1224 (__mmask32) -1);
1225 }
1226
1227 static __inline__ __m256i __DEFAULT_FN_ATTRS512
1228 _mm512_mask_cvtepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
1229 return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
1230 (__v32qi) __O,
1231 __M);
1232 }
1233
1234 static __inline__ __m256i __DEFAULT_FN_ATTRS512
1235 _mm512_maskz_cvtepi16_epi8 (__mmask32 __M, __m512i __A) {
1236 return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
1237 (__v32qi) _mm256_setzero_si256(),
1238 __M);
1239 }
1240
1241 static __inline__ void __DEFAULT_FN_ATTRS512
1242 _mm512_mask_cvtepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
1243 {
1244 __builtin_ia32_pmovwb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
1245 }
1246
1247 static __inline__ void __DEFAULT_FN_ATTRS512
1248 _mm512_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
1249 {
1250 __builtin_ia32_pmovswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
1251 }
1252
1253 static __inline__ void __DEFAULT_FN_ATTRS512
1254 _mm512_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
1255 {
1256 __builtin_ia32_pmovuswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
1257 }
1258
1259 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1260 _mm512_unpackhi_epi8(__m512i __A, __m512i __B) {
1261 return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B,
1262 8, 64+8, 9, 64+9,
1263 10, 64+10, 11, 64+11,
1264 12, 64+12, 13, 64+13,
1265 14, 64+14, 15, 64+15,
1266 24, 64+24, 25, 64+25,
1267 26, 64+26, 27, 64+27,
1268 28, 64+28, 29, 64+29,
1269 30, 64+30, 31, 64+31,
1270 40, 64+40, 41, 64+41,
1271 42, 64+42, 43, 64+43,
1272 44, 64+44, 45, 64+45,
1273 46, 64+46, 47, 64+47,
1274 56, 64+56, 57, 64+57,
1275 58, 64+58, 59, 64+59,
1276 60, 64+60, 61, 64+61,
1277 62, 64+62, 63, 64+63);
1278 }
1279
1280 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1281 _mm512_mask_unpackhi_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
1282 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1283 (__v64qi)_mm512_unpackhi_epi8(__A, __B),
1284 (__v64qi)__W);
1285 }
1286
1287 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1288 _mm512_maskz_unpackhi_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
1289 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1290 (__v64qi)_mm512_unpackhi_epi8(__A, __B),
1291 (__v64qi)_mm512_setzero_si512());
1292 }
1293
1294 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1295 _mm512_unpackhi_epi16(__m512i __A, __m512i __B) {
1296 return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B,
1297 4, 32+4, 5, 32+5,
1298 6, 32+6, 7, 32+7,
1299 12, 32+12, 13, 32+13,
1300 14, 32+14, 15, 32+15,
1301 20, 32+20, 21, 32+21,
1302 22, 32+22, 23, 32+23,
1303 28, 32+28, 29, 32+29,
1304 30, 32+30, 31, 32+31);
1305 }
1306
1307 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1308 _mm512_mask_unpackhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
1309 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1310 (__v32hi)_mm512_unpackhi_epi16(__A, __B),
1311 (__v32hi)__W);
1312 }
1313
1314 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1315 _mm512_maskz_unpackhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
1316 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1317 (__v32hi)_mm512_unpackhi_epi16(__A, __B),
1318 (__v32hi)_mm512_setzero_si512());
1319 }
1320
1321 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1322 _mm512_unpacklo_epi8(__m512i __A, __m512i __B) {
1323 return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B,
1324 0, 64+0, 1, 64+1,
1325 2, 64+2, 3, 64+3,
1326 4, 64+4, 5, 64+5,
1327 6, 64+6, 7, 64+7,
1328 16, 64+16, 17, 64+17,
1329 18, 64+18, 19, 64+19,
1330 20, 64+20, 21, 64+21,
1331 22, 64+22, 23, 64+23,
1332 32, 64+32, 33, 64+33,
1333 34, 64+34, 35, 64+35,
1334 36, 64+36, 37, 64+37,
1335 38, 64+38, 39, 64+39,
1336 48, 64+48, 49, 64+49,
1337 50, 64+50, 51, 64+51,
1338 52, 64+52, 53, 64+53,
1339 54, 64+54, 55, 64+55);
1340 }
1341
1342 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1343 _mm512_mask_unpacklo_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
1344 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1345 (__v64qi)_mm512_unpacklo_epi8(__A, __B),
1346 (__v64qi)__W);
1347 }
1348
1349 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1350 _mm512_maskz_unpacklo_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
1351 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1352 (__v64qi)_mm512_unpacklo_epi8(__A, __B),
1353 (__v64qi)_mm512_setzero_si512());
1354 }
1355
1356 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1357 _mm512_unpacklo_epi16(__m512i __A, __m512i __B) {
1358 return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B,
1359 0, 32+0, 1, 32+1,
1360 2, 32+2, 3, 32+3,
1361 8, 32+8, 9, 32+9,
1362 10, 32+10, 11, 32+11,
1363 16, 32+16, 17, 32+17,
1364 18, 32+18, 19, 32+19,
1365 24, 32+24, 25, 32+25,
1366 26, 32+26, 27, 32+27);
1367 }
1368
1369 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1370 _mm512_mask_unpacklo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
1371 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1372 (__v32hi)_mm512_unpacklo_epi16(__A, __B),
1373 (__v32hi)__W);
1374 }
1375
1376 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1377 _mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
1378 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1379 (__v32hi)_mm512_unpacklo_epi16(__A, __B),
1380 (__v32hi)_mm512_setzero_si512());
1381 }
1382
1383 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1384 _mm512_cvtepi8_epi16(__m256i __A)
1385 {
1386 /* This function always performs a signed extension, but __v32qi is a char
1387 which may be signed or unsigned, so use __v32qs. */
1388 return (__m512i)__builtin_convertvector((__v32qs)__A, __v32hi);
1389 }
1390
1391 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1392 _mm512_mask_cvtepi8_epi16(__m512i __W, __mmask32 __U, __m256i __A)
1393 {
1394 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1395 (__v32hi)_mm512_cvtepi8_epi16(__A),
1396 (__v32hi)__W);
1397 }
1398
1399 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1400 _mm512_maskz_cvtepi8_epi16(__mmask32 __U, __m256i __A)
1401 {
1402 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1403 (__v32hi)_mm512_cvtepi8_epi16(__A),
1404 (__v32hi)_mm512_setzero_si512());
1405 }
1406
1407 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1408 _mm512_cvtepu8_epi16(__m256i __A)
1409 {
1410 return (__m512i)__builtin_convertvector((__v32qu)__A, __v32hi);
1411 }
1412
1413 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1414 _mm512_mask_cvtepu8_epi16(__m512i __W, __mmask32 __U, __m256i __A)
1415 {
1416 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1417 (__v32hi)_mm512_cvtepu8_epi16(__A),
1418 (__v32hi)__W);
1419 }
1420
1421 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1422 _mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A)
1423 {
1424 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1425 (__v32hi)_mm512_cvtepu8_epi16(__A),
1426 (__v32hi)_mm512_setzero_si512());
1427 }
1428
1429
1430 #define _mm512_shufflehi_epi16(A, imm) \
1431 (__m512i)__builtin_ia32_pshufhw512((__v32hi)(__m512i)(A), (int)(imm))
1432
1433 #define _mm512_mask_shufflehi_epi16(W, U, A, imm) \
1434 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1435 (__v32hi)_mm512_shufflehi_epi16((A), \
1436 (imm)), \
1437 (__v32hi)(__m512i)(W))
1438
1439 #define _mm512_maskz_shufflehi_epi16(U, A, imm) \
1440 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1441 (__v32hi)_mm512_shufflehi_epi16((A), \
1442 (imm)), \
1443 (__v32hi)_mm512_setzero_si512())
1444
1445 #define _mm512_shufflelo_epi16(A, imm) \
1446 (__m512i)__builtin_ia32_pshuflw512((__v32hi)(__m512i)(A), (int)(imm))
1447
1448
1449 #define _mm512_mask_shufflelo_epi16(W, U, A, imm) \
1450 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1451 (__v32hi)_mm512_shufflelo_epi16((A), \
1452 (imm)), \
1453 (__v32hi)(__m512i)(W))
1454
1455
1456 #define _mm512_maskz_shufflelo_epi16(U, A, imm) \
1457 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1458 (__v32hi)_mm512_shufflelo_epi16((A), \
1459 (imm)), \
1460 (__v32hi)_mm512_setzero_si512())
1461
1462 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1463 _mm512_sllv_epi16(__m512i __A, __m512i __B)
1464 {
1465 return (__m512i)__builtin_ia32_psllv32hi((__v32hi) __A, (__v32hi) __B);
1466 }
1467
1468 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1469 _mm512_mask_sllv_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1470 {
1471 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1472 (__v32hi)_mm512_sllv_epi16(__A, __B),
1473 (__v32hi)__W);
1474 }
1475
1476 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1477 _mm512_maskz_sllv_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1478 {
1479 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1480 (__v32hi)_mm512_sllv_epi16(__A, __B),
1481 (__v32hi)_mm512_setzero_si512());
1482 }
1483
1484 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1485 _mm512_sll_epi16(__m512i __A, __m128i __B)
1486 {
1487 return (__m512i)__builtin_ia32_psllw512((__v32hi) __A, (__v8hi) __B);
1488 }
1489
1490 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1491 _mm512_mask_sll_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
1492 {
1493 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1494 (__v32hi)_mm512_sll_epi16(__A, __B),
1495 (__v32hi)__W);
1496 }
1497
1498 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1499 _mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B)
1500 {
1501 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1502 (__v32hi)_mm512_sll_epi16(__A, __B),
1503 (__v32hi)_mm512_setzero_si512());
1504 }
1505
1506 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1507 _mm512_slli_epi16(__m512i __A, int __B)
1508 {
1509 return (__m512i)__builtin_ia32_psllwi512((__v32hi)__A, __B);
1510 }
1511
1512 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1513 _mm512_mask_slli_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B)
1514 {
1515 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1516 (__v32hi)_mm512_slli_epi16(__A, __B),
1517 (__v32hi)__W);
1518 }
1519
1520 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1521 _mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, int __B)
1522 {
1523 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1524 (__v32hi)_mm512_slli_epi16(__A, __B),
1525 (__v32hi)_mm512_setzero_si512());
1526 }
1527
1528 #define _mm512_bslli_epi128(a, imm) \
1529 (__m512i)__builtin_ia32_pslldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm))
1530
1531 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1532 _mm512_srlv_epi16(__m512i __A, __m512i __B)
1533 {
1534 return (__m512i)__builtin_ia32_psrlv32hi((__v32hi)__A, (__v32hi)__B);
1535 }
1536
1537 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1538 _mm512_mask_srlv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1539 {
1540 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1541 (__v32hi)_mm512_srlv_epi16(__A, __B),
1542 (__v32hi)__W);
1543 }
1544
1545 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1546 _mm512_maskz_srlv_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1547 {
1548 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1549 (__v32hi)_mm512_srlv_epi16(__A, __B),
1550 (__v32hi)_mm512_setzero_si512());
1551 }
1552
1553 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1554 _mm512_srav_epi16(__m512i __A, __m512i __B)
1555 {
1556 return (__m512i)__builtin_ia32_psrav32hi((__v32hi)__A, (__v32hi)__B);
1557 }
1558
1559 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1560 _mm512_mask_srav_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1561 {
1562 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1563 (__v32hi)_mm512_srav_epi16(__A, __B),
1564 (__v32hi)__W);
1565 }
1566
1567 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1568 _mm512_maskz_srav_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1569 {
1570 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1571 (__v32hi)_mm512_srav_epi16(__A, __B),
1572 (__v32hi)_mm512_setzero_si512());
1573 }
1574
1575 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1576 _mm512_sra_epi16(__m512i __A, __m128i __B)
1577 {
1578 return (__m512i)__builtin_ia32_psraw512((__v32hi) __A, (__v8hi) __B);
1579 }
1580
1581 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1582 _mm512_mask_sra_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
1583 {
1584 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1585 (__v32hi)_mm512_sra_epi16(__A, __B),
1586 (__v32hi)__W);
1587 }
1588
1589 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1590 _mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B)
1591 {
1592 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1593 (__v32hi)_mm512_sra_epi16(__A, __B),
1594 (__v32hi)_mm512_setzero_si512());
1595 }
1596
1597 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1598 _mm512_srai_epi16(__m512i __A, int __B)
1599 {
1600 return (__m512i)__builtin_ia32_psrawi512((__v32hi)__A, __B);
1601 }
1602
1603 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1604 _mm512_mask_srai_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B)
1605 {
1606 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1607 (__v32hi)_mm512_srai_epi16(__A, __B),
1608 (__v32hi)__W);
1609 }
1610
1611 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1612 _mm512_maskz_srai_epi16(__mmask32 __U, __m512i __A, int __B)
1613 {
1614 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1615 (__v32hi)_mm512_srai_epi16(__A, __B),
1616 (__v32hi)_mm512_setzero_si512());
1617 }
1618
1619 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1620 _mm512_srl_epi16(__m512i __A, __m128i __B)
1621 {
1622 return (__m512i)__builtin_ia32_psrlw512((__v32hi) __A, (__v8hi) __B);
1623 }
1624
1625 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1626 _mm512_mask_srl_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
1627 {
1628 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1629 (__v32hi)_mm512_srl_epi16(__A, __B),
1630 (__v32hi)__W);
1631 }
1632
1633 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1634 _mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, __m128i __B)
1635 {
1636 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1637 (__v32hi)_mm512_srl_epi16(__A, __B),
1638 (__v32hi)_mm512_setzero_si512());
1639 }
1640
1641 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1642 _mm512_srli_epi16(__m512i __A, int __B)
1643 {
1644 return (__m512i)__builtin_ia32_psrlwi512((__v32hi)__A, __B);
1645 }
1646
1647 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1648 _mm512_mask_srli_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B)
1649 {
1650 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1651 (__v32hi)_mm512_srli_epi16(__A, __B),
1652 (__v32hi)__W);
1653 }
1654
1655 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1656 _mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B)
1657 {
1658 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1659 (__v32hi)_mm512_srli_epi16(__A, __B),
1660 (__v32hi)_mm512_setzero_si512());
1661 }
1662
1663 #define _mm512_bsrli_epi128(a, imm) \
1664 (__m512i)__builtin_ia32_psrldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm))
1665
1666 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1667 _mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
1668 {
1669 return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
1670 (__v32hi) __A,
1671 (__v32hi) __W);
1672 }
1673
1674 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1675 _mm512_maskz_mov_epi16 (__mmask32 __U, __m512i __A)
1676 {
1677 return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
1678 (__v32hi) __A,
1679 (__v32hi) _mm512_setzero_si512 ());
1680 }
1681
1682 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1683 _mm512_mask_mov_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
1684 {
1685 return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
1686 (__v64qi) __A,
1687 (__v64qi) __W);
1688 }
1689
1690 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1691 _mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A)
1692 {
1693 return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
1694 (__v64qi) __A,
1695 (__v64qi) _mm512_setzero_si512 ());
1696 }
1697
1698 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1699 _mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A)
1700 {
1701 return (__m512i) __builtin_ia32_selectb_512(__M,
1702 (__v64qi)_mm512_set1_epi8(__A),
1703 (__v64qi) __O);
1704 }
1705
1706 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1707 _mm512_maskz_set1_epi8 (__mmask64 __M, char __A)
1708 {
1709 return (__m512i) __builtin_ia32_selectb_512(__M,
1710 (__v64qi) _mm512_set1_epi8(__A),
1711 (__v64qi) _mm512_setzero_si512());
1712 }
1713
1714 static __inline__ __mmask64 __DEFAULT_FN_ATTRS
1715 _mm512_kunpackd (__mmask64 __A, __mmask64 __B)
1716 {
1717 return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
1718 (__mmask64) __B);
1719 }
1720
1721 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
1722 _mm512_kunpackw (__mmask32 __A, __mmask32 __B)
1723 {
1724 return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
1725 (__mmask32) __B);
1726 }
1727
1728 static __inline __m512i __DEFAULT_FN_ATTRS512
1729 _mm512_loadu_epi16 (void const *__P)
1730 {
1731 struct __loadu_epi16 {
1732 __m512i_u __v;
1733 } __attribute__((__packed__, __may_alias__));
1734 return ((struct __loadu_epi16*)__P)->__v;
1735 }
1736
1737 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1738 _mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P)
1739 {
1740 return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P,
1741 (__v32hi) __W,
1742 (__mmask32) __U);
1743 }
1744
1745 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1746 _mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P)
1747 {
1748 return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P,
1749 (__v32hi)
1750 _mm512_setzero_si512 (),
1751 (__mmask32) __U);
1752 }
1753
1754 static __inline __m512i __DEFAULT_FN_ATTRS512
1755 _mm512_loadu_epi8 (void const *__P)
1756 {
1757 struct __loadu_epi8 {
1758 __m512i_u __v;
1759 } __attribute__((__packed__, __may_alias__));
1760 return ((struct __loadu_epi8*)__P)->__v;
1761 }
1762
1763 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1764 _mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P)
1765 {
1766 return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P,
1767 (__v64qi) __W,
1768 (__mmask64) __U);
1769 }
1770
1771 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1772 _mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P)
1773 {
1774 return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P,
1775 (__v64qi)
1776 _mm512_setzero_si512 (),
1777 (__mmask64) __U);
1778 }
1779
1780 static __inline void __DEFAULT_FN_ATTRS512
1781 _mm512_storeu_epi16 (void *__P, __m512i __A)
1782 {
1783 struct __storeu_epi16 {
1784 __m512i_u __v;
1785 } __attribute__((__packed__, __may_alias__));
1786 ((struct __storeu_epi16*)__P)->__v = __A;
1787 }
1788
1789 static __inline__ void __DEFAULT_FN_ATTRS512
1790 _mm512_mask_storeu_epi16 (void *__P, __mmask32 __U, __m512i __A)
1791 {
1792 __builtin_ia32_storedquhi512_mask ((__v32hi *) __P,
1793 (__v32hi) __A,
1794 (__mmask32) __U);
1795 }
1796
1797 static __inline void __DEFAULT_FN_ATTRS512
1798 _mm512_storeu_epi8 (void *__P, __m512i __A)
1799 {
1800 struct __storeu_epi8 {
1801 __m512i_u __v;
1802 } __attribute__((__packed__, __may_alias__));
1803 ((struct __storeu_epi8*)__P)->__v = __A;
1804 }
1805
1806 static __inline__ void __DEFAULT_FN_ATTRS512
1807 _mm512_mask_storeu_epi8 (void *__P, __mmask64 __U, __m512i __A)
1808 {
1809 __builtin_ia32_storedquqi512_mask ((__v64qi *) __P,
1810 (__v64qi) __A,
1811 (__mmask64) __U);
1812 }
1813
1814 static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
1815 _mm512_test_epi8_mask (__m512i __A, __m512i __B)
1816 {
1817 return _mm512_cmpneq_epi8_mask (_mm512_and_epi32 (__A, __B),
1818 _mm512_setzero_si512());
1819 }
1820
1821 static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
1822 _mm512_mask_test_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
1823 {
1824 return _mm512_mask_cmpneq_epi8_mask (__U, _mm512_and_epi32 (__A, __B),
1825 _mm512_setzero_si512());
1826 }
1827
1828 static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
1829 _mm512_test_epi16_mask (__m512i __A, __m512i __B)
1830 {
1831 return _mm512_cmpneq_epi16_mask (_mm512_and_epi32 (__A, __B),
1832 _mm512_setzero_si512());
1833 }
1834
1835 static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
1836 _mm512_mask_test_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
1837 {
1838 return _mm512_mask_cmpneq_epi16_mask (__U, _mm512_and_epi32 (__A, __B),
1839 _mm512_setzero_si512());
1840 }
1841
1842 static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
1843 _mm512_testn_epi8_mask (__m512i __A, __m512i __B)
1844 {
1845 return _mm512_cmpeq_epi8_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_si512());
1846 }
1847
1848 static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
1849 _mm512_mask_testn_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
1850 {
1851 return _mm512_mask_cmpeq_epi8_mask (__U, _mm512_and_epi32 (__A, __B),
1852 _mm512_setzero_si512());
1853 }
1854
1855 static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
1856 _mm512_testn_epi16_mask (__m512i __A, __m512i __B)
1857 {
1858 return _mm512_cmpeq_epi16_mask (_mm512_and_epi32 (__A, __B),
1859 _mm512_setzero_si512());
1860 }
1861
1862 static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
1863 _mm512_mask_testn_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
1864 {
1865 return _mm512_mask_cmpeq_epi16_mask (__U, _mm512_and_epi32 (__A, __B),
1866 _mm512_setzero_si512());
1867 }
1868
1869 static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
1870 _mm512_movepi8_mask (__m512i __A)
1871 {
1872 return (__mmask64) __builtin_ia32_cvtb2mask512 ((__v64qi) __A);
1873 }
1874
1875 static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
1876 _mm512_movepi16_mask (__m512i __A)
1877 {
1878 return (__mmask32) __builtin_ia32_cvtw2mask512 ((__v32hi) __A);
1879 }
1880
1881 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1882 _mm512_movm_epi8 (__mmask64 __A)
1883 {
1884 return (__m512i) __builtin_ia32_cvtmask2b512 (__A);
1885 }
1886
1887 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1888 _mm512_movm_epi16 (__mmask32 __A)
1889 {
1890 return (__m512i) __builtin_ia32_cvtmask2w512 (__A);
1891 }
1892
1893 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1894 _mm512_broadcastb_epi8 (__m128i __A)
1895 {
1896 return (__m512i)__builtin_shufflevector((__v16qi) __A, (__v16qi) __A,
1897 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1898 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1899 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1900 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1901 }
1902
1903 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1904 _mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A)
1905 {
1906 return (__m512i)__builtin_ia32_selectb_512(__M,
1907 (__v64qi) _mm512_broadcastb_epi8(__A),
1908 (__v64qi) __O);
1909 }
1910
1911 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1912 _mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A)
1913 {
1914 return (__m512i)__builtin_ia32_selectb_512(__M,
1915 (__v64qi) _mm512_broadcastb_epi8(__A),
1916 (__v64qi) _mm512_setzero_si512());
1917 }
1918
1919 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1920 _mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A)
1921 {
1922 return (__m512i) __builtin_ia32_selectw_512(__M,
1923 (__v32hi) _mm512_set1_epi16(__A),
1924 (__v32hi) __O);
1925 }
1926
1927 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1928 _mm512_maskz_set1_epi16 (__mmask32 __M, short __A)
1929 {
1930 return (__m512i) __builtin_ia32_selectw_512(__M,
1931 (__v32hi) _mm512_set1_epi16(__A),
1932 (__v32hi) _mm512_setzero_si512());
1933 }
1934
1935 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1936 _mm512_broadcastw_epi16 (__m128i __A)
1937 {
1938 return (__m512i)__builtin_shufflevector((__v8hi) __A, (__v8hi) __A,
1939 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1940 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1941 }
1942
1943 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1944 _mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A)
1945 {
1946 return (__m512i)__builtin_ia32_selectw_512(__M,
1947 (__v32hi) _mm512_broadcastw_epi16(__A),
1948 (__v32hi) __O);
1949 }
1950
1951 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1952 _mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A)
1953 {
1954 return (__m512i)__builtin_ia32_selectw_512(__M,
1955 (__v32hi) _mm512_broadcastw_epi16(__A),
1956 (__v32hi) _mm512_setzero_si512());
1957 }
1958
1959 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1960 _mm512_permutexvar_epi16 (__m512i __A, __m512i __B)
1961 {
1962 return (__m512i)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);
1963 }
1964
1965 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1966 _mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A,
1967 __m512i __B)
1968 {
1969 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
1970 (__v32hi)_mm512_permutexvar_epi16(__A, __B),
1971 (__v32hi)_mm512_setzero_si512());
1972 }
1973
1974 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1975 _mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
1976 __m512i __B)
1977 {
1978 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
1979 (__v32hi)_mm512_permutexvar_epi16(__A, __B),
1980 (__v32hi)__W);
1981 }
1982
1983 #define _mm512_alignr_epi8(A, B, N) \
1984 (__m512i)__builtin_ia32_palignr512((__v64qi)(__m512i)(A), \
1985 (__v64qi)(__m512i)(B), (int)(N))
1986
1987 #define _mm512_mask_alignr_epi8(W, U, A, B, N) \
1988 (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
1989 (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \
1990 (__v64qi)(__m512i)(W))
1991
1992 #define _mm512_maskz_alignr_epi8(U, A, B, N) \
1993 (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
1994 (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \
1995 (__v64qi)(__m512i)_mm512_setzero_si512())
1996
1997 #define _mm512_dbsad_epu8(A, B, imm) \
1998 (__m512i)__builtin_ia32_dbpsadbw512((__v64qi)(__m512i)(A), \
1999 (__v64qi)(__m512i)(B), (int)(imm))
2000
2001 #define _mm512_mask_dbsad_epu8(W, U, A, B, imm) \
2002 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
2003 (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \
2004 (__v32hi)(__m512i)(W))
2005
2006 #define _mm512_maskz_dbsad_epu8(U, A, B, imm) \
2007 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
2008 (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \
2009 (__v32hi)_mm512_setzero_si512())
2010
2011 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2012 _mm512_sad_epu8 (__m512i __A, __m512i __B)
2013 {
2014 return (__m512i) __builtin_ia32_psadbw512 ((__v64qi) __A,
2015 (__v64qi) __B);
2016 }
2017
2018 #undef __DEFAULT_FN_ATTRS512
2019 #undef __DEFAULT_FN_ATTRS
2020
2021 #endif
2022