avx512bwintrin.h revision 1.1 1 1.1 joerg /*===------------- avx512bwintrin.h - AVX512BW intrinsics ------------------===
2 1.1 joerg *
3 1.1 joerg *
4 1.1 joerg * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 1.1 joerg * See https://llvm.org/LICENSE.txt for license information.
6 1.1 joerg * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 1.1 joerg *
8 1.1 joerg *===-----------------------------------------------------------------------===
9 1.1 joerg */
10 1.1 joerg #ifndef __IMMINTRIN_H
11 1.1 joerg #error "Never use <avx512bwintrin.h> directly; include <immintrin.h> instead."
12 1.1 joerg #endif
13 1.1 joerg
14 1.1 joerg #ifndef __AVX512BWINTRIN_H
15 1.1 joerg #define __AVX512BWINTRIN_H
16 1.1 joerg
17 1.1 joerg typedef unsigned int __mmask32;
18 1.1 joerg typedef unsigned long long __mmask64;
19 1.1 joerg
20 1.1 joerg /* Define the default attributes for the functions in this file. */
21 1.1 joerg #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"), __min_vector_width__(512)))
22 1.1 joerg #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw")))
23 1.1 joerg
24 1.1 joerg static __inline __mmask32 __DEFAULT_FN_ATTRS
25 1.1 joerg _knot_mask32(__mmask32 __M)
26 1.1 joerg {
27 1.1 joerg return __builtin_ia32_knotsi(__M);
28 1.1 joerg }
29 1.1 joerg
30 1.1 joerg static __inline __mmask64 __DEFAULT_FN_ATTRS
31 1.1 joerg _knot_mask64(__mmask64 __M)
32 1.1 joerg {
33 1.1 joerg return __builtin_ia32_knotdi(__M);
34 1.1 joerg }
35 1.1 joerg
36 1.1 joerg static __inline__ __mmask32 __DEFAULT_FN_ATTRS
37 1.1 joerg _kand_mask32(__mmask32 __A, __mmask32 __B)
38 1.1 joerg {
39 1.1 joerg return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B);
40 1.1 joerg }
41 1.1 joerg
42 1.1 joerg static __inline__ __mmask64 __DEFAULT_FN_ATTRS
43 1.1 joerg _kand_mask64(__mmask64 __A, __mmask64 __B)
44 1.1 joerg {
45 1.1 joerg return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B);
46 1.1 joerg }
47 1.1 joerg
48 1.1 joerg static __inline__ __mmask32 __DEFAULT_FN_ATTRS
49 1.1 joerg _kandn_mask32(__mmask32 __A, __mmask32 __B)
50 1.1 joerg {
51 1.1 joerg return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B);
52 1.1 joerg }
53 1.1 joerg
54 1.1 joerg static __inline__ __mmask64 __DEFAULT_FN_ATTRS
55 1.1 joerg _kandn_mask64(__mmask64 __A, __mmask64 __B)
56 1.1 joerg {
57 1.1 joerg return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B);
58 1.1 joerg }
59 1.1 joerg
60 1.1 joerg static __inline__ __mmask32 __DEFAULT_FN_ATTRS
61 1.1 joerg _kor_mask32(__mmask32 __A, __mmask32 __B)
62 1.1 joerg {
63 1.1 joerg return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B);
64 1.1 joerg }
65 1.1 joerg
66 1.1 joerg static __inline__ __mmask64 __DEFAULT_FN_ATTRS
67 1.1 joerg _kor_mask64(__mmask64 __A, __mmask64 __B)
68 1.1 joerg {
69 1.1 joerg return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B);
70 1.1 joerg }
71 1.1 joerg
72 1.1 joerg static __inline__ __mmask32 __DEFAULT_FN_ATTRS
73 1.1 joerg _kxnor_mask32(__mmask32 __A, __mmask32 __B)
74 1.1 joerg {
75 1.1 joerg return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B);
76 1.1 joerg }
77 1.1 joerg
78 1.1 joerg static __inline__ __mmask64 __DEFAULT_FN_ATTRS
79 1.1 joerg _kxnor_mask64(__mmask64 __A, __mmask64 __B)
80 1.1 joerg {
81 1.1 joerg return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B);
82 1.1 joerg }
83 1.1 joerg
84 1.1 joerg static __inline__ __mmask32 __DEFAULT_FN_ATTRS
85 1.1 joerg _kxor_mask32(__mmask32 __A, __mmask32 __B)
86 1.1 joerg {
87 1.1 joerg return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B);
88 1.1 joerg }
89 1.1 joerg
90 1.1 joerg static __inline__ __mmask64 __DEFAULT_FN_ATTRS
91 1.1 joerg _kxor_mask64(__mmask64 __A, __mmask64 __B)
92 1.1 joerg {
93 1.1 joerg return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B);
94 1.1 joerg }
95 1.1 joerg
96 1.1 joerg static __inline__ unsigned char __DEFAULT_FN_ATTRS
97 1.1 joerg _kortestc_mask32_u8(__mmask32 __A, __mmask32 __B)
98 1.1 joerg {
99 1.1 joerg return (unsigned char)__builtin_ia32_kortestcsi(__A, __B);
100 1.1 joerg }
101 1.1 joerg
102 1.1 joerg static __inline__ unsigned char __DEFAULT_FN_ATTRS
103 1.1 joerg _kortestz_mask32_u8(__mmask32 __A, __mmask32 __B)
104 1.1 joerg {
105 1.1 joerg return (unsigned char)__builtin_ia32_kortestzsi(__A, __B);
106 1.1 joerg }
107 1.1 joerg
108 1.1 joerg static __inline__ unsigned char __DEFAULT_FN_ATTRS
109 1.1 joerg _kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
110 1.1 joerg *__C = (unsigned char)__builtin_ia32_kortestcsi(__A, __B);
111 1.1 joerg return (unsigned char)__builtin_ia32_kortestzsi(__A, __B);
112 1.1 joerg }
113 1.1 joerg
114 1.1 joerg static __inline__ unsigned char __DEFAULT_FN_ATTRS
115 1.1 joerg _kortestc_mask64_u8(__mmask64 __A, __mmask64 __B)
116 1.1 joerg {
117 1.1 joerg return (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
118 1.1 joerg }
119 1.1 joerg
120 1.1 joerg static __inline__ unsigned char __DEFAULT_FN_ATTRS
121 1.1 joerg _kortestz_mask64_u8(__mmask64 __A, __mmask64 __B)
122 1.1 joerg {
123 1.1 joerg return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
124 1.1 joerg }
125 1.1 joerg
126 1.1 joerg static __inline__ unsigned char __DEFAULT_FN_ATTRS
127 1.1 joerg _kortest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
128 1.1 joerg *__C = (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
129 1.1 joerg return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
130 1.1 joerg }
131 1.1 joerg
132 1.1 joerg static __inline__ unsigned char __DEFAULT_FN_ATTRS
133 1.1 joerg _ktestc_mask32_u8(__mmask32 __A, __mmask32 __B)
134 1.1 joerg {
135 1.1 joerg return (unsigned char)__builtin_ia32_ktestcsi(__A, __B);
136 1.1 joerg }
137 1.1 joerg
138 1.1 joerg static __inline__ unsigned char __DEFAULT_FN_ATTRS
139 1.1 joerg _ktestz_mask32_u8(__mmask32 __A, __mmask32 __B)
140 1.1 joerg {
141 1.1 joerg return (unsigned char)__builtin_ia32_ktestzsi(__A, __B);
142 1.1 joerg }
143 1.1 joerg
144 1.1 joerg static __inline__ unsigned char __DEFAULT_FN_ATTRS
145 1.1 joerg _ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
146 1.1 joerg *__C = (unsigned char)__builtin_ia32_ktestcsi(__A, __B);
147 1.1 joerg return (unsigned char)__builtin_ia32_ktestzsi(__A, __B);
148 1.1 joerg }
149 1.1 joerg
150 1.1 joerg static __inline__ unsigned char __DEFAULT_FN_ATTRS
151 1.1 joerg _ktestc_mask64_u8(__mmask64 __A, __mmask64 __B)
152 1.1 joerg {
153 1.1 joerg return (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
154 1.1 joerg }
155 1.1 joerg
156 1.1 joerg static __inline__ unsigned char __DEFAULT_FN_ATTRS
157 1.1 joerg _ktestz_mask64_u8(__mmask64 __A, __mmask64 __B)
158 1.1 joerg {
159 1.1 joerg return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
160 1.1 joerg }
161 1.1 joerg
162 1.1 joerg static __inline__ unsigned char __DEFAULT_FN_ATTRS
163 1.1 joerg _ktest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
164 1.1 joerg *__C = (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
165 1.1 joerg return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
166 1.1 joerg }
167 1.1 joerg
168 1.1 joerg static __inline__ __mmask32 __DEFAULT_FN_ATTRS
169 1.1 joerg _kadd_mask32(__mmask32 __A, __mmask32 __B)
170 1.1 joerg {
171 1.1 joerg return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B);
172 1.1 joerg }
173 1.1 joerg
174 1.1 joerg static __inline__ __mmask64 __DEFAULT_FN_ATTRS
175 1.1 joerg _kadd_mask64(__mmask64 __A, __mmask64 __B)
176 1.1 joerg {
177 1.1 joerg return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B);
178 1.1 joerg }
179 1.1 joerg
180 1.1 joerg #define _kshiftli_mask32(A, I) \
181 1.1 joerg (__mmask32)__builtin_ia32_kshiftlisi((__mmask32)(A), (unsigned int)(I))
182 1.1 joerg
183 1.1 joerg #define _kshiftri_mask32(A, I) \
184 1.1 joerg (__mmask32)__builtin_ia32_kshiftrisi((__mmask32)(A), (unsigned int)(I))
185 1.1 joerg
186 1.1 joerg #define _kshiftli_mask64(A, I) \
187 1.1 joerg (__mmask64)__builtin_ia32_kshiftlidi((__mmask64)(A), (unsigned int)(I))
188 1.1 joerg
189 1.1 joerg #define _kshiftri_mask64(A, I) \
190 1.1 joerg (__mmask64)__builtin_ia32_kshiftridi((__mmask64)(A), (unsigned int)(I))
191 1.1 joerg
192 1.1 joerg static __inline__ unsigned int __DEFAULT_FN_ATTRS
193 1.1 joerg _cvtmask32_u32(__mmask32 __A) {
194 1.1 joerg return (unsigned int)__builtin_ia32_kmovd((__mmask32)__A);
195 1.1 joerg }
196 1.1 joerg
197 1.1 joerg static __inline__ unsigned long long __DEFAULT_FN_ATTRS
198 1.1 joerg _cvtmask64_u64(__mmask64 __A) {
199 1.1 joerg return (unsigned long long)__builtin_ia32_kmovq((__mmask64)__A);
200 1.1 joerg }
201 1.1 joerg
202 1.1 joerg static __inline__ __mmask32 __DEFAULT_FN_ATTRS
203 1.1 joerg _cvtu32_mask32(unsigned int __A) {
204 1.1 joerg return (__mmask32)__builtin_ia32_kmovd((__mmask32)__A);
205 1.1 joerg }
206 1.1 joerg
207 1.1 joerg static __inline__ __mmask64 __DEFAULT_FN_ATTRS
208 1.1 joerg _cvtu64_mask64(unsigned long long __A) {
209 1.1 joerg return (__mmask64)__builtin_ia32_kmovq((__mmask64)__A);
210 1.1 joerg }
211 1.1 joerg
212 1.1 joerg static __inline__ __mmask32 __DEFAULT_FN_ATTRS
213 1.1 joerg _load_mask32(__mmask32 *__A) {
214 1.1 joerg return (__mmask32)__builtin_ia32_kmovd(*(__mmask32 *)__A);
215 1.1 joerg }
216 1.1 joerg
217 1.1 joerg static __inline__ __mmask64 __DEFAULT_FN_ATTRS
218 1.1 joerg _load_mask64(__mmask64 *__A) {
219 1.1 joerg return (__mmask64)__builtin_ia32_kmovq(*(__mmask64 *)__A);
220 1.1 joerg }
221 1.1 joerg
222 1.1 joerg static __inline__ void __DEFAULT_FN_ATTRS
223 1.1 joerg _store_mask32(__mmask32 *__A, __mmask32 __B) {
224 1.1 joerg *(__mmask32 *)__A = __builtin_ia32_kmovd((__mmask32)__B);
225 1.1 joerg }
226 1.1 joerg
227 1.1 joerg static __inline__ void __DEFAULT_FN_ATTRS
228 1.1 joerg _store_mask64(__mmask64 *__A, __mmask64 __B) {
229 1.1 joerg *(__mmask64 *)__A = __builtin_ia32_kmovq((__mmask64)__B);
230 1.1 joerg }
231 1.1 joerg
232 1.1 joerg /* Integer compare */
233 1.1 joerg
234 1.1 joerg #define _mm512_cmp_epi8_mask(a, b, p) \
235 1.1 joerg (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
236 1.1 joerg (__v64qi)(__m512i)(b), (int)(p), \
237 1.1 joerg (__mmask64)-1)
238 1.1 joerg
239 1.1 joerg #define _mm512_mask_cmp_epi8_mask(m, a, b, p) \
240 1.1 joerg (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
241 1.1 joerg (__v64qi)(__m512i)(b), (int)(p), \
242 1.1 joerg (__mmask64)(m))
243 1.1 joerg
244 1.1 joerg #define _mm512_cmp_epu8_mask(a, b, p) \
245 1.1 joerg (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
246 1.1 joerg (__v64qi)(__m512i)(b), (int)(p), \
247 1.1 joerg (__mmask64)-1)
248 1.1 joerg
249 1.1 joerg #define _mm512_mask_cmp_epu8_mask(m, a, b, p) \
250 1.1 joerg (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
251 1.1 joerg (__v64qi)(__m512i)(b), (int)(p), \
252 1.1 joerg (__mmask64)(m))
253 1.1 joerg
254 1.1 joerg #define _mm512_cmp_epi16_mask(a, b, p) \
255 1.1 joerg (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
256 1.1 joerg (__v32hi)(__m512i)(b), (int)(p), \
257 1.1 joerg (__mmask32)-1)
258 1.1 joerg
259 1.1 joerg #define _mm512_mask_cmp_epi16_mask(m, a, b, p) \
260 1.1 joerg (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
261 1.1 joerg (__v32hi)(__m512i)(b), (int)(p), \
262 1.1 joerg (__mmask32)(m))
263 1.1 joerg
264 1.1 joerg #define _mm512_cmp_epu16_mask(a, b, p) \
265 1.1 joerg (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
266 1.1 joerg (__v32hi)(__m512i)(b), (int)(p), \
267 1.1 joerg (__mmask32)-1)
268 1.1 joerg
269 1.1 joerg #define _mm512_mask_cmp_epu16_mask(m, a, b, p) \
270 1.1 joerg (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
271 1.1 joerg (__v32hi)(__m512i)(b), (int)(p), \
272 1.1 joerg (__mmask32)(m))
273 1.1 joerg
274 1.1 joerg #define _mm512_cmpeq_epi8_mask(A, B) \
275 1.1 joerg _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ)
276 1.1 joerg #define _mm512_mask_cmpeq_epi8_mask(k, A, B) \
277 1.1 joerg _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ)
278 1.1 joerg #define _mm512_cmpge_epi8_mask(A, B) \
279 1.1 joerg _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GE)
280 1.1 joerg #define _mm512_mask_cmpge_epi8_mask(k, A, B) \
281 1.1 joerg _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE)
282 1.1 joerg #define _mm512_cmpgt_epi8_mask(A, B) \
283 1.1 joerg _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GT)
284 1.1 joerg #define _mm512_mask_cmpgt_epi8_mask(k, A, B) \
285 1.1 joerg _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT)
286 1.1 joerg #define _mm512_cmple_epi8_mask(A, B) \
287 1.1 joerg _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LE)
288 1.1 joerg #define _mm512_mask_cmple_epi8_mask(k, A, B) \
289 1.1 joerg _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE)
290 1.1 joerg #define _mm512_cmplt_epi8_mask(A, B) \
291 1.1 joerg _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LT)
292 1.1 joerg #define _mm512_mask_cmplt_epi8_mask(k, A, B) \
293 1.1 joerg _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT)
294 1.1 joerg #define _mm512_cmpneq_epi8_mask(A, B) \
295 1.1 joerg _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_NE)
296 1.1 joerg #define _mm512_mask_cmpneq_epi8_mask(k, A, B) \
297 1.1 joerg _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE)
298 1.1 joerg
299 1.1 joerg #define _mm512_cmpeq_epu8_mask(A, B) \
300 1.1 joerg _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ)
301 1.1 joerg #define _mm512_mask_cmpeq_epu8_mask(k, A, B) \
302 1.1 joerg _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ)
303 1.1 joerg #define _mm512_cmpge_epu8_mask(A, B) \
304 1.1 joerg _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GE)
305 1.1 joerg #define _mm512_mask_cmpge_epu8_mask(k, A, B) \
306 1.1 joerg _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE)
307 1.1 joerg #define _mm512_cmpgt_epu8_mask(A, B) \
308 1.1 joerg _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GT)
309 1.1 joerg #define _mm512_mask_cmpgt_epu8_mask(k, A, B) \
310 1.1 joerg _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT)
311 1.1 joerg #define _mm512_cmple_epu8_mask(A, B) \
312 1.1 joerg _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LE)
313 1.1 joerg #define _mm512_mask_cmple_epu8_mask(k, A, B) \
314 1.1 joerg _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE)
315 1.1 joerg #define _mm512_cmplt_epu8_mask(A, B) \
316 1.1 joerg _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LT)
317 1.1 joerg #define _mm512_mask_cmplt_epu8_mask(k, A, B) \
318 1.1 joerg _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT)
319 1.1 joerg #define _mm512_cmpneq_epu8_mask(A, B) \
320 1.1 joerg _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_NE)
321 1.1 joerg #define _mm512_mask_cmpneq_epu8_mask(k, A, B) \
322 1.1 joerg _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE)
323 1.1 joerg
324 1.1 joerg #define _mm512_cmpeq_epi16_mask(A, B) \
325 1.1 joerg _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ)
326 1.1 joerg #define _mm512_mask_cmpeq_epi16_mask(k, A, B) \
327 1.1 joerg _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ)
328 1.1 joerg #define _mm512_cmpge_epi16_mask(A, B) \
329 1.1 joerg _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GE)
330 1.1 joerg #define _mm512_mask_cmpge_epi16_mask(k, A, B) \
331 1.1 joerg _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE)
332 1.1 joerg #define _mm512_cmpgt_epi16_mask(A, B) \
333 1.1 joerg _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GT)
334 1.1 joerg #define _mm512_mask_cmpgt_epi16_mask(k, A, B) \
335 1.1 joerg _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT)
336 1.1 joerg #define _mm512_cmple_epi16_mask(A, B) \
337 1.1 joerg _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LE)
338 1.1 joerg #define _mm512_mask_cmple_epi16_mask(k, A, B) \
339 1.1 joerg _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE)
340 1.1 joerg #define _mm512_cmplt_epi16_mask(A, B) \
341 1.1 joerg _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LT)
342 1.1 joerg #define _mm512_mask_cmplt_epi16_mask(k, A, B) \
343 1.1 joerg _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT)
344 1.1 joerg #define _mm512_cmpneq_epi16_mask(A, B) \
345 1.1 joerg _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_NE)
346 1.1 joerg #define _mm512_mask_cmpneq_epi16_mask(k, A, B) \
347 1.1 joerg _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE)
348 1.1 joerg
349 1.1 joerg #define _mm512_cmpeq_epu16_mask(A, B) \
350 1.1 joerg _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ)
351 1.1 joerg #define _mm512_mask_cmpeq_epu16_mask(k, A, B) \
352 1.1 joerg _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ)
353 1.1 joerg #define _mm512_cmpge_epu16_mask(A, B) \
354 1.1 joerg _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GE)
355 1.1 joerg #define _mm512_mask_cmpge_epu16_mask(k, A, B) \
356 1.1 joerg _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE)
357 1.1 joerg #define _mm512_cmpgt_epu16_mask(A, B) \
358 1.1 joerg _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GT)
359 1.1 joerg #define _mm512_mask_cmpgt_epu16_mask(k, A, B) \
360 1.1 joerg _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT)
361 1.1 joerg #define _mm512_cmple_epu16_mask(A, B) \
362 1.1 joerg _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LE)
363 1.1 joerg #define _mm512_mask_cmple_epu16_mask(k, A, B) \
364 1.1 joerg _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE)
365 1.1 joerg #define _mm512_cmplt_epu16_mask(A, B) \
366 1.1 joerg _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LT)
367 1.1 joerg #define _mm512_mask_cmplt_epu16_mask(k, A, B) \
368 1.1 joerg _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT)
369 1.1 joerg #define _mm512_cmpneq_epu16_mask(A, B) \
370 1.1 joerg _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_NE)
371 1.1 joerg #define _mm512_mask_cmpneq_epu16_mask(k, A, B) \
372 1.1 joerg _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
373 1.1 joerg
374 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
375 1.1 joerg _mm512_add_epi8 (__m512i __A, __m512i __B) {
376 1.1 joerg return (__m512i) ((__v64qu) __A + (__v64qu) __B);
377 1.1 joerg }
378 1.1 joerg
379 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
380 1.1 joerg _mm512_mask_add_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
381 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
382 1.1 joerg (__v64qi)_mm512_add_epi8(__A, __B),
383 1.1 joerg (__v64qi)__W);
384 1.1 joerg }
385 1.1 joerg
386 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
387 1.1 joerg _mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
388 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
389 1.1 joerg (__v64qi)_mm512_add_epi8(__A, __B),
390 1.1 joerg (__v64qi)_mm512_setzero_si512());
391 1.1 joerg }
392 1.1 joerg
393 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
394 1.1 joerg _mm512_sub_epi8 (__m512i __A, __m512i __B) {
395 1.1 joerg return (__m512i) ((__v64qu) __A - (__v64qu) __B);
396 1.1 joerg }
397 1.1 joerg
398 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
399 1.1 joerg _mm512_mask_sub_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
400 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
401 1.1 joerg (__v64qi)_mm512_sub_epi8(__A, __B),
402 1.1 joerg (__v64qi)__W);
403 1.1 joerg }
404 1.1 joerg
405 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
406 1.1 joerg _mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
407 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
408 1.1 joerg (__v64qi)_mm512_sub_epi8(__A, __B),
409 1.1 joerg (__v64qi)_mm512_setzero_si512());
410 1.1 joerg }
411 1.1 joerg
412 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
413 1.1 joerg _mm512_add_epi16 (__m512i __A, __m512i __B) {
414 1.1 joerg return (__m512i) ((__v32hu) __A + (__v32hu) __B);
415 1.1 joerg }
416 1.1 joerg
417 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
418 1.1 joerg _mm512_mask_add_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
419 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
420 1.1 joerg (__v32hi)_mm512_add_epi16(__A, __B),
421 1.1 joerg (__v32hi)__W);
422 1.1 joerg }
423 1.1 joerg
424 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
425 1.1 joerg _mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
426 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
427 1.1 joerg (__v32hi)_mm512_add_epi16(__A, __B),
428 1.1 joerg (__v32hi)_mm512_setzero_si512());
429 1.1 joerg }
430 1.1 joerg
431 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
432 1.1 joerg _mm512_sub_epi16 (__m512i __A, __m512i __B) {
433 1.1 joerg return (__m512i) ((__v32hu) __A - (__v32hu) __B);
434 1.1 joerg }
435 1.1 joerg
436 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
437 1.1 joerg _mm512_mask_sub_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
438 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
439 1.1 joerg (__v32hi)_mm512_sub_epi16(__A, __B),
440 1.1 joerg (__v32hi)__W);
441 1.1 joerg }
442 1.1 joerg
443 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
444 1.1 joerg _mm512_maskz_sub_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
445 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
446 1.1 joerg (__v32hi)_mm512_sub_epi16(__A, __B),
447 1.1 joerg (__v32hi)_mm512_setzero_si512());
448 1.1 joerg }
449 1.1 joerg
450 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
451 1.1 joerg _mm512_mullo_epi16 (__m512i __A, __m512i __B) {
452 1.1 joerg return (__m512i) ((__v32hu) __A * (__v32hu) __B);
453 1.1 joerg }
454 1.1 joerg
455 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
456 1.1 joerg _mm512_mask_mullo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
457 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
458 1.1 joerg (__v32hi)_mm512_mullo_epi16(__A, __B),
459 1.1 joerg (__v32hi)__W);
460 1.1 joerg }
461 1.1 joerg
462 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
463 1.1 joerg _mm512_maskz_mullo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
464 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
465 1.1 joerg (__v32hi)_mm512_mullo_epi16(__A, __B),
466 1.1 joerg (__v32hi)_mm512_setzero_si512());
467 1.1 joerg }
468 1.1 joerg
469 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
470 1.1 joerg _mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W)
471 1.1 joerg {
472 1.1 joerg return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
473 1.1 joerg (__v64qi) __W,
474 1.1 joerg (__v64qi) __A);
475 1.1 joerg }
476 1.1 joerg
477 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
478 1.1 joerg _mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W)
479 1.1 joerg {
480 1.1 joerg return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
481 1.1 joerg (__v32hi) __W,
482 1.1 joerg (__v32hi) __A);
483 1.1 joerg }
484 1.1 joerg
485 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
486 1.1 joerg _mm512_abs_epi8 (__m512i __A)
487 1.1 joerg {
488 1.1 joerg return (__m512i)__builtin_ia32_pabsb512((__v64qi)__A);
489 1.1 joerg }
490 1.1 joerg
491 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
492 1.1 joerg _mm512_mask_abs_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
493 1.1 joerg {
494 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
495 1.1 joerg (__v64qi)_mm512_abs_epi8(__A),
496 1.1 joerg (__v64qi)__W);
497 1.1 joerg }
498 1.1 joerg
499 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
500 1.1 joerg _mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A)
501 1.1 joerg {
502 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
503 1.1 joerg (__v64qi)_mm512_abs_epi8(__A),
504 1.1 joerg (__v64qi)_mm512_setzero_si512());
505 1.1 joerg }
506 1.1 joerg
507 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
508 1.1 joerg _mm512_abs_epi16 (__m512i __A)
509 1.1 joerg {
510 1.1 joerg return (__m512i)__builtin_ia32_pabsw512((__v32hi)__A);
511 1.1 joerg }
512 1.1 joerg
513 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
514 1.1 joerg _mm512_mask_abs_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
515 1.1 joerg {
516 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
517 1.1 joerg (__v32hi)_mm512_abs_epi16(__A),
518 1.1 joerg (__v32hi)__W);
519 1.1 joerg }
520 1.1 joerg
521 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
522 1.1 joerg _mm512_maskz_abs_epi16 (__mmask32 __U, __m512i __A)
523 1.1 joerg {
524 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
525 1.1 joerg (__v32hi)_mm512_abs_epi16(__A),
526 1.1 joerg (__v32hi)_mm512_setzero_si512());
527 1.1 joerg }
528 1.1 joerg
529 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
530 1.1 joerg _mm512_packs_epi32(__m512i __A, __m512i __B)
531 1.1 joerg {
532 1.1 joerg return (__m512i)__builtin_ia32_packssdw512((__v16si)__A, (__v16si)__B);
533 1.1 joerg }
534 1.1 joerg
535 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
536 1.1 joerg _mm512_maskz_packs_epi32(__mmask32 __M, __m512i __A, __m512i __B)
537 1.1 joerg {
538 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
539 1.1 joerg (__v32hi)_mm512_packs_epi32(__A, __B),
540 1.1 joerg (__v32hi)_mm512_setzero_si512());
541 1.1 joerg }
542 1.1 joerg
543 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
544 1.1 joerg _mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
545 1.1 joerg {
546 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
547 1.1 joerg (__v32hi)_mm512_packs_epi32(__A, __B),
548 1.1 joerg (__v32hi)__W);
549 1.1 joerg }
550 1.1 joerg
551 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
552 1.1 joerg _mm512_packs_epi16(__m512i __A, __m512i __B)
553 1.1 joerg {
554 1.1 joerg return (__m512i)__builtin_ia32_packsswb512((__v32hi)__A, (__v32hi) __B);
555 1.1 joerg }
556 1.1 joerg
557 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
558 1.1 joerg _mm512_mask_packs_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
559 1.1 joerg {
560 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
561 1.1 joerg (__v64qi)_mm512_packs_epi16(__A, __B),
562 1.1 joerg (__v64qi)__W);
563 1.1 joerg }
564 1.1 joerg
565 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
566 1.1 joerg _mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, __m512i __B)
567 1.1 joerg {
568 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
569 1.1 joerg (__v64qi)_mm512_packs_epi16(__A, __B),
570 1.1 joerg (__v64qi)_mm512_setzero_si512());
571 1.1 joerg }
572 1.1 joerg
573 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
574 1.1 joerg _mm512_packus_epi32(__m512i __A, __m512i __B)
575 1.1 joerg {
576 1.1 joerg return (__m512i)__builtin_ia32_packusdw512((__v16si) __A, (__v16si) __B);
577 1.1 joerg }
578 1.1 joerg
579 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
580 1.1 joerg _mm512_maskz_packus_epi32(__mmask32 __M, __m512i __A, __m512i __B)
581 1.1 joerg {
582 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
583 1.1 joerg (__v32hi)_mm512_packus_epi32(__A, __B),
584 1.1 joerg (__v32hi)_mm512_setzero_si512());
585 1.1 joerg }
586 1.1 joerg
587 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
588 1.1 joerg _mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
589 1.1 joerg {
590 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
591 1.1 joerg (__v32hi)_mm512_packus_epi32(__A, __B),
592 1.1 joerg (__v32hi)__W);
593 1.1 joerg }
594 1.1 joerg
595 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
596 1.1 joerg _mm512_packus_epi16(__m512i __A, __m512i __B)
597 1.1 joerg {
598 1.1 joerg return (__m512i)__builtin_ia32_packuswb512((__v32hi) __A, (__v32hi) __B);
599 1.1 joerg }
600 1.1 joerg
601 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
602 1.1 joerg _mm512_mask_packus_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
603 1.1 joerg {
604 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
605 1.1 joerg (__v64qi)_mm512_packus_epi16(__A, __B),
606 1.1 joerg (__v64qi)__W);
607 1.1 joerg }
608 1.1 joerg
609 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
610 1.1 joerg _mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B)
611 1.1 joerg {
612 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
613 1.1 joerg (__v64qi)_mm512_packus_epi16(__A, __B),
614 1.1 joerg (__v64qi)_mm512_setzero_si512());
615 1.1 joerg }
616 1.1 joerg
617 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
618 1.1 joerg _mm512_adds_epi8 (__m512i __A, __m512i __B)
619 1.1 joerg {
620 1.1 joerg return (__m512i)__builtin_ia32_paddsb512((__v64qi)__A, (__v64qi)__B);
621 1.1 joerg }
622 1.1 joerg
623 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
624 1.1 joerg _mm512_mask_adds_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
625 1.1 joerg {
626 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
627 1.1 joerg (__v64qi)_mm512_adds_epi8(__A, __B),
628 1.1 joerg (__v64qi)__W);
629 1.1 joerg }
630 1.1 joerg
631 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
632 1.1 joerg _mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
633 1.1 joerg {
634 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
635 1.1 joerg (__v64qi)_mm512_adds_epi8(__A, __B),
636 1.1 joerg (__v64qi)_mm512_setzero_si512());
637 1.1 joerg }
638 1.1 joerg
639 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
640 1.1 joerg _mm512_adds_epi16 (__m512i __A, __m512i __B)
641 1.1 joerg {
642 1.1 joerg return (__m512i)__builtin_ia32_paddsw512((__v32hi)__A, (__v32hi)__B);
643 1.1 joerg }
644 1.1 joerg
645 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
646 1.1 joerg _mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
647 1.1 joerg {
648 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
649 1.1 joerg (__v32hi)_mm512_adds_epi16(__A, __B),
650 1.1 joerg (__v32hi)__W);
651 1.1 joerg }
652 1.1 joerg
653 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
654 1.1 joerg _mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
655 1.1 joerg {
656 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
657 1.1 joerg (__v32hi)_mm512_adds_epi16(__A, __B),
658 1.1 joerg (__v32hi)_mm512_setzero_si512());
659 1.1 joerg }
660 1.1 joerg
661 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
662 1.1 joerg _mm512_adds_epu8 (__m512i __A, __m512i __B)
663 1.1 joerg {
664 1.1 joerg return (__m512i)__builtin_ia32_paddusb512((__v64qi) __A, (__v64qi) __B);
665 1.1 joerg }
666 1.1 joerg
667 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
668 1.1 joerg _mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
669 1.1 joerg {
670 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
671 1.1 joerg (__v64qi)_mm512_adds_epu8(__A, __B),
672 1.1 joerg (__v64qi)__W);
673 1.1 joerg }
674 1.1 joerg
675 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
676 1.1 joerg _mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
677 1.1 joerg {
678 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
679 1.1 joerg (__v64qi)_mm512_adds_epu8(__A, __B),
680 1.1 joerg (__v64qi)_mm512_setzero_si512());
681 1.1 joerg }
682 1.1 joerg
683 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
684 1.1 joerg _mm512_adds_epu16 (__m512i __A, __m512i __B)
685 1.1 joerg {
686 1.1 joerg return (__m512i)__builtin_ia32_paddusw512((__v32hi) __A, (__v32hi) __B);
687 1.1 joerg }
688 1.1 joerg
689 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
690 1.1 joerg _mm512_mask_adds_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
691 1.1 joerg {
692 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
693 1.1 joerg (__v32hi)_mm512_adds_epu16(__A, __B),
694 1.1 joerg (__v32hi)__W);
695 1.1 joerg }
696 1.1 joerg
697 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
698 1.1 joerg _mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
699 1.1 joerg {
700 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
701 1.1 joerg (__v32hi)_mm512_adds_epu16(__A, __B),
702 1.1 joerg (__v32hi)_mm512_setzero_si512());
703 1.1 joerg }
704 1.1 joerg
705 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
706 1.1 joerg _mm512_avg_epu8 (__m512i __A, __m512i __B)
707 1.1 joerg {
708 1.1 joerg return (__m512i)__builtin_ia32_pavgb512((__v64qi)__A, (__v64qi)__B);
709 1.1 joerg }
710 1.1 joerg
711 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
712 1.1 joerg _mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A,
713 1.1 joerg __m512i __B)
714 1.1 joerg {
715 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
716 1.1 joerg (__v64qi)_mm512_avg_epu8(__A, __B),
717 1.1 joerg (__v64qi)__W);
718 1.1 joerg }
719 1.1 joerg
720 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
721 1.1 joerg _mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
722 1.1 joerg {
723 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
724 1.1 joerg (__v64qi)_mm512_avg_epu8(__A, __B),
725 1.1 joerg (__v64qi)_mm512_setzero_si512());
726 1.1 joerg }
727 1.1 joerg
728 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
729 1.1 joerg _mm512_avg_epu16 (__m512i __A, __m512i __B)
730 1.1 joerg {
731 1.1 joerg return (__m512i)__builtin_ia32_pavgw512((__v32hi)__A, (__v32hi)__B);
732 1.1 joerg }
733 1.1 joerg
734 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
735 1.1 joerg _mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
736 1.1 joerg __m512i __B)
737 1.1 joerg {
738 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
739 1.1 joerg (__v32hi)_mm512_avg_epu16(__A, __B),
740 1.1 joerg (__v32hi)__W);
741 1.1 joerg }
742 1.1 joerg
743 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
744 1.1 joerg _mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
745 1.1 joerg {
746 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
747 1.1 joerg (__v32hi)_mm512_avg_epu16(__A, __B),
748 1.1 joerg (__v32hi) _mm512_setzero_si512());
749 1.1 joerg }
750 1.1 joerg
751 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
752 1.1 joerg _mm512_max_epi8 (__m512i __A, __m512i __B)
753 1.1 joerg {
754 1.1 joerg return (__m512i)__builtin_ia32_pmaxsb512((__v64qi) __A, (__v64qi) __B);
755 1.1 joerg }
756 1.1 joerg
757 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
758 1.1 joerg _mm512_maskz_max_epi8 (__mmask64 __M, __m512i __A, __m512i __B)
759 1.1 joerg {
760 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
761 1.1 joerg (__v64qi)_mm512_max_epi8(__A, __B),
762 1.1 joerg (__v64qi)_mm512_setzero_si512());
763 1.1 joerg }
764 1.1 joerg
765 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
766 1.1 joerg _mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
767 1.1 joerg {
768 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
769 1.1 joerg (__v64qi)_mm512_max_epi8(__A, __B),
770 1.1 joerg (__v64qi)__W);
771 1.1 joerg }
772 1.1 joerg
773 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
774 1.1 joerg _mm512_max_epi16 (__m512i __A, __m512i __B)
775 1.1 joerg {
776 1.1 joerg return (__m512i)__builtin_ia32_pmaxsw512((__v32hi) __A, (__v32hi) __B);
777 1.1 joerg }
778 1.1 joerg
779 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
780 1.1 joerg _mm512_maskz_max_epi16 (__mmask32 __M, __m512i __A, __m512i __B)
781 1.1 joerg {
782 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
783 1.1 joerg (__v32hi)_mm512_max_epi16(__A, __B),
784 1.1 joerg (__v32hi)_mm512_setzero_si512());
785 1.1 joerg }
786 1.1 joerg
787 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
788 1.1 joerg _mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
789 1.1 joerg __m512i __B)
790 1.1 joerg {
791 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
792 1.1 joerg (__v32hi)_mm512_max_epi16(__A, __B),
793 1.1 joerg (__v32hi)__W);
794 1.1 joerg }
795 1.1 joerg
796 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
797 1.1 joerg _mm512_max_epu8 (__m512i __A, __m512i __B)
798 1.1 joerg {
799 1.1 joerg return (__m512i)__builtin_ia32_pmaxub512((__v64qi)__A, (__v64qi)__B);
800 1.1 joerg }
801 1.1 joerg
802 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
803 1.1 joerg _mm512_maskz_max_epu8 (__mmask64 __M, __m512i __A, __m512i __B)
804 1.1 joerg {
805 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
806 1.1 joerg (__v64qi)_mm512_max_epu8(__A, __B),
807 1.1 joerg (__v64qi)_mm512_setzero_si512());
808 1.1 joerg }
809 1.1 joerg
810 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
811 1.1 joerg _mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
812 1.1 joerg {
813 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
814 1.1 joerg (__v64qi)_mm512_max_epu8(__A, __B),
815 1.1 joerg (__v64qi)__W);
816 1.1 joerg }
817 1.1 joerg
818 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
819 1.1 joerg _mm512_max_epu16 (__m512i __A, __m512i __B)
820 1.1 joerg {
821 1.1 joerg return (__m512i)__builtin_ia32_pmaxuw512((__v32hi)__A, (__v32hi)__B);
822 1.1 joerg }
823 1.1 joerg
824 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
825 1.1 joerg _mm512_maskz_max_epu16 (__mmask32 __M, __m512i __A, __m512i __B)
826 1.1 joerg {
827 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
828 1.1 joerg (__v32hi)_mm512_max_epu16(__A, __B),
829 1.1 joerg (__v32hi)_mm512_setzero_si512());
830 1.1 joerg }
831 1.1 joerg
832 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
833 1.1 joerg _mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
834 1.1 joerg {
835 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
836 1.1 joerg (__v32hi)_mm512_max_epu16(__A, __B),
837 1.1 joerg (__v32hi)__W);
838 1.1 joerg }
839 1.1 joerg
840 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
841 1.1 joerg _mm512_min_epi8 (__m512i __A, __m512i __B)
842 1.1 joerg {
843 1.1 joerg return (__m512i)__builtin_ia32_pminsb512((__v64qi) __A, (__v64qi) __B);
844 1.1 joerg }
845 1.1 joerg
846 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
847 1.1 joerg _mm512_maskz_min_epi8 (__mmask64 __M, __m512i __A, __m512i __B)
848 1.1 joerg {
849 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
850 1.1 joerg (__v64qi)_mm512_min_epi8(__A, __B),
851 1.1 joerg (__v64qi)_mm512_setzero_si512());
852 1.1 joerg }
853 1.1 joerg
854 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
855 1.1 joerg _mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
856 1.1 joerg {
857 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
858 1.1 joerg (__v64qi)_mm512_min_epi8(__A, __B),
859 1.1 joerg (__v64qi)__W);
860 1.1 joerg }
861 1.1 joerg
862 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
863 1.1 joerg _mm512_min_epi16 (__m512i __A, __m512i __B)
864 1.1 joerg {
865 1.1 joerg return (__m512i)__builtin_ia32_pminsw512((__v32hi) __A, (__v32hi) __B);
866 1.1 joerg }
867 1.1 joerg
868 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
869 1.1 joerg _mm512_maskz_min_epi16 (__mmask32 __M, __m512i __A, __m512i __B)
870 1.1 joerg {
871 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
872 1.1 joerg (__v32hi)_mm512_min_epi16(__A, __B),
873 1.1 joerg (__v32hi)_mm512_setzero_si512());
874 1.1 joerg }
875 1.1 joerg
876 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
877 1.1 joerg _mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
878 1.1 joerg {
879 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
880 1.1 joerg (__v32hi)_mm512_min_epi16(__A, __B),
881 1.1 joerg (__v32hi)__W);
882 1.1 joerg }
883 1.1 joerg
884 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
885 1.1 joerg _mm512_min_epu8 (__m512i __A, __m512i __B)
886 1.1 joerg {
887 1.1 joerg return (__m512i)__builtin_ia32_pminub512((__v64qi)__A, (__v64qi)__B);
888 1.1 joerg }
889 1.1 joerg
890 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
891 1.1 joerg _mm512_maskz_min_epu8 (__mmask64 __M, __m512i __A, __m512i __B)
892 1.1 joerg {
893 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
894 1.1 joerg (__v64qi)_mm512_min_epu8(__A, __B),
895 1.1 joerg (__v64qi)_mm512_setzero_si512());
896 1.1 joerg }
897 1.1 joerg
898 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
899 1.1 joerg _mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
900 1.1 joerg {
901 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
902 1.1 joerg (__v64qi)_mm512_min_epu8(__A, __B),
903 1.1 joerg (__v64qi)__W);
904 1.1 joerg }
905 1.1 joerg
906 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
907 1.1 joerg _mm512_min_epu16 (__m512i __A, __m512i __B)
908 1.1 joerg {
909 1.1 joerg return (__m512i)__builtin_ia32_pminuw512((__v32hi)__A, (__v32hi)__B);
910 1.1 joerg }
911 1.1 joerg
912 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
913 1.1 joerg _mm512_maskz_min_epu16 (__mmask32 __M, __m512i __A, __m512i __B)
914 1.1 joerg {
915 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
916 1.1 joerg (__v32hi)_mm512_min_epu16(__A, __B),
917 1.1 joerg (__v32hi)_mm512_setzero_si512());
918 1.1 joerg }
919 1.1 joerg
920 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
921 1.1 joerg _mm512_mask_min_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
922 1.1 joerg {
923 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
924 1.1 joerg (__v32hi)_mm512_min_epu16(__A, __B),
925 1.1 joerg (__v32hi)__W);
926 1.1 joerg }
927 1.1 joerg
928 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
929 1.1 joerg _mm512_shuffle_epi8(__m512i __A, __m512i __B)
930 1.1 joerg {
931 1.1 joerg return (__m512i)__builtin_ia32_pshufb512((__v64qi)__A,(__v64qi)__B);
932 1.1 joerg }
933 1.1 joerg
934 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
935 1.1 joerg _mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
936 1.1 joerg {
937 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
938 1.1 joerg (__v64qi)_mm512_shuffle_epi8(__A, __B),
939 1.1 joerg (__v64qi)__W);
940 1.1 joerg }
941 1.1 joerg
942 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
943 1.1 joerg _mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B)
944 1.1 joerg {
945 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
946 1.1 joerg (__v64qi)_mm512_shuffle_epi8(__A, __B),
947 1.1 joerg (__v64qi)_mm512_setzero_si512());
948 1.1 joerg }
949 1.1 joerg
950 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
951 1.1 joerg _mm512_subs_epi8 (__m512i __A, __m512i __B)
952 1.1 joerg {
953 1.1 joerg return (__m512i)__builtin_ia32_psubsb512((__v64qi)__A, (__v64qi)__B);
954 1.1 joerg }
955 1.1 joerg
956 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
957 1.1 joerg _mm512_mask_subs_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
958 1.1 joerg {
959 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
960 1.1 joerg (__v64qi)_mm512_subs_epi8(__A, __B),
961 1.1 joerg (__v64qi)__W);
962 1.1 joerg }
963 1.1 joerg
964 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
965 1.1 joerg _mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
966 1.1 joerg {
967 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
968 1.1 joerg (__v64qi)_mm512_subs_epi8(__A, __B),
969 1.1 joerg (__v64qi)_mm512_setzero_si512());
970 1.1 joerg }
971 1.1 joerg
972 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
973 1.1 joerg _mm512_subs_epi16 (__m512i __A, __m512i __B)
974 1.1 joerg {
975 1.1 joerg return (__m512i)__builtin_ia32_psubsw512((__v32hi)__A, (__v32hi)__B);
976 1.1 joerg }
977 1.1 joerg
978 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
979 1.1 joerg _mm512_mask_subs_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
980 1.1 joerg {
981 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
982 1.1 joerg (__v32hi)_mm512_subs_epi16(__A, __B),
983 1.1 joerg (__v32hi)__W);
984 1.1 joerg }
985 1.1 joerg
986 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
987 1.1 joerg _mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
988 1.1 joerg {
989 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
990 1.1 joerg (__v32hi)_mm512_subs_epi16(__A, __B),
991 1.1 joerg (__v32hi)_mm512_setzero_si512());
992 1.1 joerg }
993 1.1 joerg
994 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
995 1.1 joerg _mm512_subs_epu8 (__m512i __A, __m512i __B)
996 1.1 joerg {
997 1.1 joerg return (__m512i)__builtin_ia32_psubusb512((__v64qi) __A, (__v64qi) __B);
998 1.1 joerg }
999 1.1 joerg
1000 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1001 1.1 joerg _mm512_mask_subs_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
1002 1.1 joerg {
1003 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1004 1.1 joerg (__v64qi)_mm512_subs_epu8(__A, __B),
1005 1.1 joerg (__v64qi)__W);
1006 1.1 joerg }
1007 1.1 joerg
1008 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1009 1.1 joerg _mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
1010 1.1 joerg {
1011 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1012 1.1 joerg (__v64qi)_mm512_subs_epu8(__A, __B),
1013 1.1 joerg (__v64qi)_mm512_setzero_si512());
1014 1.1 joerg }
1015 1.1 joerg
1016 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1017 1.1 joerg _mm512_subs_epu16 (__m512i __A, __m512i __B)
1018 1.1 joerg {
1019 1.1 joerg return (__m512i)__builtin_ia32_psubusw512((__v32hi) __A, (__v32hi) __B);
1020 1.1 joerg }
1021 1.1 joerg
1022 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1023 1.1 joerg _mm512_mask_subs_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1024 1.1 joerg {
1025 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1026 1.1 joerg (__v32hi)_mm512_subs_epu16(__A, __B),
1027 1.1 joerg (__v32hi)__W);
1028 1.1 joerg }
1029 1.1 joerg
1030 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1031 1.1 joerg _mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
1032 1.1 joerg {
1033 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1034 1.1 joerg (__v32hi)_mm512_subs_epu16(__A, __B),
1035 1.1 joerg (__v32hi)_mm512_setzero_si512());
1036 1.1 joerg }
1037 1.1 joerg
1038 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1039 1.1 joerg _mm512_permutex2var_epi16(__m512i __A, __m512i __I, __m512i __B)
1040 1.1 joerg {
1041 1.1 joerg return (__m512i)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I,
1042 1.1 joerg (__v32hi)__B);
1043 1.1 joerg }
1044 1.1 joerg
1045 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1046 1.1 joerg _mm512_mask_permutex2var_epi16(__m512i __A, __mmask32 __U, __m512i __I,
1047 1.1 joerg __m512i __B)
1048 1.1 joerg {
1049 1.1 joerg return (__m512i)__builtin_ia32_selectw_512(__U,
1050 1.1 joerg (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
1051 1.1 joerg (__v32hi)__A);
1052 1.1 joerg }
1053 1.1 joerg
1054 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1055 1.1 joerg _mm512_mask2_permutex2var_epi16(__m512i __A, __m512i __I, __mmask32 __U,
1056 1.1 joerg __m512i __B)
1057 1.1 joerg {
1058 1.1 joerg return (__m512i)__builtin_ia32_selectw_512(__U,
1059 1.1 joerg (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
1060 1.1 joerg (__v32hi)__I);
1061 1.1 joerg }
1062 1.1 joerg
1063 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1064 1.1 joerg _mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i __I,
1065 1.1 joerg __m512i __B)
1066 1.1 joerg {
1067 1.1 joerg return (__m512i)__builtin_ia32_selectw_512(__U,
1068 1.1 joerg (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
1069 1.1 joerg (__v32hi)_mm512_setzero_si512());
1070 1.1 joerg }
1071 1.1 joerg
1072 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1073 1.1 joerg _mm512_mulhrs_epi16(__m512i __A, __m512i __B)
1074 1.1 joerg {
1075 1.1 joerg return (__m512i)__builtin_ia32_pmulhrsw512((__v32hi)__A, (__v32hi)__B);
1076 1.1 joerg }
1077 1.1 joerg
1078 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1079 1.1 joerg _mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1080 1.1 joerg {
1081 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1082 1.1 joerg (__v32hi)_mm512_mulhrs_epi16(__A, __B),
1083 1.1 joerg (__v32hi)__W);
1084 1.1 joerg }
1085 1.1 joerg
1086 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1087 1.1 joerg _mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1088 1.1 joerg {
1089 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1090 1.1 joerg (__v32hi)_mm512_mulhrs_epi16(__A, __B),
1091 1.1 joerg (__v32hi)_mm512_setzero_si512());
1092 1.1 joerg }
1093 1.1 joerg
1094 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1095 1.1 joerg _mm512_mulhi_epi16(__m512i __A, __m512i __B)
1096 1.1 joerg {
1097 1.1 joerg return (__m512i)__builtin_ia32_pmulhw512((__v32hi) __A, (__v32hi) __B);
1098 1.1 joerg }
1099 1.1 joerg
1100 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1101 1.1 joerg _mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A,
1102 1.1 joerg __m512i __B)
1103 1.1 joerg {
1104 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1105 1.1 joerg (__v32hi)_mm512_mulhi_epi16(__A, __B),
1106 1.1 joerg (__v32hi)__W);
1107 1.1 joerg }
1108 1.1 joerg
1109 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1110 1.1 joerg _mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1111 1.1 joerg {
1112 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1113 1.1 joerg (__v32hi)_mm512_mulhi_epi16(__A, __B),
1114 1.1 joerg (__v32hi)_mm512_setzero_si512());
1115 1.1 joerg }
1116 1.1 joerg
1117 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1118 1.1 joerg _mm512_mulhi_epu16(__m512i __A, __m512i __B)
1119 1.1 joerg {
1120 1.1 joerg return (__m512i)__builtin_ia32_pmulhuw512((__v32hi) __A, (__v32hi) __B);
1121 1.1 joerg }
1122 1.1 joerg
1123 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1124 1.1 joerg _mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1125 1.1 joerg {
1126 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1127 1.1 joerg (__v32hi)_mm512_mulhi_epu16(__A, __B),
1128 1.1 joerg (__v32hi)__W);
1129 1.1 joerg }
1130 1.1 joerg
1131 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1132 1.1 joerg _mm512_maskz_mulhi_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
1133 1.1 joerg {
1134 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1135 1.1 joerg (__v32hi)_mm512_mulhi_epu16(__A, __B),
1136 1.1 joerg (__v32hi)_mm512_setzero_si512());
1137 1.1 joerg }
1138 1.1 joerg
1139 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1140 1.1 joerg _mm512_maddubs_epi16(__m512i __X, __m512i __Y) {
1141 1.1 joerg return (__m512i)__builtin_ia32_pmaddubsw512((__v64qi)__X, (__v64qi)__Y);
1142 1.1 joerg }
1143 1.1 joerg
1144 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1145 1.1 joerg _mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X,
1146 1.1 joerg __m512i __Y) {
1147 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U,
1148 1.1 joerg (__v32hi)_mm512_maddubs_epi16(__X, __Y),
1149 1.1 joerg (__v32hi)__W);
1150 1.1 joerg }
1151 1.1 joerg
1152 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1153 1.1 joerg _mm512_maskz_maddubs_epi16(__mmask32 __U, __m512i __X, __m512i __Y) {
1154 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U,
1155 1.1 joerg (__v32hi)_mm512_maddubs_epi16(__X, __Y),
1156 1.1 joerg (__v32hi)_mm512_setzero_si512());
1157 1.1 joerg }
1158 1.1 joerg
1159 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1160 1.1 joerg _mm512_madd_epi16(__m512i __A, __m512i __B) {
1161 1.1 joerg return (__m512i)__builtin_ia32_pmaddwd512((__v32hi)__A, (__v32hi)__B);
1162 1.1 joerg }
1163 1.1 joerg
1164 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1165 1.1 joerg _mm512_mask_madd_epi16(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
1166 1.1 joerg return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
1167 1.1 joerg (__v16si)_mm512_madd_epi16(__A, __B),
1168 1.1 joerg (__v16si)__W);
1169 1.1 joerg }
1170 1.1 joerg
1171 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1172 1.1 joerg _mm512_maskz_madd_epi16(__mmask16 __U, __m512i __A, __m512i __B) {
1173 1.1 joerg return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
1174 1.1 joerg (__v16si)_mm512_madd_epi16(__A, __B),
1175 1.1 joerg (__v16si)_mm512_setzero_si512());
1176 1.1 joerg }
1177 1.1 joerg
1178 1.1 joerg static __inline__ __m256i __DEFAULT_FN_ATTRS512
1179 1.1 joerg _mm512_cvtsepi16_epi8 (__m512i __A) {
1180 1.1 joerg return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
1181 1.1 joerg (__v32qi)_mm256_setzero_si256(),
1182 1.1 joerg (__mmask32) -1);
1183 1.1 joerg }
1184 1.1 joerg
1185 1.1 joerg static __inline__ __m256i __DEFAULT_FN_ATTRS512
1186 1.1 joerg _mm512_mask_cvtsepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
1187 1.1 joerg return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
1188 1.1 joerg (__v32qi)__O,
1189 1.1 joerg __M);
1190 1.1 joerg }
1191 1.1 joerg
1192 1.1 joerg static __inline__ __m256i __DEFAULT_FN_ATTRS512
1193 1.1 joerg _mm512_maskz_cvtsepi16_epi8 (__mmask32 __M, __m512i __A) {
1194 1.1 joerg return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
1195 1.1 joerg (__v32qi) _mm256_setzero_si256(),
1196 1.1 joerg __M);
1197 1.1 joerg }
1198 1.1 joerg
1199 1.1 joerg static __inline__ __m256i __DEFAULT_FN_ATTRS512
1200 1.1 joerg _mm512_cvtusepi16_epi8 (__m512i __A) {
1201 1.1 joerg return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
1202 1.1 joerg (__v32qi) _mm256_setzero_si256(),
1203 1.1 joerg (__mmask32) -1);
1204 1.1 joerg }
1205 1.1 joerg
1206 1.1 joerg static __inline__ __m256i __DEFAULT_FN_ATTRS512
1207 1.1 joerg _mm512_mask_cvtusepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
1208 1.1 joerg return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
1209 1.1 joerg (__v32qi) __O,
1210 1.1 joerg __M);
1211 1.1 joerg }
1212 1.1 joerg
1213 1.1 joerg static __inline__ __m256i __DEFAULT_FN_ATTRS512
1214 1.1 joerg _mm512_maskz_cvtusepi16_epi8 (__mmask32 __M, __m512i __A) {
1215 1.1 joerg return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
1216 1.1 joerg (__v32qi) _mm256_setzero_si256(),
1217 1.1 joerg __M);
1218 1.1 joerg }
1219 1.1 joerg
1220 1.1 joerg static __inline__ __m256i __DEFAULT_FN_ATTRS512
1221 1.1 joerg _mm512_cvtepi16_epi8 (__m512i __A) {
1222 1.1 joerg return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
1223 1.1 joerg (__v32qi) _mm256_undefined_si256(),
1224 1.1 joerg (__mmask32) -1);
1225 1.1 joerg }
1226 1.1 joerg
1227 1.1 joerg static __inline__ __m256i __DEFAULT_FN_ATTRS512
1228 1.1 joerg _mm512_mask_cvtepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
1229 1.1 joerg return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
1230 1.1 joerg (__v32qi) __O,
1231 1.1 joerg __M);
1232 1.1 joerg }
1233 1.1 joerg
1234 1.1 joerg static __inline__ __m256i __DEFAULT_FN_ATTRS512
1235 1.1 joerg _mm512_maskz_cvtepi16_epi8 (__mmask32 __M, __m512i __A) {
1236 1.1 joerg return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
1237 1.1 joerg (__v32qi) _mm256_setzero_si256(),
1238 1.1 joerg __M);
1239 1.1 joerg }
1240 1.1 joerg
1241 1.1 joerg static __inline__ void __DEFAULT_FN_ATTRS512
1242 1.1 joerg _mm512_mask_cvtepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
1243 1.1 joerg {
1244 1.1 joerg __builtin_ia32_pmovwb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
1245 1.1 joerg }
1246 1.1 joerg
1247 1.1 joerg static __inline__ void __DEFAULT_FN_ATTRS512
1248 1.1 joerg _mm512_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
1249 1.1 joerg {
1250 1.1 joerg __builtin_ia32_pmovswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
1251 1.1 joerg }
1252 1.1 joerg
1253 1.1 joerg static __inline__ void __DEFAULT_FN_ATTRS512
1254 1.1 joerg _mm512_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
1255 1.1 joerg {
1256 1.1 joerg __builtin_ia32_pmovuswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
1257 1.1 joerg }
1258 1.1 joerg
1259 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1260 1.1 joerg _mm512_unpackhi_epi8(__m512i __A, __m512i __B) {
1261 1.1 joerg return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B,
1262 1.1 joerg 8, 64+8, 9, 64+9,
1263 1.1 joerg 10, 64+10, 11, 64+11,
1264 1.1 joerg 12, 64+12, 13, 64+13,
1265 1.1 joerg 14, 64+14, 15, 64+15,
1266 1.1 joerg 24, 64+24, 25, 64+25,
1267 1.1 joerg 26, 64+26, 27, 64+27,
1268 1.1 joerg 28, 64+28, 29, 64+29,
1269 1.1 joerg 30, 64+30, 31, 64+31,
1270 1.1 joerg 40, 64+40, 41, 64+41,
1271 1.1 joerg 42, 64+42, 43, 64+43,
1272 1.1 joerg 44, 64+44, 45, 64+45,
1273 1.1 joerg 46, 64+46, 47, 64+47,
1274 1.1 joerg 56, 64+56, 57, 64+57,
1275 1.1 joerg 58, 64+58, 59, 64+59,
1276 1.1 joerg 60, 64+60, 61, 64+61,
1277 1.1 joerg 62, 64+62, 63, 64+63);
1278 1.1 joerg }
1279 1.1 joerg
1280 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1281 1.1 joerg _mm512_mask_unpackhi_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
1282 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1283 1.1 joerg (__v64qi)_mm512_unpackhi_epi8(__A, __B),
1284 1.1 joerg (__v64qi)__W);
1285 1.1 joerg }
1286 1.1 joerg
1287 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1288 1.1 joerg _mm512_maskz_unpackhi_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
1289 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1290 1.1 joerg (__v64qi)_mm512_unpackhi_epi8(__A, __B),
1291 1.1 joerg (__v64qi)_mm512_setzero_si512());
1292 1.1 joerg }
1293 1.1 joerg
1294 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1295 1.1 joerg _mm512_unpackhi_epi16(__m512i __A, __m512i __B) {
1296 1.1 joerg return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B,
1297 1.1 joerg 4, 32+4, 5, 32+5,
1298 1.1 joerg 6, 32+6, 7, 32+7,
1299 1.1 joerg 12, 32+12, 13, 32+13,
1300 1.1 joerg 14, 32+14, 15, 32+15,
1301 1.1 joerg 20, 32+20, 21, 32+21,
1302 1.1 joerg 22, 32+22, 23, 32+23,
1303 1.1 joerg 28, 32+28, 29, 32+29,
1304 1.1 joerg 30, 32+30, 31, 32+31);
1305 1.1 joerg }
1306 1.1 joerg
1307 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1308 1.1 joerg _mm512_mask_unpackhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
1309 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1310 1.1 joerg (__v32hi)_mm512_unpackhi_epi16(__A, __B),
1311 1.1 joerg (__v32hi)__W);
1312 1.1 joerg }
1313 1.1 joerg
1314 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1315 1.1 joerg _mm512_maskz_unpackhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
1316 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1317 1.1 joerg (__v32hi)_mm512_unpackhi_epi16(__A, __B),
1318 1.1 joerg (__v32hi)_mm512_setzero_si512());
1319 1.1 joerg }
1320 1.1 joerg
1321 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1322 1.1 joerg _mm512_unpacklo_epi8(__m512i __A, __m512i __B) {
1323 1.1 joerg return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B,
1324 1.1 joerg 0, 64+0, 1, 64+1,
1325 1.1 joerg 2, 64+2, 3, 64+3,
1326 1.1 joerg 4, 64+4, 5, 64+5,
1327 1.1 joerg 6, 64+6, 7, 64+7,
1328 1.1 joerg 16, 64+16, 17, 64+17,
1329 1.1 joerg 18, 64+18, 19, 64+19,
1330 1.1 joerg 20, 64+20, 21, 64+21,
1331 1.1 joerg 22, 64+22, 23, 64+23,
1332 1.1 joerg 32, 64+32, 33, 64+33,
1333 1.1 joerg 34, 64+34, 35, 64+35,
1334 1.1 joerg 36, 64+36, 37, 64+37,
1335 1.1 joerg 38, 64+38, 39, 64+39,
1336 1.1 joerg 48, 64+48, 49, 64+49,
1337 1.1 joerg 50, 64+50, 51, 64+51,
1338 1.1 joerg 52, 64+52, 53, 64+53,
1339 1.1 joerg 54, 64+54, 55, 64+55);
1340 1.1 joerg }
1341 1.1 joerg
1342 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1343 1.1 joerg _mm512_mask_unpacklo_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
1344 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1345 1.1 joerg (__v64qi)_mm512_unpacklo_epi8(__A, __B),
1346 1.1 joerg (__v64qi)__W);
1347 1.1 joerg }
1348 1.1 joerg
1349 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1350 1.1 joerg _mm512_maskz_unpacklo_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
1351 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1352 1.1 joerg (__v64qi)_mm512_unpacklo_epi8(__A, __B),
1353 1.1 joerg (__v64qi)_mm512_setzero_si512());
1354 1.1 joerg }
1355 1.1 joerg
1356 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1357 1.1 joerg _mm512_unpacklo_epi16(__m512i __A, __m512i __B) {
1358 1.1 joerg return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B,
1359 1.1 joerg 0, 32+0, 1, 32+1,
1360 1.1 joerg 2, 32+2, 3, 32+3,
1361 1.1 joerg 8, 32+8, 9, 32+9,
1362 1.1 joerg 10, 32+10, 11, 32+11,
1363 1.1 joerg 16, 32+16, 17, 32+17,
1364 1.1 joerg 18, 32+18, 19, 32+19,
1365 1.1 joerg 24, 32+24, 25, 32+25,
1366 1.1 joerg 26, 32+26, 27, 32+27);
1367 1.1 joerg }
1368 1.1 joerg
1369 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1370 1.1 joerg _mm512_mask_unpacklo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
1371 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1372 1.1 joerg (__v32hi)_mm512_unpacklo_epi16(__A, __B),
1373 1.1 joerg (__v32hi)__W);
1374 1.1 joerg }
1375 1.1 joerg
1376 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1377 1.1 joerg _mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
1378 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1379 1.1 joerg (__v32hi)_mm512_unpacklo_epi16(__A, __B),
1380 1.1 joerg (__v32hi)_mm512_setzero_si512());
1381 1.1 joerg }
1382 1.1 joerg
1383 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1384 1.1 joerg _mm512_cvtepi8_epi16(__m256i __A)
1385 1.1 joerg {
1386 1.1 joerg /* This function always performs a signed extension, but __v32qi is a char
1387 1.1 joerg which may be signed or unsigned, so use __v32qs. */
1388 1.1 joerg return (__m512i)__builtin_convertvector((__v32qs)__A, __v32hi);
1389 1.1 joerg }
1390 1.1 joerg
1391 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1392 1.1 joerg _mm512_mask_cvtepi8_epi16(__m512i __W, __mmask32 __U, __m256i __A)
1393 1.1 joerg {
1394 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1395 1.1 joerg (__v32hi)_mm512_cvtepi8_epi16(__A),
1396 1.1 joerg (__v32hi)__W);
1397 1.1 joerg }
1398 1.1 joerg
1399 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1400 1.1 joerg _mm512_maskz_cvtepi8_epi16(__mmask32 __U, __m256i __A)
1401 1.1 joerg {
1402 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1403 1.1 joerg (__v32hi)_mm512_cvtepi8_epi16(__A),
1404 1.1 joerg (__v32hi)_mm512_setzero_si512());
1405 1.1 joerg }
1406 1.1 joerg
1407 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1408 1.1 joerg _mm512_cvtepu8_epi16(__m256i __A)
1409 1.1 joerg {
1410 1.1 joerg return (__m512i)__builtin_convertvector((__v32qu)__A, __v32hi);
1411 1.1 joerg }
1412 1.1 joerg
1413 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1414 1.1 joerg _mm512_mask_cvtepu8_epi16(__m512i __W, __mmask32 __U, __m256i __A)
1415 1.1 joerg {
1416 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1417 1.1 joerg (__v32hi)_mm512_cvtepu8_epi16(__A),
1418 1.1 joerg (__v32hi)__W);
1419 1.1 joerg }
1420 1.1 joerg
1421 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1422 1.1 joerg _mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A)
1423 1.1 joerg {
1424 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1425 1.1 joerg (__v32hi)_mm512_cvtepu8_epi16(__A),
1426 1.1 joerg (__v32hi)_mm512_setzero_si512());
1427 1.1 joerg }
1428 1.1 joerg
1429 1.1 joerg
1430 1.1 joerg #define _mm512_shufflehi_epi16(A, imm) \
1431 1.1 joerg (__m512i)__builtin_ia32_pshufhw512((__v32hi)(__m512i)(A), (int)(imm))
1432 1.1 joerg
1433 1.1 joerg #define _mm512_mask_shufflehi_epi16(W, U, A, imm) \
1434 1.1 joerg (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1435 1.1 joerg (__v32hi)_mm512_shufflehi_epi16((A), \
1436 1.1 joerg (imm)), \
1437 1.1 joerg (__v32hi)(__m512i)(W))
1438 1.1 joerg
1439 1.1 joerg #define _mm512_maskz_shufflehi_epi16(U, A, imm) \
1440 1.1 joerg (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1441 1.1 joerg (__v32hi)_mm512_shufflehi_epi16((A), \
1442 1.1 joerg (imm)), \
1443 1.1 joerg (__v32hi)_mm512_setzero_si512())
1444 1.1 joerg
1445 1.1 joerg #define _mm512_shufflelo_epi16(A, imm) \
1446 1.1 joerg (__m512i)__builtin_ia32_pshuflw512((__v32hi)(__m512i)(A), (int)(imm))
1447 1.1 joerg
1448 1.1 joerg
1449 1.1 joerg #define _mm512_mask_shufflelo_epi16(W, U, A, imm) \
1450 1.1 joerg (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1451 1.1 joerg (__v32hi)_mm512_shufflelo_epi16((A), \
1452 1.1 joerg (imm)), \
1453 1.1 joerg (__v32hi)(__m512i)(W))
1454 1.1 joerg
1455 1.1 joerg
1456 1.1 joerg #define _mm512_maskz_shufflelo_epi16(U, A, imm) \
1457 1.1 joerg (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1458 1.1 joerg (__v32hi)_mm512_shufflelo_epi16((A), \
1459 1.1 joerg (imm)), \
1460 1.1 joerg (__v32hi)_mm512_setzero_si512())
1461 1.1 joerg
1462 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1463 1.1 joerg _mm512_sllv_epi16(__m512i __A, __m512i __B)
1464 1.1 joerg {
1465 1.1 joerg return (__m512i)__builtin_ia32_psllv32hi((__v32hi) __A, (__v32hi) __B);
1466 1.1 joerg }
1467 1.1 joerg
1468 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1469 1.1 joerg _mm512_mask_sllv_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1470 1.1 joerg {
1471 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1472 1.1 joerg (__v32hi)_mm512_sllv_epi16(__A, __B),
1473 1.1 joerg (__v32hi)__W);
1474 1.1 joerg }
1475 1.1 joerg
1476 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1477 1.1 joerg _mm512_maskz_sllv_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1478 1.1 joerg {
1479 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1480 1.1 joerg (__v32hi)_mm512_sllv_epi16(__A, __B),
1481 1.1 joerg (__v32hi)_mm512_setzero_si512());
1482 1.1 joerg }
1483 1.1 joerg
1484 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1485 1.1 joerg _mm512_sll_epi16(__m512i __A, __m128i __B)
1486 1.1 joerg {
1487 1.1 joerg return (__m512i)__builtin_ia32_psllw512((__v32hi) __A, (__v8hi) __B);
1488 1.1 joerg }
1489 1.1 joerg
1490 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1491 1.1 joerg _mm512_mask_sll_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
1492 1.1 joerg {
1493 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1494 1.1 joerg (__v32hi)_mm512_sll_epi16(__A, __B),
1495 1.1 joerg (__v32hi)__W);
1496 1.1 joerg }
1497 1.1 joerg
1498 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1499 1.1 joerg _mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B)
1500 1.1 joerg {
1501 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1502 1.1 joerg (__v32hi)_mm512_sll_epi16(__A, __B),
1503 1.1 joerg (__v32hi)_mm512_setzero_si512());
1504 1.1 joerg }
1505 1.1 joerg
1506 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1507 1.1 joerg _mm512_slli_epi16(__m512i __A, int __B)
1508 1.1 joerg {
1509 1.1 joerg return (__m512i)__builtin_ia32_psllwi512((__v32hi)__A, __B);
1510 1.1 joerg }
1511 1.1 joerg
1512 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1513 1.1 joerg _mm512_mask_slli_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B)
1514 1.1 joerg {
1515 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1516 1.1 joerg (__v32hi)_mm512_slli_epi16(__A, __B),
1517 1.1 joerg (__v32hi)__W);
1518 1.1 joerg }
1519 1.1 joerg
1520 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1521 1.1 joerg _mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, int __B)
1522 1.1 joerg {
1523 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1524 1.1 joerg (__v32hi)_mm512_slli_epi16(__A, __B),
1525 1.1 joerg (__v32hi)_mm512_setzero_si512());
1526 1.1 joerg }
1527 1.1 joerg
1528 1.1 joerg #define _mm512_bslli_epi128(a, imm) \
1529 1.1 joerg (__m512i)__builtin_ia32_pslldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm))
1530 1.1 joerg
1531 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1532 1.1 joerg _mm512_srlv_epi16(__m512i __A, __m512i __B)
1533 1.1 joerg {
1534 1.1 joerg return (__m512i)__builtin_ia32_psrlv32hi((__v32hi)__A, (__v32hi)__B);
1535 1.1 joerg }
1536 1.1 joerg
1537 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1538 1.1 joerg _mm512_mask_srlv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1539 1.1 joerg {
1540 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1541 1.1 joerg (__v32hi)_mm512_srlv_epi16(__A, __B),
1542 1.1 joerg (__v32hi)__W);
1543 1.1 joerg }
1544 1.1 joerg
1545 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1546 1.1 joerg _mm512_maskz_srlv_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1547 1.1 joerg {
1548 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1549 1.1 joerg (__v32hi)_mm512_srlv_epi16(__A, __B),
1550 1.1 joerg (__v32hi)_mm512_setzero_si512());
1551 1.1 joerg }
1552 1.1 joerg
1553 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1554 1.1 joerg _mm512_srav_epi16(__m512i __A, __m512i __B)
1555 1.1 joerg {
1556 1.1 joerg return (__m512i)__builtin_ia32_psrav32hi((__v32hi)__A, (__v32hi)__B);
1557 1.1 joerg }
1558 1.1 joerg
1559 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1560 1.1 joerg _mm512_mask_srav_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1561 1.1 joerg {
1562 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1563 1.1 joerg (__v32hi)_mm512_srav_epi16(__A, __B),
1564 1.1 joerg (__v32hi)__W);
1565 1.1 joerg }
1566 1.1 joerg
1567 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1568 1.1 joerg _mm512_maskz_srav_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1569 1.1 joerg {
1570 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1571 1.1 joerg (__v32hi)_mm512_srav_epi16(__A, __B),
1572 1.1 joerg (__v32hi)_mm512_setzero_si512());
1573 1.1 joerg }
1574 1.1 joerg
1575 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1576 1.1 joerg _mm512_sra_epi16(__m512i __A, __m128i __B)
1577 1.1 joerg {
1578 1.1 joerg return (__m512i)__builtin_ia32_psraw512((__v32hi) __A, (__v8hi) __B);
1579 1.1 joerg }
1580 1.1 joerg
1581 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1582 1.1 joerg _mm512_mask_sra_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
1583 1.1 joerg {
1584 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1585 1.1 joerg (__v32hi)_mm512_sra_epi16(__A, __B),
1586 1.1 joerg (__v32hi)__W);
1587 1.1 joerg }
1588 1.1 joerg
1589 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1590 1.1 joerg _mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B)
1591 1.1 joerg {
1592 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1593 1.1 joerg (__v32hi)_mm512_sra_epi16(__A, __B),
1594 1.1 joerg (__v32hi)_mm512_setzero_si512());
1595 1.1 joerg }
1596 1.1 joerg
1597 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1598 1.1 joerg _mm512_srai_epi16(__m512i __A, int __B)
1599 1.1 joerg {
1600 1.1 joerg return (__m512i)__builtin_ia32_psrawi512((__v32hi)__A, __B);
1601 1.1 joerg }
1602 1.1 joerg
1603 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1604 1.1 joerg _mm512_mask_srai_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B)
1605 1.1 joerg {
1606 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1607 1.1 joerg (__v32hi)_mm512_srai_epi16(__A, __B),
1608 1.1 joerg (__v32hi)__W);
1609 1.1 joerg }
1610 1.1 joerg
1611 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1612 1.1 joerg _mm512_maskz_srai_epi16(__mmask32 __U, __m512i __A, int __B)
1613 1.1 joerg {
1614 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1615 1.1 joerg (__v32hi)_mm512_srai_epi16(__A, __B),
1616 1.1 joerg (__v32hi)_mm512_setzero_si512());
1617 1.1 joerg }
1618 1.1 joerg
1619 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1620 1.1 joerg _mm512_srl_epi16(__m512i __A, __m128i __B)
1621 1.1 joerg {
1622 1.1 joerg return (__m512i)__builtin_ia32_psrlw512((__v32hi) __A, (__v8hi) __B);
1623 1.1 joerg }
1624 1.1 joerg
1625 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1626 1.1 joerg _mm512_mask_srl_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
1627 1.1 joerg {
1628 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1629 1.1 joerg (__v32hi)_mm512_srl_epi16(__A, __B),
1630 1.1 joerg (__v32hi)__W);
1631 1.1 joerg }
1632 1.1 joerg
1633 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1634 1.1 joerg _mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, __m128i __B)
1635 1.1 joerg {
1636 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1637 1.1 joerg (__v32hi)_mm512_srl_epi16(__A, __B),
1638 1.1 joerg (__v32hi)_mm512_setzero_si512());
1639 1.1 joerg }
1640 1.1 joerg
1641 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1642 1.1 joerg _mm512_srli_epi16(__m512i __A, int __B)
1643 1.1 joerg {
1644 1.1 joerg return (__m512i)__builtin_ia32_psrlwi512((__v32hi)__A, __B);
1645 1.1 joerg }
1646 1.1 joerg
1647 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1648 1.1 joerg _mm512_mask_srli_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B)
1649 1.1 joerg {
1650 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1651 1.1 joerg (__v32hi)_mm512_srli_epi16(__A, __B),
1652 1.1 joerg (__v32hi)__W);
1653 1.1 joerg }
1654 1.1 joerg
1655 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1656 1.1 joerg _mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B)
1657 1.1 joerg {
1658 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1659 1.1 joerg (__v32hi)_mm512_srli_epi16(__A, __B),
1660 1.1 joerg (__v32hi)_mm512_setzero_si512());
1661 1.1 joerg }
1662 1.1 joerg
1663 1.1 joerg #define _mm512_bsrli_epi128(a, imm) \
1664 1.1 joerg (__m512i)__builtin_ia32_psrldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm))
1665 1.1 joerg
1666 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1667 1.1 joerg _mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
1668 1.1 joerg {
1669 1.1 joerg return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
1670 1.1 joerg (__v32hi) __A,
1671 1.1 joerg (__v32hi) __W);
1672 1.1 joerg }
1673 1.1 joerg
1674 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1675 1.1 joerg _mm512_maskz_mov_epi16 (__mmask32 __U, __m512i __A)
1676 1.1 joerg {
1677 1.1 joerg return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
1678 1.1 joerg (__v32hi) __A,
1679 1.1 joerg (__v32hi) _mm512_setzero_si512 ());
1680 1.1 joerg }
1681 1.1 joerg
1682 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1683 1.1 joerg _mm512_mask_mov_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
1684 1.1 joerg {
1685 1.1 joerg return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
1686 1.1 joerg (__v64qi) __A,
1687 1.1 joerg (__v64qi) __W);
1688 1.1 joerg }
1689 1.1 joerg
1690 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1691 1.1 joerg _mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A)
1692 1.1 joerg {
1693 1.1 joerg return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
1694 1.1 joerg (__v64qi) __A,
1695 1.1 joerg (__v64qi) _mm512_setzero_si512 ());
1696 1.1 joerg }
1697 1.1 joerg
1698 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1699 1.1 joerg _mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A)
1700 1.1 joerg {
1701 1.1 joerg return (__m512i) __builtin_ia32_selectb_512(__M,
1702 1.1 joerg (__v64qi)_mm512_set1_epi8(__A),
1703 1.1 joerg (__v64qi) __O);
1704 1.1 joerg }
1705 1.1 joerg
1706 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1707 1.1 joerg _mm512_maskz_set1_epi8 (__mmask64 __M, char __A)
1708 1.1 joerg {
1709 1.1 joerg return (__m512i) __builtin_ia32_selectb_512(__M,
1710 1.1 joerg (__v64qi) _mm512_set1_epi8(__A),
1711 1.1 joerg (__v64qi) _mm512_setzero_si512());
1712 1.1 joerg }
1713 1.1 joerg
1714 1.1 joerg static __inline__ __mmask64 __DEFAULT_FN_ATTRS
1715 1.1 joerg _mm512_kunpackd (__mmask64 __A, __mmask64 __B)
1716 1.1 joerg {
1717 1.1 joerg return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
1718 1.1 joerg (__mmask64) __B);
1719 1.1 joerg }
1720 1.1 joerg
1721 1.1 joerg static __inline__ __mmask32 __DEFAULT_FN_ATTRS
1722 1.1 joerg _mm512_kunpackw (__mmask32 __A, __mmask32 __B)
1723 1.1 joerg {
1724 1.1 joerg return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
1725 1.1 joerg (__mmask32) __B);
1726 1.1 joerg }
1727 1.1 joerg
1728 1.1 joerg static __inline __m512i __DEFAULT_FN_ATTRS512
1729 1.1 joerg _mm512_loadu_epi16 (void const *__P)
1730 1.1 joerg {
1731 1.1 joerg struct __loadu_epi16 {
1732 1.1 joerg __m512i_u __v;
1733 1.1 joerg } __attribute__((__packed__, __may_alias__));
1734 1.1 joerg return ((struct __loadu_epi16*)__P)->__v;
1735 1.1 joerg }
1736 1.1 joerg
1737 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1738 1.1 joerg _mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P)
1739 1.1 joerg {
1740 1.1 joerg return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P,
1741 1.1 joerg (__v32hi) __W,
1742 1.1 joerg (__mmask32) __U);
1743 1.1 joerg }
1744 1.1 joerg
1745 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1746 1.1 joerg _mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P)
1747 1.1 joerg {
1748 1.1 joerg return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P,
1749 1.1 joerg (__v32hi)
1750 1.1 joerg _mm512_setzero_si512 (),
1751 1.1 joerg (__mmask32) __U);
1752 1.1 joerg }
1753 1.1 joerg
1754 1.1 joerg static __inline __m512i __DEFAULT_FN_ATTRS512
1755 1.1 joerg _mm512_loadu_epi8 (void const *__P)
1756 1.1 joerg {
1757 1.1 joerg struct __loadu_epi8 {
1758 1.1 joerg __m512i_u __v;
1759 1.1 joerg } __attribute__((__packed__, __may_alias__));
1760 1.1 joerg return ((struct __loadu_epi8*)__P)->__v;
1761 1.1 joerg }
1762 1.1 joerg
1763 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1764 1.1 joerg _mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P)
1765 1.1 joerg {
1766 1.1 joerg return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P,
1767 1.1 joerg (__v64qi) __W,
1768 1.1 joerg (__mmask64) __U);
1769 1.1 joerg }
1770 1.1 joerg
1771 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1772 1.1 joerg _mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P)
1773 1.1 joerg {
1774 1.1 joerg return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P,
1775 1.1 joerg (__v64qi)
1776 1.1 joerg _mm512_setzero_si512 (),
1777 1.1 joerg (__mmask64) __U);
1778 1.1 joerg }
1779 1.1 joerg
1780 1.1 joerg static __inline void __DEFAULT_FN_ATTRS512
1781 1.1 joerg _mm512_storeu_epi16 (void *__P, __m512i __A)
1782 1.1 joerg {
1783 1.1 joerg struct __storeu_epi16 {
1784 1.1 joerg __m512i_u __v;
1785 1.1 joerg } __attribute__((__packed__, __may_alias__));
1786 1.1 joerg ((struct __storeu_epi16*)__P)->__v = __A;
1787 1.1 joerg }
1788 1.1 joerg
1789 1.1 joerg static __inline__ void __DEFAULT_FN_ATTRS512
1790 1.1 joerg _mm512_mask_storeu_epi16 (void *__P, __mmask32 __U, __m512i __A)
1791 1.1 joerg {
1792 1.1 joerg __builtin_ia32_storedquhi512_mask ((__v32hi *) __P,
1793 1.1 joerg (__v32hi) __A,
1794 1.1 joerg (__mmask32) __U);
1795 1.1 joerg }
1796 1.1 joerg
1797 1.1 joerg static __inline void __DEFAULT_FN_ATTRS512
1798 1.1 joerg _mm512_storeu_epi8 (void *__P, __m512i __A)
1799 1.1 joerg {
1800 1.1 joerg struct __storeu_epi8 {
1801 1.1 joerg __m512i_u __v;
1802 1.1 joerg } __attribute__((__packed__, __may_alias__));
1803 1.1 joerg ((struct __storeu_epi8*)__P)->__v = __A;
1804 1.1 joerg }
1805 1.1 joerg
1806 1.1 joerg static __inline__ void __DEFAULT_FN_ATTRS512
1807 1.1 joerg _mm512_mask_storeu_epi8 (void *__P, __mmask64 __U, __m512i __A)
1808 1.1 joerg {
1809 1.1 joerg __builtin_ia32_storedquqi512_mask ((__v64qi *) __P,
1810 1.1 joerg (__v64qi) __A,
1811 1.1 joerg (__mmask64) __U);
1812 1.1 joerg }
1813 1.1 joerg
1814 1.1 joerg static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
1815 1.1 joerg _mm512_test_epi8_mask (__m512i __A, __m512i __B)
1816 1.1 joerg {
1817 1.1 joerg return _mm512_cmpneq_epi8_mask (_mm512_and_epi32 (__A, __B),
1818 1.1 joerg _mm512_setzero_si512());
1819 1.1 joerg }
1820 1.1 joerg
1821 1.1 joerg static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
1822 1.1 joerg _mm512_mask_test_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
1823 1.1 joerg {
1824 1.1 joerg return _mm512_mask_cmpneq_epi8_mask (__U, _mm512_and_epi32 (__A, __B),
1825 1.1 joerg _mm512_setzero_si512());
1826 1.1 joerg }
1827 1.1 joerg
1828 1.1 joerg static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
1829 1.1 joerg _mm512_test_epi16_mask (__m512i __A, __m512i __B)
1830 1.1 joerg {
1831 1.1 joerg return _mm512_cmpneq_epi16_mask (_mm512_and_epi32 (__A, __B),
1832 1.1 joerg _mm512_setzero_si512());
1833 1.1 joerg }
1834 1.1 joerg
1835 1.1 joerg static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
1836 1.1 joerg _mm512_mask_test_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
1837 1.1 joerg {
1838 1.1 joerg return _mm512_mask_cmpneq_epi16_mask (__U, _mm512_and_epi32 (__A, __B),
1839 1.1 joerg _mm512_setzero_si512());
1840 1.1 joerg }
1841 1.1 joerg
1842 1.1 joerg static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
1843 1.1 joerg _mm512_testn_epi8_mask (__m512i __A, __m512i __B)
1844 1.1 joerg {
1845 1.1 joerg return _mm512_cmpeq_epi8_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_si512());
1846 1.1 joerg }
1847 1.1 joerg
1848 1.1 joerg static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
1849 1.1 joerg _mm512_mask_testn_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
1850 1.1 joerg {
1851 1.1 joerg return _mm512_mask_cmpeq_epi8_mask (__U, _mm512_and_epi32 (__A, __B),
1852 1.1 joerg _mm512_setzero_si512());
1853 1.1 joerg }
1854 1.1 joerg
1855 1.1 joerg static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
1856 1.1 joerg _mm512_testn_epi16_mask (__m512i __A, __m512i __B)
1857 1.1 joerg {
1858 1.1 joerg return _mm512_cmpeq_epi16_mask (_mm512_and_epi32 (__A, __B),
1859 1.1 joerg _mm512_setzero_si512());
1860 1.1 joerg }
1861 1.1 joerg
1862 1.1 joerg static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
1863 1.1 joerg _mm512_mask_testn_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
1864 1.1 joerg {
1865 1.1 joerg return _mm512_mask_cmpeq_epi16_mask (__U, _mm512_and_epi32 (__A, __B),
1866 1.1 joerg _mm512_setzero_si512());
1867 1.1 joerg }
1868 1.1 joerg
1869 1.1 joerg static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
1870 1.1 joerg _mm512_movepi8_mask (__m512i __A)
1871 1.1 joerg {
1872 1.1 joerg return (__mmask64) __builtin_ia32_cvtb2mask512 ((__v64qi) __A);
1873 1.1 joerg }
1874 1.1 joerg
1875 1.1 joerg static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
1876 1.1 joerg _mm512_movepi16_mask (__m512i __A)
1877 1.1 joerg {
1878 1.1 joerg return (__mmask32) __builtin_ia32_cvtw2mask512 ((__v32hi) __A);
1879 1.1 joerg }
1880 1.1 joerg
1881 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1882 1.1 joerg _mm512_movm_epi8 (__mmask64 __A)
1883 1.1 joerg {
1884 1.1 joerg return (__m512i) __builtin_ia32_cvtmask2b512 (__A);
1885 1.1 joerg }
1886 1.1 joerg
1887 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1888 1.1 joerg _mm512_movm_epi16 (__mmask32 __A)
1889 1.1 joerg {
1890 1.1 joerg return (__m512i) __builtin_ia32_cvtmask2w512 (__A);
1891 1.1 joerg }
1892 1.1 joerg
1893 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1894 1.1 joerg _mm512_broadcastb_epi8 (__m128i __A)
1895 1.1 joerg {
1896 1.1 joerg return (__m512i)__builtin_shufflevector((__v16qi) __A, (__v16qi) __A,
1897 1.1 joerg 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1898 1.1 joerg 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1899 1.1 joerg 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1900 1.1 joerg 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1901 1.1 joerg }
1902 1.1 joerg
1903 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1904 1.1 joerg _mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A)
1905 1.1 joerg {
1906 1.1 joerg return (__m512i)__builtin_ia32_selectb_512(__M,
1907 1.1 joerg (__v64qi) _mm512_broadcastb_epi8(__A),
1908 1.1 joerg (__v64qi) __O);
1909 1.1 joerg }
1910 1.1 joerg
1911 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1912 1.1 joerg _mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A)
1913 1.1 joerg {
1914 1.1 joerg return (__m512i)__builtin_ia32_selectb_512(__M,
1915 1.1 joerg (__v64qi) _mm512_broadcastb_epi8(__A),
1916 1.1 joerg (__v64qi) _mm512_setzero_si512());
1917 1.1 joerg }
1918 1.1 joerg
1919 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1920 1.1 joerg _mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A)
1921 1.1 joerg {
1922 1.1 joerg return (__m512i) __builtin_ia32_selectw_512(__M,
1923 1.1 joerg (__v32hi) _mm512_set1_epi16(__A),
1924 1.1 joerg (__v32hi) __O);
1925 1.1 joerg }
1926 1.1 joerg
1927 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1928 1.1 joerg _mm512_maskz_set1_epi16 (__mmask32 __M, short __A)
1929 1.1 joerg {
1930 1.1 joerg return (__m512i) __builtin_ia32_selectw_512(__M,
1931 1.1 joerg (__v32hi) _mm512_set1_epi16(__A),
1932 1.1 joerg (__v32hi) _mm512_setzero_si512());
1933 1.1 joerg }
1934 1.1 joerg
1935 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1936 1.1 joerg _mm512_broadcastw_epi16 (__m128i __A)
1937 1.1 joerg {
1938 1.1 joerg return (__m512i)__builtin_shufflevector((__v8hi) __A, (__v8hi) __A,
1939 1.1 joerg 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1940 1.1 joerg 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1941 1.1 joerg }
1942 1.1 joerg
1943 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1944 1.1 joerg _mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A)
1945 1.1 joerg {
1946 1.1 joerg return (__m512i)__builtin_ia32_selectw_512(__M,
1947 1.1 joerg (__v32hi) _mm512_broadcastw_epi16(__A),
1948 1.1 joerg (__v32hi) __O);
1949 1.1 joerg }
1950 1.1 joerg
1951 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1952 1.1 joerg _mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A)
1953 1.1 joerg {
1954 1.1 joerg return (__m512i)__builtin_ia32_selectw_512(__M,
1955 1.1 joerg (__v32hi) _mm512_broadcastw_epi16(__A),
1956 1.1 joerg (__v32hi) _mm512_setzero_si512());
1957 1.1 joerg }
1958 1.1 joerg
1959 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1960 1.1 joerg _mm512_permutexvar_epi16 (__m512i __A, __m512i __B)
1961 1.1 joerg {
1962 1.1 joerg return (__m512i)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);
1963 1.1 joerg }
1964 1.1 joerg
1965 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1966 1.1 joerg _mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A,
1967 1.1 joerg __m512i __B)
1968 1.1 joerg {
1969 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
1970 1.1 joerg (__v32hi)_mm512_permutexvar_epi16(__A, __B),
1971 1.1 joerg (__v32hi)_mm512_setzero_si512());
1972 1.1 joerg }
1973 1.1 joerg
1974 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
1975 1.1 joerg _mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
1976 1.1 joerg __m512i __B)
1977 1.1 joerg {
1978 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
1979 1.1 joerg (__v32hi)_mm512_permutexvar_epi16(__A, __B),
1980 1.1 joerg (__v32hi)__W);
1981 1.1 joerg }
1982 1.1 joerg
1983 1.1 joerg #define _mm512_alignr_epi8(A, B, N) \
1984 1.1 joerg (__m512i)__builtin_ia32_palignr512((__v64qi)(__m512i)(A), \
1985 1.1 joerg (__v64qi)(__m512i)(B), (int)(N))
1986 1.1 joerg
1987 1.1 joerg #define _mm512_mask_alignr_epi8(W, U, A, B, N) \
1988 1.1 joerg (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
1989 1.1 joerg (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \
1990 1.1 joerg (__v64qi)(__m512i)(W))
1991 1.1 joerg
1992 1.1 joerg #define _mm512_maskz_alignr_epi8(U, A, B, N) \
1993 1.1 joerg (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
1994 1.1 joerg (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \
1995 1.1 joerg (__v64qi)(__m512i)_mm512_setzero_si512())
1996 1.1 joerg
1997 1.1 joerg #define _mm512_dbsad_epu8(A, B, imm) \
1998 1.1 joerg (__m512i)__builtin_ia32_dbpsadbw512((__v64qi)(__m512i)(A), \
1999 1.1 joerg (__v64qi)(__m512i)(B), (int)(imm))
2000 1.1 joerg
2001 1.1 joerg #define _mm512_mask_dbsad_epu8(W, U, A, B, imm) \
2002 1.1 joerg (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
2003 1.1 joerg (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \
2004 1.1 joerg (__v32hi)(__m512i)(W))
2005 1.1 joerg
2006 1.1 joerg #define _mm512_maskz_dbsad_epu8(U, A, B, imm) \
2007 1.1 joerg (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
2008 1.1 joerg (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \
2009 1.1 joerg (__v32hi)_mm512_setzero_si512())
2010 1.1 joerg
2011 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512
2012 1.1 joerg _mm512_sad_epu8 (__m512i __A, __m512i __B)
2013 1.1 joerg {
2014 1.1 joerg return (__m512i) __builtin_ia32_psadbw512 ((__v64qi) __A,
2015 1.1 joerg (__v64qi) __B);
2016 1.1 joerg }
2017 1.1 joerg
2018 1.1 joerg #undef __DEFAULT_FN_ATTRS512
2019 1.1 joerg #undef __DEFAULT_FN_ATTRS
2020 1.1 joerg
2021 1.1 joerg #endif
2022