avx512vbmi2vlintrin.h revision 1.1 1 1.1 mrg /* Copyright (C) 2013-2018 Free Software Foundation, Inc.
2 1.1 mrg
3 1.1 mrg This file is part of GCC.
4 1.1 mrg
5 1.1 mrg GCC is free software; you can redistribute it and/or modify
6 1.1 mrg it under the terms of the GNU General Public License as published by
7 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
8 1.1 mrg any later version.
9 1.1 mrg
10 1.1 mrg GCC is distributed in the hope that it will be useful,
11 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of
12 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 1.1 mrg GNU General Public License for more details.
14 1.1 mrg
15 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
16 1.1 mrg permissions described in the GCC Runtime Library Exception, version
17 1.1 mrg 3.1, as published by the Free Software Foundation.
18 1.1 mrg
19 1.1 mrg You should have received a copy of the GNU General Public License and
20 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
21 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 1.1 mrg <http://www.gnu.org/licenses/>. */
23 1.1 mrg
24 1.1 mrg #ifndef _IMMINTRIN_H_INCLUDED
25 1.1 mrg #error "Never use <avx512vbmi2vlintrin.h> directly; include <immintrin.h> instead."
26 1.1 mrg #endif
27 1.1 mrg
28 1.1 mrg #ifndef _AVX512VBMI2VLINTRIN_H_INCLUDED
29 1.1 mrg #define _AVX512VBMI2VLINTRIN_H_INCLUDED
30 1.1 mrg
31 1.1 mrg #if !defined(__AVX512VL__) || !defined(__AVX512VBMI2__)
32 1.1 mrg #pragma GCC push_options
33 1.1 mrg #pragma GCC target("avx512vbmi2,avx512vl")
34 1.1 mrg #define __DISABLE_AVX512VBMI2VL__
35 1.1 mrg #endif /* __AVX512VBMIVL__ */
36 1.1 mrg
37 1.1 mrg extern __inline __m128i
38 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
39 1.1 mrg _mm_mask_compress_epi8 (__m128i __A, __mmask16 __B, __m128i __C)
40 1.1 mrg {
41 1.1 mrg return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi)__C,
42 1.1 mrg (__v16qi)__A, (__mmask16)__B);
43 1.1 mrg }
44 1.1 mrg
45 1.1 mrg extern __inline __m128i
46 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
47 1.1 mrg _mm_maskz_compress_epi8 (__mmask16 __A, __m128i __B)
48 1.1 mrg {
49 1.1 mrg return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __B,
50 1.1 mrg (__v16qi) _mm_setzero_si128 (), (__mmask16) __A);
51 1.1 mrg }
52 1.1 mrg
53 1.1 mrg
54 1.1 mrg extern __inline void
55 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
56 1.1 mrg _mm256_mask_compressstoreu_epi16 (void * __A, __mmask16 __B, __m256i __C)
57 1.1 mrg {
58 1.1 mrg __builtin_ia32_compressstoreuhi256_mask ((__v16hi *) __A, (__v16hi) __C,
59 1.1 mrg (__mmask16) __B);
60 1.1 mrg }
61 1.1 mrg
62 1.1 mrg extern __inline __m128i
63 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
64 1.1 mrg _mm_mask_compress_epi16 (__m128i __A, __mmask8 __B, __m128i __C)
65 1.1 mrg {
66 1.1 mrg return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi)__C, (__v8hi)__A,
67 1.1 mrg (__mmask8)__B);
68 1.1 mrg }
69 1.1 mrg
70 1.1 mrg extern __inline __m128i
71 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
72 1.1 mrg _mm_maskz_compress_epi16 (__mmask8 __A, __m128i __B)
73 1.1 mrg {
74 1.1 mrg return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __B,
75 1.1 mrg (__v8hi) _mm_setzero_si128 (), (__mmask8) __A);
76 1.1 mrg }
77 1.1 mrg
78 1.1 mrg extern __inline __m256i
79 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
80 1.1 mrg _mm256_mask_compress_epi16 (__m256i __A, __mmask16 __B, __m256i __C)
81 1.1 mrg {
82 1.1 mrg return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi)__C,
83 1.1 mrg (__v16hi)__A, (__mmask16)__B);
84 1.1 mrg }
85 1.1 mrg
86 1.1 mrg extern __inline __m256i
87 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
88 1.1 mrg _mm256_maskz_compress_epi16 (__mmask16 __A, __m256i __B)
89 1.1 mrg {
90 1.1 mrg return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __B,
91 1.1 mrg (__v16hi) _mm256_setzero_si256 (), (__mmask16) __A);
92 1.1 mrg }
93 1.1 mrg
94 1.1 mrg extern __inline void
95 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
96 1.1 mrg _mm_mask_compressstoreu_epi8 (void * __A, __mmask16 __B, __m128i __C)
97 1.1 mrg {
98 1.1 mrg __builtin_ia32_compressstoreuqi128_mask ((__v16qi *) __A, (__v16qi) __C,
99 1.1 mrg (__mmask16) __B);
100 1.1 mrg }
101 1.1 mrg
102 1.1 mrg extern __inline void
103 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
104 1.1 mrg _mm_mask_compressstoreu_epi16 (void * __A, __mmask8 __B, __m128i __C)
105 1.1 mrg {
106 1.1 mrg __builtin_ia32_compressstoreuhi128_mask ((__v8hi *) __A, (__v8hi) __C,
107 1.1 mrg (__mmask8) __B);
108 1.1 mrg }
109 1.1 mrg
110 1.1 mrg extern __inline __m128i
111 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
112 1.1 mrg _mm_mask_expand_epi8 (__m128i __A, __mmask16 __B, __m128i __C)
113 1.1 mrg {
114 1.1 mrg return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __C,
115 1.1 mrg (__v16qi) __A,
116 1.1 mrg (__mmask16) __B);
117 1.1 mrg }
118 1.1 mrg
119 1.1 mrg extern __inline __m128i
120 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
121 1.1 mrg _mm_maskz_expand_epi8 (__mmask16 __A, __m128i __B)
122 1.1 mrg {
123 1.1 mrg return (__m128i) __builtin_ia32_expandqi128_maskz ((__v16qi) __B,
124 1.1 mrg (__v16qi) _mm_setzero_si128 (), (__mmask16) __A);
125 1.1 mrg }
126 1.1 mrg
127 1.1 mrg extern __inline __m128i
128 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
129 1.1 mrg _mm_mask_expandloadu_epi8 (__m128i __A, __mmask16 __B, const void * __C)
130 1.1 mrg {
131 1.1 mrg return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *) __C,
132 1.1 mrg (__v16qi) __A, (__mmask16) __B);
133 1.1 mrg }
134 1.1 mrg
135 1.1 mrg extern __inline __m128i
136 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
137 1.1 mrg _mm_maskz_expandloadu_epi8 (__mmask16 __A, const void * __B)
138 1.1 mrg {
139 1.1 mrg return (__m128i) __builtin_ia32_expandloadqi128_maskz ((const __v16qi *) __B,
140 1.1 mrg (__v16qi) _mm_setzero_si128 (), (__mmask16) __A);
141 1.1 mrg }
142 1.1 mrg
143 1.1 mrg extern __inline __m128i
144 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
145 1.1 mrg _mm_mask_expand_epi16 (__m128i __A, __mmask8 __B, __m128i __C)
146 1.1 mrg {
147 1.1 mrg return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __C,
148 1.1 mrg (__v8hi) __A,
149 1.1 mrg (__mmask8) __B);
150 1.1 mrg }
151 1.1 mrg
152 1.1 mrg extern __inline __m128i
153 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
154 1.1 mrg _mm_maskz_expand_epi16 (__mmask8 __A, __m128i __B)
155 1.1 mrg {
156 1.1 mrg return (__m128i) __builtin_ia32_expandhi128_maskz ((__v8hi) __B,
157 1.1 mrg (__v8hi) _mm_setzero_si128 (), (__mmask8) __A);
158 1.1 mrg }
159 1.1 mrg
160 1.1 mrg extern __inline __m128i
161 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
162 1.1 mrg _mm_mask_expandloadu_epi16 (__m128i __A, __mmask8 __B, const void * __C)
163 1.1 mrg {
164 1.1 mrg return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *) __C,
165 1.1 mrg (__v8hi) __A, (__mmask8) __B);
166 1.1 mrg }
167 1.1 mrg
168 1.1 mrg extern __inline __m128i
169 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
170 1.1 mrg _mm_maskz_expandloadu_epi16 (__mmask8 __A, const void * __B)
171 1.1 mrg {
172 1.1 mrg return (__m128i) __builtin_ia32_expandloadhi128_maskz ((const __v8hi *) __B,
173 1.1 mrg (__v8hi) _mm_setzero_si128 (), (__mmask8) __A);
174 1.1 mrg }
175 1.1 mrg extern __inline __m256i
176 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
177 1.1 mrg _mm256_mask_expand_epi16 (__m256i __A, __mmask16 __B, __m256i __C)
178 1.1 mrg {
179 1.1 mrg return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __C,
180 1.1 mrg (__v16hi) __A,
181 1.1 mrg (__mmask16) __B);
182 1.1 mrg }
183 1.1 mrg
184 1.1 mrg extern __inline __m256i
185 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
186 1.1 mrg _mm256_maskz_expand_epi16 (__mmask16 __A, __m256i __B)
187 1.1 mrg {
188 1.1 mrg return (__m256i) __builtin_ia32_expandhi256_maskz ((__v16hi) __B,
189 1.1 mrg (__v16hi) _mm256_setzero_si256 (), (__mmask16) __A);
190 1.1 mrg }
191 1.1 mrg
192 1.1 mrg extern __inline __m256i
193 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
194 1.1 mrg _mm256_mask_expandloadu_epi16 (__m256i __A, __mmask16 __B, const void * __C)
195 1.1 mrg {
196 1.1 mrg return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *) __C,
197 1.1 mrg (__v16hi) __A, (__mmask16) __B);
198 1.1 mrg }
199 1.1 mrg
200 1.1 mrg extern __inline __m256i
201 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
202 1.1 mrg _mm256_maskz_expandloadu_epi16 (__mmask16 __A, const void * __B)
203 1.1 mrg {
204 1.1 mrg return (__m256i) __builtin_ia32_expandloadhi256_maskz ((const __v16hi *) __B,
205 1.1 mrg (__v16hi) _mm256_setzero_si256 (), (__mmask16) __A);
206 1.1 mrg }
207 1.1 mrg
208 1.1 mrg #ifdef __OPTIMIZE__
209 1.1 mrg extern __inline __m256i
210 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
211 1.1 mrg _mm256_shrdi_epi16 (__m256i __A, __m256i __B, int __C)
212 1.1 mrg {
213 1.1 mrg return (__m256i) __builtin_ia32_vpshrd_v16hi ((__v16hi)__A, (__v16hi) __B,
214 1.1 mrg __C);
215 1.1 mrg }
216 1.1 mrg
217 1.1 mrg extern __inline __m256i
218 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
219 1.1 mrg _mm256_mask_shrdi_epi16 (__m256i __A, __mmask16 __B, __m256i __C, __m256i __D,
220 1.1 mrg int __E)
221 1.1 mrg {
222 1.1 mrg return (__m256i)__builtin_ia32_vpshrd_v16hi_mask ((__v16hi)__C,
223 1.1 mrg (__v16hi) __D, __E, (__v16hi) __A, (__mmask16)__B);
224 1.1 mrg }
225 1.1 mrg
226 1.1 mrg extern __inline __m256i
227 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
228 1.1 mrg _mm256_maskz_shrdi_epi16 (__mmask16 __A, __m256i __B, __m256i __C, int __D)
229 1.1 mrg {
230 1.1 mrg return (__m256i)__builtin_ia32_vpshrd_v16hi_mask ((__v16hi)__B,
231 1.1 mrg (__v16hi) __C, __D, (__v16hi) _mm256_setzero_si256 (), (__mmask16)__A);
232 1.1 mrg }
233 1.1 mrg
234 1.1 mrg extern __inline __m256i
235 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
236 1.1 mrg _mm256_mask_shrdi_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D,
237 1.1 mrg int __E)
238 1.1 mrg {
239 1.1 mrg return (__m256i)__builtin_ia32_vpshrd_v8si_mask ((__v8si)__C, (__v8si) __D,
240 1.1 mrg __E, (__v8si) __A, (__mmask8)__B);
241 1.1 mrg }
242 1.1 mrg
243 1.1 mrg extern __inline __m256i
244 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
245 1.1 mrg _mm256_maskz_shrdi_epi32 (__mmask8 __A, __m256i __B, __m256i __C, int __D)
246 1.1 mrg {
247 1.1 mrg return (__m256i)__builtin_ia32_vpshrd_v8si_mask ((__v8si)__B, (__v8si) __C,
248 1.1 mrg __D, (__v8si) _mm256_setzero_si256 (), (__mmask8)__A);
249 1.1 mrg }
250 1.1 mrg
251 1.1 mrg extern __inline __m256i
252 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
253 1.1 mrg _mm256_shrdi_epi32 (__m256i __A, __m256i __B, int __C)
254 1.1 mrg {
255 1.1 mrg return (__m256i) __builtin_ia32_vpshrd_v8si ((__v8si)__A, (__v8si) __B, __C);
256 1.1 mrg }
257 1.1 mrg
258 1.1 mrg extern __inline __m256i
259 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
260 1.1 mrg _mm256_mask_shrdi_epi64 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D,
261 1.1 mrg int __E)
262 1.1 mrg {
263 1.1 mrg return (__m256i)__builtin_ia32_vpshrd_v4di_mask ((__v4di)__C, (__v4di) __D,
264 1.1 mrg __E, (__v4di) __A, (__mmask8)__B);
265 1.1 mrg }
266 1.1 mrg
267 1.1 mrg extern __inline __m256i
268 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
269 1.1 mrg _mm256_maskz_shrdi_epi64 (__mmask8 __A, __m256i __B, __m256i __C, int __D)
270 1.1 mrg {
271 1.1 mrg return (__m256i)__builtin_ia32_vpshrd_v4di_mask ((__v4di)__B, (__v4di) __C,
272 1.1 mrg __D, (__v4di) _mm256_setzero_si256 (), (__mmask8)__A);
273 1.1 mrg }
274 1.1 mrg
275 1.1 mrg extern __inline __m256i
276 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
277 1.1 mrg _mm256_shrdi_epi64 (__m256i __A, __m256i __B, int __C)
278 1.1 mrg {
279 1.1 mrg return (__m256i) __builtin_ia32_vpshrd_v4di ((__v4di)__A, (__v4di) __B, __C);
280 1.1 mrg }
281 1.1 mrg
282 1.1 mrg extern __inline __m128i
283 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
284 1.1 mrg _mm_mask_shrdi_epi16 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
285 1.1 mrg int __E)
286 1.1 mrg {
287 1.1 mrg return (__m128i)__builtin_ia32_vpshrd_v8hi_mask ((__v8hi)__C, (__v8hi) __D,
288 1.1 mrg __E, (__v8hi) __A, (__mmask8)__B);
289 1.1 mrg }
290 1.1 mrg
291 1.1 mrg extern __inline __m128i
292 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
293 1.1 mrg _mm_maskz_shrdi_epi16 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
294 1.1 mrg {
295 1.1 mrg return (__m128i)__builtin_ia32_vpshrd_v8hi_mask ((__v8hi)__B, (__v8hi) __C,
296 1.1 mrg __D, (__v8hi) _mm_setzero_si128 (), (__mmask8)__A);
297 1.1 mrg }
298 1.1 mrg
299 1.1 mrg extern __inline __m128i
300 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
301 1.1 mrg _mm_shrdi_epi16 (__m128i __A, __m128i __B, int __C)
302 1.1 mrg {
303 1.1 mrg return (__m128i) __builtin_ia32_vpshrd_v8hi ((__v8hi)__A, (__v8hi) __B, __C);
304 1.1 mrg }
305 1.1 mrg
306 1.1 mrg extern __inline __m128i
307 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
308 1.1 mrg _mm_mask_shrdi_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
309 1.1 mrg int __E)
310 1.1 mrg {
311 1.1 mrg return (__m128i)__builtin_ia32_vpshrd_v4si_mask ((__v4si)__C, (__v4si) __D,
312 1.1 mrg __E, (__v4si) __A, (__mmask8)__B);
313 1.1 mrg }
314 1.1 mrg
315 1.1 mrg extern __inline __m128i
316 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
317 1.1 mrg _mm_maskz_shrdi_epi32 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
318 1.1 mrg {
319 1.1 mrg return (__m128i)__builtin_ia32_vpshrd_v4si_mask ((__v4si)__B, (__v4si) __C,
320 1.1 mrg __D, (__v4si) _mm_setzero_si128 (), (__mmask8)__A);
321 1.1 mrg }
322 1.1 mrg
323 1.1 mrg extern __inline __m128i
324 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
325 1.1 mrg _mm_shrdi_epi32 (__m128i __A, __m128i __B, int __C)
326 1.1 mrg {
327 1.1 mrg return (__m128i) __builtin_ia32_vpshrd_v4si ((__v4si)__A, (__v4si) __B, __C);
328 1.1 mrg }
329 1.1 mrg
330 1.1 mrg extern __inline __m128i
331 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
332 1.1 mrg _mm_mask_shrdi_epi64 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
333 1.1 mrg int __E)
334 1.1 mrg {
335 1.1 mrg return (__m128i)__builtin_ia32_vpshrd_v2di_mask ((__v2di)__C, (__v2di) __D,
336 1.1 mrg __E, (__v2di) __A, (__mmask8)__B);
337 1.1 mrg }
338 1.1 mrg
339 1.1 mrg extern __inline __m128i
340 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
341 1.1 mrg _mm_maskz_shrdi_epi64 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
342 1.1 mrg {
343 1.1 mrg return (__m128i)__builtin_ia32_vpshrd_v2di_mask ((__v2di)__B, (__v2di) __C,
344 1.1 mrg __D, (__v2di) _mm_setzero_si128 (), (__mmask8)__A);
345 1.1 mrg }
346 1.1 mrg
347 1.1 mrg extern __inline __m128i
348 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
349 1.1 mrg _mm_shrdi_epi64 (__m128i __A, __m128i __B, int __C)
350 1.1 mrg {
351 1.1 mrg return (__m128i) __builtin_ia32_vpshrd_v2di ((__v2di)__A, (__v2di) __B, __C);
352 1.1 mrg }
353 1.1 mrg
354 1.1 mrg extern __inline __m256i
355 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
356 1.1 mrg _mm256_shldi_epi16 (__m256i __A, __m256i __B, int __C)
357 1.1 mrg {
358 1.1 mrg return (__m256i) __builtin_ia32_vpshld_v16hi ((__v16hi)__A, (__v16hi) __B,
359 1.1 mrg __C);
360 1.1 mrg }
361 1.1 mrg
362 1.1 mrg extern __inline __m256i
363 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
364 1.1 mrg _mm256_mask_shldi_epi16 (__m256i __A, __mmask16 __B, __m256i __C, __m256i __D,
365 1.1 mrg int __E)
366 1.1 mrg {
367 1.1 mrg return (__m256i)__builtin_ia32_vpshld_v16hi_mask ((__v16hi)__C,
368 1.1 mrg (__v16hi) __D, __E, (__v16hi) __A, (__mmask16)__B);
369 1.1 mrg }
370 1.1 mrg
371 1.1 mrg extern __inline __m256i
372 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
373 1.1 mrg _mm256_maskz_shldi_epi16 (__mmask16 __A, __m256i __B, __m256i __C, int __D)
374 1.1 mrg {
375 1.1 mrg return (__m256i)__builtin_ia32_vpshld_v16hi_mask ((__v16hi)__B,
376 1.1 mrg (__v16hi) __C, __D, (__v16hi) _mm256_setzero_si256 (), (__mmask16)__A);
377 1.1 mrg }
378 1.1 mrg
379 1.1 mrg extern __inline __m256i
380 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
381 1.1 mrg _mm256_mask_shldi_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D,
382 1.1 mrg int __E)
383 1.1 mrg {
384 1.1 mrg return (__m256i)__builtin_ia32_vpshld_v8si_mask ((__v8si)__C, (__v8si) __D,
385 1.1 mrg __E, (__v8si) __A, (__mmask8)__B);
386 1.1 mrg }
387 1.1 mrg
388 1.1 mrg extern __inline __m256i
389 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
390 1.1 mrg _mm256_maskz_shldi_epi32 (__mmask8 __A, __m256i __B, __m256i __C, int __D)
391 1.1 mrg {
392 1.1 mrg return (__m256i)__builtin_ia32_vpshld_v8si_mask ((__v8si)__B, (__v8si) __C,
393 1.1 mrg __D, (__v8si) _mm256_setzero_si256 (), (__mmask8)__A);
394 1.1 mrg }
395 1.1 mrg
396 1.1 mrg extern __inline __m256i
397 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
398 1.1 mrg _mm256_shldi_epi32 (__m256i __A, __m256i __B, int __C)
399 1.1 mrg {
400 1.1 mrg return (__m256i) __builtin_ia32_vpshld_v8si ((__v8si)__A, (__v8si) __B, __C);
401 1.1 mrg }
402 1.1 mrg
403 1.1 mrg extern __inline __m256i
404 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
405 1.1 mrg _mm256_mask_shldi_epi64 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D,
406 1.1 mrg int __E)
407 1.1 mrg {
408 1.1 mrg return (__m256i)__builtin_ia32_vpshld_v4di_mask ((__v4di)__C, (__v4di) __D,
409 1.1 mrg __E, (__v4di) __A, (__mmask8)__B);
410 1.1 mrg }
411 1.1 mrg
412 1.1 mrg extern __inline __m256i
413 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
414 1.1 mrg _mm256_maskz_shldi_epi64 (__mmask8 __A, __m256i __B, __m256i __C, int __D)
415 1.1 mrg {
416 1.1 mrg return (__m256i)__builtin_ia32_vpshld_v4di_mask ((__v4di)__B, (__v4di) __C,
417 1.1 mrg __D, (__v4di) _mm256_setzero_si256 (), (__mmask8)__A);
418 1.1 mrg }
419 1.1 mrg
420 1.1 mrg extern __inline __m256i
421 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
422 1.1 mrg _mm256_shldi_epi64 (__m256i __A, __m256i __B, int __C)
423 1.1 mrg {
424 1.1 mrg return (__m256i) __builtin_ia32_vpshld_v4di ((__v4di)__A, (__v4di) __B, __C);
425 1.1 mrg }
426 1.1 mrg
427 1.1 mrg extern __inline __m128i
428 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
429 1.1 mrg _mm_mask_shldi_epi16 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
430 1.1 mrg int __E)
431 1.1 mrg {
432 1.1 mrg return (__m128i)__builtin_ia32_vpshld_v8hi_mask ((__v8hi)__C, (__v8hi) __D,
433 1.1 mrg __E, (__v8hi) __A, (__mmask8)__B);
434 1.1 mrg }
435 1.1 mrg
436 1.1 mrg extern __inline __m128i
437 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
438 1.1 mrg _mm_maskz_shldi_epi16 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
439 1.1 mrg {
440 1.1 mrg return (__m128i)__builtin_ia32_vpshld_v8hi_mask ((__v8hi)__B, (__v8hi) __C,
441 1.1 mrg __D, (__v8hi) _mm_setzero_si128 (), (__mmask8)__A);
442 1.1 mrg }
443 1.1 mrg
444 1.1 mrg extern __inline __m128i
445 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
446 1.1 mrg _mm_shldi_epi16 (__m128i __A, __m128i __B, int __C)
447 1.1 mrg {
448 1.1 mrg return (__m128i) __builtin_ia32_vpshld_v8hi ((__v8hi)__A, (__v8hi) __B, __C);
449 1.1 mrg }
450 1.1 mrg
451 1.1 mrg extern __inline __m128i
452 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
453 1.1 mrg _mm_mask_shldi_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
454 1.1 mrg int __E)
455 1.1 mrg {
456 1.1 mrg return (__m128i)__builtin_ia32_vpshld_v4si_mask ((__v4si)__C, (__v4si) __D,
457 1.1 mrg __E, (__v4si) __A, (__mmask8)__B);
458 1.1 mrg }
459 1.1 mrg
460 1.1 mrg extern __inline __m128i
461 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
462 1.1 mrg _mm_maskz_shldi_epi32 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
463 1.1 mrg {
464 1.1 mrg return (__m128i)__builtin_ia32_vpshld_v4si_mask ((__v4si)__B, (__v4si) __C,
465 1.1 mrg __D, (__v4si) _mm_setzero_si128 (), (__mmask8)__A);
466 1.1 mrg }
467 1.1 mrg
468 1.1 mrg extern __inline __m128i
469 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
470 1.1 mrg _mm_shldi_epi32 (__m128i __A, __m128i __B, int __C)
471 1.1 mrg {
472 1.1 mrg return (__m128i) __builtin_ia32_vpshld_v4si ((__v4si)__A, (__v4si) __B, __C);
473 1.1 mrg }
474 1.1 mrg
475 1.1 mrg extern __inline __m128i
476 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
477 1.1 mrg _mm_mask_shldi_epi64 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
478 1.1 mrg int __E)
479 1.1 mrg {
480 1.1 mrg return (__m128i)__builtin_ia32_vpshld_v2di_mask ((__v2di)__C, (__v2di) __D,
481 1.1 mrg __E, (__v2di) __A, (__mmask8)__B);
482 1.1 mrg }
483 1.1 mrg
484 1.1 mrg extern __inline __m128i
485 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
486 1.1 mrg _mm_maskz_shldi_epi64 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
487 1.1 mrg {
488 1.1 mrg return (__m128i)__builtin_ia32_vpshld_v2di_mask ((__v2di)__B, (__v2di) __C,
489 1.1 mrg __D, (__v2di) _mm_setzero_si128 (), (__mmask8)__A);
490 1.1 mrg }
491 1.1 mrg
492 1.1 mrg extern __inline __m128i
493 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
494 1.1 mrg _mm_shldi_epi64 (__m128i __A, __m128i __B, int __C)
495 1.1 mrg {
496 1.1 mrg return (__m128i) __builtin_ia32_vpshld_v2di ((__v2di)__A, (__v2di) __B, __C);
497 1.1 mrg }
498 1.1 mrg #else
499 1.1 mrg #define _mm256_shrdi_epi16(A, B, C) \
500 1.1 mrg ((__m256i) __builtin_ia32_vpshrd_v16hi ((__v16hi)(__m256i)(A), \
501 1.1 mrg (__v16hi)(__m256i)(B),(int)(C)))
502 1.1 mrg #define _mm256_mask_shrdi_epi16(A, B, C, D, E) \
503 1.1 mrg ((__m256i) __builtin_ia32_vpshrd_v16hi_mask ((__v16hi)(__m256i)(C), \
504 1.1 mrg (__v16hi)(__m256i)(D), \
505 1.1 mrg (int)(E), \
506 1.1 mrg (__v16hi)(__m256i)(A), \
507 1.1 mrg (__mmask16)(B)))
508 1.1 mrg #define _mm256_maskz_shrdi_epi16(A, B, C, D) \
509 1.1 mrg ((__m256i) \
510 1.1 mrg __builtin_ia32_vpshrd_v16hi_mask ((__v16hi)(__m256i)(B), \
511 1.1 mrg (__v16hi)(__m256i)(C),(int)(D), \
512 1.1 mrg (__v16hi)(__m256i)_mm256_setzero_si256 (), \
513 1.1 mrg (__mmask16)(A)))
514 1.1 mrg #define _mm256_shrdi_epi32(A, B, C) \
515 1.1 mrg ((__m256i) __builtin_ia32_vpshrd_v8si ((__v8si)(__m256i)(A), \
516 1.1 mrg (__v8si)(__m256i)(B),(int)(C)))
517 1.1 mrg #define _mm256_mask_shrdi_epi32(A, B, C, D, E) \
518 1.1 mrg ((__m256i) __builtin_ia32_vpshrd_v8si_mask ((__v8si)(__m256i)(C), \
519 1.1 mrg (__v8si)(__m256i)(D), \
520 1.1 mrg (int)(E), \
521 1.1 mrg (__v8si)(__m256i)(A), \
522 1.1 mrg (__mmask8)(B)))
523 1.1 mrg #define _mm256_maskz_shrdi_epi32(A, B, C, D) \
524 1.1 mrg ((__m256i) \
525 1.1 mrg __builtin_ia32_vpshrd_v8si_mask ((__v8si)(__m256i)(B), \
526 1.1 mrg (__v8si)(__m256i)(C),(int)(D), \
527 1.1 mrg (__v8si)(__m256i)_mm256_setzero_si256 (), \
528 1.1 mrg (__mmask8)(A)))
529 1.1 mrg #define _mm256_shrdi_epi64(A, B, C) \
530 1.1 mrg ((__m256i) __builtin_ia32_vpshrd_v4di ((__v4di)(__m256i)(A), \
531 1.1 mrg (__v4di)(__m256i)(B),(int)(C)))
532 1.1 mrg #define _mm256_mask_shrdi_epi64(A, B, C, D, E) \
533 1.1 mrg ((__m256i) __builtin_ia32_vpshrd_v4di_mask ((__v4di)(__m256i)(C), \
534 1.1 mrg (__v4di)(__m256i)(D), (int)(E), \
535 1.1 mrg (__v4di)(__m256i)(A), \
536 1.1 mrg (__mmask8)(B)))
537 1.1 mrg #define _mm256_maskz_shrdi_epi64(A, B, C, D) \
538 1.1 mrg ((__m256i) \
539 1.1 mrg __builtin_ia32_vpshrd_v4di_mask ((__v4di)(__m256i)(B), \
540 1.1 mrg (__v4di)(__m256i)(C),(int)(D), \
541 1.1 mrg (__v4di)(__m256i)_mm256_setzero_si256 (), \
542 1.1 mrg (__mmask8)(A)))
543 1.1 mrg #define _mm_shrdi_epi16(A, B, C) \
544 1.1 mrg ((__m128i) __builtin_ia32_vpshrd_v8hi ((__v8hi)(__m128i)(A), \
545 1.1 mrg (__v8hi)(__m128i)(B),(int)(C)))
546 1.1 mrg #define _mm_mask_shrdi_epi16(A, B, C, D, E) \
547 1.1 mrg ((__m128i) __builtin_ia32_vpshrd_v8hi_mask ((__v8hi)(__m128i)(C), \
548 1.1 mrg (__v8hi)(__m128i)(D), (int)(E), \
549 1.1 mrg (__v8hi)(__m128i)(A), \
550 1.1 mrg (__mmask8)(B)))
551 1.1 mrg #define _mm_maskz_shrdi_epi16(A, B, C, D) \
552 1.1 mrg ((__m128i) \
553 1.1 mrg __builtin_ia32_vpshrd_v8hi_mask ((__v8hi)(__m128i)(B), \
554 1.1 mrg (__v8hi)(__m128i)(C),(int)(D), \
555 1.1 mrg (__v8hi)(__m128i)_mm_setzero_si128 (), \
556 1.1 mrg (__mmask8)(A)))
557 1.1 mrg #define _mm_shrdi_epi32(A, B, C) \
558 1.1 mrg ((__m128i) __builtin_ia32_vpshrd_v4si ((__v4si)(__m128i)(A), \
559 1.1 mrg (__v4si)(__m128i)(B),(int)(C)))
560 1.1 mrg #define _mm_mask_shrdi_epi32(A, B, C, D, E) \
561 1.1 mrg ((__m128i) __builtin_ia32_vpshrd_v4si_mask ((__v4si)(__m128i)(C), \
562 1.1 mrg (__v4si)(__m128i)(D), (int)(E), \
563 1.1 mrg (__v4si)(__m128i)(A), \
564 1.1 mrg (__mmask8)(B)))
565 1.1 mrg #define _mm_maskz_shrdi_epi32(A, B, C, D) \
566 1.1 mrg ((__m128i) \
567 1.1 mrg __builtin_ia32_vpshrd_v4si_mask ((__v4si)(__m128i)(B), \
568 1.1 mrg (__v4si)(__m128i)(C),(int)(D), \
569 1.1 mrg (__v4si)(__m128i)_mm_setzero_si128 (), \
570 1.1 mrg (__mmask8)(A)))
571 1.1 mrg #define _mm_shrdi_epi64(A, B, C) \
572 1.1 mrg ((__m128i) __builtin_ia32_vpshrd_v2di ((__v2di)(__m128i)(A), \
573 1.1 mrg (__v2di)(__m128i)(B),(int)(C)))
574 1.1 mrg #define _mm_mask_shrdi_epi64(A, B, C, D, E) \
575 1.1 mrg ((__m128i) __builtin_ia32_vpshrd_v2di_mask ((__v2di)(__m128i)(C), \
576 1.1 mrg (__v2di)(__m128i)(D), (int)(E), \
577 1.1 mrg (__v2di)(__m128i)(A), \
578 1.1 mrg (__mmask8)(B)))
579 1.1 mrg #define _mm_maskz_shrdi_epi64(A, B, C, D) \
580 1.1 mrg ((__m128i) \
581 1.1 mrg __builtin_ia32_vpshrd_v2di_mask ((__v2di)(__m128i)(B), \
582 1.1 mrg (__v2di)(__m128i)(C),(int)(D), \
583 1.1 mrg (__v2di)(__m128i)_mm_setzero_si128 (), \
584 1.1 mrg (__mmask8)(A)))
585 1.1 mrg #define _mm256_shldi_epi16(A, B, C) \
586 1.1 mrg ((__m256i) __builtin_ia32_vpshld_v16hi ((__v16hi)(__m256i)(A), \
587 1.1 mrg (__v16hi)(__m256i)(B),(int)(C)))
588 1.1 mrg #define _mm256_mask_shldi_epi16(A, B, C, D, E) \
589 1.1 mrg ((__m256i) __builtin_ia32_vpshld_v16hi_mask ((__v16hi)(__m256i)(C), \
590 1.1 mrg (__v16hi)(__m256i)(D), \
591 1.1 mrg (int)(E), \
592 1.1 mrg (__v16hi)(__m256i)(A), \
593 1.1 mrg (__mmask16)(B)))
594 1.1 mrg #define _mm256_maskz_shldi_epi16(A, B, C, D) \
595 1.1 mrg ((__m256i) \
596 1.1 mrg __builtin_ia32_vpshld_v16hi_mask ((__v16hi)(__m256i)(B), \
597 1.1 mrg (__v16hi)(__m256i)(C),(int)(D), \
598 1.1 mrg (__v16hi)(__m256i)_mm256_setzero_si256 (), \
599 1.1 mrg (__mmask16)(A)))
600 1.1 mrg #define _mm256_shldi_epi32(A, B, C) \
601 1.1 mrg ((__m256i) __builtin_ia32_vpshld_v8si ((__v8si)(__m256i)(A), \
602 1.1 mrg (__v8si)(__m256i)(B),(int)(C)))
603 1.1 mrg #define _mm256_mask_shldi_epi32(A, B, C, D, E) \
604 1.1 mrg ((__m256i) __builtin_ia32_vpshld_v8si_mask ((__v8si)(__m256i)(C), \
605 1.1 mrg (__v8si)(__m256i)(D), (int)(E), \
606 1.1 mrg (__v8si)(__m256i)(A), \
607 1.1 mrg (__mmask8)(B)))
608 1.1 mrg #define _mm256_maskz_shldi_epi32(A, B, C, D) \
609 1.1 mrg ((__m256i) \
610 1.1 mrg __builtin_ia32_vpshld_v8si_mask ((__v8si)(__m256i)(B), \
611 1.1 mrg (__v8si)(__m256i)(C),(int)(D), \
612 1.1 mrg (__v8si)(__m256i)_mm256_setzero_si256 (), \
613 1.1 mrg (__mmask8)(A)))
614 1.1 mrg #define _mm256_shldi_epi64(A, B, C) \
615 1.1 mrg ((__m256i) __builtin_ia32_vpshld_v4di ((__v4di)(__m256i)(A), \
616 1.1 mrg (__v4di)(__m256i)(B),(int)(C)))
617 1.1 mrg #define _mm256_mask_shldi_epi64(A, B, C, D, E) \
618 1.1 mrg ((__m256i) __builtin_ia32_vpshld_v4di_mask ((__v4di)(__m256i)(C), \
619 1.1 mrg (__v4di)(__m256i)(D), (int)(E), \
620 1.1 mrg (__v4di)(__m256i)(A), \
621 1.1 mrg (__mmask8)(B)))
622 1.1 mrg #define _mm256_maskz_shldi_epi64(A, B, C, D) \
623 1.1 mrg ((__m256i) \
624 1.1 mrg __builtin_ia32_vpshld_v4di_mask ((__v4di)(__m256i)(B), \
625 1.1 mrg (__v4di)(__m256i)(C),(int)(D), \
626 1.1 mrg (__v4di)(__m256i)_mm256_setzero_si256 (), \
627 1.1 mrg (__mmask8)(A)))
628 1.1 mrg #define _mm_shldi_epi16(A, B, C) \
629 1.1 mrg ((__m128i) __builtin_ia32_vpshld_v8hi ((__v8hi)(__m128i)(A), \
630 1.1 mrg (__v8hi)(__m128i)(B),(int)(C)))
631 1.1 mrg #define _mm_mask_shldi_epi16(A, B, C, D, E) \
632 1.1 mrg ((__m128i) __builtin_ia32_vpshld_v8hi_mask ((__v8hi)(__m128i)(C), \
633 1.1 mrg (__v8hi)(__m128i)(D), (int)(E), \
634 1.1 mrg (__v8hi)(__m128i)(A), \
635 1.1 mrg (__mmask8)(B)))
636 1.1 mrg #define _mm_maskz_shldi_epi16(A, B, C, D) \
637 1.1 mrg ((__m128i) \
638 1.1 mrg __builtin_ia32_vpshld_v8hi_mask ((__v8hi)(__m128i)(B), \
639 1.1 mrg (__v8hi)(__m128i)(C),(int)(D), \
640 1.1 mrg (__v8hi)(__m128i)_mm_setzero_si128 (), \
641 1.1 mrg (__mmask8)(A)))
642 1.1 mrg #define _mm_shldi_epi32(A, B, C) \
643 1.1 mrg ((__m128i) __builtin_ia32_vpshld_v4si ((__v4si)(__m128i)(A), \
644 1.1 mrg (__v4si)(__m128i)(B),(int)(C)))
645 1.1 mrg #define _mm_mask_shldi_epi32(A, B, C, D, E) \
646 1.1 mrg ((__m128i) __builtin_ia32_vpshld_v4si_mask ((__v4si)(__m128i)(C), \
647 1.1 mrg (__v4si)(__m128i)(D), (int)(E), \
648 1.1 mrg (__v4si)(__m128i)(A), \
649 1.1 mrg (__mmask8)(B)))
650 1.1 mrg #define _mm_maskz_shldi_epi32(A, B, C, D) \
651 1.1 mrg ((__m128i) \
652 1.1 mrg __builtin_ia32_vpshld_v4si_mask ((__v4si)(__m128i)(B), \
653 1.1 mrg (__v4si)(__m128i)(C),(int)(D), \
654 1.1 mrg (__v4si)(__m128i)_mm_setzero_si128 (), \
655 1.1 mrg (__mmask8)(A)))
656 1.1 mrg #define _mm_shldi_epi64(A, B, C) \
657 1.1 mrg ((__m128i) __builtin_ia32_vpshld_v2di ((__v2di)(__m128i)(A), \
658 1.1 mrg (__v2di)(__m128i)(B),(int)(C)))
659 1.1 mrg #define _mm_mask_shldi_epi64(A, B, C, D, E) \
660 1.1 mrg ((__m128i) __builtin_ia32_vpshld_v2di_mask ((__v2di)(__m128i)(C), \
661 1.1 mrg (__v2di)(__m128i)(D), (int)(E), \
662 1.1 mrg (__v2di)(__m128i)(A), \
663 1.1 mrg (__mmask8)(B)))
664 1.1 mrg #define _mm_maskz_shldi_epi64(A, B, C, D) \
665 1.1 mrg ((__m128i) \
666 1.1 mrg __builtin_ia32_vpshld_v2di_mask ((__v2di)(__m128i)(B), \
667 1.1 mrg (__v2di)(__m128i)(C),(int)(D), \
668 1.1 mrg (__v2di)(__m128i)_mm_setzero_si128 (), \
669 1.1 mrg (__mmask8)(A)))
670 1.1 mrg #endif
671 1.1 mrg
672 1.1 mrg extern __inline __m256i
673 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
674 1.1 mrg _mm256_shrdv_epi16 (__m256i __A, __m256i __B, __m256i __C)
675 1.1 mrg {
676 1.1 mrg return (__m256i) __builtin_ia32_vpshrdv_v16hi ((__v16hi)__A, (__v16hi) __B,
677 1.1 mrg (__v16hi) __C);
678 1.1 mrg }
679 1.1 mrg
680 1.1 mrg extern __inline __m256i
681 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
682 1.1 mrg _mm256_mask_shrdv_epi16 (__m256i __A, __mmask16 __B, __m256i __C, __m256i __D)
683 1.1 mrg {
684 1.1 mrg return (__m256i)__builtin_ia32_vpshrdv_v16hi_mask ((__v16hi)__A,
685 1.1 mrg (__v16hi) __C, (__v16hi) __D, (__mmask16)__B);
686 1.1 mrg }
687 1.1 mrg
688 1.1 mrg extern __inline __m256i
689 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
690 1.1 mrg _mm256_maskz_shrdv_epi16 (__mmask16 __A, __m256i __B, __m256i __C, __m256i __D)
691 1.1 mrg {
692 1.1 mrg return (__m256i)__builtin_ia32_vpshrdv_v16hi_maskz ((__v16hi)__B,
693 1.1 mrg (__v16hi) __C, (__v16hi) __D, (__mmask16)__A);
694 1.1 mrg }
695 1.1 mrg
696 1.1 mrg extern __inline __m256i
697 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
698 1.1 mrg _mm256_shrdv_epi32 (__m256i __A, __m256i __B, __m256i __C)
699 1.1 mrg {
700 1.1 mrg return (__m256i) __builtin_ia32_vpshrdv_v8si ((__v8si)__A, (__v8si) __B,
701 1.1 mrg (__v8si) __C);
702 1.1 mrg }
703 1.1 mrg
704 1.1 mrg extern __inline __m256i
705 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
706 1.1 mrg _mm256_mask_shrdv_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
707 1.1 mrg {
708 1.1 mrg return (__m256i)__builtin_ia32_vpshrdv_v8si_mask ((__v8si)__A, (__v8si) __C,
709 1.1 mrg (__v8si) __D, (__mmask8)__B);
710 1.1 mrg }
711 1.1 mrg
712 1.1 mrg extern __inline __m256i
713 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
714 1.1 mrg _mm256_maskz_shrdv_epi32 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D)
715 1.1 mrg {
716 1.1 mrg return (__m256i)__builtin_ia32_vpshrdv_v8si_maskz ((__v8si)__B, (__v8si) __C,
717 1.1 mrg (__v8si) __D, (__mmask8)__A);
718 1.1 mrg }
719 1.1 mrg
720 1.1 mrg extern __inline __m256i
721 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
722 1.1 mrg _mm256_shrdv_epi64 (__m256i __A, __m256i __B, __m256i __C)
723 1.1 mrg {
724 1.1 mrg return (__m256i) __builtin_ia32_vpshrdv_v4di ((__v4di)__A, (__v4di) __B,
725 1.1 mrg (__v4di) __C);
726 1.1 mrg }
727 1.1 mrg
728 1.1 mrg extern __inline __m256i
729 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
730 1.1 mrg _mm256_mask_shrdv_epi64 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
731 1.1 mrg {
732 1.1 mrg return (__m256i)__builtin_ia32_vpshrdv_v4di_mask ((__v4di)__A, (__v4di) __C,
733 1.1 mrg (__v4di) __D, (__mmask8)__B);
734 1.1 mrg }
735 1.1 mrg
736 1.1 mrg extern __inline __m256i
737 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
738 1.1 mrg _mm256_maskz_shrdv_epi64 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D)
739 1.1 mrg {
740 1.1 mrg return (__m256i)__builtin_ia32_vpshrdv_v4di_maskz ((__v4di)__B, (__v4di) __C,
741 1.1 mrg (__v4di) __D, (__mmask8)__A);
742 1.1 mrg }
743 1.1 mrg
744 1.1 mrg extern __inline __m128i
745 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
746 1.1 mrg _mm_shrdv_epi16 (__m128i __A, __m128i __B, __m128i __C)
747 1.1 mrg {
748 1.1 mrg return (__m128i) __builtin_ia32_vpshrdv_v8hi ((__v8hi)__A, (__v8hi) __B,
749 1.1 mrg (__v8hi) __C);
750 1.1 mrg }
751 1.1 mrg
752 1.1 mrg extern __inline __m128i
753 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
754 1.1 mrg _mm_mask_shrdv_epi16 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
755 1.1 mrg {
756 1.1 mrg return (__m128i)__builtin_ia32_vpshrdv_v8hi_mask ((__v8hi)__A, (__v8hi) __C,
757 1.1 mrg (__v8hi) __D, (__mmask8)__B);
758 1.1 mrg }
759 1.1 mrg
760 1.1 mrg extern __inline __m128i
761 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
762 1.1 mrg _mm_maskz_shrdv_epi16 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
763 1.1 mrg {
764 1.1 mrg return (__m128i)__builtin_ia32_vpshrdv_v8hi_maskz ((__v8hi)__B, (__v8hi) __C,
765 1.1 mrg (__v8hi) __D, (__mmask8)__A);
766 1.1 mrg }
767 1.1 mrg
768 1.1 mrg extern __inline __m128i
769 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
770 1.1 mrg _mm_shrdv_epi32 (__m128i __A, __m128i __B, __m128i __C)
771 1.1 mrg {
772 1.1 mrg return (__m128i) __builtin_ia32_vpshrdv_v4si ((__v4si)__A, (__v4si) __B,
773 1.1 mrg (__v4si) __C);
774 1.1 mrg }
775 1.1 mrg
776 1.1 mrg extern __inline __m128i
777 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
778 1.1 mrg _mm_mask_shrdv_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
779 1.1 mrg {
780 1.1 mrg return (__m128i)__builtin_ia32_vpshrdv_v4si_mask ((__v4si)__A, (__v4si) __C,
781 1.1 mrg (__v4si) __D, (__mmask8)__B);
782 1.1 mrg }
783 1.1 mrg
784 1.1 mrg extern __inline __m128i
785 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
786 1.1 mrg _mm_maskz_shrdv_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
787 1.1 mrg {
788 1.1 mrg return (__m128i)__builtin_ia32_vpshrdv_v4si_maskz ((__v4si)__B, (__v4si) __C,
789 1.1 mrg (__v4si) __D, (__mmask8)__A);
790 1.1 mrg }
791 1.1 mrg
792 1.1 mrg extern __inline __m128i
793 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
794 1.1 mrg _mm_shrdv_epi64 (__m128i __A, __m128i __B, __m128i __C)
795 1.1 mrg {
796 1.1 mrg return (__m128i) __builtin_ia32_vpshrdv_v2di ((__v2di)__A, (__v2di) __B,
797 1.1 mrg (__v2di) __C);
798 1.1 mrg }
799 1.1 mrg
800 1.1 mrg extern __inline __m128i
801 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
802 1.1 mrg _mm_mask_shrdv_epi64 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
803 1.1 mrg {
804 1.1 mrg return (__m128i)__builtin_ia32_vpshrdv_v2di_mask ((__v2di)__A, (__v2di) __C,
805 1.1 mrg (__v2di) __D, (__mmask8)__B);
806 1.1 mrg }
807 1.1 mrg
808 1.1 mrg extern __inline __m128i
809 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
810 1.1 mrg _mm_maskz_shrdv_epi64 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
811 1.1 mrg {
812 1.1 mrg return (__m128i)__builtin_ia32_vpshrdv_v2di_maskz ((__v2di)__B, (__v2di) __C,
813 1.1 mrg (__v2di) __D, (__mmask8)__A);
814 1.1 mrg }
815 1.1 mrg
816 1.1 mrg extern __inline __m256i
817 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
818 1.1 mrg _mm256_shldv_epi16 (__m256i __A, __m256i __B, __m256i __C)
819 1.1 mrg {
820 1.1 mrg return (__m256i) __builtin_ia32_vpshldv_v16hi ((__v16hi)__A, (__v16hi) __B,
821 1.1 mrg (__v16hi) __C);
822 1.1 mrg }
823 1.1 mrg
824 1.1 mrg extern __inline __m256i
825 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
826 1.1 mrg _mm256_mask_shldv_epi16 (__m256i __A, __mmask16 __B, __m256i __C, __m256i __D)
827 1.1 mrg {
828 1.1 mrg return (__m256i)__builtin_ia32_vpshldv_v16hi_mask ((__v16hi)__A,
829 1.1 mrg (__v16hi) __C, (__v16hi) __D, (__mmask16)__B);
830 1.1 mrg }
831 1.1 mrg
832 1.1 mrg extern __inline __m256i
833 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
834 1.1 mrg _mm256_maskz_shldv_epi16 (__mmask16 __A, __m256i __B, __m256i __C, __m256i __D)
835 1.1 mrg {
836 1.1 mrg return (__m256i)__builtin_ia32_vpshldv_v16hi_maskz ((__v16hi)__B,
837 1.1 mrg (__v16hi) __C, (__v16hi) __D, (__mmask16)__A);
838 1.1 mrg }
839 1.1 mrg
840 1.1 mrg extern __inline __m256i
841 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
842 1.1 mrg _mm256_shldv_epi32 (__m256i __A, __m256i __B, __m256i __C)
843 1.1 mrg {
844 1.1 mrg return (__m256i) __builtin_ia32_vpshldv_v8si ((__v8si)__A, (__v8si) __B,
845 1.1 mrg (__v8si) __C);
846 1.1 mrg }
847 1.1 mrg
848 1.1 mrg extern __inline __m256i
849 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
850 1.1 mrg _mm256_mask_shldv_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
851 1.1 mrg {
852 1.1 mrg return (__m256i)__builtin_ia32_vpshldv_v8si_mask ((__v8si)__A, (__v8si) __C,
853 1.1 mrg (__v8si) __D, (__mmask8)__B) ;
854 1.1 mrg }
855 1.1 mrg
856 1.1 mrg extern __inline __m256i
857 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
858 1.1 mrg _mm256_maskz_shldv_epi32 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D)
859 1.1 mrg {
860 1.1 mrg return (__m256i)__builtin_ia32_vpshldv_v8si_maskz ((__v8si)__B, (__v8si) __C,
861 1.1 mrg (__v8si) __D, (__mmask8)__A);
862 1.1 mrg }
863 1.1 mrg
864 1.1 mrg extern __inline __m256i
865 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
866 1.1 mrg _mm256_shldv_epi64 (__m256i __A, __m256i __B, __m256i __C)
867 1.1 mrg {
868 1.1 mrg return (__m256i) __builtin_ia32_vpshldv_v4di ((__v4di)__A, (__v4di) __B,
869 1.1 mrg (__v4di) __C);
870 1.1 mrg }
871 1.1 mrg
872 1.1 mrg extern __inline __m256i
873 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
874 1.1 mrg _mm256_mask_shldv_epi64 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
875 1.1 mrg {
876 1.1 mrg return (__m256i)__builtin_ia32_vpshldv_v4di_mask ((__v4di)__A, (__v4di) __C,
877 1.1 mrg (__v4di) __D, (__mmask8)__B);
878 1.1 mrg }
879 1.1 mrg
880 1.1 mrg extern __inline __m256i
881 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
882 1.1 mrg _mm256_maskz_shldv_epi64 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D)
883 1.1 mrg {
884 1.1 mrg return (__m256i)__builtin_ia32_vpshldv_v4di_maskz ((__v4di)__B, (__v4di) __C,
885 1.1 mrg (__v4di) __D, (__mmask8)__A);
886 1.1 mrg }
887 1.1 mrg
888 1.1 mrg extern __inline __m128i
889 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
890 1.1 mrg _mm_shldv_epi16 (__m128i __A, __m128i __B, __m128i __C)
891 1.1 mrg {
892 1.1 mrg return (__m128i) __builtin_ia32_vpshldv_v8hi ((__v8hi)__A, (__v8hi) __B,
893 1.1 mrg (__v8hi) __C);
894 1.1 mrg }
895 1.1 mrg
896 1.1 mrg extern __inline __m128i
897 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
898 1.1 mrg _mm_mask_shldv_epi16 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
899 1.1 mrg {
900 1.1 mrg return (__m128i)__builtin_ia32_vpshldv_v8hi_mask ((__v8hi)__A, (__v8hi) __C,
901 1.1 mrg (__v8hi) __D, (__mmask8)__B);
902 1.1 mrg }
903 1.1 mrg
904 1.1 mrg extern __inline __m128i
905 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
906 1.1 mrg _mm_maskz_shldv_epi16 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
907 1.1 mrg {
908 1.1 mrg return (__m128i)__builtin_ia32_vpshldv_v8hi_maskz ((__v8hi)__B, (__v8hi) __C,
909 1.1 mrg (__v8hi) __D, (__mmask8)__A);
910 1.1 mrg }
911 1.1 mrg
912 1.1 mrg extern __inline __m128i
913 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
914 1.1 mrg _mm_shldv_epi32 (__m128i __A, __m128i __B, __m128i __C)
915 1.1 mrg {
916 1.1 mrg return (__m128i) __builtin_ia32_vpshldv_v4si ((__v4si)__A, (__v4si) __B,
917 1.1 mrg (__v4si) __C);
918 1.1 mrg }
919 1.1 mrg
920 1.1 mrg extern __inline __m128i
921 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
922 1.1 mrg _mm_mask_shldv_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
923 1.1 mrg {
924 1.1 mrg return (__m128i)__builtin_ia32_vpshldv_v4si_mask ((__v4si)__A, (__v4si) __C,
925 1.1 mrg (__v4si) __D, (__mmask8)__B);
926 1.1 mrg }
927 1.1 mrg
928 1.1 mrg extern __inline __m128i
929 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
930 1.1 mrg _mm_maskz_shldv_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
931 1.1 mrg {
932 1.1 mrg return (__m128i)__builtin_ia32_vpshldv_v4si_maskz ((__v4si)__B, (__v4si) __C,
933 1.1 mrg (__v4si) __D, (__mmask8)__A);
934 1.1 mrg }
935 1.1 mrg
936 1.1 mrg extern __inline __m128i
937 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
938 1.1 mrg _mm_shldv_epi64 (__m128i __A, __m128i __B, __m128i __C)
939 1.1 mrg {
940 1.1 mrg return (__m128i) __builtin_ia32_vpshldv_v2di ((__v2di)__A, (__v2di) __B,
941 1.1 mrg (__v2di) __C);
942 1.1 mrg }
943 1.1 mrg
944 1.1 mrg extern __inline __m128i
945 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
946 1.1 mrg _mm_mask_shldv_epi64 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
947 1.1 mrg {
948 1.1 mrg return (__m128i)__builtin_ia32_vpshldv_v2di_mask ((__v2di)__A, (__v2di) __C,
949 1.1 mrg (__v2di) __D, (__mmask8)__B);
950 1.1 mrg }
951 1.1 mrg
952 1.1 mrg extern __inline __m128i
953 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
954 1.1 mrg _mm_maskz_shldv_epi64 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
955 1.1 mrg {
956 1.1 mrg return (__m128i)__builtin_ia32_vpshldv_v2di_maskz ((__v2di)__B, (__v2di) __C,
957 1.1 mrg (__v2di) __D, (__mmask8)__A);
958 1.1 mrg }
959 1.1 mrg
960 1.1 mrg
961 1.1 mrg
962 1.1 mrg
963 1.1 mrg #ifdef __DISABLE_AVX512VBMI2VL__
964 1.1 mrg #undef __DISABLE_AVX512VBMI2VL__
965 1.1 mrg #pragma GCC pop_options
966 1.1 mrg #endif /* __DISABLE_AVX512VBMIVL__ */
967 1.1 mrg
968 1.1 mrg #if !defined(__AVX512VL__) || !defined(__AVX512VBMI2__) || \
969 1.1 mrg !defined(__AVX512BW__)
970 1.1 mrg #pragma GCC push_options
971 1.1 mrg #pragma GCC target("avx512vbmi2,avx512vl,avx512bw")
972 1.1 mrg #define __DISABLE_AVX512VBMI2VLBW__
973 1.1 mrg #endif /* __AVX512VBMIVLBW__ */
974 1.1 mrg
975 1.1 mrg extern __inline __m256i
976 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
977 1.1 mrg _mm256_mask_compress_epi8 (__m256i __A, __mmask32 __B, __m256i __C)
978 1.1 mrg {
979 1.1 mrg return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi)__C,
980 1.1 mrg (__v32qi)__A, (__mmask32)__B);
981 1.1 mrg }
982 1.1 mrg
983 1.1 mrg extern __inline __m256i
984 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
985 1.1 mrg _mm256_maskz_compress_epi8 (__mmask32 __A, __m256i __B)
986 1.1 mrg {
987 1.1 mrg return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __B,
988 1.1 mrg (__v32qi) _mm256_setzero_si256 (), (__mmask32) __A);
989 1.1 mrg }
990 1.1 mrg
991 1.1 mrg extern __inline void
992 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
993 1.1 mrg _mm256_mask_compressstoreu_epi8 (void * __A, __mmask32 __B, __m256i __C)
994 1.1 mrg {
995 1.1 mrg __builtin_ia32_compressstoreuqi256_mask ((__v32qi *) __A, (__v32qi) __C,
996 1.1 mrg (__mmask32) __B);
997 1.1 mrg }
998 1.1 mrg
999 1.1 mrg extern __inline __m256i
1000 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1001 1.1 mrg _mm256_mask_expand_epi8 (__m256i __A, __mmask32 __B, __m256i __C)
1002 1.1 mrg {
1003 1.1 mrg return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __C,
1004 1.1 mrg (__v32qi) __A,
1005 1.1 mrg (__mmask32) __B);
1006 1.1 mrg }
1007 1.1 mrg
1008 1.1 mrg extern __inline __m256i
1009 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1010 1.1 mrg _mm256_maskz_expand_epi8 (__mmask32 __A, __m256i __B)
1011 1.1 mrg {
1012 1.1 mrg return (__m256i) __builtin_ia32_expandqi256_maskz ((__v32qi) __B,
1013 1.1 mrg (__v32qi) _mm256_setzero_si256 (), (__mmask32) __A);
1014 1.1 mrg }
1015 1.1 mrg
1016 1.1 mrg extern __inline __m256i
1017 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1018 1.1 mrg _mm256_mask_expandloadu_epi8 (__m256i __A, __mmask32 __B, const void * __C)
1019 1.1 mrg {
1020 1.1 mrg return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *) __C,
1021 1.1 mrg (__v32qi) __A, (__mmask32) __B);
1022 1.1 mrg }
1023 1.1 mrg
1024 1.1 mrg extern __inline __m256i
1025 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1026 1.1 mrg _mm256_maskz_expandloadu_epi8 (__mmask32 __A, const void * __B)
1027 1.1 mrg {
1028 1.1 mrg return (__m256i) __builtin_ia32_expandloadqi256_maskz ((const __v32qi *) __B,
1029 1.1 mrg (__v32qi) _mm256_setzero_si256 (), (__mmask32) __A);
1030 1.1 mrg }
1031 1.1 mrg
1032 1.1 mrg #ifdef __DISABLE_AVX512VBMI2VLBW__
1033 1.1 mrg #undef __DISABLE_AVX512VBMI2VLBW__
1034 1.1 mrg #pragma GCC pop_options
1035 1.1 mrg #endif /* __DISABLE_AVX512VBMIVLBW__ */
1036 1.1 mrg
1037 1.1 mrg #endif /* _AVX512VBMIVLINTRIN_H_INCLUDED */
1038