avx512vbmivlintrin.h revision 1.3 1 1.3 mrg /* Copyright (C) 2013-2017 Free Software Foundation, Inc.
2 1.1 mrg
3 1.1 mrg This file is part of GCC.
4 1.1 mrg
5 1.1 mrg GCC is free software; you can redistribute it and/or modify
6 1.1 mrg it under the terms of the GNU General Public License as published by
7 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
8 1.1 mrg any later version.
9 1.1 mrg
10 1.1 mrg GCC is distributed in the hope that it will be useful,
11 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of
12 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 1.1 mrg GNU General Public License for more details.
14 1.1 mrg
15 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
16 1.1 mrg permissions described in the GCC Runtime Library Exception, version
17 1.1 mrg 3.1, as published by the Free Software Foundation.
18 1.1 mrg
19 1.1 mrg You should have received a copy of the GNU General Public License and
20 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
21 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 1.1 mrg <http://www.gnu.org/licenses/>. */
23 1.1 mrg
24 1.1 mrg #ifndef _IMMINTRIN_H_INCLUDED
25 1.1 mrg #error "Never use <avx512vbmivlintrin.h> directly; include <immintrin.h> instead."
26 1.1 mrg #endif
27 1.1 mrg
28 1.1 mrg #ifndef _AVX512VBMIVLINTRIN_H_INCLUDED
29 1.1 mrg #define _AVX512VBMIVLINTRIN_H_INCLUDED
30 1.1 mrg
31 1.1 mrg #if !defined(__AVX512VL__) || !defined(__AVX512VBMI__)
32 1.1 mrg #pragma GCC push_options
33 1.1 mrg #pragma GCC target("avx512vbmi,avx512vl")
34 1.1 mrg #define __DISABLE_AVX512VBMIVL__
35 1.1 mrg #endif /* __AVX512VBMIVL__ */
36 1.1 mrg
37 1.1 mrg extern __inline __m256i
38 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
39 1.1 mrg _mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y)
40 1.1 mrg {
41 1.1 mrg return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
42 1.1 mrg (__v32qi) __Y,
43 1.1 mrg (__v32qi) __W,
44 1.1 mrg (__mmask32) __M);
45 1.1 mrg }
46 1.1 mrg
47 1.1 mrg extern __inline __m256i
48 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
49 1.1 mrg _mm256_maskz_multishift_epi64_epi8 (__mmask32 __M, __m256i __X, __m256i __Y)
50 1.1 mrg {
51 1.1 mrg return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
52 1.1 mrg (__v32qi) __Y,
53 1.1 mrg (__v32qi)
54 1.1 mrg _mm256_setzero_si256 (),
55 1.1 mrg (__mmask32) __M);
56 1.1 mrg }
57 1.1 mrg
58 1.1 mrg extern __inline __m256i
59 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
60 1.1 mrg _mm256_multishift_epi64_epi8 (__m256i __X, __m256i __Y)
61 1.1 mrg {
62 1.1 mrg return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
63 1.1 mrg (__v32qi) __Y,
64 1.1 mrg (__v32qi)
65 1.1 mrg _mm256_undefined_si256 (),
66 1.1 mrg (__mmask32) -1);
67 1.1 mrg }
68 1.1 mrg
69 1.1 mrg extern __inline __m128i
70 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
71 1.1 mrg _mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y)
72 1.1 mrg {
73 1.1 mrg return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
74 1.1 mrg (__v16qi) __Y,
75 1.1 mrg (__v16qi) __W,
76 1.1 mrg (__mmask16) __M);
77 1.1 mrg }
78 1.1 mrg
79 1.1 mrg extern __inline __m128i
80 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
81 1.1 mrg _mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y)
82 1.1 mrg {
83 1.1 mrg return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
84 1.1 mrg (__v16qi) __Y,
85 1.1 mrg (__v16qi)
86 1.1 mrg _mm_setzero_si128 (),
87 1.1 mrg (__mmask16) __M);
88 1.1 mrg }
89 1.1 mrg
90 1.1 mrg extern __inline __m128i
91 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
92 1.1 mrg _mm_multishift_epi64_epi8 (__m128i __X, __m128i __Y)
93 1.1 mrg {
94 1.1 mrg return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
95 1.1 mrg (__v16qi) __Y,
96 1.1 mrg (__v16qi)
97 1.1 mrg _mm_undefined_si128 (),
98 1.1 mrg (__mmask16) -1);
99 1.1 mrg }
100 1.1 mrg
101 1.1 mrg extern __inline __m256i
102 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
103 1.1 mrg _mm256_permutexvar_epi8 (__m256i __A, __m256i __B)
104 1.1 mrg {
105 1.1 mrg return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
106 1.1 mrg (__v32qi) __A,
107 1.1 mrg (__v32qi)
108 1.1 mrg _mm256_undefined_si256 (),
109 1.1 mrg (__mmask32) -1);
110 1.1 mrg }
111 1.1 mrg
112 1.1 mrg extern __inline __m256i
113 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
114 1.1 mrg _mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A,
115 1.1 mrg __m256i __B)
116 1.1 mrg {
117 1.1 mrg return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
118 1.1 mrg (__v32qi) __A,
119 1.1 mrg (__v32qi)
120 1.1 mrg _mm256_setzero_si256 (),
121 1.1 mrg (__mmask32) __M);
122 1.1 mrg }
123 1.1 mrg
124 1.1 mrg extern __inline __m256i
125 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
126 1.1 mrg _mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
127 1.1 mrg __m256i __B)
128 1.1 mrg {
129 1.1 mrg return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
130 1.1 mrg (__v32qi) __A,
131 1.1 mrg (__v32qi) __W,
132 1.1 mrg (__mmask32) __M);
133 1.1 mrg }
134 1.1 mrg
135 1.1 mrg extern __inline __m128i
136 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
137 1.1 mrg _mm_permutexvar_epi8 (__m128i __A, __m128i __B)
138 1.1 mrg {
139 1.1 mrg return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
140 1.1 mrg (__v16qi) __A,
141 1.1 mrg (__v16qi)
142 1.1 mrg _mm_undefined_si128 (),
143 1.1 mrg (__mmask16) -1);
144 1.1 mrg }
145 1.1 mrg
146 1.1 mrg extern __inline __m128i
147 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
148 1.1 mrg _mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
149 1.1 mrg {
150 1.1 mrg return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
151 1.1 mrg (__v16qi) __A,
152 1.1 mrg (__v16qi)
153 1.1 mrg _mm_setzero_si128 (),
154 1.1 mrg (__mmask16) __M);
155 1.1 mrg }
156 1.1 mrg
157 1.1 mrg extern __inline __m128i
158 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
159 1.1 mrg _mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
160 1.1 mrg __m128i __B)
161 1.1 mrg {
162 1.1 mrg return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
163 1.1 mrg (__v16qi) __A,
164 1.1 mrg (__v16qi) __W,
165 1.1 mrg (__mmask16) __M);
166 1.1 mrg }
167 1.1 mrg
168 1.1 mrg extern __inline __m256i
169 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
170 1.1 mrg _mm256_permutex2var_epi8 (__m256i __A, __m256i __I, __m256i __B)
171 1.1 mrg {
172 1.1 mrg return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I
173 1.1 mrg /* idx */ ,
174 1.1 mrg (__v32qi) __A,
175 1.1 mrg (__v32qi) __B,
176 1.3 mrg (__mmask32) -1);
177 1.1 mrg }
178 1.1 mrg
179 1.1 mrg extern __inline __m256i
180 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
181 1.1 mrg _mm256_mask_permutex2var_epi8 (__m256i __A, __mmask32 __U,
182 1.1 mrg __m256i __I, __m256i __B)
183 1.1 mrg {
184 1.1 mrg return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I
185 1.1 mrg /* idx */ ,
186 1.1 mrg (__v32qi) __A,
187 1.1 mrg (__v32qi) __B,
188 1.1 mrg (__mmask32)
189 1.1 mrg __U);
190 1.1 mrg }
191 1.1 mrg
192 1.1 mrg extern __inline __m256i
193 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
194 1.1 mrg _mm256_mask2_permutex2var_epi8 (__m256i __A, __m256i __I,
195 1.1 mrg __mmask32 __U, __m256i __B)
196 1.1 mrg {
197 1.1 mrg return (__m256i) __builtin_ia32_vpermi2varqi256_mask ((__v32qi) __A,
198 1.1 mrg (__v32qi) __I
199 1.1 mrg /* idx */ ,
200 1.1 mrg (__v32qi) __B,
201 1.1 mrg (__mmask32)
202 1.1 mrg __U);
203 1.1 mrg }
204 1.1 mrg
205 1.1 mrg extern __inline __m256i
206 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
207 1.1 mrg _mm256_maskz_permutex2var_epi8 (__mmask32 __U, __m256i __A,
208 1.1 mrg __m256i __I, __m256i __B)
209 1.1 mrg {
210 1.1 mrg return (__m256i) __builtin_ia32_vpermt2varqi256_maskz ((__v32qi) __I
211 1.1 mrg /* idx */ ,
212 1.1 mrg (__v32qi) __A,
213 1.1 mrg (__v32qi) __B,
214 1.1 mrg (__mmask32)
215 1.1 mrg __U);
216 1.1 mrg }
217 1.1 mrg
218 1.1 mrg extern __inline __m128i
219 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
220 1.1 mrg _mm_permutex2var_epi8 (__m128i __A, __m128i __I, __m128i __B)
221 1.1 mrg {
222 1.1 mrg return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I
223 1.1 mrg /* idx */ ,
224 1.1 mrg (__v16qi) __A,
225 1.1 mrg (__v16qi) __B,
226 1.3 mrg (__mmask16) -1);
227 1.1 mrg }
228 1.1 mrg
229 1.1 mrg extern __inline __m128i
230 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
231 1.1 mrg _mm_mask_permutex2var_epi8 (__m128i __A, __mmask16 __U, __m128i __I,
232 1.1 mrg __m128i __B)
233 1.1 mrg {
234 1.1 mrg return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I
235 1.1 mrg /* idx */ ,
236 1.1 mrg (__v16qi) __A,
237 1.1 mrg (__v16qi) __B,
238 1.1 mrg (__mmask16)
239 1.1 mrg __U);
240 1.1 mrg }
241 1.1 mrg
242 1.1 mrg extern __inline __m128i
243 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
244 1.1 mrg _mm_mask2_permutex2var_epi8 (__m128i __A, __m128i __I, __mmask16 __U,
245 1.1 mrg __m128i __B)
246 1.1 mrg {
247 1.1 mrg return (__m128i) __builtin_ia32_vpermi2varqi128_mask ((__v16qi) __A,
248 1.1 mrg (__v16qi) __I
249 1.1 mrg /* idx */ ,
250 1.1 mrg (__v16qi) __B,
251 1.1 mrg (__mmask16)
252 1.1 mrg __U);
253 1.1 mrg }
254 1.1 mrg
255 1.1 mrg extern __inline __m128i
256 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
257 1.1 mrg _mm_maskz_permutex2var_epi8 (__mmask16 __U, __m128i __A, __m128i __I,
258 1.1 mrg __m128i __B)
259 1.1 mrg {
260 1.1 mrg return (__m128i) __builtin_ia32_vpermt2varqi128_maskz ((__v16qi) __I
261 1.1 mrg /* idx */ ,
262 1.1 mrg (__v16qi) __A,
263 1.1 mrg (__v16qi) __B,
264 1.1 mrg (__mmask16)
265 1.1 mrg __U);
266 1.1 mrg }
267 1.1 mrg
268 1.1 mrg #ifdef __DISABLE_AVX512VBMIVL__
269 1.1 mrg #undef __DISABLE_AVX512VBMIVL__
270 1.1 mrg #pragma GCC pop_options
271 1.1 mrg #endif /* __DISABLE_AVX512VBMIVL__ */
272 1.1 mrg
273 1.1 mrg #endif /* _AVX512VBMIVLINTRIN_H_INCLUDED */
274