avx512vbmivlintrin.h revision 1.1 1 1.1 mrg /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
2 1.1 mrg
3 1.1 mrg This file is part of GCC.
4 1.1 mrg
5 1.1 mrg GCC is free software; you can redistribute it and/or modify
6 1.1 mrg it under the terms of the GNU General Public License as published by
7 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
8 1.1 mrg any later version.
9 1.1 mrg
10 1.1 mrg GCC is distributed in the hope that it will be useful,
11 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of
12 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 1.1 mrg GNU General Public License for more details.
14 1.1 mrg
15 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
16 1.1 mrg permissions described in the GCC Runtime Library Exception, version
17 1.1 mrg 3.1, as published by the Free Software Foundation.
18 1.1 mrg
19 1.1 mrg You should have received a copy of the GNU General Public License and
20 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
21 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 1.1 mrg <http://www.gnu.org/licenses/>. */
23 1.1 mrg
24 1.1 mrg #ifndef _IMMINTRIN_H_INCLUDED
25 1.1 mrg #error "Never use <avx512vbmivlintrin.h> directly; include <immintrin.h> instead."
26 1.1 mrg #endif
27 1.1 mrg
28 1.1 mrg #ifndef _AVX512VBMIVLINTRIN_H_INCLUDED
29 1.1 mrg #define _AVX512VBMIVLINTRIN_H_INCLUDED
30 1.1 mrg
31 1.1 mrg #if !defined(__AVX512VL__) || !defined(__AVX512VBMI__)
32 1.1 mrg #pragma GCC push_options
33 1.1 mrg #pragma GCC target("avx512vbmi,avx512vl")
34 1.1 mrg #define __DISABLE_AVX512VBMIVL__
35 1.1 mrg #endif /* __AVX512VBMIVL__ */
36 1.1 mrg
37 1.1 mrg extern __inline __m256i
38 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
39 1.1 mrg _mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y)
40 1.1 mrg {
41 1.1 mrg return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
42 1.1 mrg (__v32qi) __Y,
43 1.1 mrg (__v32qi) __W,
44 1.1 mrg (__mmask32) __M);
45 1.1 mrg }
46 1.1 mrg
47 1.1 mrg extern __inline __m256i
48 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
49 1.1 mrg _mm256_maskz_multishift_epi64_epi8 (__mmask32 __M, __m256i __X, __m256i __Y)
50 1.1 mrg {
51 1.1 mrg return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
52 1.1 mrg (__v32qi) __Y,
53 1.1 mrg (__v32qi)
54 1.1 mrg _mm256_setzero_si256 (),
55 1.1 mrg (__mmask32) __M);
56 1.1 mrg }
57 1.1 mrg
58 1.1 mrg extern __inline __m256i
59 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
60 1.1 mrg _mm256_multishift_epi64_epi8 (__m256i __X, __m256i __Y)
61 1.1 mrg {
62 1.1 mrg return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
63 1.1 mrg (__v32qi) __Y,
64 1.1 mrg (__v32qi)
65 1.1 mrg _mm256_undefined_si256 (),
66 1.1 mrg (__mmask32) -1);
67 1.1 mrg }
68 1.1 mrg
69 1.1 mrg extern __inline __m128i
70 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
71 1.1 mrg _mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y)
72 1.1 mrg {
73 1.1 mrg return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
74 1.1 mrg (__v16qi) __Y,
75 1.1 mrg (__v16qi) __W,
76 1.1 mrg (__mmask16) __M);
77 1.1 mrg }
78 1.1 mrg
79 1.1 mrg extern __inline __m128i
80 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
81 1.1 mrg _mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y)
82 1.1 mrg {
83 1.1 mrg return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
84 1.1 mrg (__v16qi) __Y,
85 1.1 mrg (__v16qi)
86 1.1 mrg _mm_setzero_si128 (),
87 1.1 mrg (__mmask16) __M);
88 1.1 mrg }
89 1.1 mrg
90 1.1 mrg extern __inline __m128i
91 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
92 1.1 mrg _mm_multishift_epi64_epi8 (__m128i __X, __m128i __Y)
93 1.1 mrg {
94 1.1 mrg return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
95 1.1 mrg (__v16qi) __Y,
96 1.1 mrg (__v16qi)
97 1.1 mrg _mm_undefined_si128 (),
98 1.1 mrg (__mmask16) -1);
99 1.1 mrg }
100 1.1 mrg
101 1.1 mrg extern __inline __m256i
102 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
103 1.1 mrg _mm256_permutexvar_epi8 (__m256i __A, __m256i __B)
104 1.1 mrg {
105 1.1 mrg return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
106 1.1 mrg (__v32qi) __A,
107 1.1 mrg (__v32qi)
108 1.1 mrg _mm256_undefined_si256 (),
109 1.1 mrg (__mmask32) -1);
110 1.1 mrg }
111 1.1 mrg
112 1.1 mrg extern __inline __m256i
113 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
114 1.1 mrg _mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A,
115 1.1 mrg __m256i __B)
116 1.1 mrg {
117 1.1 mrg return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
118 1.1 mrg (__v32qi) __A,
119 1.1 mrg (__v32qi)
120 1.1 mrg _mm256_setzero_si256 (),
121 1.1 mrg (__mmask32) __M);
122 1.1 mrg }
123 1.1 mrg
124 1.1 mrg extern __inline __m256i
125 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
126 1.1 mrg _mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
127 1.1 mrg __m256i __B)
128 1.1 mrg {
129 1.1 mrg return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
130 1.1 mrg (__v32qi) __A,
131 1.1 mrg (__v32qi) __W,
132 1.1 mrg (__mmask32) __M);
133 1.1 mrg }
134 1.1 mrg
135 1.1 mrg extern __inline __m128i
136 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
137 1.1 mrg _mm_permutexvar_epi8 (__m128i __A, __m128i __B)
138 1.1 mrg {
139 1.1 mrg return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
140 1.1 mrg (__v16qi) __A,
141 1.1 mrg (__v16qi)
142 1.1 mrg _mm_undefined_si128 (),
143 1.1 mrg (__mmask16) -1);
144 1.1 mrg }
145 1.1 mrg
146 1.1 mrg extern __inline __m128i
147 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
148 1.1 mrg _mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
149 1.1 mrg {
150 1.1 mrg return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
151 1.1 mrg (__v16qi) __A,
152 1.1 mrg (__v16qi)
153 1.1 mrg _mm_setzero_si128 (),
154 1.1 mrg (__mmask16) __M);
155 1.1 mrg }
156 1.1 mrg
157 1.1 mrg extern __inline __m128i
158 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
159 1.1 mrg _mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
160 1.1 mrg __m128i __B)
161 1.1 mrg {
162 1.1 mrg return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
163 1.1 mrg (__v16qi) __A,
164 1.1 mrg (__v16qi) __W,
165 1.1 mrg (__mmask16) __M);
166 1.1 mrg }
167 1.1 mrg
168 1.1 mrg extern __inline __m256i
169 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
170 1.1 mrg _mm256_permutex2var_epi8 (__m256i __A, __m256i __I, __m256i __B)
171 1.1 mrg {
172 1.1 mrg return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I
173 1.1 mrg /* idx */ ,
174 1.1 mrg (__v32qi) __A,
175 1.1 mrg (__v32qi) __B,
176 1.1 mrg (__mmask32) -
177 1.1 mrg 1);
178 1.1 mrg }
179 1.1 mrg
180 1.1 mrg extern __inline __m256i
181 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
182 1.1 mrg _mm256_mask_permutex2var_epi8 (__m256i __A, __mmask32 __U,
183 1.1 mrg __m256i __I, __m256i __B)
184 1.1 mrg {
185 1.1 mrg return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I
186 1.1 mrg /* idx */ ,
187 1.1 mrg (__v32qi) __A,
188 1.1 mrg (__v32qi) __B,
189 1.1 mrg (__mmask32)
190 1.1 mrg __U);
191 1.1 mrg }
192 1.1 mrg
193 1.1 mrg extern __inline __m256i
194 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
195 1.1 mrg _mm256_mask2_permutex2var_epi8 (__m256i __A, __m256i __I,
196 1.1 mrg __mmask32 __U, __m256i __B)
197 1.1 mrg {
198 1.1 mrg return (__m256i) __builtin_ia32_vpermi2varqi256_mask ((__v32qi) __A,
199 1.1 mrg (__v32qi) __I
200 1.1 mrg /* idx */ ,
201 1.1 mrg (__v32qi) __B,
202 1.1 mrg (__mmask32)
203 1.1 mrg __U);
204 1.1 mrg }
205 1.1 mrg
206 1.1 mrg extern __inline __m256i
207 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
208 1.1 mrg _mm256_maskz_permutex2var_epi8 (__mmask32 __U, __m256i __A,
209 1.1 mrg __m256i __I, __m256i __B)
210 1.1 mrg {
211 1.1 mrg return (__m256i) __builtin_ia32_vpermt2varqi256_maskz ((__v32qi) __I
212 1.1 mrg /* idx */ ,
213 1.1 mrg (__v32qi) __A,
214 1.1 mrg (__v32qi) __B,
215 1.1 mrg (__mmask32)
216 1.1 mrg __U);
217 1.1 mrg }
218 1.1 mrg
219 1.1 mrg extern __inline __m128i
220 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
221 1.1 mrg _mm_permutex2var_epi8 (__m128i __A, __m128i __I, __m128i __B)
222 1.1 mrg {
223 1.1 mrg return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I
224 1.1 mrg /* idx */ ,
225 1.1 mrg (__v16qi) __A,
226 1.1 mrg (__v16qi) __B,
227 1.1 mrg (__mmask16) -
228 1.1 mrg 1);
229 1.1 mrg }
230 1.1 mrg
231 1.1 mrg extern __inline __m128i
232 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
233 1.1 mrg _mm_mask_permutex2var_epi8 (__m128i __A, __mmask16 __U, __m128i __I,
234 1.1 mrg __m128i __B)
235 1.1 mrg {
236 1.1 mrg return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I
237 1.1 mrg /* idx */ ,
238 1.1 mrg (__v16qi) __A,
239 1.1 mrg (__v16qi) __B,
240 1.1 mrg (__mmask16)
241 1.1 mrg __U);
242 1.1 mrg }
243 1.1 mrg
244 1.1 mrg extern __inline __m128i
245 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
246 1.1 mrg _mm_mask2_permutex2var_epi8 (__m128i __A, __m128i __I, __mmask16 __U,
247 1.1 mrg __m128i __B)
248 1.1 mrg {
249 1.1 mrg return (__m128i) __builtin_ia32_vpermi2varqi128_mask ((__v16qi) __A,
250 1.1 mrg (__v16qi) __I
251 1.1 mrg /* idx */ ,
252 1.1 mrg (__v16qi) __B,
253 1.1 mrg (__mmask16)
254 1.1 mrg __U);
255 1.1 mrg }
256 1.1 mrg
257 1.1 mrg extern __inline __m128i
258 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
259 1.1 mrg _mm_maskz_permutex2var_epi8 (__mmask16 __U, __m128i __A, __m128i __I,
260 1.1 mrg __m128i __B)
261 1.1 mrg {
262 1.1 mrg return (__m128i) __builtin_ia32_vpermt2varqi128_maskz ((__v16qi) __I
263 1.1 mrg /* idx */ ,
264 1.1 mrg (__v16qi) __A,
265 1.1 mrg (__v16qi) __B,
266 1.1 mrg (__mmask16)
267 1.1 mrg __U);
268 1.1 mrg }
269 1.1 mrg
270 1.1 mrg #ifdef __DISABLE_AVX512VBMIVL__
271 1.1 mrg #undef __DISABLE_AVX512VBMIVL__
272 1.1 mrg #pragma GCC pop_options
273 1.1 mrg #endif /* __DISABLE_AVX512VBMIVL__ */
274 1.1 mrg
275 1.1 mrg #endif /* _AVX512VBMIVLINTRIN_H_INCLUDED */
276