1 1.7 mrg /* Copyright (C) 2013-2022 Free Software Foundation, Inc. 2 1.1 mrg 3 1.1 mrg This file is part of GCC. 4 1.1 mrg 5 1.1 mrg GCC is free software; you can redistribute it and/or modify 6 1.1 mrg it under the terms of the GNU General Public License as published by 7 1.1 mrg the Free Software Foundation; either version 3, or (at your option) 8 1.1 mrg any later version. 9 1.1 mrg 10 1.1 mrg GCC is distributed in the hope that it will be useful, 11 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of 12 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 1.1 mrg GNU General Public License for more details. 14 1.1 mrg 15 1.1 mrg Under Section 7 of GPL version 3, you are granted additional 16 1.1 mrg permissions described in the GCC Runtime Library Exception, version 17 1.1 mrg 3.1, as published by the Free Software Foundation. 18 1.1 mrg 19 1.1 mrg You should have received a copy of the GNU General Public License and 20 1.1 mrg a copy of the GCC Runtime Library Exception along with this program; 21 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 1.1 mrg <http://www.gnu.org/licenses/>. */ 23 1.1 mrg 24 1.1 mrg #ifndef _IMMINTRIN_H_INCLUDED 25 1.1 mrg #error "Never use <avx512vbmivlintrin.h> directly; include <immintrin.h> instead." 26 1.1 mrg #endif 27 1.1 mrg 28 1.1 mrg #ifndef _AVX512VBMIVLINTRIN_H_INCLUDED 29 1.1 mrg #define _AVX512VBMIVLINTRIN_H_INCLUDED 30 1.1 mrg 31 1.1 mrg #if !defined(__AVX512VL__) || !defined(__AVX512VBMI__) 32 1.1 mrg #pragma GCC push_options 33 1.1 mrg #pragma GCC target("avx512vbmi,avx512vl") 34 1.1 mrg #define __DISABLE_AVX512VBMIVL__ 35 1.1 mrg #endif /* __AVX512VBMIVL__ */ 36 1.1 mrg 37 1.1 mrg extern __inline __m256i 38 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 39 1.1 mrg _mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y) 40 1.1 mrg { 41 1.1 mrg return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X, 42 1.1 mrg (__v32qi) __Y, 43 1.1 mrg (__v32qi) __W, 44 1.1 mrg (__mmask32) __M); 45 1.1 mrg } 46 1.1 mrg 47 1.1 mrg extern __inline __m256i 48 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 49 1.1 mrg _mm256_maskz_multishift_epi64_epi8 (__mmask32 __M, __m256i __X, __m256i __Y) 50 1.1 mrg { 51 1.1 mrg return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X, 52 1.1 mrg (__v32qi) __Y, 53 1.1 mrg (__v32qi) 54 1.1 mrg _mm256_setzero_si256 (), 55 1.1 mrg (__mmask32) __M); 56 1.1 mrg } 57 1.1 mrg 58 1.1 mrg extern __inline __m256i 59 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 60 1.1 mrg _mm256_multishift_epi64_epi8 (__m256i __X, __m256i __Y) 61 1.1 mrg { 62 1.1 mrg return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X, 63 1.1 mrg (__v32qi) __Y, 64 1.1 mrg (__v32qi) 65 1.1 mrg _mm256_undefined_si256 (), 66 1.1 mrg (__mmask32) -1); 67 1.1 mrg } 68 1.1 mrg 69 1.1 mrg extern __inline __m128i 70 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 71 1.1 mrg _mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y) 72 1.1 mrg { 73 1.1 mrg return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, 74 1.1 mrg (__v16qi) __Y, 75 1.1 mrg (__v16qi) __W, 76 1.1 mrg (__mmask16) __M); 77 1.1 mrg } 78 1.1 mrg 79 1.1 mrg extern __inline __m128i 80 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 81 1.1 mrg _mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y) 82 1.1 mrg { 83 1.1 mrg return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, 84 1.1 mrg (__v16qi) __Y, 85 1.1 mrg (__v16qi) 86 1.1 mrg _mm_setzero_si128 (), 87 1.1 mrg (__mmask16) __M); 88 1.1 mrg } 89 1.1 mrg 90 1.1 mrg extern __inline __m128i 91 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 92 1.1 mrg _mm_multishift_epi64_epi8 (__m128i __X, __m128i __Y) 93 1.1 mrg { 94 1.1 mrg return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, 95 1.1 mrg (__v16qi) __Y, 96 1.1 mrg (__v16qi) 97 1.1 mrg _mm_undefined_si128 (), 98 1.1 mrg (__mmask16) -1); 99 1.1 mrg } 100 1.1 mrg 101 1.1 mrg extern __inline __m256i 102 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 103 1.1 mrg _mm256_permutexvar_epi8 (__m256i __A, __m256i __B) 104 1.1 mrg { 105 1.1 mrg return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B, 106 1.1 mrg (__v32qi) __A, 107 1.1 mrg (__v32qi) 108 1.1 mrg _mm256_undefined_si256 (), 109 1.1 mrg (__mmask32) -1); 110 1.1 mrg } 111 1.1 mrg 112 1.1 mrg extern __inline __m256i 113 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 114 1.1 mrg _mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A, 115 1.1 mrg __m256i __B) 116 1.1 mrg { 117 1.1 mrg return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B, 118 1.1 mrg (__v32qi) __A, 119 1.1 mrg (__v32qi) 120 1.1 mrg _mm256_setzero_si256 (), 121 1.1 mrg (__mmask32) __M); 122 1.1 mrg } 123 1.1 mrg 124 1.1 mrg extern __inline __m256i 125 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 126 1.1 mrg _mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A, 127 1.1 mrg __m256i __B) 128 1.1 mrg { 129 1.1 mrg return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B, 130 1.1 mrg (__v32qi) __A, 131 1.1 mrg (__v32qi) __W, 132 1.1 mrg (__mmask32) __M); 133 1.1 mrg } 134 1.1 mrg 135 1.1 mrg extern __inline __m128i 136 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 137 1.1 mrg _mm_permutexvar_epi8 (__m128i __A, __m128i __B) 138 1.1 mrg { 139 1.1 mrg return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B, 140 1.1 mrg (__v16qi) __A, 141 1.1 mrg (__v16qi) 142 1.1 mrg _mm_undefined_si128 (), 143 1.1 mrg (__mmask16) -1); 144 1.1 mrg } 145 1.1 mrg 146 1.1 mrg extern __inline __m128i 147 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 148 1.1 mrg _mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B) 149 1.1 mrg { 150 1.1 mrg return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B, 151 1.1 mrg (__v16qi) __A, 152 1.1 mrg (__v16qi) 153 1.1 mrg _mm_setzero_si128 (), 154 1.1 mrg (__mmask16) __M); 155 1.1 mrg } 156 1.1 mrg 157 1.1 mrg extern __inline __m128i 158 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 159 1.1 mrg _mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A, 160 1.1 mrg __m128i __B) 161 1.1 mrg { 162 1.1 mrg return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B, 163 1.1 mrg (__v16qi) __A, 164 1.1 mrg (__v16qi) __W, 165 1.1 mrg (__mmask16) __M); 166 1.1 mrg } 167 1.1 mrg 168 1.1 mrg extern __inline __m256i 169 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 170 1.1 mrg _mm256_permutex2var_epi8 (__m256i __A, __m256i __I, __m256i __B) 171 1.1 mrg { 172 1.1 mrg return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I 173 1.1 mrg /* idx */ , 174 1.1 mrg (__v32qi) __A, 175 1.1 mrg (__v32qi) __B, 176 1.3 mrg (__mmask32) -1); 177 1.1 mrg } 178 1.1 mrg 179 1.1 mrg extern __inline __m256i 180 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 181 1.1 mrg _mm256_mask_permutex2var_epi8 (__m256i __A, __mmask32 __U, 182 1.1 mrg __m256i __I, __m256i __B) 183 1.1 mrg { 184 1.1 mrg return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I 185 1.1 mrg /* idx */ , 186 1.1 mrg (__v32qi) __A, 187 1.1 mrg (__v32qi) __B, 188 1.1 mrg (__mmask32) 189 1.1 mrg __U); 190 1.1 mrg } 191 1.1 mrg 192 1.1 mrg extern __inline __m256i 193 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 194 1.1 mrg _mm256_mask2_permutex2var_epi8 (__m256i __A, __m256i __I, 195 1.1 mrg __mmask32 __U, __m256i __B) 196 1.1 mrg { 197 1.1 mrg return (__m256i) __builtin_ia32_vpermi2varqi256_mask ((__v32qi) __A, 198 1.1 mrg (__v32qi) __I 199 1.1 mrg /* idx */ , 200 1.1 mrg (__v32qi) __B, 201 1.1 mrg (__mmask32) 202 1.1 mrg __U); 203 1.1 mrg } 204 1.1 mrg 205 1.1 mrg extern __inline __m256i 206 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 207 1.1 mrg _mm256_maskz_permutex2var_epi8 (__mmask32 __U, __m256i __A, 208 1.1 mrg __m256i __I, __m256i __B) 209 1.1 mrg { 210 1.1 mrg return (__m256i) __builtin_ia32_vpermt2varqi256_maskz ((__v32qi) __I 211 1.1 mrg /* idx */ , 212 1.1 mrg (__v32qi) __A, 213 1.1 mrg (__v32qi) __B, 214 1.1 mrg (__mmask32) 215 1.1 mrg __U); 216 1.1 mrg } 217 1.1 mrg 218 1.1 mrg extern __inline __m128i 219 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 220 1.1 mrg _mm_permutex2var_epi8 (__m128i __A, __m128i __I, __m128i __B) 221 1.1 mrg { 222 1.1 mrg return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I 223 1.1 mrg /* idx */ , 224 1.1 mrg (__v16qi) __A, 225 1.1 mrg (__v16qi) __B, 226 1.3 mrg (__mmask16) -1); 227 1.1 mrg } 228 1.1 mrg 229 1.1 mrg extern __inline __m128i 230 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 231 1.1 mrg _mm_mask_permutex2var_epi8 (__m128i __A, __mmask16 __U, __m128i __I, 232 1.1 mrg __m128i __B) 233 1.1 mrg { 234 1.1 mrg return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I 235 1.1 mrg /* idx */ , 236 1.1 mrg (__v16qi) __A, 237 1.1 mrg (__v16qi) __B, 238 1.1 mrg (__mmask16) 239 1.1 mrg __U); 240 1.1 mrg } 241 1.1 mrg 242 1.1 mrg extern __inline __m128i 243 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 244 1.1 mrg _mm_mask2_permutex2var_epi8 (__m128i __A, __m128i __I, __mmask16 __U, 245 1.1 mrg __m128i __B) 246 1.1 mrg { 247 1.1 mrg return (__m128i) __builtin_ia32_vpermi2varqi128_mask ((__v16qi) __A, 248 1.1 mrg (__v16qi) __I 249 1.1 mrg /* idx */ , 250 1.1 mrg (__v16qi) __B, 251 1.1 mrg (__mmask16) 252 1.1 mrg __U); 253 1.1 mrg } 254 1.1 mrg 255 1.1 mrg extern __inline __m128i 256 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 257 1.1 mrg _mm_maskz_permutex2var_epi8 (__mmask16 __U, __m128i __A, __m128i __I, 258 1.1 mrg __m128i __B) 259 1.1 mrg { 260 1.1 mrg return (__m128i) __builtin_ia32_vpermt2varqi128_maskz ((__v16qi) __I 261 1.1 mrg /* idx */ , 262 1.1 mrg (__v16qi) __A, 263 1.1 mrg (__v16qi) __B, 264 1.1 mrg (__mmask16) 265 1.1 mrg __U); 266 1.1 mrg } 267 1.1 mrg 268 1.1 mrg #ifdef __DISABLE_AVX512VBMIVL__ 269 1.1 mrg #undef __DISABLE_AVX512VBMIVL__ 270 1.1 mrg #pragma GCC pop_options 271 1.1 mrg #endif /* __DISABLE_AVX512VBMIVL__ */ 272 1.1 mrg 273 1.1 mrg #endif /* _AVX512VBMIVLINTRIN_H_INCLUDED */ 274