1 1.1.1.4 mrg /* Copyright (C) 2013-2022 Free Software Foundation, Inc. 2 1.1 mrg 3 1.1 mrg This file is part of GCC. 4 1.1 mrg 5 1.1 mrg GCC is free software; you can redistribute it and/or modify 6 1.1 mrg it under the terms of the GNU General Public License as published by 7 1.1 mrg the Free Software Foundation; either version 3, or (at your option) 8 1.1 mrg any later version. 9 1.1 mrg 10 1.1 mrg GCC is distributed in the hope that it will be useful, 11 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of 12 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 1.1 mrg GNU General Public License for more details. 14 1.1 mrg 15 1.1 mrg Under Section 7 of GPL version 3, you are granted additional 16 1.1 mrg permissions described in the GCC Runtime Library Exception, version 17 1.1 mrg 3.1, as published by the Free Software Foundation. 18 1.1 mrg 19 1.1 mrg You should have received a copy of the GNU General Public License and 20 1.1 mrg a copy of the GCC Runtime Library Exception along with this program; 21 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 1.1 mrg <http://www.gnu.org/licenses/>. */ 23 1.1 mrg 24 1.1 mrg #ifndef _IMMINTRIN_H_INCLUDED 25 1.1 mrg #error "Never use <avx512vbmi2vlintrin.h> directly; include <immintrin.h> instead." 26 1.1 mrg #endif 27 1.1 mrg 28 1.1 mrg #ifndef _AVX512VBMI2VLINTRIN_H_INCLUDED 29 1.1 mrg #define _AVX512VBMI2VLINTRIN_H_INCLUDED 30 1.1 mrg 31 1.1 mrg #if !defined(__AVX512VL__) || !defined(__AVX512VBMI2__) 32 1.1 mrg #pragma GCC push_options 33 1.1 mrg #pragma GCC target("avx512vbmi2,avx512vl") 34 1.1 mrg #define __DISABLE_AVX512VBMI2VL__ 35 1.1 mrg #endif /* __AVX512VBMIVL__ */ 36 1.1 mrg 37 1.1 mrg extern __inline __m128i 38 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 39 1.1 mrg _mm_mask_compress_epi8 (__m128i __A, __mmask16 __B, __m128i __C) 40 1.1 mrg { 41 1.1 mrg return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi)__C, 42 1.1 mrg (__v16qi)__A, (__mmask16)__B); 43 1.1 mrg } 44 1.1 mrg 45 1.1 mrg extern __inline __m128i 46 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 47 1.1 mrg _mm_maskz_compress_epi8 (__mmask16 __A, __m128i __B) 48 1.1 mrg { 49 1.1 mrg return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __B, 50 1.1 mrg (__v16qi) _mm_setzero_si128 (), (__mmask16) __A); 51 1.1 mrg } 52 1.1 mrg 53 1.1 mrg 54 1.1 mrg extern __inline void 55 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 56 1.1 mrg _mm256_mask_compressstoreu_epi16 (void * __A, __mmask16 __B, __m256i __C) 57 1.1 mrg { 58 1.1 mrg __builtin_ia32_compressstoreuhi256_mask ((__v16hi *) __A, (__v16hi) __C, 59 1.1 mrg (__mmask16) __B); 60 1.1 mrg } 61 1.1 mrg 62 1.1 mrg extern __inline __m128i 63 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 64 1.1 mrg _mm_mask_compress_epi16 (__m128i __A, __mmask8 __B, __m128i __C) 65 1.1 mrg { 66 1.1 mrg return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi)__C, (__v8hi)__A, 67 1.1 mrg (__mmask8)__B); 68 1.1 mrg } 69 1.1 mrg 70 1.1 mrg extern __inline __m128i 71 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 72 1.1 mrg _mm_maskz_compress_epi16 (__mmask8 __A, __m128i __B) 73 1.1 mrg { 74 1.1 mrg return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __B, 75 1.1 mrg (__v8hi) _mm_setzero_si128 (), (__mmask8) __A); 76 1.1 mrg } 77 1.1 mrg 78 1.1 mrg extern __inline __m256i 79 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 80 1.1 mrg _mm256_mask_compress_epi16 (__m256i __A, __mmask16 __B, __m256i __C) 81 1.1 mrg { 82 1.1 mrg return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi)__C, 83 1.1 mrg (__v16hi)__A, (__mmask16)__B); 84 1.1 mrg } 85 1.1 mrg 86 1.1 mrg extern __inline __m256i 87 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 88 1.1 mrg _mm256_maskz_compress_epi16 (__mmask16 __A, __m256i __B) 89 1.1 mrg { 90 1.1 mrg return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __B, 91 1.1 mrg (__v16hi) _mm256_setzero_si256 (), (__mmask16) __A); 92 1.1 mrg } 93 1.1 mrg 94 1.1 mrg extern __inline void 95 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 96 1.1 mrg _mm_mask_compressstoreu_epi8 (void * __A, __mmask16 __B, __m128i __C) 97 1.1 mrg { 98 1.1 mrg __builtin_ia32_compressstoreuqi128_mask ((__v16qi *) __A, (__v16qi) __C, 99 1.1 mrg (__mmask16) __B); 100 1.1 mrg } 101 1.1 mrg 102 1.1 mrg extern __inline void 103 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 104 1.1 mrg _mm_mask_compressstoreu_epi16 (void * __A, __mmask8 __B, __m128i __C) 105 1.1 mrg { 106 1.1 mrg __builtin_ia32_compressstoreuhi128_mask ((__v8hi *) __A, (__v8hi) __C, 107 1.1 mrg (__mmask8) __B); 108 1.1 mrg } 109 1.1 mrg 110 1.1 mrg extern __inline __m128i 111 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 112 1.1 mrg _mm_mask_expand_epi8 (__m128i __A, __mmask16 __B, __m128i __C) 113 1.1 mrg { 114 1.1 mrg return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __C, 115 1.1 mrg (__v16qi) __A, 116 1.1 mrg (__mmask16) __B); 117 1.1 mrg } 118 1.1 mrg 119 1.1 mrg extern __inline __m128i 120 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 121 1.1 mrg _mm_maskz_expand_epi8 (__mmask16 __A, __m128i __B) 122 1.1 mrg { 123 1.1 mrg return (__m128i) __builtin_ia32_expandqi128_maskz ((__v16qi) __B, 124 1.1 mrg (__v16qi) _mm_setzero_si128 (), (__mmask16) __A); 125 1.1 mrg } 126 1.1 mrg 127 1.1 mrg extern __inline __m128i 128 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 129 1.1 mrg _mm_mask_expandloadu_epi8 (__m128i __A, __mmask16 __B, const void * __C) 130 1.1 mrg { 131 1.1 mrg return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *) __C, 132 1.1 mrg (__v16qi) __A, (__mmask16) __B); 133 1.1 mrg } 134 1.1 mrg 135 1.1 mrg extern __inline __m128i 136 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 137 1.1 mrg _mm_maskz_expandloadu_epi8 (__mmask16 __A, const void * __B) 138 1.1 mrg { 139 1.1 mrg return (__m128i) __builtin_ia32_expandloadqi128_maskz ((const __v16qi *) __B, 140 1.1 mrg (__v16qi) _mm_setzero_si128 (), (__mmask16) __A); 141 1.1 mrg } 142 1.1 mrg 143 1.1 mrg extern __inline __m128i 144 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 145 1.1 mrg _mm_mask_expand_epi16 (__m128i __A, __mmask8 __B, __m128i __C) 146 1.1 mrg { 147 1.1 mrg return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __C, 148 1.1 mrg (__v8hi) __A, 149 1.1 mrg (__mmask8) __B); 150 1.1 mrg } 151 1.1 mrg 152 1.1 mrg extern __inline __m128i 153 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 154 1.1 mrg _mm_maskz_expand_epi16 (__mmask8 __A, __m128i __B) 155 1.1 mrg { 156 1.1 mrg return (__m128i) __builtin_ia32_expandhi128_maskz ((__v8hi) __B, 157 1.1 mrg (__v8hi) _mm_setzero_si128 (), (__mmask8) __A); 158 1.1 mrg } 159 1.1 mrg 160 1.1 mrg extern __inline __m128i 161 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 162 1.1 mrg _mm_mask_expandloadu_epi16 (__m128i __A, __mmask8 __B, const void * __C) 163 1.1 mrg { 164 1.1 mrg return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *) __C, 165 1.1 mrg (__v8hi) __A, (__mmask8) __B); 166 1.1 mrg } 167 1.1 mrg 168 1.1 mrg extern __inline __m128i 169 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 170 1.1 mrg _mm_maskz_expandloadu_epi16 (__mmask8 __A, const void * __B) 171 1.1 mrg { 172 1.1 mrg return (__m128i) __builtin_ia32_expandloadhi128_maskz ((const __v8hi *) __B, 173 1.1 mrg (__v8hi) _mm_setzero_si128 (), (__mmask8) __A); 174 1.1 mrg } 175 1.1 mrg extern __inline __m256i 176 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 177 1.1 mrg _mm256_mask_expand_epi16 (__m256i __A, __mmask16 __B, __m256i __C) 178 1.1 mrg { 179 1.1 mrg return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __C, 180 1.1 mrg (__v16hi) __A, 181 1.1 mrg (__mmask16) __B); 182 1.1 mrg } 183 1.1 mrg 184 1.1 mrg extern __inline __m256i 185 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 186 1.1 mrg _mm256_maskz_expand_epi16 (__mmask16 __A, __m256i __B) 187 1.1 mrg { 188 1.1 mrg return (__m256i) __builtin_ia32_expandhi256_maskz ((__v16hi) __B, 189 1.1 mrg (__v16hi) _mm256_setzero_si256 (), (__mmask16) __A); 190 1.1 mrg } 191 1.1 mrg 192 1.1 mrg extern __inline __m256i 193 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 194 1.1 mrg _mm256_mask_expandloadu_epi16 (__m256i __A, __mmask16 __B, const void * __C) 195 1.1 mrg { 196 1.1 mrg return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *) __C, 197 1.1 mrg (__v16hi) __A, (__mmask16) __B); 198 1.1 mrg } 199 1.1 mrg 200 1.1 mrg extern __inline __m256i 201 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 202 1.1 mrg _mm256_maskz_expandloadu_epi16 (__mmask16 __A, const void * __B) 203 1.1 mrg { 204 1.1 mrg return (__m256i) __builtin_ia32_expandloadhi256_maskz ((const __v16hi *) __B, 205 1.1 mrg (__v16hi) _mm256_setzero_si256 (), (__mmask16) __A); 206 1.1 mrg } 207 1.1 mrg 208 1.1 mrg #ifdef __OPTIMIZE__ 209 1.1 mrg extern __inline __m256i 210 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 211 1.1 mrg _mm256_shrdi_epi16 (__m256i __A, __m256i __B, int __C) 212 1.1 mrg { 213 1.1 mrg return (__m256i) __builtin_ia32_vpshrd_v16hi ((__v16hi)__A, (__v16hi) __B, 214 1.1 mrg __C); 215 1.1 mrg } 216 1.1 mrg 217 1.1 mrg extern __inline __m256i 218 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 219 1.1 mrg _mm256_mask_shrdi_epi16 (__m256i __A, __mmask16 __B, __m256i __C, __m256i __D, 220 1.1 mrg int __E) 221 1.1 mrg { 222 1.1 mrg return (__m256i)__builtin_ia32_vpshrd_v16hi_mask ((__v16hi)__C, 223 1.1 mrg (__v16hi) __D, __E, (__v16hi) __A, (__mmask16)__B); 224 1.1 mrg } 225 1.1 mrg 226 1.1 mrg extern __inline __m256i 227 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 228 1.1 mrg _mm256_maskz_shrdi_epi16 (__mmask16 __A, __m256i __B, __m256i __C, int __D) 229 1.1 mrg { 230 1.1 mrg return (__m256i)__builtin_ia32_vpshrd_v16hi_mask ((__v16hi)__B, 231 1.1 mrg (__v16hi) __C, __D, (__v16hi) _mm256_setzero_si256 (), (__mmask16)__A); 232 1.1 mrg } 233 1.1 mrg 234 1.1 mrg extern __inline __m256i 235 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 236 1.1 mrg _mm256_mask_shrdi_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D, 237 1.1 mrg int __E) 238 1.1 mrg { 239 1.1 mrg return (__m256i)__builtin_ia32_vpshrd_v8si_mask ((__v8si)__C, (__v8si) __D, 240 1.1 mrg __E, (__v8si) __A, (__mmask8)__B); 241 1.1 mrg } 242 1.1 mrg 243 1.1 mrg extern __inline __m256i 244 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 245 1.1 mrg _mm256_maskz_shrdi_epi32 (__mmask8 __A, __m256i __B, __m256i __C, int __D) 246 1.1 mrg { 247 1.1 mrg return (__m256i)__builtin_ia32_vpshrd_v8si_mask ((__v8si)__B, (__v8si) __C, 248 1.1 mrg __D, (__v8si) _mm256_setzero_si256 (), (__mmask8)__A); 249 1.1 mrg } 250 1.1 mrg 251 1.1 mrg extern __inline __m256i 252 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 253 1.1 mrg _mm256_shrdi_epi32 (__m256i __A, __m256i __B, int __C) 254 1.1 mrg { 255 1.1 mrg return (__m256i) __builtin_ia32_vpshrd_v8si ((__v8si)__A, (__v8si) __B, __C); 256 1.1 mrg } 257 1.1 mrg 258 1.1 mrg extern __inline __m256i 259 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 260 1.1 mrg _mm256_mask_shrdi_epi64 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D, 261 1.1 mrg int __E) 262 1.1 mrg { 263 1.1 mrg return (__m256i)__builtin_ia32_vpshrd_v4di_mask ((__v4di)__C, (__v4di) __D, 264 1.1 mrg __E, (__v4di) __A, (__mmask8)__B); 265 1.1 mrg } 266 1.1 mrg 267 1.1 mrg extern __inline __m256i 268 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 269 1.1 mrg _mm256_maskz_shrdi_epi64 (__mmask8 __A, __m256i __B, __m256i __C, int __D) 270 1.1 mrg { 271 1.1 mrg return (__m256i)__builtin_ia32_vpshrd_v4di_mask ((__v4di)__B, (__v4di) __C, 272 1.1 mrg __D, (__v4di) _mm256_setzero_si256 (), (__mmask8)__A); 273 1.1 mrg } 274 1.1 mrg 275 1.1 mrg extern __inline __m256i 276 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 277 1.1 mrg _mm256_shrdi_epi64 (__m256i __A, __m256i __B, int __C) 278 1.1 mrg { 279 1.1 mrg return (__m256i) __builtin_ia32_vpshrd_v4di ((__v4di)__A, (__v4di) __B, __C); 280 1.1 mrg } 281 1.1 mrg 282 1.1 mrg extern __inline __m128i 283 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 284 1.1 mrg _mm_mask_shrdi_epi16 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D, 285 1.1 mrg int __E) 286 1.1 mrg { 287 1.1 mrg return (__m128i)__builtin_ia32_vpshrd_v8hi_mask ((__v8hi)__C, (__v8hi) __D, 288 1.1 mrg __E, (__v8hi) __A, (__mmask8)__B); 289 1.1 mrg } 290 1.1 mrg 291 1.1 mrg extern __inline __m128i 292 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 293 1.1 mrg _mm_maskz_shrdi_epi16 (__mmask8 __A, __m128i __B, __m128i __C, int __D) 294 1.1 mrg { 295 1.1 mrg return (__m128i)__builtin_ia32_vpshrd_v8hi_mask ((__v8hi)__B, (__v8hi) __C, 296 1.1 mrg __D, (__v8hi) _mm_setzero_si128 (), (__mmask8)__A); 297 1.1 mrg } 298 1.1 mrg 299 1.1 mrg extern __inline __m128i 300 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 301 1.1 mrg _mm_shrdi_epi16 (__m128i __A, __m128i __B, int __C) 302 1.1 mrg { 303 1.1 mrg return (__m128i) __builtin_ia32_vpshrd_v8hi ((__v8hi)__A, (__v8hi) __B, __C); 304 1.1 mrg } 305 1.1 mrg 306 1.1 mrg extern __inline __m128i 307 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 308 1.1 mrg _mm_mask_shrdi_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D, 309 1.1 mrg int __E) 310 1.1 mrg { 311 1.1 mrg return (__m128i)__builtin_ia32_vpshrd_v4si_mask ((__v4si)__C, (__v4si) __D, 312 1.1 mrg __E, (__v4si) __A, (__mmask8)__B); 313 1.1 mrg } 314 1.1 mrg 315 1.1 mrg extern __inline __m128i 316 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 317 1.1 mrg _mm_maskz_shrdi_epi32 (__mmask8 __A, __m128i __B, __m128i __C, int __D) 318 1.1 mrg { 319 1.1 mrg return (__m128i)__builtin_ia32_vpshrd_v4si_mask ((__v4si)__B, (__v4si) __C, 320 1.1 mrg __D, (__v4si) _mm_setzero_si128 (), (__mmask8)__A); 321 1.1 mrg } 322 1.1 mrg 323 1.1 mrg extern __inline __m128i 324 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 325 1.1 mrg _mm_shrdi_epi32 (__m128i __A, __m128i __B, int __C) 326 1.1 mrg { 327 1.1 mrg return (__m128i) __builtin_ia32_vpshrd_v4si ((__v4si)__A, (__v4si) __B, __C); 328 1.1 mrg } 329 1.1 mrg 330 1.1 mrg extern __inline __m128i 331 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 332 1.1 mrg _mm_mask_shrdi_epi64 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D, 333 1.1 mrg int __E) 334 1.1 mrg { 335 1.1 mrg return (__m128i)__builtin_ia32_vpshrd_v2di_mask ((__v2di)__C, (__v2di) __D, 336 1.1 mrg __E, (__v2di) __A, (__mmask8)__B); 337 1.1 mrg } 338 1.1 mrg 339 1.1 mrg extern __inline __m128i 340 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 341 1.1 mrg _mm_maskz_shrdi_epi64 (__mmask8 __A, __m128i __B, __m128i __C, int __D) 342 1.1 mrg { 343 1.1 mrg return (__m128i)__builtin_ia32_vpshrd_v2di_mask ((__v2di)__B, (__v2di) __C, 344 1.1 mrg __D, (__v2di) _mm_setzero_si128 (), (__mmask8)__A); 345 1.1 mrg } 346 1.1 mrg 347 1.1 mrg extern __inline __m128i 348 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 349 1.1 mrg _mm_shrdi_epi64 (__m128i __A, __m128i __B, int __C) 350 1.1 mrg { 351 1.1 mrg return (__m128i) __builtin_ia32_vpshrd_v2di ((__v2di)__A, (__v2di) __B, __C); 352 1.1 mrg } 353 1.1 mrg 354 1.1 mrg extern __inline __m256i 355 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 356 1.1 mrg _mm256_shldi_epi16 (__m256i __A, __m256i __B, int __C) 357 1.1 mrg { 358 1.1 mrg return (__m256i) __builtin_ia32_vpshld_v16hi ((__v16hi)__A, (__v16hi) __B, 359 1.1 mrg __C); 360 1.1 mrg } 361 1.1 mrg 362 1.1 mrg extern __inline __m256i 363 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 364 1.1 mrg _mm256_mask_shldi_epi16 (__m256i __A, __mmask16 __B, __m256i __C, __m256i __D, 365 1.1 mrg int __E) 366 1.1 mrg { 367 1.1 mrg return (__m256i)__builtin_ia32_vpshld_v16hi_mask ((__v16hi)__C, 368 1.1 mrg (__v16hi) __D, __E, (__v16hi) __A, (__mmask16)__B); 369 1.1 mrg } 370 1.1 mrg 371 1.1 mrg extern __inline __m256i 372 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 373 1.1 mrg _mm256_maskz_shldi_epi16 (__mmask16 __A, __m256i __B, __m256i __C, int __D) 374 1.1 mrg { 375 1.1 mrg return (__m256i)__builtin_ia32_vpshld_v16hi_mask ((__v16hi)__B, 376 1.1 mrg (__v16hi) __C, __D, (__v16hi) _mm256_setzero_si256 (), (__mmask16)__A); 377 1.1 mrg } 378 1.1 mrg 379 1.1 mrg extern __inline __m256i 380 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 381 1.1 mrg _mm256_mask_shldi_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D, 382 1.1 mrg int __E) 383 1.1 mrg { 384 1.1 mrg return (__m256i)__builtin_ia32_vpshld_v8si_mask ((__v8si)__C, (__v8si) __D, 385 1.1 mrg __E, (__v8si) __A, (__mmask8)__B); 386 1.1 mrg } 387 1.1 mrg 388 1.1 mrg extern __inline __m256i 389 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 390 1.1 mrg _mm256_maskz_shldi_epi32 (__mmask8 __A, __m256i __B, __m256i __C, int __D) 391 1.1 mrg { 392 1.1 mrg return (__m256i)__builtin_ia32_vpshld_v8si_mask ((__v8si)__B, (__v8si) __C, 393 1.1 mrg __D, (__v8si) _mm256_setzero_si256 (), (__mmask8)__A); 394 1.1 mrg } 395 1.1 mrg 396 1.1 mrg extern __inline __m256i 397 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 398 1.1 mrg _mm256_shldi_epi32 (__m256i __A, __m256i __B, int __C) 399 1.1 mrg { 400 1.1 mrg return (__m256i) __builtin_ia32_vpshld_v8si ((__v8si)__A, (__v8si) __B, __C); 401 1.1 mrg } 402 1.1 mrg 403 1.1 mrg extern __inline __m256i 404 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 405 1.1 mrg _mm256_mask_shldi_epi64 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D, 406 1.1 mrg int __E) 407 1.1 mrg { 408 1.1 mrg return (__m256i)__builtin_ia32_vpshld_v4di_mask ((__v4di)__C, (__v4di) __D, 409 1.1 mrg __E, (__v4di) __A, (__mmask8)__B); 410 1.1 mrg } 411 1.1 mrg 412 1.1 mrg extern __inline __m256i 413 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 414 1.1 mrg _mm256_maskz_shldi_epi64 (__mmask8 __A, __m256i __B, __m256i __C, int __D) 415 1.1 mrg { 416 1.1 mrg return (__m256i)__builtin_ia32_vpshld_v4di_mask ((__v4di)__B, (__v4di) __C, 417 1.1 mrg __D, (__v4di) _mm256_setzero_si256 (), (__mmask8)__A); 418 1.1 mrg } 419 1.1 mrg 420 1.1 mrg extern __inline __m256i 421 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 422 1.1 mrg _mm256_shldi_epi64 (__m256i __A, __m256i __B, int __C) 423 1.1 mrg { 424 1.1 mrg return (__m256i) __builtin_ia32_vpshld_v4di ((__v4di)__A, (__v4di) __B, __C); 425 1.1 mrg } 426 1.1 mrg 427 1.1 mrg extern __inline __m128i 428 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 429 1.1 mrg _mm_mask_shldi_epi16 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D, 430 1.1 mrg int __E) 431 1.1 mrg { 432 1.1 mrg return (__m128i)__builtin_ia32_vpshld_v8hi_mask ((__v8hi)__C, (__v8hi) __D, 433 1.1 mrg __E, (__v8hi) __A, (__mmask8)__B); 434 1.1 mrg } 435 1.1 mrg 436 1.1 mrg extern __inline __m128i 437 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 438 1.1 mrg _mm_maskz_shldi_epi16 (__mmask8 __A, __m128i __B, __m128i __C, int __D) 439 1.1 mrg { 440 1.1 mrg return (__m128i)__builtin_ia32_vpshld_v8hi_mask ((__v8hi)__B, (__v8hi) __C, 441 1.1 mrg __D, (__v8hi) _mm_setzero_si128 (), (__mmask8)__A); 442 1.1 mrg } 443 1.1 mrg 444 1.1 mrg extern __inline __m128i 445 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 446 1.1 mrg _mm_shldi_epi16 (__m128i __A, __m128i __B, int __C) 447 1.1 mrg { 448 1.1 mrg return (__m128i) __builtin_ia32_vpshld_v8hi ((__v8hi)__A, (__v8hi) __B, __C); 449 1.1 mrg } 450 1.1 mrg 451 1.1 mrg extern __inline __m128i 452 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 453 1.1 mrg _mm_mask_shldi_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D, 454 1.1 mrg int __E) 455 1.1 mrg { 456 1.1 mrg return (__m128i)__builtin_ia32_vpshld_v4si_mask ((__v4si)__C, (__v4si) __D, 457 1.1 mrg __E, (__v4si) __A, (__mmask8)__B); 458 1.1 mrg } 459 1.1 mrg 460 1.1 mrg extern __inline __m128i 461 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 462 1.1 mrg _mm_maskz_shldi_epi32 (__mmask8 __A, __m128i __B, __m128i __C, int __D) 463 1.1 mrg { 464 1.1 mrg return (__m128i)__builtin_ia32_vpshld_v4si_mask ((__v4si)__B, (__v4si) __C, 465 1.1 mrg __D, (__v4si) _mm_setzero_si128 (), (__mmask8)__A); 466 1.1 mrg } 467 1.1 mrg 468 1.1 mrg extern __inline __m128i 469 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 470 1.1 mrg _mm_shldi_epi32 (__m128i __A, __m128i __B, int __C) 471 1.1 mrg { 472 1.1 mrg return (__m128i) __builtin_ia32_vpshld_v4si ((__v4si)__A, (__v4si) __B, __C); 473 1.1 mrg } 474 1.1 mrg 475 1.1 mrg extern __inline __m128i 476 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 477 1.1 mrg _mm_mask_shldi_epi64 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D, 478 1.1 mrg int __E) 479 1.1 mrg { 480 1.1 mrg return (__m128i)__builtin_ia32_vpshld_v2di_mask ((__v2di)__C, (__v2di) __D, 481 1.1 mrg __E, (__v2di) __A, (__mmask8)__B); 482 1.1 mrg } 483 1.1 mrg 484 1.1 mrg extern __inline __m128i 485 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 486 1.1 mrg _mm_maskz_shldi_epi64 (__mmask8 __A, __m128i __B, __m128i __C, int __D) 487 1.1 mrg { 488 1.1 mrg return (__m128i)__builtin_ia32_vpshld_v2di_mask ((__v2di)__B, (__v2di) __C, 489 1.1 mrg __D, (__v2di) _mm_setzero_si128 (), (__mmask8)__A); 490 1.1 mrg } 491 1.1 mrg 492 1.1 mrg extern __inline __m128i 493 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 494 1.1 mrg _mm_shldi_epi64 (__m128i __A, __m128i __B, int __C) 495 1.1 mrg { 496 1.1 mrg return (__m128i) __builtin_ia32_vpshld_v2di ((__v2di)__A, (__v2di) __B, __C); 497 1.1 mrg } 498 1.1 mrg #else 499 1.1 mrg #define _mm256_shrdi_epi16(A, B, C) \ 500 1.1 mrg ((__m256i) __builtin_ia32_vpshrd_v16hi ((__v16hi)(__m256i)(A), \ 501 1.1 mrg (__v16hi)(__m256i)(B),(int)(C))) 502 1.1 mrg #define _mm256_mask_shrdi_epi16(A, B, C, D, E) \ 503 1.1 mrg ((__m256i) __builtin_ia32_vpshrd_v16hi_mask ((__v16hi)(__m256i)(C), \ 504 1.1 mrg (__v16hi)(__m256i)(D), \ 505 1.1 mrg (int)(E), \ 506 1.1 mrg (__v16hi)(__m256i)(A), \ 507 1.1 mrg (__mmask16)(B))) 508 1.1 mrg #define _mm256_maskz_shrdi_epi16(A, B, C, D) \ 509 1.1 mrg ((__m256i) \ 510 1.1 mrg __builtin_ia32_vpshrd_v16hi_mask ((__v16hi)(__m256i)(B), \ 511 1.1 mrg (__v16hi)(__m256i)(C),(int)(D), \ 512 1.1 mrg (__v16hi)(__m256i)_mm256_setzero_si256 (), \ 513 1.1 mrg (__mmask16)(A))) 514 1.1 mrg #define _mm256_shrdi_epi32(A, B, C) \ 515 1.1 mrg ((__m256i) __builtin_ia32_vpshrd_v8si ((__v8si)(__m256i)(A), \ 516 1.1 mrg (__v8si)(__m256i)(B),(int)(C))) 517 1.1 mrg #define _mm256_mask_shrdi_epi32(A, B, C, D, E) \ 518 1.1 mrg ((__m256i) __builtin_ia32_vpshrd_v8si_mask ((__v8si)(__m256i)(C), \ 519 1.1 mrg (__v8si)(__m256i)(D), \ 520 1.1 mrg (int)(E), \ 521 1.1 mrg (__v8si)(__m256i)(A), \ 522 1.1 mrg (__mmask8)(B))) 523 1.1 mrg #define _mm256_maskz_shrdi_epi32(A, B, C, D) \ 524 1.1 mrg ((__m256i) \ 525 1.1 mrg __builtin_ia32_vpshrd_v8si_mask ((__v8si)(__m256i)(B), \ 526 1.1 mrg (__v8si)(__m256i)(C),(int)(D), \ 527 1.1 mrg (__v8si)(__m256i)_mm256_setzero_si256 (), \ 528 1.1 mrg (__mmask8)(A))) 529 1.1 mrg #define _mm256_shrdi_epi64(A, B, C) \ 530 1.1 mrg ((__m256i) __builtin_ia32_vpshrd_v4di ((__v4di)(__m256i)(A), \ 531 1.1 mrg (__v4di)(__m256i)(B),(int)(C))) 532 1.1 mrg #define _mm256_mask_shrdi_epi64(A, B, C, D, E) \ 533 1.1 mrg ((__m256i) __builtin_ia32_vpshrd_v4di_mask ((__v4di)(__m256i)(C), \ 534 1.1 mrg (__v4di)(__m256i)(D), (int)(E), \ 535 1.1 mrg (__v4di)(__m256i)(A), \ 536 1.1 mrg (__mmask8)(B))) 537 1.1 mrg #define _mm256_maskz_shrdi_epi64(A, B, C, D) \ 538 1.1 mrg ((__m256i) \ 539 1.1 mrg __builtin_ia32_vpshrd_v4di_mask ((__v4di)(__m256i)(B), \ 540 1.1 mrg (__v4di)(__m256i)(C),(int)(D), \ 541 1.1 mrg (__v4di)(__m256i)_mm256_setzero_si256 (), \ 542 1.1 mrg (__mmask8)(A))) 543 1.1 mrg #define _mm_shrdi_epi16(A, B, C) \ 544 1.1 mrg ((__m128i) __builtin_ia32_vpshrd_v8hi ((__v8hi)(__m128i)(A), \ 545 1.1 mrg (__v8hi)(__m128i)(B),(int)(C))) 546 1.1 mrg #define _mm_mask_shrdi_epi16(A, B, C, D, E) \ 547 1.1 mrg ((__m128i) __builtin_ia32_vpshrd_v8hi_mask ((__v8hi)(__m128i)(C), \ 548 1.1 mrg (__v8hi)(__m128i)(D), (int)(E), \ 549 1.1 mrg (__v8hi)(__m128i)(A), \ 550 1.1 mrg (__mmask8)(B))) 551 1.1 mrg #define _mm_maskz_shrdi_epi16(A, B, C, D) \ 552 1.1 mrg ((__m128i) \ 553 1.1 mrg __builtin_ia32_vpshrd_v8hi_mask ((__v8hi)(__m128i)(B), \ 554 1.1 mrg (__v8hi)(__m128i)(C),(int)(D), \ 555 1.1 mrg (__v8hi)(__m128i)_mm_setzero_si128 (), \ 556 1.1 mrg (__mmask8)(A))) 557 1.1 mrg #define _mm_shrdi_epi32(A, B, C) \ 558 1.1 mrg ((__m128i) __builtin_ia32_vpshrd_v4si ((__v4si)(__m128i)(A), \ 559 1.1 mrg (__v4si)(__m128i)(B),(int)(C))) 560 1.1 mrg #define _mm_mask_shrdi_epi32(A, B, C, D, E) \ 561 1.1 mrg ((__m128i) __builtin_ia32_vpshrd_v4si_mask ((__v4si)(__m128i)(C), \ 562 1.1 mrg (__v4si)(__m128i)(D), (int)(E), \ 563 1.1 mrg (__v4si)(__m128i)(A), \ 564 1.1 mrg (__mmask8)(B))) 565 1.1 mrg #define _mm_maskz_shrdi_epi32(A, B, C, D) \ 566 1.1 mrg ((__m128i) \ 567 1.1 mrg __builtin_ia32_vpshrd_v4si_mask ((__v4si)(__m128i)(B), \ 568 1.1 mrg (__v4si)(__m128i)(C),(int)(D), \ 569 1.1 mrg (__v4si)(__m128i)_mm_setzero_si128 (), \ 570 1.1 mrg (__mmask8)(A))) 571 1.1 mrg #define _mm_shrdi_epi64(A, B, C) \ 572 1.1 mrg ((__m128i) __builtin_ia32_vpshrd_v2di ((__v2di)(__m128i)(A), \ 573 1.1 mrg (__v2di)(__m128i)(B),(int)(C))) 574 1.1 mrg #define _mm_mask_shrdi_epi64(A, B, C, D, E) \ 575 1.1 mrg ((__m128i) __builtin_ia32_vpshrd_v2di_mask ((__v2di)(__m128i)(C), \ 576 1.1 mrg (__v2di)(__m128i)(D), (int)(E), \ 577 1.1 mrg (__v2di)(__m128i)(A), \ 578 1.1 mrg (__mmask8)(B))) 579 1.1 mrg #define _mm_maskz_shrdi_epi64(A, B, C, D) \ 580 1.1 mrg ((__m128i) \ 581 1.1 mrg __builtin_ia32_vpshrd_v2di_mask ((__v2di)(__m128i)(B), \ 582 1.1 mrg (__v2di)(__m128i)(C),(int)(D), \ 583 1.1 mrg (__v2di)(__m128i)_mm_setzero_si128 (), \ 584 1.1 mrg (__mmask8)(A))) 585 1.1 mrg #define _mm256_shldi_epi16(A, B, C) \ 586 1.1 mrg ((__m256i) __builtin_ia32_vpshld_v16hi ((__v16hi)(__m256i)(A), \ 587 1.1 mrg (__v16hi)(__m256i)(B),(int)(C))) 588 1.1 mrg #define _mm256_mask_shldi_epi16(A, B, C, D, E) \ 589 1.1 mrg ((__m256i) __builtin_ia32_vpshld_v16hi_mask ((__v16hi)(__m256i)(C), \ 590 1.1 mrg (__v16hi)(__m256i)(D), \ 591 1.1 mrg (int)(E), \ 592 1.1 mrg (__v16hi)(__m256i)(A), \ 593 1.1 mrg (__mmask16)(B))) 594 1.1 mrg #define _mm256_maskz_shldi_epi16(A, B, C, D) \ 595 1.1 mrg ((__m256i) \ 596 1.1 mrg __builtin_ia32_vpshld_v16hi_mask ((__v16hi)(__m256i)(B), \ 597 1.1 mrg (__v16hi)(__m256i)(C),(int)(D), \ 598 1.1 mrg (__v16hi)(__m256i)_mm256_setzero_si256 (), \ 599 1.1 mrg (__mmask16)(A))) 600 1.1 mrg #define _mm256_shldi_epi32(A, B, C) \ 601 1.1 mrg ((__m256i) __builtin_ia32_vpshld_v8si ((__v8si)(__m256i)(A), \ 602 1.1 mrg (__v8si)(__m256i)(B),(int)(C))) 603 1.1 mrg #define _mm256_mask_shldi_epi32(A, B, C, D, E) \ 604 1.1 mrg ((__m256i) __builtin_ia32_vpshld_v8si_mask ((__v8si)(__m256i)(C), \ 605 1.1 mrg (__v8si)(__m256i)(D), (int)(E), \ 606 1.1 mrg (__v8si)(__m256i)(A), \ 607 1.1 mrg (__mmask8)(B))) 608 1.1 mrg #define _mm256_maskz_shldi_epi32(A, B, C, D) \ 609 1.1 mrg ((__m256i) \ 610 1.1 mrg __builtin_ia32_vpshld_v8si_mask ((__v8si)(__m256i)(B), \ 611 1.1 mrg (__v8si)(__m256i)(C),(int)(D), \ 612 1.1 mrg (__v8si)(__m256i)_mm256_setzero_si256 (), \ 613 1.1 mrg (__mmask8)(A))) 614 1.1 mrg #define _mm256_shldi_epi64(A, B, C) \ 615 1.1 mrg ((__m256i) __builtin_ia32_vpshld_v4di ((__v4di)(__m256i)(A), \ 616 1.1 mrg (__v4di)(__m256i)(B),(int)(C))) 617 1.1 mrg #define _mm256_mask_shldi_epi64(A, B, C, D, E) \ 618 1.1 mrg ((__m256i) __builtin_ia32_vpshld_v4di_mask ((__v4di)(__m256i)(C), \ 619 1.1 mrg (__v4di)(__m256i)(D), (int)(E), \ 620 1.1 mrg (__v4di)(__m256i)(A), \ 621 1.1 mrg (__mmask8)(B))) 622 1.1 mrg #define _mm256_maskz_shldi_epi64(A, B, C, D) \ 623 1.1 mrg ((__m256i) \ 624 1.1 mrg __builtin_ia32_vpshld_v4di_mask ((__v4di)(__m256i)(B), \ 625 1.1 mrg (__v4di)(__m256i)(C),(int)(D), \ 626 1.1 mrg (__v4di)(__m256i)_mm256_setzero_si256 (), \ 627 1.1 mrg (__mmask8)(A))) 628 1.1 mrg #define _mm_shldi_epi16(A, B, C) \ 629 1.1 mrg ((__m128i) __builtin_ia32_vpshld_v8hi ((__v8hi)(__m128i)(A), \ 630 1.1 mrg (__v8hi)(__m128i)(B),(int)(C))) 631 1.1 mrg #define _mm_mask_shldi_epi16(A, B, C, D, E) \ 632 1.1 mrg ((__m128i) __builtin_ia32_vpshld_v8hi_mask ((__v8hi)(__m128i)(C), \ 633 1.1 mrg (__v8hi)(__m128i)(D), (int)(E), \ 634 1.1 mrg (__v8hi)(__m128i)(A), \ 635 1.1 mrg (__mmask8)(B))) 636 1.1 mrg #define _mm_maskz_shldi_epi16(A, B, C, D) \ 637 1.1 mrg ((__m128i) \ 638 1.1 mrg __builtin_ia32_vpshld_v8hi_mask ((__v8hi)(__m128i)(B), \ 639 1.1 mrg (__v8hi)(__m128i)(C),(int)(D), \ 640 1.1 mrg (__v8hi)(__m128i)_mm_setzero_si128 (), \ 641 1.1 mrg (__mmask8)(A))) 642 1.1 mrg #define _mm_shldi_epi32(A, B, C) \ 643 1.1 mrg ((__m128i) __builtin_ia32_vpshld_v4si ((__v4si)(__m128i)(A), \ 644 1.1 mrg (__v4si)(__m128i)(B),(int)(C))) 645 1.1 mrg #define _mm_mask_shldi_epi32(A, B, C, D, E) \ 646 1.1 mrg ((__m128i) __builtin_ia32_vpshld_v4si_mask ((__v4si)(__m128i)(C), \ 647 1.1 mrg (__v4si)(__m128i)(D), (int)(E), \ 648 1.1 mrg (__v4si)(__m128i)(A), \ 649 1.1 mrg (__mmask8)(B))) 650 1.1 mrg #define _mm_maskz_shldi_epi32(A, B, C, D) \ 651 1.1 mrg ((__m128i) \ 652 1.1 mrg __builtin_ia32_vpshld_v4si_mask ((__v4si)(__m128i)(B), \ 653 1.1 mrg (__v4si)(__m128i)(C),(int)(D), \ 654 1.1 mrg (__v4si)(__m128i)_mm_setzero_si128 (), \ 655 1.1 mrg (__mmask8)(A))) 656 1.1 mrg #define _mm_shldi_epi64(A, B, C) \ 657 1.1 mrg ((__m128i) __builtin_ia32_vpshld_v2di ((__v2di)(__m128i)(A), \ 658 1.1 mrg (__v2di)(__m128i)(B),(int)(C))) 659 1.1 mrg #define _mm_mask_shldi_epi64(A, B, C, D, E) \ 660 1.1 mrg ((__m128i) __builtin_ia32_vpshld_v2di_mask ((__v2di)(__m128i)(C), \ 661 1.1 mrg (__v2di)(__m128i)(D), (int)(E), \ 662 1.1 mrg (__v2di)(__m128i)(A), \ 663 1.1 mrg (__mmask8)(B))) 664 1.1 mrg #define _mm_maskz_shldi_epi64(A, B, C, D) \ 665 1.1 mrg ((__m128i) \ 666 1.1 mrg __builtin_ia32_vpshld_v2di_mask ((__v2di)(__m128i)(B), \ 667 1.1 mrg (__v2di)(__m128i)(C),(int)(D), \ 668 1.1 mrg (__v2di)(__m128i)_mm_setzero_si128 (), \ 669 1.1 mrg (__mmask8)(A))) 670 1.1 mrg #endif 671 1.1 mrg 672 1.1 mrg extern __inline __m256i 673 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 674 1.1 mrg _mm256_shrdv_epi16 (__m256i __A, __m256i __B, __m256i __C) 675 1.1 mrg { 676 1.1 mrg return (__m256i) __builtin_ia32_vpshrdv_v16hi ((__v16hi)__A, (__v16hi) __B, 677 1.1 mrg (__v16hi) __C); 678 1.1 mrg } 679 1.1 mrg 680 1.1 mrg extern __inline __m256i 681 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 682 1.1 mrg _mm256_mask_shrdv_epi16 (__m256i __A, __mmask16 __B, __m256i __C, __m256i __D) 683 1.1 mrg { 684 1.1 mrg return (__m256i)__builtin_ia32_vpshrdv_v16hi_mask ((__v16hi)__A, 685 1.1 mrg (__v16hi) __C, (__v16hi) __D, (__mmask16)__B); 686 1.1 mrg } 687 1.1 mrg 688 1.1 mrg extern __inline __m256i 689 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 690 1.1 mrg _mm256_maskz_shrdv_epi16 (__mmask16 __A, __m256i __B, __m256i __C, __m256i __D) 691 1.1 mrg { 692 1.1 mrg return (__m256i)__builtin_ia32_vpshrdv_v16hi_maskz ((__v16hi)__B, 693 1.1 mrg (__v16hi) __C, (__v16hi) __D, (__mmask16)__A); 694 1.1 mrg } 695 1.1 mrg 696 1.1 mrg extern __inline __m256i 697 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 698 1.1 mrg _mm256_shrdv_epi32 (__m256i __A, __m256i __B, __m256i __C) 699 1.1 mrg { 700 1.1 mrg return (__m256i) __builtin_ia32_vpshrdv_v8si ((__v8si)__A, (__v8si) __B, 701 1.1 mrg (__v8si) __C); 702 1.1 mrg } 703 1.1 mrg 704 1.1 mrg extern __inline __m256i 705 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 706 1.1 mrg _mm256_mask_shrdv_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) 707 1.1 mrg { 708 1.1 mrg return (__m256i)__builtin_ia32_vpshrdv_v8si_mask ((__v8si)__A, (__v8si) __C, 709 1.1 mrg (__v8si) __D, (__mmask8)__B); 710 1.1 mrg } 711 1.1 mrg 712 1.1 mrg extern __inline __m256i 713 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 714 1.1 mrg _mm256_maskz_shrdv_epi32 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D) 715 1.1 mrg { 716 1.1 mrg return (__m256i)__builtin_ia32_vpshrdv_v8si_maskz ((__v8si)__B, (__v8si) __C, 717 1.1 mrg (__v8si) __D, (__mmask8)__A); 718 1.1 mrg } 719 1.1 mrg 720 1.1 mrg extern __inline __m256i 721 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 722 1.1 mrg _mm256_shrdv_epi64 (__m256i __A, __m256i __B, __m256i __C) 723 1.1 mrg { 724 1.1 mrg return (__m256i) __builtin_ia32_vpshrdv_v4di ((__v4di)__A, (__v4di) __B, 725 1.1 mrg (__v4di) __C); 726 1.1 mrg } 727 1.1 mrg 728 1.1 mrg extern __inline __m256i 729 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 730 1.1 mrg _mm256_mask_shrdv_epi64 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) 731 1.1 mrg { 732 1.1 mrg return (__m256i)__builtin_ia32_vpshrdv_v4di_mask ((__v4di)__A, (__v4di) __C, 733 1.1 mrg (__v4di) __D, (__mmask8)__B); 734 1.1 mrg } 735 1.1 mrg 736 1.1 mrg extern __inline __m256i 737 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 738 1.1 mrg _mm256_maskz_shrdv_epi64 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D) 739 1.1 mrg { 740 1.1 mrg return (__m256i)__builtin_ia32_vpshrdv_v4di_maskz ((__v4di)__B, (__v4di) __C, 741 1.1 mrg (__v4di) __D, (__mmask8)__A); 742 1.1 mrg } 743 1.1 mrg 744 1.1 mrg extern __inline __m128i 745 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 746 1.1 mrg _mm_shrdv_epi16 (__m128i __A, __m128i __B, __m128i __C) 747 1.1 mrg { 748 1.1 mrg return (__m128i) __builtin_ia32_vpshrdv_v8hi ((__v8hi)__A, (__v8hi) __B, 749 1.1 mrg (__v8hi) __C); 750 1.1 mrg } 751 1.1 mrg 752 1.1 mrg extern __inline __m128i 753 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 754 1.1 mrg _mm_mask_shrdv_epi16 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) 755 1.1 mrg { 756 1.1 mrg return (__m128i)__builtin_ia32_vpshrdv_v8hi_mask ((__v8hi)__A, (__v8hi) __C, 757 1.1 mrg (__v8hi) __D, (__mmask8)__B); 758 1.1 mrg } 759 1.1 mrg 760 1.1 mrg extern __inline __m128i 761 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 762 1.1 mrg _mm_maskz_shrdv_epi16 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) 763 1.1 mrg { 764 1.1 mrg return (__m128i)__builtin_ia32_vpshrdv_v8hi_maskz ((__v8hi)__B, (__v8hi) __C, 765 1.1 mrg (__v8hi) __D, (__mmask8)__A); 766 1.1 mrg } 767 1.1 mrg 768 1.1 mrg extern __inline __m128i 769 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 770 1.1 mrg _mm_shrdv_epi32 (__m128i __A, __m128i __B, __m128i __C) 771 1.1 mrg { 772 1.1 mrg return (__m128i) __builtin_ia32_vpshrdv_v4si ((__v4si)__A, (__v4si) __B, 773 1.1 mrg (__v4si) __C); 774 1.1 mrg } 775 1.1 mrg 776 1.1 mrg extern __inline __m128i 777 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 778 1.1 mrg _mm_mask_shrdv_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) 779 1.1 mrg { 780 1.1 mrg return (__m128i)__builtin_ia32_vpshrdv_v4si_mask ((__v4si)__A, (__v4si) __C, 781 1.1 mrg (__v4si) __D, (__mmask8)__B); 782 1.1 mrg } 783 1.1 mrg 784 1.1 mrg extern __inline __m128i 785 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 786 1.1 mrg _mm_maskz_shrdv_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) 787 1.1 mrg { 788 1.1 mrg return (__m128i)__builtin_ia32_vpshrdv_v4si_maskz ((__v4si)__B, (__v4si) __C, 789 1.1 mrg (__v4si) __D, (__mmask8)__A); 790 1.1 mrg } 791 1.1 mrg 792 1.1 mrg extern __inline __m128i 793 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 794 1.1 mrg _mm_shrdv_epi64 (__m128i __A, __m128i __B, __m128i __C) 795 1.1 mrg { 796 1.1 mrg return (__m128i) __builtin_ia32_vpshrdv_v2di ((__v2di)__A, (__v2di) __B, 797 1.1 mrg (__v2di) __C); 798 1.1 mrg } 799 1.1 mrg 800 1.1 mrg extern __inline __m128i 801 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 802 1.1 mrg _mm_mask_shrdv_epi64 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) 803 1.1 mrg { 804 1.1 mrg return (__m128i)__builtin_ia32_vpshrdv_v2di_mask ((__v2di)__A, (__v2di) __C, 805 1.1 mrg (__v2di) __D, (__mmask8)__B); 806 1.1 mrg } 807 1.1 mrg 808 1.1 mrg extern __inline __m128i 809 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 810 1.1 mrg _mm_maskz_shrdv_epi64 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) 811 1.1 mrg { 812 1.1 mrg return (__m128i)__builtin_ia32_vpshrdv_v2di_maskz ((__v2di)__B, (__v2di) __C, 813 1.1 mrg (__v2di) __D, (__mmask8)__A); 814 1.1 mrg } 815 1.1 mrg 816 1.1 mrg extern __inline __m256i 817 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 818 1.1 mrg _mm256_shldv_epi16 (__m256i __A, __m256i __B, __m256i __C) 819 1.1 mrg { 820 1.1 mrg return (__m256i) __builtin_ia32_vpshldv_v16hi ((__v16hi)__A, (__v16hi) __B, 821 1.1 mrg (__v16hi) __C); 822 1.1 mrg } 823 1.1 mrg 824 1.1 mrg extern __inline __m256i 825 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 826 1.1 mrg _mm256_mask_shldv_epi16 (__m256i __A, __mmask16 __B, __m256i __C, __m256i __D) 827 1.1 mrg { 828 1.1 mrg return (__m256i)__builtin_ia32_vpshldv_v16hi_mask ((__v16hi)__A, 829 1.1 mrg (__v16hi) __C, (__v16hi) __D, (__mmask16)__B); 830 1.1 mrg } 831 1.1 mrg 832 1.1 mrg extern __inline __m256i 833 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 834 1.1 mrg _mm256_maskz_shldv_epi16 (__mmask16 __A, __m256i __B, __m256i __C, __m256i __D) 835 1.1 mrg { 836 1.1 mrg return (__m256i)__builtin_ia32_vpshldv_v16hi_maskz ((__v16hi)__B, 837 1.1 mrg (__v16hi) __C, (__v16hi) __D, (__mmask16)__A); 838 1.1 mrg } 839 1.1 mrg 840 1.1 mrg extern __inline __m256i 841 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 842 1.1 mrg _mm256_shldv_epi32 (__m256i __A, __m256i __B, __m256i __C) 843 1.1 mrg { 844 1.1 mrg return (__m256i) __builtin_ia32_vpshldv_v8si ((__v8si)__A, (__v8si) __B, 845 1.1 mrg (__v8si) __C); 846 1.1 mrg } 847 1.1 mrg 848 1.1 mrg extern __inline __m256i 849 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 850 1.1 mrg _mm256_mask_shldv_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) 851 1.1 mrg { 852 1.1 mrg return (__m256i)__builtin_ia32_vpshldv_v8si_mask ((__v8si)__A, (__v8si) __C, 853 1.1 mrg (__v8si) __D, (__mmask8)__B) ; 854 1.1 mrg } 855 1.1 mrg 856 1.1 mrg extern __inline __m256i 857 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 858 1.1 mrg _mm256_maskz_shldv_epi32 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D) 859 1.1 mrg { 860 1.1 mrg return (__m256i)__builtin_ia32_vpshldv_v8si_maskz ((__v8si)__B, (__v8si) __C, 861 1.1 mrg (__v8si) __D, (__mmask8)__A); 862 1.1 mrg } 863 1.1 mrg 864 1.1 mrg extern __inline __m256i 865 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 866 1.1 mrg _mm256_shldv_epi64 (__m256i __A, __m256i __B, __m256i __C) 867 1.1 mrg { 868 1.1 mrg return (__m256i) __builtin_ia32_vpshldv_v4di ((__v4di)__A, (__v4di) __B, 869 1.1 mrg (__v4di) __C); 870 1.1 mrg } 871 1.1 mrg 872 1.1 mrg extern __inline __m256i 873 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 874 1.1 mrg _mm256_mask_shldv_epi64 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) 875 1.1 mrg { 876 1.1 mrg return (__m256i)__builtin_ia32_vpshldv_v4di_mask ((__v4di)__A, (__v4di) __C, 877 1.1 mrg (__v4di) __D, (__mmask8)__B); 878 1.1 mrg } 879 1.1 mrg 880 1.1 mrg extern __inline __m256i 881 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 882 1.1 mrg _mm256_maskz_shldv_epi64 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D) 883 1.1 mrg { 884 1.1 mrg return (__m256i)__builtin_ia32_vpshldv_v4di_maskz ((__v4di)__B, (__v4di) __C, 885 1.1 mrg (__v4di) __D, (__mmask8)__A); 886 1.1 mrg } 887 1.1 mrg 888 1.1 mrg extern __inline __m128i 889 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 890 1.1 mrg _mm_shldv_epi16 (__m128i __A, __m128i __B, __m128i __C) 891 1.1 mrg { 892 1.1 mrg return (__m128i) __builtin_ia32_vpshldv_v8hi ((__v8hi)__A, (__v8hi) __B, 893 1.1 mrg (__v8hi) __C); 894 1.1 mrg } 895 1.1 mrg 896 1.1 mrg extern __inline __m128i 897 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 898 1.1 mrg _mm_mask_shldv_epi16 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) 899 1.1 mrg { 900 1.1 mrg return (__m128i)__builtin_ia32_vpshldv_v8hi_mask ((__v8hi)__A, (__v8hi) __C, 901 1.1 mrg (__v8hi) __D, (__mmask8)__B); 902 1.1 mrg } 903 1.1 mrg 904 1.1 mrg extern __inline __m128i 905 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 906 1.1 mrg _mm_maskz_shldv_epi16 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) 907 1.1 mrg { 908 1.1 mrg return (__m128i)__builtin_ia32_vpshldv_v8hi_maskz ((__v8hi)__B, (__v8hi) __C, 909 1.1 mrg (__v8hi) __D, (__mmask8)__A); 910 1.1 mrg } 911 1.1 mrg 912 1.1 mrg extern __inline __m128i 913 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 914 1.1 mrg _mm_shldv_epi32 (__m128i __A, __m128i __B, __m128i __C) 915 1.1 mrg { 916 1.1 mrg return (__m128i) __builtin_ia32_vpshldv_v4si ((__v4si)__A, (__v4si) __B, 917 1.1 mrg (__v4si) __C); 918 1.1 mrg } 919 1.1 mrg 920 1.1 mrg extern __inline __m128i 921 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 922 1.1 mrg _mm_mask_shldv_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) 923 1.1 mrg { 924 1.1 mrg return (__m128i)__builtin_ia32_vpshldv_v4si_mask ((__v4si)__A, (__v4si) __C, 925 1.1 mrg (__v4si) __D, (__mmask8)__B); 926 1.1 mrg } 927 1.1 mrg 928 1.1 mrg extern __inline __m128i 929 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 930 1.1 mrg _mm_maskz_shldv_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) 931 1.1 mrg { 932 1.1 mrg return (__m128i)__builtin_ia32_vpshldv_v4si_maskz ((__v4si)__B, (__v4si) __C, 933 1.1 mrg (__v4si) __D, (__mmask8)__A); 934 1.1 mrg } 935 1.1 mrg 936 1.1 mrg extern __inline __m128i 937 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 938 1.1 mrg _mm_shldv_epi64 (__m128i __A, __m128i __B, __m128i __C) 939 1.1 mrg { 940 1.1 mrg return (__m128i) __builtin_ia32_vpshldv_v2di ((__v2di)__A, (__v2di) __B, 941 1.1 mrg (__v2di) __C); 942 1.1 mrg } 943 1.1 mrg 944 1.1 mrg extern __inline __m128i 945 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 946 1.1 mrg _mm_mask_shldv_epi64 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) 947 1.1 mrg { 948 1.1 mrg return (__m128i)__builtin_ia32_vpshldv_v2di_mask ((__v2di)__A, (__v2di) __C, 949 1.1 mrg (__v2di) __D, (__mmask8)__B); 950 1.1 mrg } 951 1.1 mrg 952 1.1 mrg extern __inline __m128i 953 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 954 1.1 mrg _mm_maskz_shldv_epi64 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) 955 1.1 mrg { 956 1.1 mrg return (__m128i)__builtin_ia32_vpshldv_v2di_maskz ((__v2di)__B, (__v2di) __C, 957 1.1 mrg (__v2di) __D, (__mmask8)__A); 958 1.1 mrg } 959 1.1 mrg 960 1.1 mrg 961 1.1 mrg 962 1.1 mrg 963 1.1 mrg #ifdef __DISABLE_AVX512VBMI2VL__ 964 1.1 mrg #undef __DISABLE_AVX512VBMI2VL__ 965 1.1 mrg #pragma GCC pop_options 966 1.1 mrg #endif /* __DISABLE_AVX512VBMIVL__ */ 967 1.1 mrg 968 1.1 mrg #if !defined(__AVX512VL__) || !defined(__AVX512VBMI2__) || \ 969 1.1 mrg !defined(__AVX512BW__) 970 1.1 mrg #pragma GCC push_options 971 1.1 mrg #pragma GCC target("avx512vbmi2,avx512vl,avx512bw") 972 1.1 mrg #define __DISABLE_AVX512VBMI2VLBW__ 973 1.1 mrg #endif /* __AVX512VBMIVLBW__ */ 974 1.1 mrg 975 1.1 mrg extern __inline __m256i 976 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 977 1.1 mrg _mm256_mask_compress_epi8 (__m256i __A, __mmask32 __B, __m256i __C) 978 1.1 mrg { 979 1.1 mrg return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi)__C, 980 1.1 mrg (__v32qi)__A, (__mmask32)__B); 981 1.1 mrg } 982 1.1 mrg 983 1.1 mrg extern __inline __m256i 984 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 985 1.1 mrg _mm256_maskz_compress_epi8 (__mmask32 __A, __m256i __B) 986 1.1 mrg { 987 1.1 mrg return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __B, 988 1.1 mrg (__v32qi) _mm256_setzero_si256 (), (__mmask32) __A); 989 1.1 mrg } 990 1.1 mrg 991 1.1 mrg extern __inline void 992 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 993 1.1 mrg _mm256_mask_compressstoreu_epi8 (void * __A, __mmask32 __B, __m256i __C) 994 1.1 mrg { 995 1.1 mrg __builtin_ia32_compressstoreuqi256_mask ((__v32qi *) __A, (__v32qi) __C, 996 1.1 mrg (__mmask32) __B); 997 1.1 mrg } 998 1.1 mrg 999 1.1 mrg extern __inline __m256i 1000 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1001 1.1 mrg _mm256_mask_expand_epi8 (__m256i __A, __mmask32 __B, __m256i __C) 1002 1.1 mrg { 1003 1.1 mrg return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __C, 1004 1.1 mrg (__v32qi) __A, 1005 1.1 mrg (__mmask32) __B); 1006 1.1 mrg } 1007 1.1 mrg 1008 1.1 mrg extern __inline __m256i 1009 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1010 1.1 mrg _mm256_maskz_expand_epi8 (__mmask32 __A, __m256i __B) 1011 1.1 mrg { 1012 1.1 mrg return (__m256i) __builtin_ia32_expandqi256_maskz ((__v32qi) __B, 1013 1.1 mrg (__v32qi) _mm256_setzero_si256 (), (__mmask32) __A); 1014 1.1 mrg } 1015 1.1 mrg 1016 1.1 mrg extern __inline __m256i 1017 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1018 1.1 mrg _mm256_mask_expandloadu_epi8 (__m256i __A, __mmask32 __B, const void * __C) 1019 1.1 mrg { 1020 1.1 mrg return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *) __C, 1021 1.1 mrg (__v32qi) __A, (__mmask32) __B); 1022 1.1 mrg } 1023 1.1 mrg 1024 1.1 mrg extern __inline __m256i 1025 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1026 1.1 mrg _mm256_maskz_expandloadu_epi8 (__mmask32 __A, const void * __B) 1027 1.1 mrg { 1028 1.1 mrg return (__m256i) __builtin_ia32_expandloadqi256_maskz ((const __v32qi *) __B, 1029 1.1 mrg (__v32qi) _mm256_setzero_si256 (), (__mmask32) __A); 1030 1.1 mrg } 1031 1.1 mrg 1032 1.1 mrg #ifdef __DISABLE_AVX512VBMI2VLBW__ 1033 1.1 mrg #undef __DISABLE_AVX512VBMI2VLBW__ 1034 1.1 mrg #pragma GCC pop_options 1035 1.1 mrg #endif /* __DISABLE_AVX512VBMIVLBW__ */ 1036 1.1 mrg 1037 1.1 mrg #endif /* _AVX512VBMIVLINTRIN_H_INCLUDED */ 1038