Home | History | Annotate | Line # | Download | only in i386
      1  1.1.1.4  mrg /* Copyright (C) 2013-2022 Free Software Foundation, Inc.
      2      1.1  mrg 
      3      1.1  mrg    This file is part of GCC.
      4      1.1  mrg 
      5      1.1  mrg    GCC is free software; you can redistribute it and/or modify
      6      1.1  mrg    it under the terms of the GNU General Public License as published by
      7      1.1  mrg    the Free Software Foundation; either version 3, or (at your option)
      8      1.1  mrg    any later version.
      9      1.1  mrg 
     10      1.1  mrg    GCC is distributed in the hope that it will be useful,
     11      1.1  mrg    but WITHOUT ANY WARRANTY; without even the implied warranty of
     12      1.1  mrg    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13      1.1  mrg    GNU General Public License for more details.
     14      1.1  mrg 
     15      1.1  mrg    Under Section 7 of GPL version 3, you are granted additional
     16      1.1  mrg    permissions described in the GCC Runtime Library Exception, version
     17      1.1  mrg    3.1, as published by the Free Software Foundation.
     18      1.1  mrg 
     19      1.1  mrg    You should have received a copy of the GNU General Public License and
     20      1.1  mrg    a copy of the GCC Runtime Library Exception along with this program;
     21      1.1  mrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     22      1.1  mrg    <http://www.gnu.org/licenses/>.  */
     23      1.1  mrg 
     24      1.1  mrg #ifndef _IMMINTRIN_H_INCLUDED
     25      1.1  mrg #error "Never use <avx512vbmi2vlintrin.h> directly; include <immintrin.h> instead."
     26      1.1  mrg #endif
     27      1.1  mrg 
     28      1.1  mrg #ifndef _AVX512VBMI2VLINTRIN_H_INCLUDED
     29      1.1  mrg #define _AVX512VBMI2VLINTRIN_H_INCLUDED
     30      1.1  mrg 
     31      1.1  mrg #if !defined(__AVX512VL__) || !defined(__AVX512VBMI2__)
     32      1.1  mrg #pragma GCC push_options
     33      1.1  mrg #pragma GCC target("avx512vbmi2,avx512vl")
     34      1.1  mrg #define __DISABLE_AVX512VBMI2VL__
     35      1.1  mrg #endif /* __AVX512VBMIVL__ */
     36      1.1  mrg 
     37      1.1  mrg extern __inline __m128i
     38      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     39      1.1  mrg _mm_mask_compress_epi8 (__m128i __A, __mmask16 __B, __m128i __C)
     40      1.1  mrg {
     41      1.1  mrg   return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi)__C,
     42      1.1  mrg 						(__v16qi)__A, (__mmask16)__B);
     43      1.1  mrg }
     44      1.1  mrg 
     45      1.1  mrg extern __inline __m128i
     46      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     47      1.1  mrg _mm_maskz_compress_epi8 (__mmask16 __A, __m128i __B)
     48      1.1  mrg {
     49      1.1  mrg   return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __B,
     50      1.1  mrg 			(__v16qi) _mm_setzero_si128 (), (__mmask16) __A);
     51      1.1  mrg }
     52      1.1  mrg 
     53      1.1  mrg 
     54      1.1  mrg extern __inline void
     55      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     56      1.1  mrg _mm256_mask_compressstoreu_epi16 (void * __A, __mmask16 __B, __m256i __C)
     57      1.1  mrg {
     58      1.1  mrg   __builtin_ia32_compressstoreuhi256_mask ((__v16hi *) __A, (__v16hi) __C,
     59      1.1  mrg 							(__mmask16) __B);
     60      1.1  mrg }
     61      1.1  mrg 
     62      1.1  mrg extern __inline __m128i
     63      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     64      1.1  mrg _mm_mask_compress_epi16 (__m128i __A, __mmask8 __B, __m128i __C)
     65      1.1  mrg {
     66      1.1  mrg   return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi)__C, (__v8hi)__A,
     67      1.1  mrg 								(__mmask8)__B);
     68      1.1  mrg }
     69      1.1  mrg 
     70      1.1  mrg extern __inline __m128i
     71      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     72      1.1  mrg _mm_maskz_compress_epi16 (__mmask8 __A, __m128i __B)
     73      1.1  mrg {
     74      1.1  mrg   return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __B,
     75      1.1  mrg 				(__v8hi) _mm_setzero_si128 (), (__mmask8) __A);
     76      1.1  mrg }
     77      1.1  mrg 
     78      1.1  mrg extern __inline __m256i
     79      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     80      1.1  mrg _mm256_mask_compress_epi16 (__m256i __A, __mmask16 __B, __m256i __C)
     81      1.1  mrg {
     82      1.1  mrg   return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi)__C,
     83      1.1  mrg 						(__v16hi)__A, (__mmask16)__B);
     84      1.1  mrg }
     85      1.1  mrg 
     86      1.1  mrg extern __inline __m256i
     87      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     88      1.1  mrg _mm256_maskz_compress_epi16 (__mmask16 __A, __m256i __B)
     89      1.1  mrg {
     90      1.1  mrg   return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __B,
     91      1.1  mrg 			(__v16hi) _mm256_setzero_si256 (), (__mmask16) __A);
     92      1.1  mrg }
     93      1.1  mrg 
     94      1.1  mrg extern __inline void
     95      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     96      1.1  mrg _mm_mask_compressstoreu_epi8 (void * __A, __mmask16 __B, __m128i __C)
     97      1.1  mrg {
     98      1.1  mrg   __builtin_ia32_compressstoreuqi128_mask ((__v16qi *) __A, (__v16qi) __C,
     99      1.1  mrg 							(__mmask16) __B);
    100      1.1  mrg }
    101      1.1  mrg 
    102      1.1  mrg extern __inline void
    103      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    104      1.1  mrg _mm_mask_compressstoreu_epi16 (void * __A, __mmask8 __B, __m128i __C)
    105      1.1  mrg {
    106      1.1  mrg   __builtin_ia32_compressstoreuhi128_mask ((__v8hi *) __A, (__v8hi) __C,
    107      1.1  mrg 							(__mmask8) __B);
    108      1.1  mrg }
    109      1.1  mrg 
    110      1.1  mrg extern __inline __m128i
    111      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    112      1.1  mrg _mm_mask_expand_epi8 (__m128i __A, __mmask16 __B, __m128i __C)
    113      1.1  mrg {
    114      1.1  mrg   return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __C,
    115      1.1  mrg 						    (__v16qi) __A,
    116      1.1  mrg 						    (__mmask16) __B);
    117      1.1  mrg }
    118      1.1  mrg 
    119      1.1  mrg extern __inline __m128i
    120      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    121      1.1  mrg _mm_maskz_expand_epi8 (__mmask16 __A, __m128i __B)
    122      1.1  mrg {
    123      1.1  mrg   return (__m128i) __builtin_ia32_expandqi128_maskz ((__v16qi) __B,
    124      1.1  mrg 			(__v16qi) _mm_setzero_si128 (), (__mmask16) __A);
    125      1.1  mrg }
    126      1.1  mrg 
    127      1.1  mrg extern __inline __m128i
    128      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    129      1.1  mrg _mm_mask_expandloadu_epi8 (__m128i __A, __mmask16 __B, const void * __C)
    130      1.1  mrg {
    131      1.1  mrg   return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *) __C,
    132      1.1  mrg 					(__v16qi) __A, (__mmask16) __B);
    133      1.1  mrg }
    134      1.1  mrg 
    135      1.1  mrg extern __inline __m128i
    136      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    137      1.1  mrg _mm_maskz_expandloadu_epi8 (__mmask16 __A, const void * __B)
    138      1.1  mrg {
    139      1.1  mrg   return (__m128i) __builtin_ia32_expandloadqi128_maskz ((const __v16qi *) __B,
    140      1.1  mrg 			(__v16qi) _mm_setzero_si128 (), (__mmask16) __A);
    141      1.1  mrg }
    142      1.1  mrg 
    143      1.1  mrg extern __inline __m128i
    144      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    145      1.1  mrg _mm_mask_expand_epi16 (__m128i __A, __mmask8 __B, __m128i __C)
    146      1.1  mrg {
    147      1.1  mrg   return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __C,
    148      1.1  mrg 						    (__v8hi) __A,
    149      1.1  mrg 						    (__mmask8) __B);
    150      1.1  mrg }
    151      1.1  mrg 
    152      1.1  mrg extern __inline __m128i
    153      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    154      1.1  mrg _mm_maskz_expand_epi16 (__mmask8 __A, __m128i __B)
    155      1.1  mrg {
    156      1.1  mrg   return (__m128i) __builtin_ia32_expandhi128_maskz ((__v8hi) __B,
    157      1.1  mrg 				(__v8hi) _mm_setzero_si128 (), (__mmask8) __A);
    158      1.1  mrg }
    159      1.1  mrg 
    160      1.1  mrg extern __inline __m128i
    161      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    162      1.1  mrg _mm_mask_expandloadu_epi16 (__m128i __A, __mmask8 __B, const void * __C)
    163      1.1  mrg {
    164      1.1  mrg   return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *) __C,
    165      1.1  mrg 						(__v8hi) __A, (__mmask8) __B);
    166      1.1  mrg }
    167      1.1  mrg 
    168      1.1  mrg extern __inline __m128i
    169      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    170      1.1  mrg _mm_maskz_expandloadu_epi16 (__mmask8 __A, const void * __B)
    171      1.1  mrg {
    172      1.1  mrg   return (__m128i) __builtin_ia32_expandloadhi128_maskz ((const __v8hi *) __B,
    173      1.1  mrg 				(__v8hi) _mm_setzero_si128 (), (__mmask8) __A);
    174      1.1  mrg }
    175      1.1  mrg extern __inline __m256i
    176      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    177      1.1  mrg _mm256_mask_expand_epi16 (__m256i __A, __mmask16 __B, __m256i __C)
    178      1.1  mrg {
    179      1.1  mrg   return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __C,
    180      1.1  mrg 						    (__v16hi) __A,
    181      1.1  mrg 						    (__mmask16) __B);
    182      1.1  mrg }
    183      1.1  mrg 
    184      1.1  mrg extern __inline __m256i
    185      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    186      1.1  mrg _mm256_maskz_expand_epi16 (__mmask16 __A, __m256i __B)
    187      1.1  mrg {
    188      1.1  mrg   return (__m256i) __builtin_ia32_expandhi256_maskz ((__v16hi) __B,
    189      1.1  mrg 			(__v16hi) _mm256_setzero_si256 (), (__mmask16) __A);
    190      1.1  mrg }
    191      1.1  mrg 
    192      1.1  mrg extern __inline __m256i
    193      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    194      1.1  mrg _mm256_mask_expandloadu_epi16 (__m256i __A, __mmask16 __B, const void * __C)
    195      1.1  mrg {
    196      1.1  mrg   return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *) __C,
    197      1.1  mrg 					(__v16hi) __A, (__mmask16) __B);
    198      1.1  mrg }
    199      1.1  mrg 
    200      1.1  mrg extern __inline __m256i
    201      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    202      1.1  mrg _mm256_maskz_expandloadu_epi16 (__mmask16 __A, const void * __B)
    203      1.1  mrg {
    204      1.1  mrg   return (__m256i) __builtin_ia32_expandloadhi256_maskz ((const __v16hi *) __B,
    205      1.1  mrg 			(__v16hi) _mm256_setzero_si256 (), (__mmask16) __A);
    206      1.1  mrg }
    207      1.1  mrg 
    208      1.1  mrg #ifdef __OPTIMIZE__
    209      1.1  mrg extern __inline __m256i
    210      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    211      1.1  mrg _mm256_shrdi_epi16 (__m256i __A, __m256i __B, int __C)
    212      1.1  mrg {
    213      1.1  mrg   return (__m256i) __builtin_ia32_vpshrd_v16hi ((__v16hi)__A, (__v16hi) __B,
    214      1.1  mrg 									__C);
    215      1.1  mrg }
    216      1.1  mrg 
    217      1.1  mrg extern __inline __m256i
    218      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    219      1.1  mrg _mm256_mask_shrdi_epi16 (__m256i __A, __mmask16 __B, __m256i __C, __m256i __D,
    220      1.1  mrg 								int __E)
    221      1.1  mrg {
    222      1.1  mrg   return (__m256i)__builtin_ia32_vpshrd_v16hi_mask ((__v16hi)__C,
    223      1.1  mrg 			(__v16hi) __D, __E, (__v16hi) __A, (__mmask16)__B);
    224      1.1  mrg }
    225      1.1  mrg 
    226      1.1  mrg extern __inline __m256i
    227      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    228      1.1  mrg _mm256_maskz_shrdi_epi16 (__mmask16 __A, __m256i __B, __m256i __C, int __D)
    229      1.1  mrg {
    230      1.1  mrg   return (__m256i)__builtin_ia32_vpshrd_v16hi_mask ((__v16hi)__B,
    231      1.1  mrg 	(__v16hi) __C, __D, (__v16hi) _mm256_setzero_si256 (), (__mmask16)__A);
    232      1.1  mrg }
    233      1.1  mrg 
    234      1.1  mrg extern __inline __m256i
    235      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    236      1.1  mrg _mm256_mask_shrdi_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D,
    237      1.1  mrg 								int __E)
    238      1.1  mrg {
    239      1.1  mrg   return (__m256i)__builtin_ia32_vpshrd_v8si_mask ((__v8si)__C, (__v8si) __D,
    240      1.1  mrg 					__E, (__v8si) __A, (__mmask8)__B);
    241      1.1  mrg }
    242      1.1  mrg 
    243      1.1  mrg extern __inline __m256i
    244      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    245      1.1  mrg _mm256_maskz_shrdi_epi32 (__mmask8 __A, __m256i __B, __m256i __C, int __D)
    246      1.1  mrg {
    247      1.1  mrg   return (__m256i)__builtin_ia32_vpshrd_v8si_mask ((__v8si)__B, (__v8si) __C,
    248      1.1  mrg 			__D, (__v8si) _mm256_setzero_si256 (), (__mmask8)__A);
    249      1.1  mrg }
    250      1.1  mrg 
    251      1.1  mrg extern __inline __m256i
    252      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    253      1.1  mrg _mm256_shrdi_epi32 (__m256i __A, __m256i __B, int __C)
    254      1.1  mrg {
    255      1.1  mrg   return (__m256i) __builtin_ia32_vpshrd_v8si ((__v8si)__A, (__v8si) __B, __C);
    256      1.1  mrg }
    257      1.1  mrg 
    258      1.1  mrg extern __inline __m256i
    259      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    260      1.1  mrg _mm256_mask_shrdi_epi64 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D,
    261      1.1  mrg 								int __E)
    262      1.1  mrg {
    263      1.1  mrg   return (__m256i)__builtin_ia32_vpshrd_v4di_mask ((__v4di)__C, (__v4di) __D,
    264      1.1  mrg 					__E, (__v4di) __A, (__mmask8)__B);
    265      1.1  mrg }
    266      1.1  mrg 
    267      1.1  mrg extern __inline __m256i
    268      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    269      1.1  mrg _mm256_maskz_shrdi_epi64 (__mmask8 __A, __m256i __B, __m256i __C, int __D)
    270      1.1  mrg {
    271      1.1  mrg   return (__m256i)__builtin_ia32_vpshrd_v4di_mask ((__v4di)__B, (__v4di) __C,
    272      1.1  mrg 			__D, (__v4di) _mm256_setzero_si256 (), (__mmask8)__A);
    273      1.1  mrg }
    274      1.1  mrg 
    275      1.1  mrg extern __inline __m256i
    276      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    277      1.1  mrg _mm256_shrdi_epi64 (__m256i __A, __m256i __B, int __C)
    278      1.1  mrg {
    279      1.1  mrg   return (__m256i) __builtin_ia32_vpshrd_v4di ((__v4di)__A, (__v4di) __B, __C);
    280      1.1  mrg }
    281      1.1  mrg 
    282      1.1  mrg extern __inline __m128i
    283      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    284      1.1  mrg _mm_mask_shrdi_epi16 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
    285      1.1  mrg 								int __E)
    286      1.1  mrg {
    287      1.1  mrg   return (__m128i)__builtin_ia32_vpshrd_v8hi_mask ((__v8hi)__C, (__v8hi) __D,
    288      1.1  mrg 					__E, (__v8hi) __A, (__mmask8)__B);
    289      1.1  mrg }
    290      1.1  mrg 
    291      1.1  mrg extern __inline __m128i
    292      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    293      1.1  mrg _mm_maskz_shrdi_epi16 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
    294      1.1  mrg {
    295      1.1  mrg   return (__m128i)__builtin_ia32_vpshrd_v8hi_mask ((__v8hi)__B, (__v8hi) __C,
    296      1.1  mrg 			__D, (__v8hi) _mm_setzero_si128 (), (__mmask8)__A);
    297      1.1  mrg }
    298      1.1  mrg 
    299      1.1  mrg extern __inline __m128i
    300      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    301      1.1  mrg _mm_shrdi_epi16 (__m128i __A, __m128i __B, int __C)
    302      1.1  mrg {
    303      1.1  mrg   return (__m128i) __builtin_ia32_vpshrd_v8hi ((__v8hi)__A, (__v8hi) __B, __C);
    304      1.1  mrg }
    305      1.1  mrg 
    306      1.1  mrg extern __inline __m128i
    307      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    308      1.1  mrg _mm_mask_shrdi_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
    309      1.1  mrg 								int __E)
    310      1.1  mrg {
    311      1.1  mrg   return (__m128i)__builtin_ia32_vpshrd_v4si_mask ((__v4si)__C, (__v4si) __D,
    312      1.1  mrg 					__E, (__v4si) __A, (__mmask8)__B);
    313      1.1  mrg }
    314      1.1  mrg 
    315      1.1  mrg extern __inline __m128i
    316      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    317      1.1  mrg _mm_maskz_shrdi_epi32 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
    318      1.1  mrg {
    319      1.1  mrg   return (__m128i)__builtin_ia32_vpshrd_v4si_mask ((__v4si)__B, (__v4si) __C,
    320      1.1  mrg 			__D, (__v4si) _mm_setzero_si128 (), (__mmask8)__A);
    321      1.1  mrg }
    322      1.1  mrg 
    323      1.1  mrg extern __inline __m128i
    324      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    325      1.1  mrg _mm_shrdi_epi32 (__m128i __A, __m128i __B, int __C)
    326      1.1  mrg {
    327      1.1  mrg   return (__m128i) __builtin_ia32_vpshrd_v4si ((__v4si)__A, (__v4si) __B, __C);
    328      1.1  mrg }
    329      1.1  mrg 
    330      1.1  mrg extern __inline __m128i
    331      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    332      1.1  mrg _mm_mask_shrdi_epi64 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
    333      1.1  mrg 								int __E)
    334      1.1  mrg {
    335      1.1  mrg   return (__m128i)__builtin_ia32_vpshrd_v2di_mask ((__v2di)__C, (__v2di) __D,
    336      1.1  mrg 					__E, (__v2di) __A, (__mmask8)__B);
    337      1.1  mrg }
    338      1.1  mrg 
    339      1.1  mrg extern __inline __m128i
    340      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    341      1.1  mrg _mm_maskz_shrdi_epi64 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
    342      1.1  mrg {
    343      1.1  mrg   return (__m128i)__builtin_ia32_vpshrd_v2di_mask ((__v2di)__B, (__v2di) __C,
    344      1.1  mrg 			__D, (__v2di) _mm_setzero_si128 (), (__mmask8)__A);
    345      1.1  mrg }
    346      1.1  mrg 
    347      1.1  mrg extern __inline __m128i
    348      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    349      1.1  mrg _mm_shrdi_epi64 (__m128i __A, __m128i __B, int __C)
    350      1.1  mrg {
    351      1.1  mrg   return (__m128i) __builtin_ia32_vpshrd_v2di ((__v2di)__A, (__v2di) __B, __C);
    352      1.1  mrg }
    353      1.1  mrg 
    354      1.1  mrg extern __inline __m256i
    355      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    356      1.1  mrg _mm256_shldi_epi16 (__m256i __A, __m256i __B, int __C)
    357      1.1  mrg {
    358      1.1  mrg   return (__m256i) __builtin_ia32_vpshld_v16hi ((__v16hi)__A, (__v16hi) __B,
    359      1.1  mrg 									__C);
    360      1.1  mrg }
    361      1.1  mrg 
    362      1.1  mrg extern __inline __m256i
    363      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    364      1.1  mrg _mm256_mask_shldi_epi16 (__m256i __A, __mmask16 __B, __m256i __C, __m256i __D,
    365      1.1  mrg 								int __E)
    366      1.1  mrg {
    367      1.1  mrg   return (__m256i)__builtin_ia32_vpshld_v16hi_mask ((__v16hi)__C,
    368      1.1  mrg 			(__v16hi) __D, __E, (__v16hi) __A, (__mmask16)__B);
    369      1.1  mrg }
    370      1.1  mrg 
    371      1.1  mrg extern __inline __m256i
    372      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    373      1.1  mrg _mm256_maskz_shldi_epi16 (__mmask16 __A, __m256i __B, __m256i __C, int __D)
    374      1.1  mrg {
    375      1.1  mrg   return (__m256i)__builtin_ia32_vpshld_v16hi_mask ((__v16hi)__B,
    376      1.1  mrg 	(__v16hi) __C, __D, (__v16hi) _mm256_setzero_si256 (), (__mmask16)__A);
    377      1.1  mrg }
    378      1.1  mrg 
    379      1.1  mrg extern __inline __m256i
    380      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    381      1.1  mrg _mm256_mask_shldi_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D,
    382      1.1  mrg 								int __E)
    383      1.1  mrg {
    384      1.1  mrg   return (__m256i)__builtin_ia32_vpshld_v8si_mask ((__v8si)__C, (__v8si) __D,
    385      1.1  mrg 					__E, (__v8si) __A, (__mmask8)__B);
    386      1.1  mrg }
    387      1.1  mrg 
    388      1.1  mrg extern __inline __m256i
    389      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    390      1.1  mrg _mm256_maskz_shldi_epi32 (__mmask8 __A, __m256i __B, __m256i __C, int __D)
    391      1.1  mrg {
    392      1.1  mrg   return (__m256i)__builtin_ia32_vpshld_v8si_mask ((__v8si)__B, (__v8si) __C,
    393      1.1  mrg 			__D, (__v8si) _mm256_setzero_si256 (), (__mmask8)__A);
    394      1.1  mrg }
    395      1.1  mrg 
    396      1.1  mrg extern __inline __m256i
    397      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    398      1.1  mrg _mm256_shldi_epi32 (__m256i __A, __m256i __B, int __C)
    399      1.1  mrg {
    400      1.1  mrg   return (__m256i) __builtin_ia32_vpshld_v8si ((__v8si)__A, (__v8si) __B, __C);
    401      1.1  mrg }
    402      1.1  mrg 
    403      1.1  mrg extern __inline __m256i
    404      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    405      1.1  mrg _mm256_mask_shldi_epi64 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D,
    406      1.1  mrg 								int __E)
    407      1.1  mrg {
    408      1.1  mrg   return (__m256i)__builtin_ia32_vpshld_v4di_mask ((__v4di)__C, (__v4di) __D,
    409      1.1  mrg 					__E, (__v4di) __A, (__mmask8)__B);
    410      1.1  mrg }
    411      1.1  mrg 
    412      1.1  mrg extern __inline __m256i
    413      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    414      1.1  mrg _mm256_maskz_shldi_epi64 (__mmask8 __A, __m256i __B, __m256i __C, int __D)
    415      1.1  mrg {
    416      1.1  mrg   return (__m256i)__builtin_ia32_vpshld_v4di_mask ((__v4di)__B, (__v4di) __C,
    417      1.1  mrg 			__D, (__v4di) _mm256_setzero_si256 (), (__mmask8)__A);
    418      1.1  mrg }
    419      1.1  mrg 
    420      1.1  mrg extern __inline __m256i
    421      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    422      1.1  mrg _mm256_shldi_epi64 (__m256i __A, __m256i __B, int __C)
    423      1.1  mrg {
    424      1.1  mrg   return (__m256i) __builtin_ia32_vpshld_v4di ((__v4di)__A, (__v4di) __B, __C);
    425      1.1  mrg }
    426      1.1  mrg 
    427      1.1  mrg extern __inline __m128i
    428      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    429      1.1  mrg _mm_mask_shldi_epi16 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
    430      1.1  mrg 								int __E)
    431      1.1  mrg {
    432      1.1  mrg   return (__m128i)__builtin_ia32_vpshld_v8hi_mask ((__v8hi)__C, (__v8hi) __D,
    433      1.1  mrg 					__E, (__v8hi) __A, (__mmask8)__B);
    434      1.1  mrg }
    435      1.1  mrg 
    436      1.1  mrg extern __inline __m128i
    437      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    438      1.1  mrg _mm_maskz_shldi_epi16 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
    439      1.1  mrg {
    440      1.1  mrg   return (__m128i)__builtin_ia32_vpshld_v8hi_mask ((__v8hi)__B, (__v8hi) __C,
    441      1.1  mrg 			__D, (__v8hi) _mm_setzero_si128 (), (__mmask8)__A);
    442      1.1  mrg }
    443      1.1  mrg 
    444      1.1  mrg extern __inline __m128i
    445      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    446      1.1  mrg _mm_shldi_epi16 (__m128i __A, __m128i __B, int __C)
    447      1.1  mrg {
    448      1.1  mrg   return (__m128i) __builtin_ia32_vpshld_v8hi ((__v8hi)__A, (__v8hi) __B, __C);
    449      1.1  mrg }
    450      1.1  mrg 
    451      1.1  mrg extern __inline __m128i
    452      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    453      1.1  mrg _mm_mask_shldi_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
    454      1.1  mrg 								int __E)
    455      1.1  mrg {
    456      1.1  mrg   return (__m128i)__builtin_ia32_vpshld_v4si_mask ((__v4si)__C, (__v4si) __D,
    457      1.1  mrg 					__E, (__v4si) __A, (__mmask8)__B);
    458      1.1  mrg }
    459      1.1  mrg 
    460      1.1  mrg extern __inline __m128i
    461      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    462      1.1  mrg _mm_maskz_shldi_epi32 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
    463      1.1  mrg {
    464      1.1  mrg   return (__m128i)__builtin_ia32_vpshld_v4si_mask ((__v4si)__B, (__v4si) __C,
    465      1.1  mrg 			__D, (__v4si) _mm_setzero_si128 (), (__mmask8)__A);
    466      1.1  mrg }
    467      1.1  mrg 
    468      1.1  mrg extern __inline __m128i
    469      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    470      1.1  mrg _mm_shldi_epi32 (__m128i __A, __m128i __B, int __C)
    471      1.1  mrg {
    472      1.1  mrg   return (__m128i) __builtin_ia32_vpshld_v4si ((__v4si)__A, (__v4si) __B, __C);
    473      1.1  mrg }
    474      1.1  mrg 
    475      1.1  mrg extern __inline __m128i
    476      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    477      1.1  mrg _mm_mask_shldi_epi64 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
    478      1.1  mrg 								int __E)
    479      1.1  mrg {
    480      1.1  mrg   return (__m128i)__builtin_ia32_vpshld_v2di_mask ((__v2di)__C, (__v2di) __D,
    481      1.1  mrg 					__E, (__v2di) __A, (__mmask8)__B);
    482      1.1  mrg }
    483      1.1  mrg 
    484      1.1  mrg extern __inline __m128i
    485      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    486      1.1  mrg _mm_maskz_shldi_epi64 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
    487      1.1  mrg {
    488      1.1  mrg   return (__m128i)__builtin_ia32_vpshld_v2di_mask ((__v2di)__B, (__v2di) __C,
    489      1.1  mrg 			__D, (__v2di) _mm_setzero_si128 (), (__mmask8)__A);
    490      1.1  mrg }
    491      1.1  mrg 
    492      1.1  mrg extern __inline __m128i
    493      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    494      1.1  mrg _mm_shldi_epi64 (__m128i __A, __m128i __B, int __C)
    495      1.1  mrg {
    496      1.1  mrg   return (__m128i) __builtin_ia32_vpshld_v2di ((__v2di)__A, (__v2di) __B, __C);
    497      1.1  mrg }
    498      1.1  mrg #else
    499      1.1  mrg #define _mm256_shrdi_epi16(A, B, C) \
    500      1.1  mrg   ((__m256i) __builtin_ia32_vpshrd_v16hi ((__v16hi)(__m256i)(A), \
    501      1.1  mrg 					  (__v16hi)(__m256i)(B),(int)(C)))
    502      1.1  mrg #define _mm256_mask_shrdi_epi16(A, B, C, D, E) \
    503      1.1  mrg   ((__m256i) __builtin_ia32_vpshrd_v16hi_mask ((__v16hi)(__m256i)(C), \
    504      1.1  mrg 					       (__v16hi)(__m256i)(D), \
    505      1.1  mrg 					       (int)(E),		\
    506      1.1  mrg 					       (__v16hi)(__m256i)(A), \
    507      1.1  mrg 					       (__mmask16)(B)))
    508      1.1  mrg #define _mm256_maskz_shrdi_epi16(A, B, C, D) \
    509      1.1  mrg   ((__m256i) \
    510      1.1  mrg    __builtin_ia32_vpshrd_v16hi_mask ((__v16hi)(__m256i)(B),		\
    511      1.1  mrg 				     (__v16hi)(__m256i)(C),(int)(D),	\
    512      1.1  mrg 				     (__v16hi)(__m256i)_mm256_setzero_si256 (), \
    513      1.1  mrg 				     (__mmask16)(A)))
    514      1.1  mrg #define _mm256_shrdi_epi32(A, B, C) \
    515      1.1  mrg   ((__m256i) __builtin_ia32_vpshrd_v8si ((__v8si)(__m256i)(A), \
    516      1.1  mrg 					 (__v8si)(__m256i)(B),(int)(C)))
    517      1.1  mrg #define _mm256_mask_shrdi_epi32(A, B, C, D, E) \
    518      1.1  mrg   ((__m256i) __builtin_ia32_vpshrd_v8si_mask ((__v8si)(__m256i)(C), \
    519      1.1  mrg 					      (__v8si)(__m256i)(D), \
    520      1.1  mrg 					      (int)(E), \
    521      1.1  mrg 					      (__v8si)(__m256i)(A), \
    522      1.1  mrg 					      (__mmask8)(B)))
    523      1.1  mrg #define _mm256_maskz_shrdi_epi32(A, B, C, D) \
    524      1.1  mrg   ((__m256i) \
    525      1.1  mrg    __builtin_ia32_vpshrd_v8si_mask ((__v8si)(__m256i)(B),		\
    526      1.1  mrg 				    (__v8si)(__m256i)(C),(int)(D),	\
    527      1.1  mrg 				    (__v8si)(__m256i)_mm256_setzero_si256 (), \
    528      1.1  mrg 				    (__mmask8)(A)))
    529      1.1  mrg #define _mm256_shrdi_epi64(A, B, C) \
    530      1.1  mrg   ((__m256i) __builtin_ia32_vpshrd_v4di ((__v4di)(__m256i)(A), \
    531      1.1  mrg 					 (__v4di)(__m256i)(B),(int)(C)))
    532      1.1  mrg #define _mm256_mask_shrdi_epi64(A, B, C, D, E) \
    533      1.1  mrg   ((__m256i) __builtin_ia32_vpshrd_v4di_mask ((__v4di)(__m256i)(C), \
    534      1.1  mrg 					      (__v4di)(__m256i)(D), (int)(E), \
    535      1.1  mrg 					      (__v4di)(__m256i)(A), \
    536      1.1  mrg 					      (__mmask8)(B)))
    537      1.1  mrg #define _mm256_maskz_shrdi_epi64(A, B, C, D) \
    538      1.1  mrg   ((__m256i) \
    539      1.1  mrg    __builtin_ia32_vpshrd_v4di_mask ((__v4di)(__m256i)(B),		\
    540      1.1  mrg 				    (__v4di)(__m256i)(C),(int)(D),	\
    541      1.1  mrg 				    (__v4di)(__m256i)_mm256_setzero_si256 (), \
    542      1.1  mrg 				    (__mmask8)(A)))
    543      1.1  mrg #define _mm_shrdi_epi16(A, B, C) \
    544      1.1  mrg   ((__m128i) __builtin_ia32_vpshrd_v8hi ((__v8hi)(__m128i)(A), \
    545      1.1  mrg 					 (__v8hi)(__m128i)(B),(int)(C)))
    546      1.1  mrg #define _mm_mask_shrdi_epi16(A, B, C, D, E) \
    547      1.1  mrg   ((__m128i) __builtin_ia32_vpshrd_v8hi_mask ((__v8hi)(__m128i)(C), \
    548      1.1  mrg 					      (__v8hi)(__m128i)(D), (int)(E), \
    549      1.1  mrg 					      (__v8hi)(__m128i)(A), \
    550      1.1  mrg 					      (__mmask8)(B)))
    551      1.1  mrg #define _mm_maskz_shrdi_epi16(A, B, C, D) \
    552      1.1  mrg   ((__m128i) \
    553      1.1  mrg    __builtin_ia32_vpshrd_v8hi_mask ((__v8hi)(__m128i)(B),		\
    554      1.1  mrg 				    (__v8hi)(__m128i)(C),(int)(D),	\
    555      1.1  mrg 				    (__v8hi)(__m128i)_mm_setzero_si128 (), \
    556      1.1  mrg 				    (__mmask8)(A)))
    557      1.1  mrg #define _mm_shrdi_epi32(A, B, C) \
    558      1.1  mrg   ((__m128i) __builtin_ia32_vpshrd_v4si ((__v4si)(__m128i)(A), \
    559      1.1  mrg 					 (__v4si)(__m128i)(B),(int)(C)))
    560      1.1  mrg #define _mm_mask_shrdi_epi32(A, B, C, D, E) \
    561      1.1  mrg   ((__m128i) __builtin_ia32_vpshrd_v4si_mask ((__v4si)(__m128i)(C),	\
    562      1.1  mrg 					      (__v4si)(__m128i)(D), (int)(E), \
    563      1.1  mrg 					      (__v4si)(__m128i)(A), \
    564      1.1  mrg 					      (__mmask8)(B)))
    565      1.1  mrg #define _mm_maskz_shrdi_epi32(A, B, C, D) \
    566      1.1  mrg   ((__m128i) \
    567      1.1  mrg    __builtin_ia32_vpshrd_v4si_mask ((__v4si)(__m128i)(B),		\
    568      1.1  mrg 				    (__v4si)(__m128i)(C),(int)(D),	\
    569      1.1  mrg 				    (__v4si)(__m128i)_mm_setzero_si128 (), \
    570      1.1  mrg 				    (__mmask8)(A)))
    571      1.1  mrg #define _mm_shrdi_epi64(A, B, C) \
    572      1.1  mrg   ((__m128i) __builtin_ia32_vpshrd_v2di ((__v2di)(__m128i)(A), \
    573      1.1  mrg 					 (__v2di)(__m128i)(B),(int)(C)))
    574      1.1  mrg #define _mm_mask_shrdi_epi64(A, B, C, D, E) \
    575      1.1  mrg   ((__m128i) __builtin_ia32_vpshrd_v2di_mask ((__v2di)(__m128i)(C), \
    576      1.1  mrg 					      (__v2di)(__m128i)(D), (int)(E), \
    577      1.1  mrg 					      (__v2di)(__m128i)(A), \
    578      1.1  mrg 					      (__mmask8)(B)))
    579      1.1  mrg #define _mm_maskz_shrdi_epi64(A, B, C, D) \
    580      1.1  mrg   ((__m128i) \
    581      1.1  mrg    __builtin_ia32_vpshrd_v2di_mask ((__v2di)(__m128i)(B),		\
    582      1.1  mrg 				    (__v2di)(__m128i)(C),(int)(D),	\
    583      1.1  mrg 				    (__v2di)(__m128i)_mm_setzero_si128 (), \
    584      1.1  mrg 				    (__mmask8)(A)))
    585      1.1  mrg #define _mm256_shldi_epi16(A, B, C) \
    586      1.1  mrg   ((__m256i) __builtin_ia32_vpshld_v16hi ((__v16hi)(__m256i)(A), \
    587      1.1  mrg 					  (__v16hi)(__m256i)(B),(int)(C)))
    588      1.1  mrg #define _mm256_mask_shldi_epi16(A, B, C, D, E) \
    589      1.1  mrg   ((__m256i) __builtin_ia32_vpshld_v16hi_mask ((__v16hi)(__m256i)(C), \
    590      1.1  mrg 					       (__v16hi)(__m256i)(D), \
    591      1.1  mrg 					       (int)(E),		\
    592      1.1  mrg 					       (__v16hi)(__m256i)(A), \
    593      1.1  mrg 					       (__mmask16)(B)))
    594      1.1  mrg #define _mm256_maskz_shldi_epi16(A, B, C, D) \
    595      1.1  mrg   ((__m256i) \
    596      1.1  mrg    __builtin_ia32_vpshld_v16hi_mask ((__v16hi)(__m256i)(B),		\
    597      1.1  mrg 				     (__v16hi)(__m256i)(C),(int)(D),	\
    598      1.1  mrg 				     (__v16hi)(__m256i)_mm256_setzero_si256 (), \
    599      1.1  mrg 				     (__mmask16)(A)))
    600      1.1  mrg #define _mm256_shldi_epi32(A, B, C) \
    601      1.1  mrg   ((__m256i) __builtin_ia32_vpshld_v8si ((__v8si)(__m256i)(A), \
    602      1.1  mrg 					 (__v8si)(__m256i)(B),(int)(C)))
    603      1.1  mrg #define _mm256_mask_shldi_epi32(A, B, C, D, E) \
    604      1.1  mrg   ((__m256i) __builtin_ia32_vpshld_v8si_mask ((__v8si)(__m256i)(C), \
    605      1.1  mrg 					      (__v8si)(__m256i)(D), (int)(E), \
    606      1.1  mrg 					      (__v8si)(__m256i)(A), \
    607      1.1  mrg 					      (__mmask8)(B)))
    608      1.1  mrg #define _mm256_maskz_shldi_epi32(A, B, C, D) \
    609      1.1  mrg   ((__m256i) \
    610      1.1  mrg    __builtin_ia32_vpshld_v8si_mask ((__v8si)(__m256i)(B),		\
    611      1.1  mrg 				    (__v8si)(__m256i)(C),(int)(D),	\
    612      1.1  mrg 				    (__v8si)(__m256i)_mm256_setzero_si256 (), \
    613      1.1  mrg 				    (__mmask8)(A)))
    614      1.1  mrg #define _mm256_shldi_epi64(A, B, C) \
    615      1.1  mrg   ((__m256i) __builtin_ia32_vpshld_v4di ((__v4di)(__m256i)(A), \
    616      1.1  mrg 					 (__v4di)(__m256i)(B),(int)(C)))
    617      1.1  mrg #define _mm256_mask_shldi_epi64(A, B, C, D, E) \
    618      1.1  mrg   ((__m256i) __builtin_ia32_vpshld_v4di_mask ((__v4di)(__m256i)(C), \
    619      1.1  mrg 					      (__v4di)(__m256i)(D), (int)(E), \
    620      1.1  mrg 					      (__v4di)(__m256i)(A), \
    621      1.1  mrg 					      (__mmask8)(B)))
    622      1.1  mrg #define _mm256_maskz_shldi_epi64(A, B, C, D) \
    623      1.1  mrg   ((__m256i) \
    624      1.1  mrg    __builtin_ia32_vpshld_v4di_mask ((__v4di)(__m256i)(B),		\
    625      1.1  mrg 				    (__v4di)(__m256i)(C),(int)(D),	\
    626      1.1  mrg 				    (__v4di)(__m256i)_mm256_setzero_si256 (), \
    627      1.1  mrg 				    (__mmask8)(A)))
    628      1.1  mrg #define _mm_shldi_epi16(A, B, C) \
    629      1.1  mrg   ((__m128i) __builtin_ia32_vpshld_v8hi ((__v8hi)(__m128i)(A), \
    630      1.1  mrg 					 (__v8hi)(__m128i)(B),(int)(C)))
    631      1.1  mrg #define _mm_mask_shldi_epi16(A, B, C, D, E) \
    632      1.1  mrg   ((__m128i) __builtin_ia32_vpshld_v8hi_mask ((__v8hi)(__m128i)(C), \
    633      1.1  mrg 					      (__v8hi)(__m128i)(D), (int)(E), \
    634      1.1  mrg 					      (__v8hi)(__m128i)(A), \
    635      1.1  mrg 					      (__mmask8)(B)))
    636      1.1  mrg #define _mm_maskz_shldi_epi16(A, B, C, D) \
    637      1.1  mrg   ((__m128i) \
    638      1.1  mrg    __builtin_ia32_vpshld_v8hi_mask ((__v8hi)(__m128i)(B),		\
    639      1.1  mrg 				    (__v8hi)(__m128i)(C),(int)(D),	\
    640      1.1  mrg 				    (__v8hi)(__m128i)_mm_setzero_si128 (), \
    641      1.1  mrg 				    (__mmask8)(A)))
    642      1.1  mrg #define _mm_shldi_epi32(A, B, C) \
    643      1.1  mrg   ((__m128i) __builtin_ia32_vpshld_v4si ((__v4si)(__m128i)(A), \
    644      1.1  mrg 					 (__v4si)(__m128i)(B),(int)(C)))
    645      1.1  mrg #define _mm_mask_shldi_epi32(A, B, C, D, E) \
    646      1.1  mrg   ((__m128i) __builtin_ia32_vpshld_v4si_mask ((__v4si)(__m128i)(C), \
    647      1.1  mrg 					      (__v4si)(__m128i)(D), (int)(E), \
    648      1.1  mrg 					      (__v4si)(__m128i)(A), \
    649      1.1  mrg 					      (__mmask8)(B)))
    650      1.1  mrg #define _mm_maskz_shldi_epi32(A, B, C, D) \
    651      1.1  mrg   ((__m128i) \
    652      1.1  mrg    __builtin_ia32_vpshld_v4si_mask ((__v4si)(__m128i)(B),		\
    653      1.1  mrg 				    (__v4si)(__m128i)(C),(int)(D),	\
    654      1.1  mrg 				    (__v4si)(__m128i)_mm_setzero_si128 (), \
    655      1.1  mrg 				    (__mmask8)(A)))
    656      1.1  mrg #define _mm_shldi_epi64(A, B, C) \
    657      1.1  mrg   ((__m128i) __builtin_ia32_vpshld_v2di ((__v2di)(__m128i)(A), \
    658      1.1  mrg 					 (__v2di)(__m128i)(B),(int)(C)))
    659      1.1  mrg #define _mm_mask_shldi_epi64(A, B, C, D, E) \
    660      1.1  mrg   ((__m128i) __builtin_ia32_vpshld_v2di_mask ((__v2di)(__m128i)(C), \
    661      1.1  mrg 					      (__v2di)(__m128i)(D), (int)(E), \
    662      1.1  mrg 					      (__v2di)(__m128i)(A), \
    663      1.1  mrg 					      (__mmask8)(B)))
    664      1.1  mrg #define _mm_maskz_shldi_epi64(A, B, C, D) \
    665      1.1  mrg   ((__m128i) \
    666      1.1  mrg    __builtin_ia32_vpshld_v2di_mask ((__v2di)(__m128i)(B),		\
    667      1.1  mrg 				    (__v2di)(__m128i)(C),(int)(D),	\
    668      1.1  mrg 				    (__v2di)(__m128i)_mm_setzero_si128 (), \
    669      1.1  mrg 				    (__mmask8)(A)))
    670      1.1  mrg #endif
    671      1.1  mrg 
    672      1.1  mrg extern __inline __m256i
    673      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    674      1.1  mrg _mm256_shrdv_epi16 (__m256i __A, __m256i __B, __m256i __C)
    675      1.1  mrg {
    676      1.1  mrg   return (__m256i) __builtin_ia32_vpshrdv_v16hi ((__v16hi)__A, (__v16hi) __B,
    677      1.1  mrg 								(__v16hi) __C);
    678      1.1  mrg }
    679      1.1  mrg 
    680      1.1  mrg extern __inline __m256i
    681      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    682      1.1  mrg _mm256_mask_shrdv_epi16 (__m256i __A, __mmask16 __B, __m256i __C, __m256i __D)
    683      1.1  mrg {
    684      1.1  mrg   return (__m256i)__builtin_ia32_vpshrdv_v16hi_mask ((__v16hi)__A,
    685      1.1  mrg 				(__v16hi) __C, (__v16hi) __D, (__mmask16)__B);
    686      1.1  mrg }
    687      1.1  mrg 
    688      1.1  mrg extern __inline __m256i
    689      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    690      1.1  mrg _mm256_maskz_shrdv_epi16 (__mmask16 __A, __m256i __B, __m256i __C, __m256i __D)
    691      1.1  mrg {
    692      1.1  mrg   return (__m256i)__builtin_ia32_vpshrdv_v16hi_maskz ((__v16hi)__B,
    693      1.1  mrg 				(__v16hi) __C, (__v16hi) __D, (__mmask16)__A);
    694      1.1  mrg }
    695      1.1  mrg 
    696      1.1  mrg extern __inline __m256i
    697      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    698      1.1  mrg _mm256_shrdv_epi32 (__m256i __A, __m256i __B, __m256i __C)
    699      1.1  mrg {
    700      1.1  mrg   return (__m256i) __builtin_ia32_vpshrdv_v8si ((__v8si)__A, (__v8si) __B,
    701      1.1  mrg 								(__v8si) __C);
    702      1.1  mrg }
    703      1.1  mrg 
    704      1.1  mrg extern __inline __m256i
    705      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    706      1.1  mrg _mm256_mask_shrdv_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
    707      1.1  mrg {
    708      1.1  mrg   return (__m256i)__builtin_ia32_vpshrdv_v8si_mask ((__v8si)__A, (__v8si) __C,
    709      1.1  mrg 						(__v8si) __D, (__mmask8)__B);
    710      1.1  mrg }
    711      1.1  mrg 
    712      1.1  mrg extern __inline __m256i
    713      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    714      1.1  mrg _mm256_maskz_shrdv_epi32 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D)
    715      1.1  mrg {
    716      1.1  mrg   return (__m256i)__builtin_ia32_vpshrdv_v8si_maskz ((__v8si)__B, (__v8si) __C,
    717      1.1  mrg 						 (__v8si) __D, (__mmask8)__A);
    718      1.1  mrg }
    719      1.1  mrg 
    720      1.1  mrg extern __inline __m256i
    721      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    722      1.1  mrg _mm256_shrdv_epi64 (__m256i __A, __m256i __B, __m256i __C)
    723      1.1  mrg {
    724      1.1  mrg   return (__m256i) __builtin_ia32_vpshrdv_v4di ((__v4di)__A, (__v4di) __B,
    725      1.1  mrg 								(__v4di) __C);
    726      1.1  mrg }
    727      1.1  mrg 
    728      1.1  mrg extern __inline __m256i
    729      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    730      1.1  mrg _mm256_mask_shrdv_epi64 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
    731      1.1  mrg {
    732      1.1  mrg   return (__m256i)__builtin_ia32_vpshrdv_v4di_mask ((__v4di)__A, (__v4di) __C,
    733      1.1  mrg 						(__v4di) __D, (__mmask8)__B);
    734      1.1  mrg }
    735      1.1  mrg 
    736      1.1  mrg extern __inline __m256i
    737      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    738      1.1  mrg _mm256_maskz_shrdv_epi64 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D)
    739      1.1  mrg {
    740      1.1  mrg   return (__m256i)__builtin_ia32_vpshrdv_v4di_maskz ((__v4di)__B, (__v4di) __C,
    741      1.1  mrg 						 (__v4di) __D, (__mmask8)__A);
    742      1.1  mrg }
    743      1.1  mrg 
    744      1.1  mrg extern __inline __m128i
    745      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    746      1.1  mrg _mm_shrdv_epi16 (__m128i __A, __m128i __B, __m128i __C)
    747      1.1  mrg {
    748      1.1  mrg   return (__m128i) __builtin_ia32_vpshrdv_v8hi ((__v8hi)__A, (__v8hi) __B,
    749      1.1  mrg 								(__v8hi) __C);
    750      1.1  mrg }
    751      1.1  mrg 
    752      1.1  mrg extern __inline __m128i
    753      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    754      1.1  mrg _mm_mask_shrdv_epi16 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
    755      1.1  mrg {
    756      1.1  mrg   return (__m128i)__builtin_ia32_vpshrdv_v8hi_mask ((__v8hi)__A, (__v8hi) __C,
    757      1.1  mrg 						(__v8hi) __D, (__mmask8)__B);
    758      1.1  mrg }
    759      1.1  mrg 
    760      1.1  mrg extern __inline __m128i
    761      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    762      1.1  mrg _mm_maskz_shrdv_epi16 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
    763      1.1  mrg {
    764      1.1  mrg   return (__m128i)__builtin_ia32_vpshrdv_v8hi_maskz ((__v8hi)__B, (__v8hi) __C,
    765      1.1  mrg 						 (__v8hi) __D, (__mmask8)__A);
    766      1.1  mrg }
    767      1.1  mrg 
    768      1.1  mrg extern __inline __m128i
    769      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    770      1.1  mrg _mm_shrdv_epi32 (__m128i __A, __m128i __B, __m128i __C)
    771      1.1  mrg {
    772      1.1  mrg   return (__m128i) __builtin_ia32_vpshrdv_v4si ((__v4si)__A, (__v4si) __B,
    773      1.1  mrg 								(__v4si) __C);
    774      1.1  mrg }
    775      1.1  mrg 
    776      1.1  mrg extern __inline __m128i
    777      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    778      1.1  mrg _mm_mask_shrdv_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
    779      1.1  mrg {
    780      1.1  mrg   return (__m128i)__builtin_ia32_vpshrdv_v4si_mask ((__v4si)__A, (__v4si) __C,
    781      1.1  mrg 						(__v4si) __D, (__mmask8)__B);
    782      1.1  mrg }
    783      1.1  mrg 
    784      1.1  mrg extern __inline __m128i
    785      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    786      1.1  mrg _mm_maskz_shrdv_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
    787      1.1  mrg {
    788      1.1  mrg   return (__m128i)__builtin_ia32_vpshrdv_v4si_maskz ((__v4si)__B, (__v4si) __C,
    789      1.1  mrg 						 (__v4si) __D, (__mmask8)__A);
    790      1.1  mrg }
    791      1.1  mrg 
    792      1.1  mrg extern __inline __m128i
    793      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    794      1.1  mrg _mm_shrdv_epi64 (__m128i __A, __m128i __B, __m128i __C)
    795      1.1  mrg {
    796      1.1  mrg   return (__m128i) __builtin_ia32_vpshrdv_v2di ((__v2di)__A, (__v2di) __B,
    797      1.1  mrg 								(__v2di) __C);
    798      1.1  mrg }
    799      1.1  mrg 
    800      1.1  mrg extern __inline __m128i
    801      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    802      1.1  mrg _mm_mask_shrdv_epi64 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
    803      1.1  mrg {
    804      1.1  mrg   return (__m128i)__builtin_ia32_vpshrdv_v2di_mask ((__v2di)__A, (__v2di) __C,
    805      1.1  mrg 						(__v2di) __D, (__mmask8)__B);
    806      1.1  mrg }
    807      1.1  mrg 
    808      1.1  mrg extern __inline __m128i
    809      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    810      1.1  mrg _mm_maskz_shrdv_epi64 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
    811      1.1  mrg {
    812      1.1  mrg   return (__m128i)__builtin_ia32_vpshrdv_v2di_maskz ((__v2di)__B, (__v2di) __C,
    813      1.1  mrg 						 (__v2di) __D, (__mmask8)__A);
    814      1.1  mrg }
    815      1.1  mrg 
    816      1.1  mrg extern __inline __m256i
    817      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    818      1.1  mrg _mm256_shldv_epi16 (__m256i __A, __m256i __B, __m256i __C)
    819      1.1  mrg {
    820      1.1  mrg   return (__m256i) __builtin_ia32_vpshldv_v16hi ((__v16hi)__A, (__v16hi) __B,
    821      1.1  mrg 								(__v16hi) __C);
    822      1.1  mrg }
    823      1.1  mrg 
    824      1.1  mrg extern __inline __m256i
    825      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    826      1.1  mrg _mm256_mask_shldv_epi16 (__m256i __A, __mmask16 __B, __m256i __C, __m256i __D)
    827      1.1  mrg {
    828      1.1  mrg   return (__m256i)__builtin_ia32_vpshldv_v16hi_mask ((__v16hi)__A,
    829      1.1  mrg 				(__v16hi) __C, (__v16hi) __D, (__mmask16)__B);
    830      1.1  mrg }
    831      1.1  mrg 
    832      1.1  mrg extern __inline __m256i
    833      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    834      1.1  mrg _mm256_maskz_shldv_epi16 (__mmask16 __A, __m256i __B, __m256i __C, __m256i __D)
    835      1.1  mrg {
    836      1.1  mrg   return (__m256i)__builtin_ia32_vpshldv_v16hi_maskz ((__v16hi)__B,
    837      1.1  mrg 				(__v16hi) __C, (__v16hi) __D, (__mmask16)__A);
    838      1.1  mrg }
    839      1.1  mrg 
    840      1.1  mrg extern __inline __m256i
    841      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    842      1.1  mrg _mm256_shldv_epi32 (__m256i __A, __m256i __B, __m256i __C)
    843      1.1  mrg {
    844      1.1  mrg   return (__m256i) __builtin_ia32_vpshldv_v8si ((__v8si)__A, (__v8si) __B,
    845      1.1  mrg 								(__v8si) __C);
    846      1.1  mrg }
    847      1.1  mrg 
    848      1.1  mrg extern __inline __m256i
    849      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    850      1.1  mrg _mm256_mask_shldv_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
    851      1.1  mrg {
    852      1.1  mrg   return (__m256i)__builtin_ia32_vpshldv_v8si_mask ((__v8si)__A, (__v8si) __C,
    853      1.1  mrg 						(__v8si) __D, (__mmask8)__B) ;
    854      1.1  mrg }
    855      1.1  mrg 
    856      1.1  mrg extern __inline __m256i
    857      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    858      1.1  mrg _mm256_maskz_shldv_epi32 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D)
    859      1.1  mrg {
    860      1.1  mrg   return (__m256i)__builtin_ia32_vpshldv_v8si_maskz ((__v8si)__B, (__v8si) __C,
    861      1.1  mrg 						(__v8si) __D, (__mmask8)__A);
    862      1.1  mrg }
    863      1.1  mrg 
    864      1.1  mrg extern __inline __m256i
    865      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    866      1.1  mrg _mm256_shldv_epi64 (__m256i __A, __m256i __B, __m256i __C)
    867      1.1  mrg {
    868      1.1  mrg   return (__m256i) __builtin_ia32_vpshldv_v4di ((__v4di)__A, (__v4di) __B,
    869      1.1  mrg 								(__v4di) __C);
    870      1.1  mrg }
    871      1.1  mrg 
    872      1.1  mrg extern __inline __m256i
    873      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    874      1.1  mrg _mm256_mask_shldv_epi64 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
    875      1.1  mrg {
    876      1.1  mrg   return (__m256i)__builtin_ia32_vpshldv_v4di_mask ((__v4di)__A, (__v4di) __C,
    877      1.1  mrg 						(__v4di) __D, (__mmask8)__B);
    878      1.1  mrg }
    879      1.1  mrg 
    880      1.1  mrg extern __inline __m256i
    881      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    882      1.1  mrg _mm256_maskz_shldv_epi64 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D)
    883      1.1  mrg {
    884      1.1  mrg   return (__m256i)__builtin_ia32_vpshldv_v4di_maskz ((__v4di)__B, (__v4di) __C,
    885      1.1  mrg 						 (__v4di) __D, (__mmask8)__A);
    886      1.1  mrg }
    887      1.1  mrg 
    888      1.1  mrg extern __inline __m128i
    889      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    890      1.1  mrg _mm_shldv_epi16 (__m128i __A, __m128i __B, __m128i __C)
    891      1.1  mrg {
    892      1.1  mrg   return (__m128i) __builtin_ia32_vpshldv_v8hi ((__v8hi)__A, (__v8hi) __B,
    893      1.1  mrg 								(__v8hi) __C);
    894      1.1  mrg }
    895      1.1  mrg 
    896      1.1  mrg extern __inline __m128i
    897      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    898      1.1  mrg _mm_mask_shldv_epi16 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
    899      1.1  mrg {
    900      1.1  mrg   return (__m128i)__builtin_ia32_vpshldv_v8hi_mask ((__v8hi)__A, (__v8hi) __C,
    901      1.1  mrg 						(__v8hi) __D, (__mmask8)__B);
    902      1.1  mrg }
    903      1.1  mrg 
    904      1.1  mrg extern __inline __m128i
    905      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    906      1.1  mrg _mm_maskz_shldv_epi16 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
    907      1.1  mrg {
    908      1.1  mrg   return (__m128i)__builtin_ia32_vpshldv_v8hi_maskz ((__v8hi)__B, (__v8hi) __C,
    909      1.1  mrg 						 (__v8hi) __D, (__mmask8)__A);
    910      1.1  mrg }
    911      1.1  mrg 
    912      1.1  mrg extern __inline __m128i
    913      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    914      1.1  mrg _mm_shldv_epi32 (__m128i __A, __m128i __B, __m128i __C)
    915      1.1  mrg {
    916      1.1  mrg   return (__m128i) __builtin_ia32_vpshldv_v4si ((__v4si)__A, (__v4si) __B,
    917      1.1  mrg 								(__v4si) __C);
    918      1.1  mrg }
    919      1.1  mrg 
    920      1.1  mrg extern __inline __m128i
    921      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    922      1.1  mrg _mm_mask_shldv_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
    923      1.1  mrg {
    924      1.1  mrg   return (__m128i)__builtin_ia32_vpshldv_v4si_mask ((__v4si)__A, (__v4si) __C,
    925      1.1  mrg 						(__v4si) __D, (__mmask8)__B);
    926      1.1  mrg }
    927      1.1  mrg 
    928      1.1  mrg extern __inline __m128i
    929      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    930      1.1  mrg _mm_maskz_shldv_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
    931      1.1  mrg {
    932      1.1  mrg   return (__m128i)__builtin_ia32_vpshldv_v4si_maskz ((__v4si)__B, (__v4si) __C,
    933      1.1  mrg 						 (__v4si) __D, (__mmask8)__A);
    934      1.1  mrg }
    935      1.1  mrg 
    936      1.1  mrg extern __inline __m128i
    937      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    938      1.1  mrg _mm_shldv_epi64 (__m128i __A, __m128i __B, __m128i __C)
    939      1.1  mrg {
    940      1.1  mrg   return (__m128i) __builtin_ia32_vpshldv_v2di ((__v2di)__A, (__v2di) __B,
    941      1.1  mrg 								(__v2di) __C);
    942      1.1  mrg }
    943      1.1  mrg 
    944      1.1  mrg extern __inline __m128i
    945      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    946      1.1  mrg _mm_mask_shldv_epi64 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
    947      1.1  mrg {
    948      1.1  mrg   return (__m128i)__builtin_ia32_vpshldv_v2di_mask ((__v2di)__A, (__v2di) __C,
    949      1.1  mrg 						(__v2di) __D, (__mmask8)__B);
    950      1.1  mrg }
    951      1.1  mrg 
    952      1.1  mrg extern __inline __m128i
    953      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    954      1.1  mrg _mm_maskz_shldv_epi64 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
    955      1.1  mrg {
    956      1.1  mrg   return (__m128i)__builtin_ia32_vpshldv_v2di_maskz ((__v2di)__B, (__v2di) __C,
    957      1.1  mrg 						(__v2di) __D, (__mmask8)__A);
    958      1.1  mrg }
    959      1.1  mrg 
    960      1.1  mrg 
    961      1.1  mrg 
    962      1.1  mrg 
    963      1.1  mrg #ifdef __DISABLE_AVX512VBMI2VL__
    964      1.1  mrg #undef __DISABLE_AVX512VBMI2VL__
    965      1.1  mrg #pragma GCC pop_options
    966      1.1  mrg #endif /* __DISABLE_AVX512VBMIVL__ */
    967      1.1  mrg 
    968      1.1  mrg #if !defined(__AVX512VL__) || !defined(__AVX512VBMI2__) || \
    969      1.1  mrg     !defined(__AVX512BW__)
    970      1.1  mrg #pragma GCC push_options
    971      1.1  mrg #pragma GCC target("avx512vbmi2,avx512vl,avx512bw")
    972      1.1  mrg #define __DISABLE_AVX512VBMI2VLBW__
    973      1.1  mrg #endif /* __AVX512VBMIVLBW__ */
    974      1.1  mrg 
    975      1.1  mrg extern __inline __m256i
    976      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    977      1.1  mrg _mm256_mask_compress_epi8 (__m256i __A, __mmask32 __B, __m256i __C)
    978      1.1  mrg {
    979      1.1  mrg   return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi)__C,
    980      1.1  mrg 						(__v32qi)__A, (__mmask32)__B);
    981      1.1  mrg }
    982      1.1  mrg 
    983      1.1  mrg extern __inline __m256i
    984      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    985      1.1  mrg _mm256_maskz_compress_epi8 (__mmask32 __A, __m256i __B)
    986      1.1  mrg {
    987      1.1  mrg   return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __B,
    988      1.1  mrg 			(__v32qi) _mm256_setzero_si256 (), (__mmask32) __A);
    989      1.1  mrg }
    990      1.1  mrg 
    991      1.1  mrg extern __inline void
    992      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    993      1.1  mrg _mm256_mask_compressstoreu_epi8 (void * __A, __mmask32 __B, __m256i __C)
    994      1.1  mrg {
    995      1.1  mrg   __builtin_ia32_compressstoreuqi256_mask ((__v32qi *) __A, (__v32qi) __C,
    996      1.1  mrg 							(__mmask32) __B);
    997      1.1  mrg }
    998      1.1  mrg 
    999      1.1  mrg extern __inline __m256i
   1000      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1001      1.1  mrg _mm256_mask_expand_epi8 (__m256i __A, __mmask32 __B, __m256i __C)
   1002      1.1  mrg {
   1003      1.1  mrg   return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __C,
   1004      1.1  mrg 						    (__v32qi) __A,
   1005      1.1  mrg 						    (__mmask32) __B);
   1006      1.1  mrg }
   1007      1.1  mrg 
   1008      1.1  mrg extern __inline __m256i
   1009      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1010      1.1  mrg _mm256_maskz_expand_epi8 (__mmask32 __A, __m256i __B)
   1011      1.1  mrg {
   1012      1.1  mrg   return (__m256i) __builtin_ia32_expandqi256_maskz ((__v32qi) __B,
   1013      1.1  mrg 			(__v32qi) _mm256_setzero_si256 (), (__mmask32) __A);
   1014      1.1  mrg }
   1015      1.1  mrg 
   1016      1.1  mrg extern __inline __m256i
   1017      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1018      1.1  mrg _mm256_mask_expandloadu_epi8 (__m256i __A, __mmask32 __B, const void * __C)
   1019      1.1  mrg {
   1020      1.1  mrg   return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *) __C,
   1021      1.1  mrg 					(__v32qi) __A, (__mmask32) __B);
   1022      1.1  mrg }
   1023      1.1  mrg 
   1024      1.1  mrg extern __inline __m256i
   1025      1.1  mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
   1026      1.1  mrg _mm256_maskz_expandloadu_epi8 (__mmask32 __A, const void * __B)
   1027      1.1  mrg {
   1028      1.1  mrg   return (__m256i) __builtin_ia32_expandloadqi256_maskz ((const __v32qi *) __B,
   1029      1.1  mrg 			(__v32qi) _mm256_setzero_si256 (), (__mmask32) __A);
   1030      1.1  mrg }
   1031      1.1  mrg 
   1032      1.1  mrg #ifdef __DISABLE_AVX512VBMI2VLBW__
   1033      1.1  mrg #undef __DISABLE_AVX512VBMI2VLBW__
   1034      1.1  mrg #pragma GCC pop_options
   1035      1.1  mrg #endif /* __DISABLE_AVX512VBMIVLBW__ */
   1036      1.1  mrg 
   1037      1.1  mrg #endif /* _AVX512VBMIVLINTRIN_H_INCLUDED */
   1038