1 1.1.1.4 mrg /* Copyright (C) 2013-2022 Free Software Foundation, Inc. 2 1.1 mrg 3 1.1 mrg This file is part of GCC. 4 1.1 mrg 5 1.1 mrg GCC is free software; you can redistribute it and/or modify 6 1.1 mrg it under the terms of the GNU General Public License as published by 7 1.1 mrg the Free Software Foundation; either version 3, or (at your option) 8 1.1 mrg any later version. 9 1.1 mrg 10 1.1 mrg GCC is distributed in the hope that it will be useful, 11 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of 12 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 1.1 mrg GNU General Public License for more details. 14 1.1 mrg 15 1.1 mrg Under Section 7 of GPL version 3, you are granted additional 16 1.1 mrg permissions described in the GCC Runtime Library Exception, version 17 1.1 mrg 3.1, as published by the Free Software Foundation. 18 1.1 mrg 19 1.1 mrg You should have received a copy of the GNU General Public License and 20 1.1 mrg a copy of the GCC Runtime Library Exception along with this program; 21 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 1.1 mrg <http://www.gnu.org/licenses/>. */ 23 1.1 mrg 24 1.1 mrg #ifndef _IMMINTRIN_H_INCLUDED 25 1.1 mrg #error "Never use <avx512vbmi2intrin.h> directly; include <immintrin.h> instead." 26 1.1 mrg #endif 27 1.1 mrg 28 1.1 mrg #ifndef __AVX512VBMI2INTRIN_H_INCLUDED 29 1.1 mrg #define __AVX512VBMI2INTRIN_H_INCLUDED 30 1.1 mrg 31 1.1 mrg #if !defined(__AVX512VBMI2__) 32 1.1 mrg #pragma GCC push_options 33 1.1 mrg #pragma GCC target("avx512vbmi2") 34 1.1 mrg #define __DISABLE_AVX512VBMI2__ 35 1.1 mrg #endif /* __AVX512VBMI2__ */ 36 1.1 mrg 37 1.1 mrg #ifdef __OPTIMIZE__ 38 1.1 mrg extern __inline __m512i 39 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 40 1.1 mrg _mm512_shrdi_epi16 (__m512i __A, __m512i __B, int __C) 41 1.1 mrg { 42 1.1 mrg return (__m512i) __builtin_ia32_vpshrd_v32hi ((__v32hi)__A, (__v32hi) __B, 43 1.1 mrg __C); 44 1.1 mrg } 45 1.1 mrg 46 1.1 mrg extern __inline __m512i 47 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 48 1.1 mrg _mm512_shrdi_epi32 (__m512i __A, __m512i __B, int __C) 49 1.1 mrg { 50 1.1 mrg return (__m512i) __builtin_ia32_vpshrd_v16si ((__v16si)__A, (__v16si) __B, 51 1.1 mrg __C); 52 1.1 mrg } 53 1.1 mrg 54 1.1 mrg extern __inline __m512i 55 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 56 1.1 mrg _mm512_mask_shrdi_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D, 57 1.1 mrg int __E) 58 1.1 mrg { 59 1.1 mrg return (__m512i)__builtin_ia32_vpshrd_v16si_mask ((__v16si)__C, 60 1.1 mrg (__v16si) __D, __E, (__v16si) __A, (__mmask16)__B); 61 1.1 mrg } 62 1.1 mrg 63 1.1 mrg extern __inline __m512i 64 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 65 1.1 mrg _mm512_maskz_shrdi_epi32 (__mmask16 __A, __m512i __B, __m512i __C, int __D) 66 1.1 mrg { 67 1.1 mrg return (__m512i)__builtin_ia32_vpshrd_v16si_mask ((__v16si)__B, 68 1.1 mrg (__v16si) __C, __D, (__v16si) _mm512_setzero_si512 (), (__mmask16)__A); 69 1.1 mrg } 70 1.1 mrg 71 1.1 mrg extern __inline __m512i 72 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 73 1.1 mrg _mm512_shrdi_epi64 (__m512i __A, __m512i __B, int __C) 74 1.1 mrg { 75 1.1 mrg return (__m512i) __builtin_ia32_vpshrd_v8di ((__v8di)__A, (__v8di) __B, __C); 76 1.1 mrg } 77 1.1 mrg 78 1.1 mrg extern __inline __m512i 79 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 80 1.1 mrg _mm512_mask_shrdi_epi64 (__m512i __A, __mmask8 __B, __m512i __C, __m512i __D, 81 1.1 mrg int __E) 82 1.1 mrg { 83 1.1 mrg return (__m512i)__builtin_ia32_vpshrd_v8di_mask ((__v8di)__C, (__v8di) __D, 84 1.1 mrg __E, (__v8di) __A, (__mmask8)__B); 85 1.1 mrg } 86 1.1 mrg 87 1.1 mrg extern __inline __m512i 88 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 89 1.1 mrg _mm512_maskz_shrdi_epi64 (__mmask8 __A, __m512i __B, __m512i __C, int __D) 90 1.1 mrg { 91 1.1 mrg return (__m512i)__builtin_ia32_vpshrd_v8di_mask ((__v8di)__B, (__v8di) __C, 92 1.1 mrg __D, (__v8di) _mm512_setzero_si512 (), (__mmask8)__A); 93 1.1 mrg } 94 1.1 mrg 95 1.1 mrg extern __inline __m512i 96 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 97 1.1 mrg _mm512_shldi_epi16 (__m512i __A, __m512i __B, int __C) 98 1.1 mrg { 99 1.1 mrg return (__m512i) __builtin_ia32_vpshld_v32hi ((__v32hi)__A, (__v32hi) __B, 100 1.1 mrg __C); 101 1.1 mrg } 102 1.1 mrg 103 1.1 mrg extern __inline __m512i 104 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 105 1.1 mrg _mm512_shldi_epi32 (__m512i __A, __m512i __B, int __C) 106 1.1 mrg { 107 1.1 mrg return (__m512i) __builtin_ia32_vpshld_v16si ((__v16si)__A, (__v16si) __B, 108 1.1 mrg __C); 109 1.1 mrg } 110 1.1 mrg 111 1.1 mrg extern __inline __m512i 112 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 113 1.1 mrg _mm512_mask_shldi_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D, 114 1.1 mrg int __E) 115 1.1 mrg { 116 1.1 mrg return (__m512i)__builtin_ia32_vpshld_v16si_mask ((__v16si)__C, 117 1.1 mrg (__v16si) __D, __E, (__v16si) __A, (__mmask16)__B); 118 1.1 mrg } 119 1.1 mrg 120 1.1 mrg extern __inline __m512i 121 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 122 1.1 mrg _mm512_maskz_shldi_epi32 (__mmask16 __A, __m512i __B, __m512i __C, int __D) 123 1.1 mrg { 124 1.1 mrg return (__m512i)__builtin_ia32_vpshld_v16si_mask ((__v16si)__B, 125 1.1 mrg (__v16si) __C, __D, (__v16si) _mm512_setzero_si512 (), (__mmask16)__A); 126 1.1 mrg } 127 1.1 mrg 128 1.1 mrg extern __inline __m512i 129 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 130 1.1 mrg _mm512_shldi_epi64 (__m512i __A, __m512i __B, int __C) 131 1.1 mrg { 132 1.1 mrg return (__m512i) __builtin_ia32_vpshld_v8di ((__v8di)__A, (__v8di) __B, __C); 133 1.1 mrg } 134 1.1 mrg 135 1.1 mrg extern __inline __m512i 136 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 137 1.1 mrg _mm512_mask_shldi_epi64 (__m512i __A, __mmask8 __B, __m512i __C, __m512i __D, 138 1.1 mrg int __E) 139 1.1 mrg { 140 1.1 mrg return (__m512i)__builtin_ia32_vpshld_v8di_mask ((__v8di)__C, (__v8di) __D, 141 1.1 mrg __E, (__v8di) __A, (__mmask8)__B); 142 1.1 mrg } 143 1.1 mrg 144 1.1 mrg extern __inline __m512i 145 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 146 1.1 mrg _mm512_maskz_shldi_epi64 (__mmask8 __A, __m512i __B, __m512i __C, int __D) 147 1.1 mrg { 148 1.1 mrg return (__m512i)__builtin_ia32_vpshld_v8di_mask ((__v8di)__B, (__v8di) __C, 149 1.1 mrg __D, (__v8di) _mm512_setzero_si512 (), (__mmask8)__A); 150 1.1 mrg } 151 1.1 mrg #else 152 1.1 mrg #define _mm512_shrdi_epi16(A, B, C) \ 153 1.1 mrg ((__m512i) __builtin_ia32_vpshrd_v32hi ((__v32hi)(__m512i)(A), \ 154 1.1 mrg (__v32hi)(__m512i)(B),(int)(C))) 155 1.1 mrg #define _mm512_shrdi_epi32(A, B, C) \ 156 1.1 mrg ((__m512i) __builtin_ia32_vpshrd_v16si ((__v16si)(__m512i)(A), \ 157 1.1 mrg (__v16si)(__m512i)(B),(int)(C))) 158 1.1 mrg #define _mm512_mask_shrdi_epi32(A, B, C, D, E) \ 159 1.1 mrg ((__m512i) __builtin_ia32_vpshrd_v16si_mask ((__v16si)(__m512i)(C), \ 160 1.1 mrg (__v16si)(__m512i)(D), \ 161 1.1 mrg (int)(E), \ 162 1.1 mrg (__v16si)(__m512i)(A), \ 163 1.1 mrg (__mmask16)(B))) 164 1.1 mrg #define _mm512_maskz_shrdi_epi32(A, B, C, D) \ 165 1.1 mrg ((__m512i) \ 166 1.1 mrg __builtin_ia32_vpshrd_v16si_mask ((__v16si)(__m512i)(B), \ 167 1.1 mrg (__v16si)(__m512i)(C),(int)(D), \ 168 1.1 mrg (__v16si)(__m512i)_mm512_setzero_si512 (), \ 169 1.1 mrg (__mmask16)(A))) 170 1.1 mrg #define _mm512_shrdi_epi64(A, B, C) \ 171 1.1 mrg ((__m512i) __builtin_ia32_vpshrd_v8di ((__v8di)(__m512i)(A), \ 172 1.1 mrg (__v8di)(__m512i)(B),(int)(C))) 173 1.1 mrg #define _mm512_mask_shrdi_epi64(A, B, C, D, E) \ 174 1.1 mrg ((__m512i) __builtin_ia32_vpshrd_v8di_mask ((__v8di)(__m512i)(C), \ 175 1.1 mrg (__v8di)(__m512i)(D), (int)(E), \ 176 1.1 mrg (__v8di)(__m512i)(A), \ 177 1.1 mrg (__mmask8)(B))) 178 1.1 mrg #define _mm512_maskz_shrdi_epi64(A, B, C, D) \ 179 1.1 mrg ((__m512i) \ 180 1.1 mrg __builtin_ia32_vpshrd_v8di_mask ((__v8di)(__m512i)(B), \ 181 1.1 mrg (__v8di)(__m512i)(C),(int)(D), \ 182 1.1 mrg (__v8di)(__m512i)_mm512_setzero_si512 (), \ 183 1.1 mrg (__mmask8)(A))) 184 1.1 mrg #define _mm512_shldi_epi16(A, B, C) \ 185 1.1 mrg ((__m512i) __builtin_ia32_vpshld_v32hi ((__v32hi)(__m512i)(A), \ 186 1.1 mrg (__v32hi)(__m512i)(B),(int)(C))) 187 1.1 mrg #define _mm512_shldi_epi32(A, B, C) \ 188 1.1 mrg ((__m512i) __builtin_ia32_vpshld_v16si ((__v16si)(__m512i)(A), \ 189 1.1 mrg (__v16si)(__m512i)(B),(int)(C))) 190 1.1 mrg #define _mm512_mask_shldi_epi32(A, B, C, D, E) \ 191 1.1 mrg ((__m512i) __builtin_ia32_vpshld_v16si_mask ((__v16si)(__m512i)(C), \ 192 1.1 mrg (__v16si)(__m512i)(D), \ 193 1.1 mrg (int)(E), \ 194 1.1 mrg (__v16si)(__m512i)(A), \ 195 1.1 mrg (__mmask16)(B))) 196 1.1 mrg #define _mm512_maskz_shldi_epi32(A, B, C, D) \ 197 1.1 mrg ((__m512i) \ 198 1.1 mrg __builtin_ia32_vpshld_v16si_mask ((__v16si)(__m512i)(B), \ 199 1.1 mrg (__v16si)(__m512i)(C),(int)(D), \ 200 1.1 mrg (__v16si)(__m512i)_mm512_setzero_si512 (), \ 201 1.1 mrg (__mmask16)(A))) 202 1.1 mrg #define _mm512_shldi_epi64(A, B, C) \ 203 1.1 mrg ((__m512i) __builtin_ia32_vpshld_v8di ((__v8di)(__m512i)(A), \ 204 1.1 mrg (__v8di)(__m512i)(B), (int)(C))) 205 1.1 mrg #define _mm512_mask_shldi_epi64(A, B, C, D, E) \ 206 1.1 mrg ((__m512i) __builtin_ia32_vpshld_v8di_mask ((__v8di)(__m512i)(C), \ 207 1.1 mrg (__v8di)(__m512i)(D), (int)(E), \ 208 1.1 mrg (__v8di)(__m512i)(A), \ 209 1.1 mrg (__mmask8)(B))) 210 1.1 mrg #define _mm512_maskz_shldi_epi64(A, B, C, D) \ 211 1.1 mrg ((__m512i) \ 212 1.1 mrg __builtin_ia32_vpshld_v8di_mask ((__v8di)(__m512i)(B), \ 213 1.1 mrg (__v8di)(__m512i)(C),(int)(D), \ 214 1.1 mrg (__v8di)(__m512i)_mm512_setzero_si512 (), \ 215 1.1 mrg (__mmask8)(A))) 216 1.1 mrg #endif 217 1.1 mrg 218 1.1 mrg extern __inline __m512i 219 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 220 1.1 mrg _mm512_shrdv_epi16 (__m512i __A, __m512i __B, __m512i __C) 221 1.1 mrg { 222 1.1 mrg return (__m512i) __builtin_ia32_vpshrdv_v32hi ((__v32hi)__A, (__v32hi) __B, 223 1.1 mrg (__v32hi) __C); 224 1.1 mrg } 225 1.1 mrg 226 1.1 mrg extern __inline __m512i 227 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 228 1.1 mrg _mm512_shrdv_epi32 (__m512i __A, __m512i __B, __m512i __C) 229 1.1 mrg { 230 1.1 mrg return (__m512i) __builtin_ia32_vpshrdv_v16si ((__v16si)__A, (__v16si) __B, 231 1.1 mrg (__v16si) __C); 232 1.1 mrg } 233 1.1 mrg 234 1.1 mrg extern __inline __m512i 235 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 236 1.1 mrg _mm512_mask_shrdv_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D) 237 1.1 mrg { 238 1.1 mrg return (__m512i)__builtin_ia32_vpshrdv_v16si_mask ((__v16si)__A, 239 1.1 mrg (__v16si) __C, (__v16si) __D, (__mmask16)__B); 240 1.1 mrg } 241 1.1 mrg 242 1.1 mrg extern __inline __m512i 243 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 244 1.1 mrg _mm512_maskz_shrdv_epi32 (__mmask16 __A, __m512i __B, __m512i __C, __m512i __D) 245 1.1 mrg { 246 1.1 mrg return (__m512i)__builtin_ia32_vpshrdv_v16si_maskz ((__v16si)__B, 247 1.1 mrg (__v16si) __C, (__v16si) __D, (__mmask16)__A); 248 1.1 mrg } 249 1.1 mrg 250 1.1 mrg extern __inline __m512i 251 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 252 1.1 mrg _mm512_shrdv_epi64 (__m512i __A, __m512i __B, __m512i __C) 253 1.1 mrg { 254 1.1 mrg return (__m512i) __builtin_ia32_vpshrdv_v8di ((__v8di)__A, (__v8di) __B, 255 1.1 mrg (__v8di) __C); 256 1.1 mrg } 257 1.1 mrg 258 1.1 mrg extern __inline __m512i 259 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 260 1.1 mrg _mm512_mask_shrdv_epi64 (__m512i __A, __mmask8 __B, __m512i __C, __m512i __D) 261 1.1 mrg { 262 1.1 mrg return (__m512i)__builtin_ia32_vpshrdv_v8di_mask ((__v8di)__A, (__v8di) __C, 263 1.1 mrg (__v8di) __D, (__mmask8)__B); 264 1.1 mrg } 265 1.1 mrg 266 1.1 mrg extern __inline __m512i 267 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 268 1.1 mrg _mm512_maskz_shrdv_epi64 (__mmask8 __A, __m512i __B, __m512i __C, __m512i __D) 269 1.1 mrg { 270 1.1 mrg return (__m512i)__builtin_ia32_vpshrdv_v8di_maskz ((__v8di)__B, (__v8di) __C, 271 1.1 mrg (__v8di) __D, (__mmask8)__A); 272 1.1 mrg } 273 1.1 mrg extern __inline __m512i 274 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 275 1.1 mrg _mm512_shldv_epi16 (__m512i __A, __m512i __B, __m512i __C) 276 1.1 mrg { 277 1.1 mrg return (__m512i) __builtin_ia32_vpshldv_v32hi ((__v32hi)__A, (__v32hi) __B, 278 1.1 mrg (__v32hi) __C); 279 1.1 mrg } 280 1.1 mrg 281 1.1 mrg extern __inline __m512i 282 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 283 1.1 mrg _mm512_shldv_epi32 (__m512i __A, __m512i __B, __m512i __C) 284 1.1 mrg { 285 1.1 mrg return (__m512i) __builtin_ia32_vpshldv_v16si ((__v16si)__A, (__v16si) __B, 286 1.1 mrg (__v16si) __C); 287 1.1 mrg } 288 1.1 mrg 289 1.1 mrg extern __inline __m512i 290 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 291 1.1 mrg _mm512_mask_shldv_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D) 292 1.1 mrg { 293 1.1 mrg return (__m512i)__builtin_ia32_vpshldv_v16si_mask ((__v16si)__A, 294 1.1 mrg (__v16si) __C, (__v16si) __D, (__mmask16)__B); 295 1.1 mrg } 296 1.1 mrg 297 1.1 mrg extern __inline __m512i 298 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 299 1.1 mrg _mm512_maskz_shldv_epi32 (__mmask16 __A, __m512i __B, __m512i __C, __m512i __D) 300 1.1 mrg { 301 1.1 mrg return (__m512i)__builtin_ia32_vpshldv_v16si_maskz ((__v16si)__B, 302 1.1 mrg (__v16si) __C, (__v16si) __D, (__mmask16)__A); 303 1.1 mrg } 304 1.1 mrg 305 1.1 mrg extern __inline __m512i 306 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 307 1.1 mrg _mm512_shldv_epi64 (__m512i __A, __m512i __B, __m512i __C) 308 1.1 mrg { 309 1.1 mrg return (__m512i) __builtin_ia32_vpshldv_v8di ((__v8di)__A, (__v8di) __B, 310 1.1 mrg (__v8di) __C); 311 1.1 mrg } 312 1.1 mrg 313 1.1 mrg extern __inline __m512i 314 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 315 1.1 mrg _mm512_mask_shldv_epi64 (__m512i __A, __mmask8 __B, __m512i __C, __m512i __D) 316 1.1 mrg { 317 1.1 mrg return (__m512i)__builtin_ia32_vpshldv_v8di_mask ((__v8di)__A, (__v8di) __C, 318 1.1 mrg (__v8di) __D, (__mmask8)__B); 319 1.1 mrg } 320 1.1 mrg 321 1.1 mrg extern __inline __m512i 322 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 323 1.1 mrg _mm512_maskz_shldv_epi64 (__mmask8 __A, __m512i __B, __m512i __C, __m512i __D) 324 1.1 mrg { 325 1.1 mrg return (__m512i)__builtin_ia32_vpshldv_v8di_maskz ((__v8di)__B, (__v8di) __C, 326 1.1 mrg (__v8di) __D, (__mmask8)__A); 327 1.1 mrg } 328 1.1 mrg 329 1.1 mrg #ifdef __DISABLE_AVX512VBMI2__ 330 1.1 mrg #undef __DISABLE_AVX512VBMI2__ 331 1.1 mrg 332 1.1 mrg #pragma GCC pop_options 333 1.1 mrg #endif /* __DISABLE_AVX512VBMI2__ */ 334 1.1 mrg 335 1.1 mrg #if !defined(__AVX512VBMI2__) || !defined(__AVX512BW__) 336 1.1 mrg #pragma GCC push_options 337 1.1 mrg #pragma GCC target("avx512vbmi2,avx512bw") 338 1.1 mrg #define __DISABLE_AVX512VBMI2BW__ 339 1.1 mrg #endif /* __AVX512VBMI2BW__ */ 340 1.1 mrg 341 1.1 mrg extern __inline __m512i 342 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 343 1.1 mrg _mm512_mask_compress_epi8 (__m512i __A, __mmask64 __B, __m512i __C) 344 1.1 mrg { 345 1.1 mrg return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi)__C, 346 1.1 mrg (__v64qi)__A, (__mmask64)__B); 347 1.1 mrg } 348 1.1 mrg 349 1.1 mrg 350 1.1 mrg extern __inline __m512i 351 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 352 1.1 mrg _mm512_maskz_compress_epi8 (__mmask64 __A, __m512i __B) 353 1.1 mrg { 354 1.1 mrg return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi)__B, 355 1.1 mrg (__v64qi)_mm512_setzero_si512 (), (__mmask64)__A); 356 1.1 mrg } 357 1.1 mrg 358 1.1 mrg 359 1.1 mrg extern __inline void 360 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 361 1.1 mrg _mm512_mask_compressstoreu_epi8 (void * __A, __mmask64 __B, __m512i __C) 362 1.1 mrg { 363 1.1 mrg __builtin_ia32_compressstoreuqi512_mask ((__v64qi *) __A, (__v64qi) __C, 364 1.1 mrg (__mmask64) __B); 365 1.1 mrg } 366 1.1 mrg 367 1.1 mrg extern __inline __m512i 368 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 369 1.1 mrg _mm512_mask_compress_epi16 (__m512i __A, __mmask32 __B, __m512i __C) 370 1.1 mrg { 371 1.1 mrg return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi)__C, 372 1.1 mrg (__v32hi)__A, (__mmask32)__B); 373 1.1 mrg } 374 1.1 mrg 375 1.1 mrg extern __inline __m512i 376 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 377 1.1 mrg _mm512_maskz_compress_epi16 (__mmask32 __A, __m512i __B) 378 1.1 mrg { 379 1.1 mrg return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi)__B, 380 1.1 mrg (__v32hi)_mm512_setzero_si512 (), (__mmask32)__A); 381 1.1 mrg } 382 1.1 mrg 383 1.1 mrg extern __inline void 384 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 385 1.1 mrg _mm512_mask_compressstoreu_epi16 (void * __A, __mmask32 __B, __m512i __C) 386 1.1 mrg { 387 1.1 mrg __builtin_ia32_compressstoreuhi512_mask ((__v32hi *) __A, (__v32hi) __C, 388 1.1 mrg (__mmask32) __B); 389 1.1 mrg } 390 1.1 mrg 391 1.1 mrg extern __inline __m512i 392 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 393 1.1 mrg _mm512_mask_expand_epi8 (__m512i __A, __mmask64 __B, __m512i __C) 394 1.1 mrg { 395 1.1 mrg return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __C, 396 1.1 mrg (__v64qi) __A, 397 1.1 mrg (__mmask64) __B); 398 1.1 mrg } 399 1.1 mrg 400 1.1 mrg extern __inline __m512i 401 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 402 1.1 mrg _mm512_maskz_expand_epi8 (__mmask64 __A, __m512i __B) 403 1.1 mrg { 404 1.1 mrg return (__m512i) __builtin_ia32_expandqi512_maskz ((__v64qi) __B, 405 1.1 mrg (__v64qi) _mm512_setzero_si512 (), (__mmask64) __A); 406 1.1 mrg } 407 1.1 mrg 408 1.1 mrg extern __inline __m512i 409 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 410 1.1 mrg _mm512_mask_expandloadu_epi8 (__m512i __A, __mmask64 __B, const void * __C) 411 1.1 mrg { 412 1.1 mrg return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *) __C, 413 1.1 mrg (__v64qi) __A, (__mmask64) __B); 414 1.1 mrg } 415 1.1 mrg 416 1.1 mrg extern __inline __m512i 417 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 418 1.1 mrg _mm512_maskz_expandloadu_epi8 (__mmask64 __A, const void * __B) 419 1.1 mrg { 420 1.1 mrg return (__m512i) __builtin_ia32_expandloadqi512_maskz ((const __v64qi *) __B, 421 1.1 mrg (__v64qi) _mm512_setzero_si512 (), (__mmask64) __A); 422 1.1 mrg } 423 1.1 mrg 424 1.1 mrg extern __inline __m512i 425 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 426 1.1 mrg _mm512_mask_expand_epi16 (__m512i __A, __mmask32 __B, __m512i __C) 427 1.1 mrg { 428 1.1 mrg return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __C, 429 1.1 mrg (__v32hi) __A, 430 1.1 mrg (__mmask32) __B); 431 1.1 mrg } 432 1.1 mrg 433 1.1 mrg extern __inline __m512i 434 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 435 1.1 mrg _mm512_maskz_expand_epi16 (__mmask32 __A, __m512i __B) 436 1.1 mrg { 437 1.1 mrg return (__m512i) __builtin_ia32_expandhi512_maskz ((__v32hi) __B, 438 1.1 mrg (__v32hi) _mm512_setzero_si512 (), (__mmask32) __A); 439 1.1 mrg } 440 1.1 mrg 441 1.1 mrg extern __inline __m512i 442 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 443 1.1 mrg _mm512_mask_expandloadu_epi16 (__m512i __A, __mmask32 __B, const void * __C) 444 1.1 mrg { 445 1.1 mrg return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *) __C, 446 1.1 mrg (__v32hi) __A, (__mmask32) __B); 447 1.1 mrg } 448 1.1 mrg 449 1.1 mrg extern __inline __m512i 450 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 451 1.1 mrg _mm512_maskz_expandloadu_epi16 (__mmask32 __A, const void * __B) 452 1.1 mrg { 453 1.1 mrg return (__m512i) __builtin_ia32_expandloadhi512_maskz ((const __v32hi *) __B, 454 1.1 mrg (__v32hi) _mm512_setzero_si512 (), (__mmask32) __A); 455 1.1 mrg } 456 1.1 mrg 457 1.1 mrg #ifdef __OPTIMIZE__ 458 1.1 mrg extern __inline __m512i 459 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 460 1.1 mrg _mm512_mask_shrdi_epi16 (__m512i __A, __mmask32 __B, __m512i __C, __m512i __D, 461 1.1 mrg int __E) 462 1.1 mrg { 463 1.1 mrg return (__m512i)__builtin_ia32_vpshrd_v32hi_mask ((__v32hi)__C, 464 1.1 mrg (__v32hi) __D, __E, (__v32hi) __A, (__mmask32)__B); 465 1.1 mrg } 466 1.1 mrg 467 1.1 mrg extern __inline __m512i 468 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 469 1.1 mrg _mm512_maskz_shrdi_epi16 (__mmask32 __A, __m512i __B, __m512i __C, int __D) 470 1.1 mrg { 471 1.1 mrg return (__m512i)__builtin_ia32_vpshrd_v32hi_mask ((__v32hi)__B, 472 1.1 mrg (__v32hi) __C, __D, (__v32hi) _mm512_setzero_si512 (), (__mmask32)__A); 473 1.1 mrg } 474 1.1 mrg 475 1.1 mrg extern __inline __m512i 476 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 477 1.1 mrg _mm512_mask_shldi_epi16 (__m512i __A, __mmask32 __B, __m512i __C, __m512i __D, 478 1.1 mrg int __E) 479 1.1 mrg { 480 1.1 mrg return (__m512i)__builtin_ia32_vpshld_v32hi_mask ((__v32hi)__C, 481 1.1 mrg (__v32hi) __D, __E, (__v32hi) __A, (__mmask32)__B); 482 1.1 mrg } 483 1.1 mrg 484 1.1 mrg extern __inline __m512i 485 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 486 1.1 mrg _mm512_maskz_shldi_epi16 (__mmask32 __A, __m512i __B, __m512i __C, int __D) 487 1.1 mrg { 488 1.1 mrg return (__m512i)__builtin_ia32_vpshld_v32hi_mask ((__v32hi)__B, 489 1.1 mrg (__v32hi) __C, __D, (__v32hi) _mm512_setzero_si512 (), (__mmask32)__A); 490 1.1 mrg } 491 1.1 mrg 492 1.1 mrg #else 493 1.1 mrg #define _mm512_mask_shrdi_epi16(A, B, C, D, E) \ 494 1.1 mrg ((__m512i) __builtin_ia32_vpshrd_v32hi_mask ((__v32hi)(__m512i)(C), \ 495 1.1 mrg (__v32hi)(__m512i)(D), \ 496 1.1 mrg (int)(E), \ 497 1.1 mrg (__v32hi)(__m512i)(A), \ 498 1.1 mrg (__mmask32)(B))) 499 1.1 mrg #define _mm512_maskz_shrdi_epi16(A, B, C, D) \ 500 1.1 mrg ((__m512i) \ 501 1.1 mrg __builtin_ia32_vpshrd_v32hi_mask ((__v32hi)(__m512i)(B), \ 502 1.1 mrg (__v32hi)(__m512i)(C),(int)(D), \ 503 1.1 mrg (__v32hi)(__m512i)_mm512_setzero_si512 (), \ 504 1.1 mrg (__mmask32)(A))) 505 1.1 mrg #define _mm512_mask_shldi_epi16(A, B, C, D, E) \ 506 1.1 mrg ((__m512i) __builtin_ia32_vpshld_v32hi_mask ((__v32hi)(__m512i)(C), \ 507 1.1 mrg (__v32hi)(__m512i)(D), \ 508 1.1 mrg (int)(E), \ 509 1.1 mrg (__v32hi)(__m512i)(A), \ 510 1.1 mrg (__mmask32)(B))) 511 1.1 mrg #define _mm512_maskz_shldi_epi16(A, B, C, D) \ 512 1.1 mrg ((__m512i) \ 513 1.1 mrg __builtin_ia32_vpshld_v32hi_mask ((__v32hi)(__m512i)(B), \ 514 1.1 mrg (__v32hi)(__m512i)(C),(int)(D), \ 515 1.1 mrg (__v32hi)(__m512i)_mm512_setzero_si512 (), \ 516 1.1 mrg (__mmask32)(A))) 517 1.1 mrg #endif 518 1.1 mrg 519 1.1 mrg extern __inline __m512i 520 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 521 1.1 mrg _mm512_mask_shrdv_epi16 (__m512i __A, __mmask32 __B, __m512i __C, __m512i __D) 522 1.1 mrg { 523 1.1 mrg return (__m512i)__builtin_ia32_vpshrdv_v32hi_mask ((__v32hi)__A, 524 1.1 mrg (__v32hi) __C, (__v32hi) __D, (__mmask32)__B); 525 1.1 mrg } 526 1.1 mrg 527 1.1 mrg extern __inline __m512i 528 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 529 1.1 mrg _mm512_maskz_shrdv_epi16 (__mmask32 __A, __m512i __B, __m512i __C, __m512i __D) 530 1.1 mrg { 531 1.1 mrg return (__m512i)__builtin_ia32_vpshrdv_v32hi_maskz ((__v32hi)__B, 532 1.1 mrg (__v32hi) __C, (__v32hi) __D, (__mmask32)__A); 533 1.1 mrg } 534 1.1 mrg 535 1.1 mrg extern __inline __m512i 536 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 537 1.1 mrg _mm512_mask_shldv_epi16 (__m512i __A, __mmask32 __B, __m512i __C, __m512i __D) 538 1.1 mrg { 539 1.1 mrg return (__m512i)__builtin_ia32_vpshldv_v32hi_mask ((__v32hi)__A, 540 1.1 mrg (__v32hi) __C, (__v32hi) __D, (__mmask32)__B); 541 1.1 mrg } 542 1.1 mrg 543 1.1 mrg extern __inline __m512i 544 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 545 1.1 mrg _mm512_maskz_shldv_epi16 (__mmask32 __A, __m512i __B, __m512i __C, __m512i __D) 546 1.1 mrg { 547 1.1 mrg return (__m512i)__builtin_ia32_vpshldv_v32hi_maskz ((__v32hi)__B, 548 1.1 mrg (__v32hi) __C, (__v32hi) __D, (__mmask32)__A); 549 1.1 mrg } 550 1.1 mrg 551 1.1 mrg #ifdef __DISABLE_AVX512VBMI2BW__ 552 1.1 mrg #undef __DISABLE_AVX512VBMI2BW__ 553 1.1 mrg 554 1.1 mrg #pragma GCC pop_options 555 1.1 mrg #endif /* __DISABLE_AVX512VBMI2BW__ */ 556 1.1 mrg 557 1.1 mrg #endif /* __AVX512VBMI2INTRIN_H_INCLUDED */ 558