1 1.1.1.4 mrg /* Copyright (C) 2013-2022 Free Software Foundation, Inc. 2 1.1 mrg 3 1.1 mrg This file is part of GCC. 4 1.1 mrg 5 1.1 mrg GCC is free software; you can redistribute it and/or modify 6 1.1 mrg it under the terms of the GNU General Public License as published by 7 1.1 mrg the Free Software Foundation; either version 3, or (at your option) 8 1.1 mrg any later version. 9 1.1 mrg 10 1.1 mrg GCC is distributed in the hope that it will be useful, 11 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of 12 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 1.1 mrg GNU General Public License for more details. 14 1.1 mrg 15 1.1 mrg Under Section 7 of GPL version 3, you are granted additional 16 1.1 mrg permissions described in the GCC Runtime Library Exception, version 17 1.1 mrg 3.1, as published by the Free Software Foundation. 18 1.1 mrg 19 1.1 mrg You should have received a copy of the GNU General Public License and 20 1.1 mrg a copy of the GCC Runtime Library Exception along with this program; 21 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 1.1 mrg <http://www.gnu.org/licenses/>. */ 23 1.1 mrg 24 1.1 mrg #ifndef _IMMINTRIN_H_INCLUDED 25 1.1 mrg #error "Never use <avx512vnnivlintrin.h> directly; include <immintrin.h> instead." 26 1.1 mrg #endif 27 1.1 mrg 28 1.1 mrg #ifndef _AVX512VNNIVLINTRIN_H_INCLUDED 29 1.1 mrg #define _AVX512VNNIVLINTRIN_H_INCLUDED 30 1.1 mrg 31 1.1 mrg #if !defined(__AVX512VL__) || !defined(__AVX512VNNI__) 32 1.1 mrg #pragma GCC push_options 33 1.1 mrg #pragma GCC target("avx512vnni,avx512vl") 34 1.1 mrg #define __DISABLE_AVX512VNNIVL__ 35 1.1 mrg #endif /* __AVX512VNNIVL__ */ 36 1.1 mrg 37 1.1.1.4 mrg #define _mm256_dpbusd_epi32(A, B, C) \ 38 1.1.1.4 mrg ((__m256i) __builtin_ia32_vpdpbusd_v8si ((__v8si) (A), \ 39 1.1.1.4 mrg (__v8si) (B), \ 40 1.1.1.4 mrg (__v8si) (C))) 41 1.1 mrg 42 1.1 mrg extern __inline __m256i 43 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 44 1.1 mrg _mm256_mask_dpbusd_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) 45 1.1 mrg { 46 1.1 mrg return (__m256i)__builtin_ia32_vpdpbusd_v8si_mask ((__v8si)__A, (__v8si) __C, 47 1.1 mrg (__v8si) __D, (__mmask8)__B); 48 1.1 mrg } 49 1.1 mrg 50 1.1 mrg extern __inline __m256i 51 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 52 1.1 mrg _mm256_maskz_dpbusd_epi32 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D) 53 1.1 mrg { 54 1.1 mrg return (__m256i)__builtin_ia32_vpdpbusd_v8si_maskz ((__v8si)__B, 55 1.1 mrg (__v8si) __C, (__v8si) __D, (__mmask8)__A); 56 1.1 mrg } 57 1.1 mrg 58 1.1.1.4 mrg #define _mm_dpbusd_epi32(A, B, C) \ 59 1.1.1.4 mrg ((__m128i) __builtin_ia32_vpdpbusd_v4si ((__v4si) (A), \ 60 1.1.1.4 mrg (__v4si) (B), \ 61 1.1.1.4 mrg (__v4si) (C))) 62 1.1 mrg 63 1.1 mrg extern __inline __m128i 64 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 65 1.1 mrg _mm_mask_dpbusd_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) 66 1.1 mrg { 67 1.1 mrg return (__m128i)__builtin_ia32_vpdpbusd_v4si_mask ((__v4si)__A, (__v4si) __C, 68 1.1 mrg (__v4si) __D, (__mmask8)__B); 69 1.1 mrg } 70 1.1 mrg 71 1.1 mrg extern __inline __m128i 72 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 73 1.1 mrg _mm_maskz_dpbusd_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) 74 1.1 mrg { 75 1.1 mrg return (__m128i)__builtin_ia32_vpdpbusd_v4si_maskz ((__v4si)__B, 76 1.1 mrg (__v4si) __C, (__v4si) __D, (__mmask8)__A); 77 1.1 mrg } 78 1.1 mrg 79 1.1.1.4 mrg #define _mm256_dpbusds_epi32(A, B, C) \ 80 1.1.1.4 mrg ((__m256i) __builtin_ia32_vpdpbusds_v8si ((__v8si) (A), \ 81 1.1.1.4 mrg (__v8si) (B), \ 82 1.1.1.4 mrg (__v8si) (C))) 83 1.1 mrg 84 1.1 mrg extern __inline __m256i 85 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 86 1.1 mrg _mm256_mask_dpbusds_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) 87 1.1 mrg { 88 1.1 mrg return (__m256i)__builtin_ia32_vpdpbusds_v8si_mask ((__v8si)__A, 89 1.1 mrg (__v8si) __C, (__v8si) __D, (__mmask8)__B); 90 1.1 mrg } 91 1.1 mrg 92 1.1 mrg extern __inline __m256i 93 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 94 1.1 mrg _mm256_maskz_dpbusds_epi32 (__mmask8 __A, __m256i __B, __m256i __C, 95 1.1 mrg __m256i __D) 96 1.1 mrg { 97 1.1 mrg return (__m256i)__builtin_ia32_vpdpbusds_v8si_maskz ((__v8si)__B, 98 1.1 mrg (__v8si) __C, (__v8si) __D, (__mmask8)__A); 99 1.1 mrg } 100 1.1 mrg 101 1.1.1.4 mrg #define _mm_dpbusds_epi32(A, B, C) \ 102 1.1.1.4 mrg ((__m128i) __builtin_ia32_vpdpbusds_v4si ((__v4si) (A), \ 103 1.1.1.4 mrg (__v4si) (B), \ 104 1.1.1.4 mrg (__v4si) (C))) 105 1.1 mrg 106 1.1 mrg extern __inline __m128i 107 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 108 1.1 mrg _mm_mask_dpbusds_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) 109 1.1 mrg { 110 1.1 mrg return (__m128i)__builtin_ia32_vpdpbusds_v4si_mask ((__v4si)__A, 111 1.1 mrg (__v4si) __C, (__v4si) __D, (__mmask8)__B); 112 1.1 mrg } 113 1.1 mrg 114 1.1 mrg extern __inline __m128i 115 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 116 1.1 mrg _mm_maskz_dpbusds_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) 117 1.1 mrg { 118 1.1 mrg return (__m128i)__builtin_ia32_vpdpbusds_v4si_maskz ((__v4si)__B, 119 1.1 mrg (__v4si) __C, (__v4si) __D, (__mmask8)__A); 120 1.1 mrg } 121 1.1 mrg 122 1.1.1.4 mrg #define _mm256_dpwssd_epi32(A, B, C) \ 123 1.1.1.4 mrg ((__m256i) __builtin_ia32_vpdpwssd_v8si ((__v8si) (A), \ 124 1.1.1.4 mrg (__v8si) (B), \ 125 1.1.1.4 mrg (__v8si) (C))) 126 1.1 mrg 127 1.1 mrg extern __inline __m256i 128 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 129 1.1 mrg _mm256_mask_dpwssd_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) 130 1.1 mrg { 131 1.1 mrg return (__m256i)__builtin_ia32_vpdpwssd_v8si_mask ((__v8si)__A, (__v8si) __C, 132 1.1 mrg (__v8si) __D, (__mmask8)__B); 133 1.1 mrg } 134 1.1 mrg 135 1.1 mrg extern __inline __m256i 136 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 137 1.1 mrg _mm256_maskz_dpwssd_epi32 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D) 138 1.1 mrg { 139 1.1 mrg return (__m256i)__builtin_ia32_vpdpwssd_v8si_maskz ((__v8si)__B, 140 1.1 mrg (__v8si) __C, (__v8si) __D, (__mmask8)__A); 141 1.1 mrg } 142 1.1 mrg 143 1.1.1.4 mrg #define _mm_dpwssd_epi32(A, B, C) \ 144 1.1.1.4 mrg ((__m128i) __builtin_ia32_vpdpwssd_v4si ((__v4si) (A), \ 145 1.1.1.4 mrg (__v4si) (B), \ 146 1.1.1.4 mrg (__v4si) (C))) 147 1.1 mrg 148 1.1 mrg extern __inline __m128i 149 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 150 1.1 mrg _mm_mask_dpwssd_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) 151 1.1 mrg { 152 1.1 mrg return (__m128i)__builtin_ia32_vpdpwssd_v4si_mask ((__v4si)__A, (__v4si) __C, 153 1.1 mrg (__v4si) __D, (__mmask8)__B); 154 1.1 mrg } 155 1.1 mrg 156 1.1 mrg extern __inline __m128i 157 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 158 1.1 mrg _mm_maskz_dpwssd_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) 159 1.1 mrg { 160 1.1 mrg return (__m128i)__builtin_ia32_vpdpwssd_v4si_maskz ((__v4si)__B, 161 1.1 mrg (__v4si) __C, (__v4si) __D, (__mmask8)__A); 162 1.1 mrg } 163 1.1 mrg 164 1.1.1.4 mrg #define _mm256_dpwssds_epi32(A, B, C) \ 165 1.1.1.4 mrg ((__m256i) __builtin_ia32_vpdpwssds_v8si ((__v8si) (A), \ 166 1.1.1.4 mrg (__v8si) (B), \ 167 1.1.1.4 mrg (__v8si) (C))) 168 1.1 mrg 169 1.1 mrg extern __inline __m256i 170 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 171 1.1 mrg _mm256_mask_dpwssds_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) 172 1.1 mrg { 173 1.1 mrg return (__m256i)__builtin_ia32_vpdpwssds_v8si_mask ((__v8si)__A, 174 1.1 mrg (__v8si) __C, (__v8si) __D, (__mmask8)__B); 175 1.1 mrg } 176 1.1 mrg 177 1.1 mrg extern __inline __m256i 178 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 179 1.1 mrg _mm256_maskz_dpwssds_epi32 (__mmask8 __A, __m256i __B, __m256i __C, 180 1.1 mrg __m256i __D) 181 1.1 mrg { 182 1.1 mrg return (__m256i)__builtin_ia32_vpdpwssds_v8si_maskz ((__v8si)__B, 183 1.1 mrg (__v8si) __C, (__v8si) __D, (__mmask8)__A); 184 1.1 mrg } 185 1.1 mrg 186 1.1.1.4 mrg #define _mm_dpwssds_epi32(A, B, C) \ 187 1.1.1.4 mrg ((__m128i) __builtin_ia32_vpdpwssds_v4si ((__v4si) (A), \ 188 1.1.1.4 mrg (__v4si) (B), \ 189 1.1.1.4 mrg (__v4si) (C))) 190 1.1 mrg 191 1.1 mrg extern __inline __m128i 192 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 193 1.1 mrg _mm_mask_dpwssds_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) 194 1.1 mrg { 195 1.1 mrg return (__m128i)__builtin_ia32_vpdpwssds_v4si_mask ((__v4si)__A, 196 1.1 mrg (__v4si) __C, (__v4si) __D, (__mmask8)__B); 197 1.1 mrg } 198 1.1 mrg 199 1.1 mrg extern __inline __m128i 200 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 201 1.1 mrg _mm_maskz_dpwssds_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) 202 1.1 mrg { 203 1.1 mrg return (__m128i)__builtin_ia32_vpdpwssds_v4si_maskz ((__v4si)__B, 204 1.1 mrg (__v4si) __C, (__v4si) __D, (__mmask8)__A); 205 1.1 mrg } 206 1.1 mrg #ifdef __DISABLE_AVX512VNNIVL__ 207 1.1 mrg #undef __DISABLE_AVX512VNNIVL__ 208 1.1 mrg #pragma GCC pop_options 209 1.1 mrg #endif /* __DISABLE_AVX512VNNIVL__ */ 210 1.1 mrg #endif /* __DISABLE_AVX512VNNIVL__ */ 211