avx512bf16vlintrin.h revision 1.1 1 1.1 mrg /* Copyright (C) 2019-2020 Free Software Foundation, Inc.
2 1.1 mrg
3 1.1 mrg This file is part of GCC.
4 1.1 mrg
5 1.1 mrg GCC is free software; you can redistribute it and/or modify
6 1.1 mrg it under the terms of the GNU General Public License as published by
7 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
8 1.1 mrg any later version.
9 1.1 mrg
10 1.1 mrg GCC is distributed in the hope that it will be useful,
11 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of
12 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 1.1 mrg GNU General Public License for more details.
14 1.1 mrg
15 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
16 1.1 mrg permissions described in the GCC Runtime Library Exception, version
17 1.1 mrg 3.1, as published by the Free Software Foundation.
18 1.1 mrg
19 1.1 mrg You should have received a copy of the GNU General Public License and
20 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
21 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 1.1 mrg <http://www.gnu.org/licenses/>. */
23 1.1 mrg
24 1.1 mrg #ifndef _IMMINTRIN_H_INCLUDED
25 1.1 mrg #error "Never use <avx512bf16vlintrin.h> directly; include <immintrin.h> instead."
26 1.1 mrg #endif
27 1.1 mrg
28 1.1 mrg #ifndef _AVX512BF16VLINTRIN_H_INCLUDED
29 1.1 mrg #define _AVX512BF16VLINTRIN_H_INCLUDED
30 1.1 mrg
31 1.1 mrg #if !defined(__AVX512VL__) || !defined(__AVX512BF16__)
32 1.1 mrg #pragma GCC push_options
33 1.1 mrg #pragma GCC target("avx512bf16,avx512vl")
34 1.1 mrg #define __DISABLE_AVX512BF16VL__
35 1.1 mrg #endif /* __AVX512BF16__ */
36 1.1 mrg
37 1.1 mrg /* Internal data types for implementing the intrinsics. */
38 1.1 mrg typedef short __v16bh __attribute__ ((__vector_size__ (32)));
39 1.1 mrg typedef short __v8bh __attribute__ ((__vector_size__ (16)));
40 1.1 mrg
41 1.1 mrg /* The Intel API is flexible enough that we must allow aliasing with other
42 1.1 mrg vector types, and their scalar components. */
43 1.1 mrg typedef short __m256bh __attribute__ ((__vector_size__ (32), __may_alias__));
44 1.1 mrg typedef short __m128bh __attribute__ ((__vector_size__ (16), __may_alias__));
45 1.1 mrg
46 1.1 mrg /* vcvtne2ps2bf16 */
47 1.1 mrg
48 1.1 mrg extern __inline __m256bh
49 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
50 1.1 mrg _mm256_cvtne2ps_pbh (__m256 __A, __m256 __B)
51 1.1 mrg {
52 1.1 mrg return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi(__A, __B);
53 1.1 mrg }
54 1.1 mrg
55 1.1 mrg extern __inline __m256bh
56 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
57 1.1 mrg _mm256_mask_cvtne2ps_pbh (__m256bh __A, __mmask16 __B, __m256 __C, __m256 __D)
58 1.1 mrg {
59 1.1 mrg return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi_mask(__C, __D, __A, __B);
60 1.1 mrg }
61 1.1 mrg
62 1.1 mrg extern __inline __m256bh
63 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
64 1.1 mrg _mm256_maskz_cvtne2ps_pbh (__mmask16 __A, __m256 __B, __m256 __C)
65 1.1 mrg {
66 1.1 mrg return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi_maskz(__B, __C, __A);
67 1.1 mrg }
68 1.1 mrg
69 1.1 mrg extern __inline __m128bh
70 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
71 1.1 mrg _mm_cvtne2ps_pbh (__m128 __A, __m128 __B)
72 1.1 mrg {
73 1.1 mrg return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi(__A, __B);
74 1.1 mrg }
75 1.1 mrg
76 1.1 mrg extern __inline __m128bh
77 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
78 1.1 mrg _mm_mask_cvtne2ps_pbh (__m128bh __A, __mmask8 __B, __m128 __C, __m128 __D)
79 1.1 mrg {
80 1.1 mrg return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi_mask(__C, __D, __A, __B);
81 1.1 mrg }
82 1.1 mrg
83 1.1 mrg extern __inline __m128bh
84 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
85 1.1 mrg _mm_maskz_cvtne2ps_pbh (__mmask8 __A, __m128 __B, __m128 __C)
86 1.1 mrg {
87 1.1 mrg return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi_maskz(__B, __C, __A);
88 1.1 mrg }
89 1.1 mrg
90 1.1 mrg /* vcvtneps2bf16 */
91 1.1 mrg
92 1.1 mrg extern __inline __m128bh
93 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
94 1.1 mrg _mm256_cvtneps_pbh (__m256 __A)
95 1.1 mrg {
96 1.1 mrg return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf(__A);
97 1.1 mrg }
98 1.1 mrg
99 1.1 mrg extern __inline __m128bh
100 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
101 1.1 mrg _mm256_mask_cvtneps_pbh (__m128bh __A, __mmask8 __B, __m256 __C)
102 1.1 mrg {
103 1.1 mrg return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf_mask(__C, __A, __B);
104 1.1 mrg }
105 1.1 mrg
106 1.1 mrg extern __inline __m128bh
107 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
108 1.1 mrg _mm256_maskz_cvtneps_pbh (__mmask8 __A, __m256 __B)
109 1.1 mrg {
110 1.1 mrg return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf_maskz(__B, __A);
111 1.1 mrg }
112 1.1 mrg
113 1.1 mrg extern __inline __m128bh
114 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
115 1.1 mrg _mm_cvtneps_pbh (__m128 __A)
116 1.1 mrg {
117 1.1 mrg return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf(__A);
118 1.1 mrg }
119 1.1 mrg
120 1.1 mrg extern __inline __m128bh
121 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
122 1.1 mrg _mm_mask_cvtneps_pbh (__m128bh __A, __mmask8 __B, __m128 __C)
123 1.1 mrg {
124 1.1 mrg return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf_mask(__C, __A, __B);
125 1.1 mrg }
126 1.1 mrg
127 1.1 mrg extern __inline __m128bh
128 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
129 1.1 mrg _mm_maskz_cvtneps_pbh (__mmask8 __A, __m128 __B)
130 1.1 mrg {
131 1.1 mrg return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf_maskz(__B, __A);
132 1.1 mrg }
133 1.1 mrg
134 1.1 mrg /* vdpbf16ps */
135 1.1 mrg
136 1.1 mrg extern __inline __m256
137 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
138 1.1 mrg _mm256_dpbf16_ps (__m256 __A, __m256bh __B, __m256bh __C)
139 1.1 mrg {
140 1.1 mrg return (__m256)__builtin_ia32_dpbf16ps_v8sf(__A, __B, __C);
141 1.1 mrg }
142 1.1 mrg
143 1.1 mrg extern __inline __m256
144 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
145 1.1 mrg _mm256_mask_dpbf16_ps (__m256 __A, __mmask8 __B, __m256bh __C, __m256bh __D)
146 1.1 mrg {
147 1.1 mrg return (__m256)__builtin_ia32_dpbf16ps_v8sf_mask(__A, __C, __D, __B);
148 1.1 mrg }
149 1.1 mrg
150 1.1 mrg extern __inline __m256
151 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
152 1.1 mrg _mm256_maskz_dpbf16_ps (__mmask8 __A, __m256 __B, __m256bh __C, __m256bh __D)
153 1.1 mrg {
154 1.1 mrg return (__m256)__builtin_ia32_dpbf16ps_v8sf_maskz(__B, __C, __D, __A);
155 1.1 mrg }
156 1.1 mrg
157 1.1 mrg extern __inline __m128
158 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
159 1.1 mrg _mm_dpbf16_ps (__m128 __A, __m128bh __B, __m128bh __C)
160 1.1 mrg {
161 1.1 mrg return (__m128)__builtin_ia32_dpbf16ps_v4sf(__A, __B, __C);
162 1.1 mrg }
163 1.1 mrg
164 1.1 mrg extern __inline __m128
165 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
166 1.1 mrg _mm_mask_dpbf16_ps (__m128 __A, __mmask8 __B, __m128bh __C, __m128bh __D)
167 1.1 mrg {
168 1.1 mrg return (__m128)__builtin_ia32_dpbf16ps_v4sf_mask(__A, __C, __D, __B);
169 1.1 mrg }
170 1.1 mrg
171 1.1 mrg extern __inline __m128
172 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
173 1.1 mrg _mm_maskz_dpbf16_ps (__mmask8 __A, __m128 __B, __m128bh __C, __m128bh __D)
174 1.1 mrg {
175 1.1 mrg return (__m128)__builtin_ia32_dpbf16ps_v4sf_maskz(__B, __C, __D, __A);
176 1.1 mrg }
177 1.1 mrg
178 1.1 mrg #ifdef __DISABLE_AVX512BF16VL__
179 1.1 mrg #undef __DISABLE_AVX512BF16VL__
180 1.1 mrg #pragma GCC pop_options
181 1.1 mrg #endif /* __DISABLE_AVX512BF16VL__ */
182 1.1 mrg
183 1.1 mrg #endif /* _AVX512BF16VLINTRIN_H_INCLUDED */
184