fmaintrin.h revision 1.10 1 1.10 mrg /* Copyright (C) 2011-2022 Free Software Foundation, Inc.
2 1.1 mrg
3 1.1 mrg This file is part of GCC.
4 1.1 mrg
5 1.1 mrg GCC is free software; you can redistribute it and/or modify
6 1.1 mrg it under the terms of the GNU General Public License as published by
7 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
8 1.1 mrg any later version.
9 1.1 mrg
10 1.1 mrg GCC is distributed in the hope that it will be useful,
11 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of
12 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 1.1 mrg GNU General Public License for more details.
14 1.1 mrg
15 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
16 1.1 mrg permissions described in the GCC Runtime Library Exception, version
17 1.1 mrg 3.1, as published by the Free Software Foundation.
18 1.1 mrg
19 1.1 mrg You should have received a copy of the GNU General Public License and
20 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
21 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 1.1 mrg <http://www.gnu.org/licenses/>. */
23 1.1 mrg
24 1.1 mrg #ifndef _IMMINTRIN_H_INCLUDED
25 1.1 mrg # error "Never use <fmaintrin.h> directly; include <immintrin.h> instead."
26 1.1 mrg #endif
27 1.1 mrg
28 1.1 mrg #ifndef _FMAINTRIN_H_INCLUDED
29 1.1 mrg #define _FMAINTRIN_H_INCLUDED
30 1.1 mrg
31 1.1 mrg #ifndef __FMA__
32 1.3 mrg #pragma GCC push_options
33 1.3 mrg #pragma GCC target("fma")
34 1.3 mrg #define __DISABLE_FMA__
35 1.3 mrg #endif /* __FMA__ */
36 1.1 mrg
37 1.1 mrg extern __inline __m128d
38 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
39 1.1 mrg _mm_fmadd_pd (__m128d __A, __m128d __B, __m128d __C)
40 1.1 mrg {
41 1.1 mrg return (__m128d)__builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B,
42 1.1 mrg (__v2df)__C);
43 1.1 mrg }
44 1.1 mrg
45 1.1 mrg extern __inline __m256d
46 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
47 1.1 mrg _mm256_fmadd_pd (__m256d __A, __m256d __B, __m256d __C)
48 1.1 mrg {
49 1.1 mrg return (__m256d)__builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B,
50 1.1 mrg (__v4df)__C);
51 1.1 mrg }
52 1.1 mrg
53 1.1 mrg extern __inline __m128
54 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
55 1.1 mrg _mm_fmadd_ps (__m128 __A, __m128 __B, __m128 __C)
56 1.1 mrg {
57 1.1 mrg return (__m128)__builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B,
58 1.1 mrg (__v4sf)__C);
59 1.1 mrg }
60 1.1 mrg
61 1.1 mrg extern __inline __m256
62 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
63 1.1 mrg _mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C)
64 1.1 mrg {
65 1.1 mrg return (__m256)__builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B,
66 1.1 mrg (__v8sf)__C);
67 1.1 mrg }
68 1.1 mrg
69 1.1 mrg extern __inline __m128d
70 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
71 1.1 mrg _mm_fmadd_sd (__m128d __A, __m128d __B, __m128d __C)
72 1.1 mrg {
73 1.1 mrg return (__m128d) __builtin_ia32_vfmaddsd3 ((__v2df)__A, (__v2df)__B,
74 1.1 mrg (__v2df)__C);
75 1.1 mrg }
76 1.1 mrg
77 1.1 mrg extern __inline __m128
78 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
79 1.1 mrg _mm_fmadd_ss (__m128 __A, __m128 __B, __m128 __C)
80 1.1 mrg {
81 1.1 mrg return (__m128) __builtin_ia32_vfmaddss3 ((__v4sf)__A, (__v4sf)__B,
82 1.1 mrg (__v4sf)__C);
83 1.1 mrg }
84 1.1 mrg
85 1.1 mrg extern __inline __m128d
86 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
87 1.1 mrg _mm_fmsub_pd (__m128d __A, __m128d __B, __m128d __C)
88 1.1 mrg {
89 1.8 mrg return (__m128d)__builtin_ia32_vfmsubpd ((__v2df)__A, (__v2df)__B,
90 1.8 mrg (__v2df)__C);
91 1.1 mrg }
92 1.1 mrg
93 1.1 mrg extern __inline __m256d
94 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
95 1.1 mrg _mm256_fmsub_pd (__m256d __A, __m256d __B, __m256d __C)
96 1.1 mrg {
97 1.8 mrg return (__m256d)__builtin_ia32_vfmsubpd256 ((__v4df)__A, (__v4df)__B,
98 1.8 mrg (__v4df)__C);
99 1.1 mrg }
100 1.1 mrg
101 1.1 mrg extern __inline __m128
102 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
103 1.1 mrg _mm_fmsub_ps (__m128 __A, __m128 __B, __m128 __C)
104 1.1 mrg {
105 1.8 mrg return (__m128)__builtin_ia32_vfmsubps ((__v4sf)__A, (__v4sf)__B,
106 1.8 mrg (__v4sf)__C);
107 1.1 mrg }
108 1.1 mrg
109 1.1 mrg extern __inline __m256
110 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
111 1.1 mrg _mm256_fmsub_ps (__m256 __A, __m256 __B, __m256 __C)
112 1.1 mrg {
113 1.8 mrg return (__m256)__builtin_ia32_vfmsubps256 ((__v8sf)__A, (__v8sf)__B,
114 1.8 mrg (__v8sf)__C);
115 1.1 mrg }
116 1.1 mrg
117 1.1 mrg extern __inline __m128d
118 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
119 1.1 mrg _mm_fmsub_sd (__m128d __A, __m128d __B, __m128d __C)
120 1.1 mrg {
121 1.8 mrg return (__m128d)__builtin_ia32_vfmsubsd3 ((__v2df)__A, (__v2df)__B,
122 1.8 mrg (__v2df)__C);
123 1.1 mrg }
124 1.1 mrg
125 1.1 mrg extern __inline __m128
126 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
127 1.1 mrg _mm_fmsub_ss (__m128 __A, __m128 __B, __m128 __C)
128 1.1 mrg {
129 1.8 mrg return (__m128)__builtin_ia32_vfmsubss3 ((__v4sf)__A, (__v4sf)__B,
130 1.8 mrg (__v4sf)__C);
131 1.1 mrg }
132 1.1 mrg
133 1.1 mrg extern __inline __m128d
134 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
135 1.1 mrg _mm_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C)
136 1.1 mrg {
137 1.8 mrg return (__m128d)__builtin_ia32_vfnmaddpd ((__v2df)__A, (__v2df)__B,
138 1.8 mrg (__v2df)__C);
139 1.1 mrg }
140 1.1 mrg
141 1.1 mrg extern __inline __m256d
142 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
143 1.1 mrg _mm256_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C)
144 1.1 mrg {
145 1.8 mrg return (__m256d)__builtin_ia32_vfnmaddpd256 ((__v4df)__A, (__v4df)__B,
146 1.8 mrg (__v4df)__C);
147 1.1 mrg }
148 1.1 mrg
149 1.1 mrg extern __inline __m128
150 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
151 1.1 mrg _mm_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C)
152 1.1 mrg {
153 1.8 mrg return (__m128)__builtin_ia32_vfnmaddps ((__v4sf)__A, (__v4sf)__B,
154 1.8 mrg (__v4sf)__C);
155 1.1 mrg }
156 1.1 mrg
157 1.1 mrg extern __inline __m256
158 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
159 1.1 mrg _mm256_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C)
160 1.1 mrg {
161 1.8 mrg return (__m256)__builtin_ia32_vfnmaddps256 ((__v8sf)__A, (__v8sf)__B,
162 1.8 mrg (__v8sf)__C);
163 1.1 mrg }
164 1.1 mrg
165 1.1 mrg extern __inline __m128d
166 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
167 1.1 mrg _mm_fnmadd_sd (__m128d __A, __m128d __B, __m128d __C)
168 1.1 mrg {
169 1.8 mrg return (__m128d)__builtin_ia32_vfnmaddsd3 ((__v2df)__A, (__v2df)__B,
170 1.8 mrg (__v2df)__C);
171 1.1 mrg }
172 1.1 mrg
173 1.1 mrg extern __inline __m128
174 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
175 1.1 mrg _mm_fnmadd_ss (__m128 __A, __m128 __B, __m128 __C)
176 1.1 mrg {
177 1.8 mrg return (__m128)__builtin_ia32_vfnmaddss3 ((__v4sf)__A, (__v4sf)__B,
178 1.8 mrg (__v4sf)__C);
179 1.1 mrg }
180 1.1 mrg
181 1.1 mrg extern __inline __m128d
182 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
183 1.1 mrg _mm_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C)
184 1.1 mrg {
185 1.8 mrg return (__m128d)__builtin_ia32_vfnmsubpd ((__v2df)__A, (__v2df)__B,
186 1.8 mrg (__v2df)__C);
187 1.1 mrg }
188 1.1 mrg
189 1.1 mrg extern __inline __m256d
190 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
191 1.1 mrg _mm256_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C)
192 1.1 mrg {
193 1.8 mrg return (__m256d)__builtin_ia32_vfnmsubpd256 ((__v4df)__A, (__v4df)__B,
194 1.8 mrg (__v4df)__C);
195 1.1 mrg }
196 1.1 mrg
197 1.1 mrg extern __inline __m128
198 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
199 1.1 mrg _mm_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C)
200 1.1 mrg {
201 1.8 mrg return (__m128)__builtin_ia32_vfnmsubps ((__v4sf)__A, (__v4sf)__B,
202 1.8 mrg (__v4sf)__C);
203 1.1 mrg }
204 1.1 mrg
205 1.1 mrg extern __inline __m256
206 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
207 1.1 mrg _mm256_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C)
208 1.1 mrg {
209 1.8 mrg return (__m256)__builtin_ia32_vfnmsubps256 ((__v8sf)__A, (__v8sf)__B,
210 1.8 mrg (__v8sf)__C);
211 1.1 mrg }
212 1.1 mrg
213 1.1 mrg extern __inline __m128d
214 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
215 1.1 mrg _mm_fnmsub_sd (__m128d __A, __m128d __B, __m128d __C)
216 1.1 mrg {
217 1.8 mrg return (__m128d)__builtin_ia32_vfnmsubsd3 ((__v2df)__A, (__v2df)__B,
218 1.8 mrg (__v2df)__C);
219 1.1 mrg }
220 1.1 mrg
221 1.1 mrg extern __inline __m128
222 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
223 1.1 mrg _mm_fnmsub_ss (__m128 __A, __m128 __B, __m128 __C)
224 1.1 mrg {
225 1.8 mrg return (__m128)__builtin_ia32_vfnmsubss3 ((__v4sf)__A, (__v4sf)__B,
226 1.8 mrg (__v4sf)__C);
227 1.1 mrg }
228 1.1 mrg
229 1.1 mrg extern __inline __m128d
230 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
231 1.1 mrg _mm_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C)
232 1.1 mrg {
233 1.1 mrg return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B,
234 1.1 mrg (__v2df)__C);
235 1.1 mrg }
236 1.1 mrg
237 1.1 mrg extern __inline __m256d
238 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
239 1.1 mrg _mm256_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C)
240 1.1 mrg {
241 1.1 mrg return (__m256d)__builtin_ia32_vfmaddsubpd256 ((__v4df)__A,
242 1.1 mrg (__v4df)__B,
243 1.1 mrg (__v4df)__C);
244 1.1 mrg }
245 1.1 mrg
246 1.1 mrg extern __inline __m128
247 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
248 1.1 mrg _mm_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C)
249 1.1 mrg {
250 1.1 mrg return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B,
251 1.1 mrg (__v4sf)__C);
252 1.1 mrg }
253 1.1 mrg
254 1.1 mrg extern __inline __m256
255 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
256 1.1 mrg _mm256_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C)
257 1.1 mrg {
258 1.1 mrg return (__m256)__builtin_ia32_vfmaddsubps256 ((__v8sf)__A,
259 1.1 mrg (__v8sf)__B,
260 1.1 mrg (__v8sf)__C);
261 1.1 mrg }
262 1.1 mrg
263 1.1 mrg extern __inline __m128d
264 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
265 1.1 mrg _mm_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C)
266 1.1 mrg {
267 1.1 mrg return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B,
268 1.1 mrg -(__v2df)__C);
269 1.1 mrg }
270 1.1 mrg
271 1.1 mrg extern __inline __m256d
272 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
273 1.1 mrg _mm256_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C)
274 1.1 mrg {
275 1.1 mrg return (__m256d)__builtin_ia32_vfmaddsubpd256 ((__v4df)__A,
276 1.1 mrg (__v4df)__B,
277 1.1 mrg -(__v4df)__C);
278 1.1 mrg }
279 1.1 mrg
280 1.1 mrg extern __inline __m128
281 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
282 1.1 mrg _mm_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C)
283 1.1 mrg {
284 1.1 mrg return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B,
285 1.1 mrg -(__v4sf)__C);
286 1.1 mrg }
287 1.1 mrg
288 1.1 mrg extern __inline __m256
289 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__))
290 1.1 mrg _mm256_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C)
291 1.1 mrg {
292 1.1 mrg return (__m256)__builtin_ia32_vfmaddsubps256 ((__v8sf)__A,
293 1.1 mrg (__v8sf)__B,
294 1.1 mrg -(__v8sf)__C);
295 1.1 mrg }
296 1.1 mrg
297 1.3 mrg #ifdef __DISABLE_FMA__
298 1.3 mrg #undef __DISABLE_FMA__
299 1.3 mrg #pragma GCC pop_options
300 1.3 mrg #endif /* __DISABLE_FMA__ */
301 1.1 mrg
302 1.1 mrg #endif
303