1 1.10 mrg /* Copyright (C) 2011-2022 Free Software Foundation, Inc. 2 1.1 mrg 3 1.1 mrg This file is part of GCC. 4 1.1 mrg 5 1.1 mrg GCC is free software; you can redistribute it and/or modify 6 1.1 mrg it under the terms of the GNU General Public License as published by 7 1.1 mrg the Free Software Foundation; either version 3, or (at your option) 8 1.1 mrg any later version. 9 1.1 mrg 10 1.1 mrg GCC is distributed in the hope that it will be useful, 11 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of 12 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 1.1 mrg GNU General Public License for more details. 14 1.1 mrg 15 1.1 mrg Under Section 7 of GPL version 3, you are granted additional 16 1.1 mrg permissions described in the GCC Runtime Library Exception, version 17 1.1 mrg 3.1, as published by the Free Software Foundation. 18 1.1 mrg 19 1.1 mrg You should have received a copy of the GNU General Public License and 20 1.1 mrg a copy of the GCC Runtime Library Exception along with this program; 21 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 1.1 mrg <http://www.gnu.org/licenses/>. */ 23 1.1 mrg 24 1.1 mrg #ifndef _IMMINTRIN_H_INCLUDED 25 1.1 mrg # error "Never use <fmaintrin.h> directly; include <immintrin.h> instead." 26 1.1 mrg #endif 27 1.1 mrg 28 1.1 mrg #ifndef _FMAINTRIN_H_INCLUDED 29 1.1 mrg #define _FMAINTRIN_H_INCLUDED 30 1.1 mrg 31 1.1 mrg #ifndef __FMA__ 32 1.3 mrg #pragma GCC push_options 33 1.3 mrg #pragma GCC target("fma") 34 1.3 mrg #define __DISABLE_FMA__ 35 1.3 mrg #endif /* __FMA__ */ 36 1.1 mrg 37 1.1 mrg extern __inline __m128d 38 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 39 1.1 mrg _mm_fmadd_pd (__m128d __A, __m128d __B, __m128d __C) 40 1.1 mrg { 41 1.1 mrg return (__m128d)__builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, 42 1.1 mrg (__v2df)__C); 43 1.1 mrg } 44 1.1 mrg 45 1.1 mrg extern __inline __m256d 46 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 47 1.1 mrg _mm256_fmadd_pd (__m256d __A, __m256d __B, __m256d __C) 48 1.1 mrg { 49 1.1 mrg return (__m256d)__builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, 50 1.1 mrg (__v4df)__C); 51 1.1 mrg } 52 1.1 mrg 53 1.1 mrg extern __inline __m128 54 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 55 1.1 mrg _mm_fmadd_ps (__m128 __A, __m128 __B, __m128 __C) 56 1.1 mrg { 57 1.1 mrg return (__m128)__builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, 58 1.1 mrg (__v4sf)__C); 59 1.1 mrg } 60 1.1 mrg 61 1.1 mrg extern __inline __m256 62 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 63 1.1 mrg _mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C) 64 1.1 mrg { 65 1.1 mrg return (__m256)__builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, 66 1.1 mrg (__v8sf)__C); 67 1.1 mrg } 68 1.1 mrg 69 1.1 mrg extern __inline __m128d 70 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 71 1.1 mrg _mm_fmadd_sd (__m128d __A, __m128d __B, __m128d __C) 72 1.1 mrg { 73 1.1 mrg return (__m128d) __builtin_ia32_vfmaddsd3 ((__v2df)__A, (__v2df)__B, 74 1.1 mrg (__v2df)__C); 75 1.1 mrg } 76 1.1 mrg 77 1.1 mrg extern __inline __m128 78 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 79 1.1 mrg _mm_fmadd_ss (__m128 __A, __m128 __B, __m128 __C) 80 1.1 mrg { 81 1.1 mrg return (__m128) __builtin_ia32_vfmaddss3 ((__v4sf)__A, (__v4sf)__B, 82 1.1 mrg (__v4sf)__C); 83 1.1 mrg } 84 1.1 mrg 85 1.1 mrg extern __inline __m128d 86 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 87 1.1 mrg _mm_fmsub_pd (__m128d __A, __m128d __B, __m128d __C) 88 1.1 mrg { 89 1.8 mrg return (__m128d)__builtin_ia32_vfmsubpd ((__v2df)__A, (__v2df)__B, 90 1.8 mrg (__v2df)__C); 91 1.1 mrg } 92 1.1 mrg 93 1.1 mrg extern __inline __m256d 94 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 95 1.1 mrg _mm256_fmsub_pd (__m256d __A, __m256d __B, __m256d __C) 96 1.1 mrg { 97 1.8 mrg return (__m256d)__builtin_ia32_vfmsubpd256 ((__v4df)__A, (__v4df)__B, 98 1.8 mrg (__v4df)__C); 99 1.1 mrg } 100 1.1 mrg 101 1.1 mrg extern __inline __m128 102 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 103 1.1 mrg _mm_fmsub_ps (__m128 __A, __m128 __B, __m128 __C) 104 1.1 mrg { 105 1.8 mrg return (__m128)__builtin_ia32_vfmsubps ((__v4sf)__A, (__v4sf)__B, 106 1.8 mrg (__v4sf)__C); 107 1.1 mrg } 108 1.1 mrg 109 1.1 mrg extern __inline __m256 110 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 111 1.1 mrg _mm256_fmsub_ps (__m256 __A, __m256 __B, __m256 __C) 112 1.1 mrg { 113 1.8 mrg return (__m256)__builtin_ia32_vfmsubps256 ((__v8sf)__A, (__v8sf)__B, 114 1.8 mrg (__v8sf)__C); 115 1.1 mrg } 116 1.1 mrg 117 1.1 mrg extern __inline __m128d 118 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 119 1.1 mrg _mm_fmsub_sd (__m128d __A, __m128d __B, __m128d __C) 120 1.1 mrg { 121 1.8 mrg return (__m128d)__builtin_ia32_vfmsubsd3 ((__v2df)__A, (__v2df)__B, 122 1.8 mrg (__v2df)__C); 123 1.1 mrg } 124 1.1 mrg 125 1.1 mrg extern __inline __m128 126 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 127 1.1 mrg _mm_fmsub_ss (__m128 __A, __m128 __B, __m128 __C) 128 1.1 mrg { 129 1.8 mrg return (__m128)__builtin_ia32_vfmsubss3 ((__v4sf)__A, (__v4sf)__B, 130 1.8 mrg (__v4sf)__C); 131 1.1 mrg } 132 1.1 mrg 133 1.1 mrg extern __inline __m128d 134 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 135 1.1 mrg _mm_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C) 136 1.1 mrg { 137 1.8 mrg return (__m128d)__builtin_ia32_vfnmaddpd ((__v2df)__A, (__v2df)__B, 138 1.8 mrg (__v2df)__C); 139 1.1 mrg } 140 1.1 mrg 141 1.1 mrg extern __inline __m256d 142 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 143 1.1 mrg _mm256_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C) 144 1.1 mrg { 145 1.8 mrg return (__m256d)__builtin_ia32_vfnmaddpd256 ((__v4df)__A, (__v4df)__B, 146 1.8 mrg (__v4df)__C); 147 1.1 mrg } 148 1.1 mrg 149 1.1 mrg extern __inline __m128 150 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 151 1.1 mrg _mm_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C) 152 1.1 mrg { 153 1.8 mrg return (__m128)__builtin_ia32_vfnmaddps ((__v4sf)__A, (__v4sf)__B, 154 1.8 mrg (__v4sf)__C); 155 1.1 mrg } 156 1.1 mrg 157 1.1 mrg extern __inline __m256 158 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 159 1.1 mrg _mm256_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C) 160 1.1 mrg { 161 1.8 mrg return (__m256)__builtin_ia32_vfnmaddps256 ((__v8sf)__A, (__v8sf)__B, 162 1.8 mrg (__v8sf)__C); 163 1.1 mrg } 164 1.1 mrg 165 1.1 mrg extern __inline __m128d 166 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 167 1.1 mrg _mm_fnmadd_sd (__m128d __A, __m128d __B, __m128d __C) 168 1.1 mrg { 169 1.8 mrg return (__m128d)__builtin_ia32_vfnmaddsd3 ((__v2df)__A, (__v2df)__B, 170 1.8 mrg (__v2df)__C); 171 1.1 mrg } 172 1.1 mrg 173 1.1 mrg extern __inline __m128 174 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 175 1.1 mrg _mm_fnmadd_ss (__m128 __A, __m128 __B, __m128 __C) 176 1.1 mrg { 177 1.8 mrg return (__m128)__builtin_ia32_vfnmaddss3 ((__v4sf)__A, (__v4sf)__B, 178 1.8 mrg (__v4sf)__C); 179 1.1 mrg } 180 1.1 mrg 181 1.1 mrg extern __inline __m128d 182 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 183 1.1 mrg _mm_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C) 184 1.1 mrg { 185 1.8 mrg return (__m128d)__builtin_ia32_vfnmsubpd ((__v2df)__A, (__v2df)__B, 186 1.8 mrg (__v2df)__C); 187 1.1 mrg } 188 1.1 mrg 189 1.1 mrg extern __inline __m256d 190 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 191 1.1 mrg _mm256_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C) 192 1.1 mrg { 193 1.8 mrg return (__m256d)__builtin_ia32_vfnmsubpd256 ((__v4df)__A, (__v4df)__B, 194 1.8 mrg (__v4df)__C); 195 1.1 mrg } 196 1.1 mrg 197 1.1 mrg extern __inline __m128 198 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 199 1.1 mrg _mm_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C) 200 1.1 mrg { 201 1.8 mrg return (__m128)__builtin_ia32_vfnmsubps ((__v4sf)__A, (__v4sf)__B, 202 1.8 mrg (__v4sf)__C); 203 1.1 mrg } 204 1.1 mrg 205 1.1 mrg extern __inline __m256 206 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 207 1.1 mrg _mm256_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C) 208 1.1 mrg { 209 1.8 mrg return (__m256)__builtin_ia32_vfnmsubps256 ((__v8sf)__A, (__v8sf)__B, 210 1.8 mrg (__v8sf)__C); 211 1.1 mrg } 212 1.1 mrg 213 1.1 mrg extern __inline __m128d 214 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 215 1.1 mrg _mm_fnmsub_sd (__m128d __A, __m128d __B, __m128d __C) 216 1.1 mrg { 217 1.8 mrg return (__m128d)__builtin_ia32_vfnmsubsd3 ((__v2df)__A, (__v2df)__B, 218 1.8 mrg (__v2df)__C); 219 1.1 mrg } 220 1.1 mrg 221 1.1 mrg extern __inline __m128 222 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 223 1.1 mrg _mm_fnmsub_ss (__m128 __A, __m128 __B, __m128 __C) 224 1.1 mrg { 225 1.8 mrg return (__m128)__builtin_ia32_vfnmsubss3 ((__v4sf)__A, (__v4sf)__B, 226 1.8 mrg (__v4sf)__C); 227 1.1 mrg } 228 1.1 mrg 229 1.1 mrg extern __inline __m128d 230 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 231 1.1 mrg _mm_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C) 232 1.1 mrg { 233 1.1 mrg return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, 234 1.1 mrg (__v2df)__C); 235 1.1 mrg } 236 1.1 mrg 237 1.1 mrg extern __inline __m256d 238 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 239 1.1 mrg _mm256_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C) 240 1.1 mrg { 241 1.1 mrg return (__m256d)__builtin_ia32_vfmaddsubpd256 ((__v4df)__A, 242 1.1 mrg (__v4df)__B, 243 1.1 mrg (__v4df)__C); 244 1.1 mrg } 245 1.1 mrg 246 1.1 mrg extern __inline __m128 247 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 248 1.1 mrg _mm_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C) 249 1.1 mrg { 250 1.1 mrg return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, 251 1.1 mrg (__v4sf)__C); 252 1.1 mrg } 253 1.1 mrg 254 1.1 mrg extern __inline __m256 255 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 256 1.1 mrg _mm256_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C) 257 1.1 mrg { 258 1.1 mrg return (__m256)__builtin_ia32_vfmaddsubps256 ((__v8sf)__A, 259 1.1 mrg (__v8sf)__B, 260 1.1 mrg (__v8sf)__C); 261 1.1 mrg } 262 1.1 mrg 263 1.1 mrg extern __inline __m128d 264 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 265 1.1 mrg _mm_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C) 266 1.1 mrg { 267 1.1 mrg return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, 268 1.1 mrg -(__v2df)__C); 269 1.1 mrg } 270 1.1 mrg 271 1.1 mrg extern __inline __m256d 272 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 273 1.1 mrg _mm256_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C) 274 1.1 mrg { 275 1.1 mrg return (__m256d)__builtin_ia32_vfmaddsubpd256 ((__v4df)__A, 276 1.1 mrg (__v4df)__B, 277 1.1 mrg -(__v4df)__C); 278 1.1 mrg } 279 1.1 mrg 280 1.1 mrg extern __inline __m128 281 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 282 1.1 mrg _mm_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C) 283 1.1 mrg { 284 1.1 mrg return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, 285 1.1 mrg -(__v4sf)__C); 286 1.1 mrg } 287 1.1 mrg 288 1.1 mrg extern __inline __m256 289 1.1 mrg __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 290 1.1 mrg _mm256_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C) 291 1.1 mrg { 292 1.1 mrg return (__m256)__builtin_ia32_vfmaddsubps256 ((__v8sf)__A, 293 1.1 mrg (__v8sf)__B, 294 1.1 mrg -(__v8sf)__C); 295 1.1 mrg } 296 1.1 mrg 297 1.3 mrg #ifdef __DISABLE_FMA__ 298 1.3 mrg #undef __DISABLE_FMA__ 299 1.3 mrg #pragma GCC pop_options 300 1.3 mrg #endif /* __DISABLE_FMA__ */ 301 1.1 mrg 302 1.1 mrg #endif 303