1 1.7 mrg /* Copyright (C) 2014-2022 Free Software Foundation, Inc. 2 1.1 mrg 3 1.1 mrg This file is part of GCC. 4 1.1 mrg 5 1.1 mrg GCC is free software; you can redistribute it and/or modify 6 1.1 mrg it under the terms of the GNU General Public License as published by 7 1.1 mrg the Free Software Foundation; either version 3, or (at your option) 8 1.1 mrg any later version. 9 1.1 mrg 10 1.1 mrg GCC is distributed in the hope that it will be useful, 11 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of 12 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 1.1 mrg GNU General Public License for more details. 14 1.1 mrg 15 1.1 mrg Under Section 7 of GPL version 3, you are granted additional 16 1.1 mrg permissions described in the GCC Runtime Library Exception, version 17 1.1 mrg 3.1, as published by the Free Software Foundation. 18 1.1 mrg 19 1.1 mrg You should have received a copy of the GNU General Public License and 20 1.1 mrg a copy of the GCC Runtime Library Exception along with this program; 21 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 1.1 mrg <http://www.gnu.org/licenses/>. */ 23 1.1 mrg 24 1.1 mrg #ifndef _IMMINTRIN_H_INCLUDED 25 1.1 mrg #error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead." 26 1.1 mrg #endif 27 1.1 mrg 28 1.1 mrg #ifndef _AVX512DQINTRIN_H_INCLUDED 29 1.1 mrg #define _AVX512DQINTRIN_H_INCLUDED 30 1.1 mrg 31 1.1 mrg #ifndef __AVX512DQ__ 32 1.1 mrg #pragma GCC push_options 33 1.1 mrg #pragma GCC target("avx512dq") 34 1.1 mrg #define __DISABLE_AVX512DQ__ 35 1.1 mrg #endif /* __AVX512DQ__ */ 36 1.1 mrg 37 1.3 mrg extern __inline unsigned char 38 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 39 1.3 mrg _ktest_mask8_u8 (__mmask8 __A, __mmask8 __B, unsigned char *__CF) 40 1.3 mrg { 41 1.3 mrg *__CF = (unsigned char) __builtin_ia32_ktestcqi (__A, __B); 42 1.3 mrg return (unsigned char) __builtin_ia32_ktestzqi (__A, __B); 43 1.3 mrg } 44 1.3 mrg 45 1.3 mrg extern __inline unsigned char 46 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 47 1.3 mrg _ktestz_mask8_u8 (__mmask8 __A, __mmask8 __B) 48 1.3 mrg { 49 1.3 mrg return (unsigned char) __builtin_ia32_ktestzqi (__A, __B); 50 1.3 mrg } 51 1.3 mrg 52 1.3 mrg extern __inline unsigned char 53 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 54 1.3 mrg _ktestc_mask8_u8 (__mmask8 __A, __mmask8 __B) 55 1.3 mrg { 56 1.3 mrg return (unsigned char) __builtin_ia32_ktestcqi (__A, __B); 57 1.3 mrg } 58 1.3 mrg 59 1.3 mrg extern __inline unsigned char 60 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 61 1.3 mrg _ktest_mask16_u8 (__mmask16 __A, __mmask16 __B, unsigned char *__CF) 62 1.3 mrg { 63 1.3 mrg *__CF = (unsigned char) __builtin_ia32_ktestchi (__A, __B); 64 1.3 mrg return (unsigned char) __builtin_ia32_ktestzhi (__A, __B); 65 1.3 mrg } 66 1.3 mrg 67 1.3 mrg extern __inline unsigned char 68 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 69 1.3 mrg _ktestz_mask16_u8 (__mmask16 __A, __mmask16 __B) 70 1.3 mrg { 71 1.3 mrg return (unsigned char) __builtin_ia32_ktestzhi (__A, __B); 72 1.3 mrg } 73 1.3 mrg 74 1.3 mrg extern __inline unsigned char 75 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 76 1.3 mrg _ktestc_mask16_u8 (__mmask16 __A, __mmask16 __B) 77 1.3 mrg { 78 1.3 mrg return (unsigned char) __builtin_ia32_ktestchi (__A, __B); 79 1.3 mrg } 80 1.3 mrg 81 1.3 mrg extern __inline unsigned char 82 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 83 1.3 mrg _kortest_mask8_u8 (__mmask8 __A, __mmask8 __B, unsigned char *__CF) 84 1.3 mrg { 85 1.3 mrg *__CF = (unsigned char) __builtin_ia32_kortestcqi (__A, __B); 86 1.3 mrg return (unsigned char) __builtin_ia32_kortestzqi (__A, __B); 87 1.3 mrg } 88 1.3 mrg 89 1.3 mrg extern __inline unsigned char 90 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 91 1.3 mrg _kortestz_mask8_u8 (__mmask8 __A, __mmask8 __B) 92 1.3 mrg { 93 1.3 mrg return (unsigned char) __builtin_ia32_kortestzqi (__A, __B); 94 1.3 mrg } 95 1.3 mrg 96 1.3 mrg extern __inline unsigned char 97 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 98 1.3 mrg _kortestc_mask8_u8 (__mmask8 __A, __mmask8 __B) 99 1.3 mrg { 100 1.3 mrg return (unsigned char) __builtin_ia32_kortestcqi (__A, __B); 101 1.3 mrg } 102 1.3 mrg 103 1.3 mrg extern __inline __mmask8 104 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 105 1.3 mrg _kadd_mask8 (__mmask8 __A, __mmask8 __B) 106 1.3 mrg { 107 1.3 mrg return (__mmask8) __builtin_ia32_kaddqi ((__mmask8) __A, (__mmask8) __B); 108 1.3 mrg } 109 1.3 mrg 110 1.3 mrg extern __inline __mmask16 111 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 112 1.3 mrg _kadd_mask16 (__mmask16 __A, __mmask16 __B) 113 1.3 mrg { 114 1.3 mrg return (__mmask16) __builtin_ia32_kaddhi ((__mmask16) __A, (__mmask16) __B); 115 1.3 mrg } 116 1.3 mrg 117 1.3 mrg extern __inline unsigned int 118 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 119 1.3 mrg _cvtmask8_u32 (__mmask8 __A) 120 1.3 mrg { 121 1.3 mrg return (unsigned int) __builtin_ia32_kmovb ((__mmask8 ) __A); 122 1.3 mrg } 123 1.3 mrg 124 1.3 mrg extern __inline __mmask8 125 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 126 1.3 mrg _cvtu32_mask8 (unsigned int __A) 127 1.3 mrg { 128 1.3 mrg return (__mmask8) __builtin_ia32_kmovb ((__mmask8) __A); 129 1.3 mrg } 130 1.3 mrg 131 1.3 mrg extern __inline __mmask8 132 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 133 1.3 mrg _load_mask8 (__mmask8 *__A) 134 1.3 mrg { 135 1.3 mrg return (__mmask8) __builtin_ia32_kmovb (*(__mmask8 *) __A); 136 1.3 mrg } 137 1.3 mrg 138 1.3 mrg extern __inline void 139 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 140 1.3 mrg _store_mask8 (__mmask8 *__A, __mmask8 __B) 141 1.3 mrg { 142 1.3 mrg *(__mmask8 *) __A = __builtin_ia32_kmovb (__B); 143 1.3 mrg } 144 1.3 mrg 145 1.3 mrg extern __inline __mmask8 146 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 147 1.3 mrg _knot_mask8 (__mmask8 __A) 148 1.3 mrg { 149 1.3 mrg return (__mmask8) __builtin_ia32_knotqi ((__mmask8) __A); 150 1.3 mrg } 151 1.3 mrg 152 1.3 mrg extern __inline __mmask8 153 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 154 1.3 mrg _kor_mask8 (__mmask8 __A, __mmask8 __B) 155 1.3 mrg { 156 1.3 mrg return (__mmask8) __builtin_ia32_korqi ((__mmask8) __A, (__mmask8) __B); 157 1.3 mrg } 158 1.3 mrg 159 1.3 mrg extern __inline __mmask8 160 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 161 1.3 mrg _kxnor_mask8 (__mmask8 __A, __mmask8 __B) 162 1.3 mrg { 163 1.3 mrg return (__mmask8) __builtin_ia32_kxnorqi ((__mmask8) __A, (__mmask8) __B); 164 1.3 mrg } 165 1.3 mrg 166 1.3 mrg extern __inline __mmask8 167 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 168 1.3 mrg _kxor_mask8 (__mmask8 __A, __mmask8 __B) 169 1.3 mrg { 170 1.3 mrg return (__mmask8) __builtin_ia32_kxorqi ((__mmask8) __A, (__mmask8) __B); 171 1.3 mrg } 172 1.3 mrg 173 1.3 mrg extern __inline __mmask8 174 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 175 1.3 mrg _kand_mask8 (__mmask8 __A, __mmask8 __B) 176 1.3 mrg { 177 1.3 mrg return (__mmask8) __builtin_ia32_kandqi ((__mmask8) __A, (__mmask8) __B); 178 1.3 mrg } 179 1.3 mrg 180 1.3 mrg extern __inline __mmask8 181 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 182 1.3 mrg _kandn_mask8 (__mmask8 __A, __mmask8 __B) 183 1.3 mrg { 184 1.3 mrg return (__mmask8) __builtin_ia32_kandnqi ((__mmask8) __A, (__mmask8) __B); 185 1.3 mrg } 186 1.3 mrg 187 1.1 mrg extern __inline __m512d 188 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 189 1.1 mrg _mm512_broadcast_f64x2 (__m128d __A) 190 1.1 mrg { 191 1.3 mrg return (__m512d) 192 1.3 mrg __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A, 193 1.3 mrg _mm512_undefined_pd (), 194 1.3 mrg (__mmask8) -1); 195 1.1 mrg } 196 1.1 mrg 197 1.1 mrg extern __inline __m512d 198 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 199 1.1 mrg _mm512_mask_broadcast_f64x2 (__m512d __O, __mmask8 __M, __m128d __A) 200 1.1 mrg { 201 1.1 mrg return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) 202 1.1 mrg __A, 203 1.1 mrg (__v8df) 204 1.1 mrg __O, __M); 205 1.1 mrg } 206 1.1 mrg 207 1.1 mrg extern __inline __m512d 208 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 209 1.1 mrg _mm512_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A) 210 1.1 mrg { 211 1.1 mrg return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) 212 1.1 mrg __A, 213 1.1 mrg (__v8df) 214 1.1 mrg _mm512_setzero_ps (), 215 1.1 mrg __M); 216 1.1 mrg } 217 1.1 mrg 218 1.1 mrg extern __inline __m512i 219 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 220 1.1 mrg _mm512_broadcast_i64x2 (__m128i __A) 221 1.1 mrg { 222 1.3 mrg return (__m512i) 223 1.3 mrg __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A, 224 1.3 mrg _mm512_undefined_epi32 (), 225 1.3 mrg (__mmask8) -1); 226 1.1 mrg } 227 1.1 mrg 228 1.1 mrg extern __inline __m512i 229 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 230 1.1 mrg _mm512_mask_broadcast_i64x2 (__m512i __O, __mmask8 __M, __m128i __A) 231 1.1 mrg { 232 1.1 mrg return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) 233 1.1 mrg __A, 234 1.1 mrg (__v8di) 235 1.1 mrg __O, __M); 236 1.1 mrg } 237 1.1 mrg 238 1.1 mrg extern __inline __m512i 239 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 240 1.1 mrg _mm512_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) 241 1.1 mrg { 242 1.1 mrg return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) 243 1.1 mrg __A, 244 1.1 mrg (__v8di) 245 1.1 mrg _mm512_setzero_si512 (), 246 1.1 mrg __M); 247 1.1 mrg } 248 1.1 mrg 249 1.1 mrg extern __inline __m512 250 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 251 1.1 mrg _mm512_broadcast_f32x2 (__m128 __A) 252 1.1 mrg { 253 1.3 mrg return (__m512) 254 1.3 mrg __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A, 255 1.3 mrg (__v16sf)_mm512_undefined_ps (), 256 1.3 mrg (__mmask16) -1); 257 1.1 mrg } 258 1.1 mrg 259 1.1 mrg extern __inline __m512 260 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 261 1.1 mrg _mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A) 262 1.1 mrg { 263 1.1 mrg return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A, 264 1.1 mrg (__v16sf) 265 1.1 mrg __O, __M); 266 1.1 mrg } 267 1.1 mrg 268 1.1 mrg extern __inline __m512 269 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 270 1.1 mrg _mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A) 271 1.1 mrg { 272 1.1 mrg return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A, 273 1.1 mrg (__v16sf) 274 1.1 mrg _mm512_setzero_ps (), 275 1.1 mrg __M); 276 1.1 mrg } 277 1.1 mrg 278 1.1 mrg extern __inline __m512i 279 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 280 1.1 mrg _mm512_broadcast_i32x2 (__m128i __A) 281 1.1 mrg { 282 1.3 mrg return (__m512i) 283 1.3 mrg __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A, 284 1.3 mrg (__v16si) 285 1.3 mrg _mm512_undefined_epi32 (), 286 1.3 mrg (__mmask16) -1); 287 1.1 mrg } 288 1.1 mrg 289 1.1 mrg extern __inline __m512i 290 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 291 1.1 mrg _mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A) 292 1.1 mrg { 293 1.1 mrg return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) 294 1.1 mrg __A, 295 1.1 mrg (__v16si) 296 1.1 mrg __O, __M); 297 1.1 mrg } 298 1.1 mrg 299 1.1 mrg extern __inline __m512i 300 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 301 1.1 mrg _mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A) 302 1.1 mrg { 303 1.1 mrg return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) 304 1.1 mrg __A, 305 1.1 mrg (__v16si) 306 1.1 mrg _mm512_setzero_si512 (), 307 1.1 mrg __M); 308 1.1 mrg } 309 1.1 mrg 310 1.1 mrg extern __inline __m512 311 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 312 1.1 mrg _mm512_broadcast_f32x8 (__m256 __A) 313 1.1 mrg { 314 1.3 mrg return (__m512) 315 1.3 mrg __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A, 316 1.3 mrg _mm512_undefined_ps (), 317 1.3 mrg (__mmask16) -1); 318 1.1 mrg } 319 1.1 mrg 320 1.1 mrg extern __inline __m512 321 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 322 1.1 mrg _mm512_mask_broadcast_f32x8 (__m512 __O, __mmask16 __M, __m256 __A) 323 1.1 mrg { 324 1.1 mrg return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A, 325 1.1 mrg (__v16sf)__O, 326 1.1 mrg __M); 327 1.1 mrg } 328 1.1 mrg 329 1.1 mrg extern __inline __m512 330 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 331 1.1 mrg _mm512_maskz_broadcast_f32x8 (__mmask16 __M, __m256 __A) 332 1.1 mrg { 333 1.1 mrg return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A, 334 1.1 mrg (__v16sf) 335 1.1 mrg _mm512_setzero_ps (), 336 1.1 mrg __M); 337 1.1 mrg } 338 1.1 mrg 339 1.1 mrg extern __inline __m512i 340 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 341 1.1 mrg _mm512_broadcast_i32x8 (__m256i __A) 342 1.1 mrg { 343 1.3 mrg return (__m512i) 344 1.3 mrg __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A, 345 1.3 mrg (__v16si) 346 1.3 mrg _mm512_undefined_epi32 (), 347 1.3 mrg (__mmask16) -1); 348 1.1 mrg } 349 1.1 mrg 350 1.1 mrg extern __inline __m512i 351 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 352 1.1 mrg _mm512_mask_broadcast_i32x8 (__m512i __O, __mmask16 __M, __m256i __A) 353 1.1 mrg { 354 1.1 mrg return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) 355 1.1 mrg __A, 356 1.1 mrg (__v16si)__O, 357 1.1 mrg __M); 358 1.1 mrg } 359 1.1 mrg 360 1.1 mrg extern __inline __m512i 361 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 362 1.1 mrg _mm512_maskz_broadcast_i32x8 (__mmask16 __M, __m256i __A) 363 1.1 mrg { 364 1.1 mrg return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) 365 1.1 mrg __A, 366 1.1 mrg (__v16si) 367 1.1 mrg _mm512_setzero_si512 (), 368 1.1 mrg __M); 369 1.1 mrg } 370 1.1 mrg 371 1.1 mrg extern __inline __m512i 372 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 373 1.1 mrg _mm512_mullo_epi64 (__m512i __A, __m512i __B) 374 1.1 mrg { 375 1.1 mrg return (__m512i) ((__v8du) __A * (__v8du) __B); 376 1.1 mrg } 377 1.1 mrg 378 1.1 mrg extern __inline __m512i 379 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 380 1.1 mrg _mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, 381 1.1 mrg __m512i __B) 382 1.1 mrg { 383 1.1 mrg return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A, 384 1.1 mrg (__v8di) __B, 385 1.1 mrg (__v8di) __W, 386 1.1 mrg (__mmask8) __U); 387 1.1 mrg } 388 1.1 mrg 389 1.1 mrg extern __inline __m512i 390 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 391 1.1 mrg _mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 392 1.1 mrg { 393 1.1 mrg return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A, 394 1.1 mrg (__v8di) __B, 395 1.1 mrg (__v8di) 396 1.1 mrg _mm512_setzero_si512 (), 397 1.1 mrg (__mmask8) __U); 398 1.1 mrg } 399 1.1 mrg 400 1.1 mrg extern __inline __m512d 401 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 402 1.1 mrg _mm512_xor_pd (__m512d __A, __m512d __B) 403 1.1 mrg { 404 1.1 mrg return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A, 405 1.1 mrg (__v8df) __B, 406 1.1 mrg (__v8df) 407 1.1 mrg _mm512_setzero_pd (), 408 1.1 mrg (__mmask8) -1); 409 1.1 mrg } 410 1.1 mrg 411 1.1 mrg extern __inline __m512d 412 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 413 1.1 mrg _mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A, 414 1.1 mrg __m512d __B) 415 1.1 mrg { 416 1.1 mrg return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A, 417 1.1 mrg (__v8df) __B, 418 1.1 mrg (__v8df) __W, 419 1.1 mrg (__mmask8) __U); 420 1.1 mrg } 421 1.1 mrg 422 1.1 mrg extern __inline __m512d 423 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 424 1.1 mrg _mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B) 425 1.1 mrg { 426 1.1 mrg return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A, 427 1.1 mrg (__v8df) __B, 428 1.1 mrg (__v8df) 429 1.1 mrg _mm512_setzero_pd (), 430 1.1 mrg (__mmask8) __U); 431 1.1 mrg } 432 1.1 mrg 433 1.1 mrg extern __inline __m512 434 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 435 1.1 mrg _mm512_xor_ps (__m512 __A, __m512 __B) 436 1.1 mrg { 437 1.1 mrg return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A, 438 1.1 mrg (__v16sf) __B, 439 1.1 mrg (__v16sf) 440 1.1 mrg _mm512_setzero_ps (), 441 1.1 mrg (__mmask16) -1); 442 1.1 mrg } 443 1.1 mrg 444 1.1 mrg extern __inline __m512 445 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 446 1.1 mrg _mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 447 1.1 mrg { 448 1.1 mrg return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A, 449 1.1 mrg (__v16sf) __B, 450 1.1 mrg (__v16sf) __W, 451 1.1 mrg (__mmask16) __U); 452 1.1 mrg } 453 1.1 mrg 454 1.1 mrg extern __inline __m512 455 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 456 1.1 mrg _mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B) 457 1.1 mrg { 458 1.1 mrg return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A, 459 1.1 mrg (__v16sf) __B, 460 1.1 mrg (__v16sf) 461 1.1 mrg _mm512_setzero_ps (), 462 1.1 mrg (__mmask16) __U); 463 1.1 mrg } 464 1.1 mrg 465 1.1 mrg extern __inline __m512d 466 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 467 1.1 mrg _mm512_or_pd (__m512d __A, __m512d __B) 468 1.1 mrg { 469 1.1 mrg return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A, 470 1.1 mrg (__v8df) __B, 471 1.1 mrg (__v8df) 472 1.1 mrg _mm512_setzero_pd (), 473 1.1 mrg (__mmask8) -1); 474 1.1 mrg } 475 1.1 mrg 476 1.1 mrg extern __inline __m512d 477 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 478 1.1 mrg _mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 479 1.1 mrg { 480 1.1 mrg return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A, 481 1.1 mrg (__v8df) __B, 482 1.1 mrg (__v8df) __W, 483 1.1 mrg (__mmask8) __U); 484 1.1 mrg } 485 1.1 mrg 486 1.1 mrg extern __inline __m512d 487 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 488 1.1 mrg _mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B) 489 1.1 mrg { 490 1.1 mrg return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A, 491 1.1 mrg (__v8df) __B, 492 1.1 mrg (__v8df) 493 1.1 mrg _mm512_setzero_pd (), 494 1.1 mrg (__mmask8) __U); 495 1.1 mrg } 496 1.1 mrg 497 1.1 mrg extern __inline __m512 498 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 499 1.1 mrg _mm512_or_ps (__m512 __A, __m512 __B) 500 1.1 mrg { 501 1.1 mrg return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A, 502 1.1 mrg (__v16sf) __B, 503 1.1 mrg (__v16sf) 504 1.1 mrg _mm512_setzero_ps (), 505 1.1 mrg (__mmask16) -1); 506 1.1 mrg } 507 1.1 mrg 508 1.1 mrg extern __inline __m512 509 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 510 1.1 mrg _mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 511 1.1 mrg { 512 1.1 mrg return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A, 513 1.1 mrg (__v16sf) __B, 514 1.1 mrg (__v16sf) __W, 515 1.1 mrg (__mmask16) __U); 516 1.1 mrg } 517 1.1 mrg 518 1.1 mrg extern __inline __m512 519 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 520 1.1 mrg _mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B) 521 1.1 mrg { 522 1.1 mrg return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A, 523 1.1 mrg (__v16sf) __B, 524 1.1 mrg (__v16sf) 525 1.1 mrg _mm512_setzero_ps (), 526 1.1 mrg (__mmask16) __U); 527 1.1 mrg } 528 1.1 mrg 529 1.1 mrg extern __inline __m512d 530 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 531 1.1 mrg _mm512_and_pd (__m512d __A, __m512d __B) 532 1.1 mrg { 533 1.1 mrg return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A, 534 1.1 mrg (__v8df) __B, 535 1.1 mrg (__v8df) 536 1.1 mrg _mm512_setzero_pd (), 537 1.1 mrg (__mmask8) -1); 538 1.1 mrg } 539 1.1 mrg 540 1.1 mrg extern __inline __m512d 541 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 542 1.1 mrg _mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A, 543 1.1 mrg __m512d __B) 544 1.1 mrg { 545 1.1 mrg return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A, 546 1.1 mrg (__v8df) __B, 547 1.1 mrg (__v8df) __W, 548 1.1 mrg (__mmask8) __U); 549 1.1 mrg } 550 1.1 mrg 551 1.1 mrg extern __inline __m512d 552 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 553 1.1 mrg _mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B) 554 1.1 mrg { 555 1.1 mrg return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A, 556 1.1 mrg (__v8df) __B, 557 1.1 mrg (__v8df) 558 1.1 mrg _mm512_setzero_pd (), 559 1.1 mrg (__mmask8) __U); 560 1.1 mrg } 561 1.1 mrg 562 1.1 mrg extern __inline __m512 563 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 564 1.1 mrg _mm512_and_ps (__m512 __A, __m512 __B) 565 1.1 mrg { 566 1.1 mrg return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A, 567 1.1 mrg (__v16sf) __B, 568 1.1 mrg (__v16sf) 569 1.1 mrg _mm512_setzero_ps (), 570 1.1 mrg (__mmask16) -1); 571 1.1 mrg } 572 1.1 mrg 573 1.1 mrg extern __inline __m512 574 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 575 1.1 mrg _mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 576 1.1 mrg { 577 1.1 mrg return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A, 578 1.1 mrg (__v16sf) __B, 579 1.1 mrg (__v16sf) __W, 580 1.1 mrg (__mmask16) __U); 581 1.1 mrg } 582 1.1 mrg 583 1.1 mrg extern __inline __m512 584 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 585 1.1 mrg _mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B) 586 1.1 mrg { 587 1.1 mrg return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A, 588 1.1 mrg (__v16sf) __B, 589 1.1 mrg (__v16sf) 590 1.1 mrg _mm512_setzero_ps (), 591 1.1 mrg (__mmask16) __U); 592 1.1 mrg } 593 1.1 mrg 594 1.1 mrg extern __inline __m512d 595 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 596 1.1 mrg _mm512_andnot_pd (__m512d __A, __m512d __B) 597 1.1 mrg { 598 1.1 mrg return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A, 599 1.1 mrg (__v8df) __B, 600 1.1 mrg (__v8df) 601 1.1 mrg _mm512_setzero_pd (), 602 1.1 mrg (__mmask8) -1); 603 1.1 mrg } 604 1.1 mrg 605 1.1 mrg extern __inline __m512d 606 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 607 1.1 mrg _mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, 608 1.1 mrg __m512d __B) 609 1.1 mrg { 610 1.1 mrg return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A, 611 1.1 mrg (__v8df) __B, 612 1.1 mrg (__v8df) __W, 613 1.1 mrg (__mmask8) __U); 614 1.1 mrg } 615 1.1 mrg 616 1.1 mrg extern __inline __m512d 617 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 618 1.1 mrg _mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) 619 1.1 mrg { 620 1.1 mrg return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A, 621 1.1 mrg (__v8df) __B, 622 1.1 mrg (__v8df) 623 1.1 mrg _mm512_setzero_pd (), 624 1.1 mrg (__mmask8) __U); 625 1.1 mrg } 626 1.1 mrg 627 1.1 mrg extern __inline __m512 628 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 629 1.1 mrg _mm512_andnot_ps (__m512 __A, __m512 __B) 630 1.1 mrg { 631 1.1 mrg return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A, 632 1.1 mrg (__v16sf) __B, 633 1.1 mrg (__v16sf) 634 1.1 mrg _mm512_setzero_ps (), 635 1.1 mrg (__mmask16) -1); 636 1.1 mrg } 637 1.1 mrg 638 1.1 mrg extern __inline __m512 639 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 640 1.1 mrg _mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, 641 1.1 mrg __m512 __B) 642 1.1 mrg { 643 1.1 mrg return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A, 644 1.1 mrg (__v16sf) __B, 645 1.1 mrg (__v16sf) __W, 646 1.1 mrg (__mmask16) __U); 647 1.1 mrg } 648 1.1 mrg 649 1.1 mrg extern __inline __m512 650 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 651 1.1 mrg _mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) 652 1.1 mrg { 653 1.1 mrg return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A, 654 1.1 mrg (__v16sf) __B, 655 1.1 mrg (__v16sf) 656 1.1 mrg _mm512_setzero_ps (), 657 1.1 mrg (__mmask16) __U); 658 1.1 mrg } 659 1.1 mrg 660 1.1 mrg extern __inline __mmask16 661 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 662 1.1 mrg _mm512_movepi32_mask (__m512i __A) 663 1.1 mrg { 664 1.1 mrg return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A); 665 1.1 mrg } 666 1.1 mrg 667 1.1 mrg extern __inline __mmask8 668 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 669 1.1 mrg _mm512_movepi64_mask (__m512i __A) 670 1.1 mrg { 671 1.1 mrg return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A); 672 1.1 mrg } 673 1.1 mrg 674 1.1 mrg extern __inline __m512i 675 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 676 1.1 mrg _mm512_movm_epi32 (__mmask16 __A) 677 1.1 mrg { 678 1.1 mrg return (__m512i) __builtin_ia32_cvtmask2d512 (__A); 679 1.1 mrg } 680 1.1 mrg 681 1.1 mrg extern __inline __m512i 682 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 683 1.1 mrg _mm512_movm_epi64 (__mmask8 __A) 684 1.1 mrg { 685 1.1 mrg return (__m512i) __builtin_ia32_cvtmask2q512 (__A); 686 1.1 mrg } 687 1.1 mrg 688 1.1 mrg extern __inline __m512i 689 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 690 1.1 mrg _mm512_cvttpd_epi64 (__m512d __A) 691 1.1 mrg { 692 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, 693 1.1 mrg (__v8di) 694 1.1 mrg _mm512_setzero_si512 (), 695 1.1 mrg (__mmask8) -1, 696 1.1 mrg _MM_FROUND_CUR_DIRECTION); 697 1.1 mrg } 698 1.1 mrg 699 1.1 mrg extern __inline __m512i 700 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 701 1.1 mrg _mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) 702 1.1 mrg { 703 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, 704 1.1 mrg (__v8di) __W, 705 1.1 mrg (__mmask8) __U, 706 1.1 mrg _MM_FROUND_CUR_DIRECTION); 707 1.1 mrg } 708 1.1 mrg 709 1.1 mrg extern __inline __m512i 710 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 711 1.1 mrg _mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) 712 1.1 mrg { 713 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, 714 1.1 mrg (__v8di) 715 1.1 mrg _mm512_setzero_si512 (), 716 1.1 mrg (__mmask8) __U, 717 1.1 mrg _MM_FROUND_CUR_DIRECTION); 718 1.1 mrg } 719 1.1 mrg 720 1.1 mrg extern __inline __m512i 721 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 722 1.1 mrg _mm512_cvttpd_epu64 (__m512d __A) 723 1.1 mrg { 724 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, 725 1.1 mrg (__v8di) 726 1.1 mrg _mm512_setzero_si512 (), 727 1.1 mrg (__mmask8) -1, 728 1.1 mrg _MM_FROUND_CUR_DIRECTION); 729 1.1 mrg } 730 1.1 mrg 731 1.1 mrg extern __inline __m512i 732 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 733 1.1 mrg _mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) 734 1.1 mrg { 735 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, 736 1.1 mrg (__v8di) __W, 737 1.1 mrg (__mmask8) __U, 738 1.1 mrg _MM_FROUND_CUR_DIRECTION); 739 1.1 mrg } 740 1.1 mrg 741 1.1 mrg extern __inline __m512i 742 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 743 1.1 mrg _mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) 744 1.1 mrg { 745 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, 746 1.1 mrg (__v8di) 747 1.1 mrg _mm512_setzero_si512 (), 748 1.1 mrg (__mmask8) __U, 749 1.1 mrg _MM_FROUND_CUR_DIRECTION); 750 1.1 mrg } 751 1.1 mrg 752 1.1 mrg extern __inline __m512i 753 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 754 1.1 mrg _mm512_cvttps_epi64 (__m256 __A) 755 1.1 mrg { 756 1.1 mrg return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, 757 1.1 mrg (__v8di) 758 1.1 mrg _mm512_setzero_si512 (), 759 1.1 mrg (__mmask8) -1, 760 1.1 mrg _MM_FROUND_CUR_DIRECTION); 761 1.1 mrg } 762 1.1 mrg 763 1.1 mrg extern __inline __m512i 764 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 765 1.1 mrg _mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) 766 1.1 mrg { 767 1.1 mrg return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, 768 1.1 mrg (__v8di) __W, 769 1.1 mrg (__mmask8) __U, 770 1.1 mrg _MM_FROUND_CUR_DIRECTION); 771 1.1 mrg } 772 1.1 mrg 773 1.1 mrg extern __inline __m512i 774 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 775 1.1 mrg _mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) 776 1.1 mrg { 777 1.1 mrg return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, 778 1.1 mrg (__v8di) 779 1.1 mrg _mm512_setzero_si512 (), 780 1.1 mrg (__mmask8) __U, 781 1.1 mrg _MM_FROUND_CUR_DIRECTION); 782 1.1 mrg } 783 1.1 mrg 784 1.1 mrg extern __inline __m512i 785 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 786 1.1 mrg _mm512_cvttps_epu64 (__m256 __A) 787 1.1 mrg { 788 1.1 mrg return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, 789 1.1 mrg (__v8di) 790 1.1 mrg _mm512_setzero_si512 (), 791 1.1 mrg (__mmask8) -1, 792 1.1 mrg _MM_FROUND_CUR_DIRECTION); 793 1.1 mrg } 794 1.1 mrg 795 1.1 mrg extern __inline __m512i 796 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 797 1.1 mrg _mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) 798 1.1 mrg { 799 1.1 mrg return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, 800 1.1 mrg (__v8di) __W, 801 1.1 mrg (__mmask8) __U, 802 1.1 mrg _MM_FROUND_CUR_DIRECTION); 803 1.1 mrg } 804 1.1 mrg 805 1.1 mrg extern __inline __m512i 806 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 807 1.1 mrg _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) 808 1.1 mrg { 809 1.1 mrg return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, 810 1.1 mrg (__v8di) 811 1.1 mrg _mm512_setzero_si512 (), 812 1.1 mrg (__mmask8) __U, 813 1.1 mrg _MM_FROUND_CUR_DIRECTION); 814 1.1 mrg } 815 1.1 mrg 816 1.1 mrg extern __inline __m512i 817 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 818 1.1 mrg _mm512_cvtpd_epi64 (__m512d __A) 819 1.1 mrg { 820 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, 821 1.1 mrg (__v8di) 822 1.1 mrg _mm512_setzero_si512 (), 823 1.1 mrg (__mmask8) -1, 824 1.1 mrg _MM_FROUND_CUR_DIRECTION); 825 1.1 mrg } 826 1.1 mrg 827 1.1 mrg extern __inline __m512i 828 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 829 1.1 mrg _mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) 830 1.1 mrg { 831 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, 832 1.1 mrg (__v8di) __W, 833 1.1 mrg (__mmask8) __U, 834 1.1 mrg _MM_FROUND_CUR_DIRECTION); 835 1.1 mrg } 836 1.1 mrg 837 1.1 mrg extern __inline __m512i 838 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 839 1.1 mrg _mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) 840 1.1 mrg { 841 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, 842 1.1 mrg (__v8di) 843 1.1 mrg _mm512_setzero_si512 (), 844 1.1 mrg (__mmask8) __U, 845 1.1 mrg _MM_FROUND_CUR_DIRECTION); 846 1.1 mrg } 847 1.1 mrg 848 1.1 mrg extern __inline __m512i 849 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 850 1.1 mrg _mm512_cvtpd_epu64 (__m512d __A) 851 1.1 mrg { 852 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, 853 1.1 mrg (__v8di) 854 1.1 mrg _mm512_setzero_si512 (), 855 1.1 mrg (__mmask8) -1, 856 1.1 mrg _MM_FROUND_CUR_DIRECTION); 857 1.1 mrg } 858 1.1 mrg 859 1.1 mrg extern __inline __m512i 860 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 861 1.1 mrg _mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) 862 1.1 mrg { 863 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, 864 1.1 mrg (__v8di) __W, 865 1.1 mrg (__mmask8) __U, 866 1.1 mrg _MM_FROUND_CUR_DIRECTION); 867 1.1 mrg } 868 1.1 mrg 869 1.1 mrg extern __inline __m512i 870 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 871 1.1 mrg _mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) 872 1.1 mrg { 873 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, 874 1.1 mrg (__v8di) 875 1.1 mrg _mm512_setzero_si512 (), 876 1.1 mrg (__mmask8) __U, 877 1.1 mrg _MM_FROUND_CUR_DIRECTION); 878 1.1 mrg } 879 1.1 mrg 880 1.1 mrg extern __inline __m512i 881 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 882 1.1 mrg _mm512_cvtps_epi64 (__m256 __A) 883 1.1 mrg { 884 1.1 mrg return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, 885 1.1 mrg (__v8di) 886 1.1 mrg _mm512_setzero_si512 (), 887 1.1 mrg (__mmask8) -1, 888 1.1 mrg _MM_FROUND_CUR_DIRECTION); 889 1.1 mrg } 890 1.1 mrg 891 1.1 mrg extern __inline __m512i 892 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 893 1.1 mrg _mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) 894 1.1 mrg { 895 1.1 mrg return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, 896 1.1 mrg (__v8di) __W, 897 1.1 mrg (__mmask8) __U, 898 1.1 mrg _MM_FROUND_CUR_DIRECTION); 899 1.1 mrg } 900 1.1 mrg 901 1.1 mrg extern __inline __m512i 902 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 903 1.1 mrg _mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) 904 1.1 mrg { 905 1.1 mrg return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, 906 1.1 mrg (__v8di) 907 1.1 mrg _mm512_setzero_si512 (), 908 1.1 mrg (__mmask8) __U, 909 1.1 mrg _MM_FROUND_CUR_DIRECTION); 910 1.1 mrg } 911 1.1 mrg 912 1.1 mrg extern __inline __m512i 913 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 914 1.1 mrg _mm512_cvtps_epu64 (__m256 __A) 915 1.1 mrg { 916 1.1 mrg return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, 917 1.1 mrg (__v8di) 918 1.1 mrg _mm512_setzero_si512 (), 919 1.1 mrg (__mmask8) -1, 920 1.1 mrg _MM_FROUND_CUR_DIRECTION); 921 1.1 mrg } 922 1.1 mrg 923 1.1 mrg extern __inline __m512i 924 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 925 1.1 mrg _mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) 926 1.1 mrg { 927 1.1 mrg return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, 928 1.1 mrg (__v8di) __W, 929 1.1 mrg (__mmask8) __U, 930 1.1 mrg _MM_FROUND_CUR_DIRECTION); 931 1.1 mrg } 932 1.1 mrg 933 1.1 mrg extern __inline __m512i 934 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 935 1.1 mrg _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) 936 1.1 mrg { 937 1.1 mrg return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, 938 1.1 mrg (__v8di) 939 1.1 mrg _mm512_setzero_si512 (), 940 1.1 mrg (__mmask8) __U, 941 1.1 mrg _MM_FROUND_CUR_DIRECTION); 942 1.1 mrg } 943 1.1 mrg 944 1.1 mrg extern __inline __m256 945 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 946 1.1 mrg _mm512_cvtepi64_ps (__m512i __A) 947 1.1 mrg { 948 1.1 mrg return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, 949 1.1 mrg (__v8sf) 950 1.1 mrg _mm256_setzero_ps (), 951 1.1 mrg (__mmask8) -1, 952 1.1 mrg _MM_FROUND_CUR_DIRECTION); 953 1.1 mrg } 954 1.1 mrg 955 1.1 mrg extern __inline __m256 956 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 957 1.1 mrg _mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A) 958 1.1 mrg { 959 1.1 mrg return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, 960 1.1 mrg (__v8sf) __W, 961 1.1 mrg (__mmask8) __U, 962 1.1 mrg _MM_FROUND_CUR_DIRECTION); 963 1.1 mrg } 964 1.1 mrg 965 1.1 mrg extern __inline __m256 966 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 967 1.1 mrg _mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) 968 1.1 mrg { 969 1.1 mrg return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, 970 1.1 mrg (__v8sf) 971 1.1 mrg _mm256_setzero_ps (), 972 1.1 mrg (__mmask8) __U, 973 1.1 mrg _MM_FROUND_CUR_DIRECTION); 974 1.1 mrg } 975 1.1 mrg 976 1.1 mrg extern __inline __m256 977 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 978 1.1 mrg _mm512_cvtepu64_ps (__m512i __A) 979 1.1 mrg { 980 1.1 mrg return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, 981 1.1 mrg (__v8sf) 982 1.1 mrg _mm256_setzero_ps (), 983 1.1 mrg (__mmask8) -1, 984 1.1 mrg _MM_FROUND_CUR_DIRECTION); 985 1.1 mrg } 986 1.1 mrg 987 1.1 mrg extern __inline __m256 988 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 989 1.1 mrg _mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A) 990 1.1 mrg { 991 1.1 mrg return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, 992 1.1 mrg (__v8sf) __W, 993 1.1 mrg (__mmask8) __U, 994 1.1 mrg _MM_FROUND_CUR_DIRECTION); 995 1.1 mrg } 996 1.1 mrg 997 1.1 mrg extern __inline __m256 998 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 999 1.1 mrg _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) 1000 1.1 mrg { 1001 1.1 mrg return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, 1002 1.1 mrg (__v8sf) 1003 1.1 mrg _mm256_setzero_ps (), 1004 1.1 mrg (__mmask8) __U, 1005 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1006 1.1 mrg } 1007 1.1 mrg 1008 1.1 mrg extern __inline __m512d 1009 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1010 1.1 mrg _mm512_cvtepi64_pd (__m512i __A) 1011 1.1 mrg { 1012 1.1 mrg return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A, 1013 1.1 mrg (__v8df) 1014 1.1 mrg _mm512_setzero_pd (), 1015 1.1 mrg (__mmask8) -1, 1016 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1017 1.1 mrg } 1018 1.1 mrg 1019 1.1 mrg extern __inline __m512d 1020 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1021 1.1 mrg _mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A) 1022 1.1 mrg { 1023 1.1 mrg return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A, 1024 1.1 mrg (__v8df) __W, 1025 1.1 mrg (__mmask8) __U, 1026 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1027 1.1 mrg } 1028 1.1 mrg 1029 1.1 mrg extern __inline __m512d 1030 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1031 1.1 mrg _mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) 1032 1.1 mrg { 1033 1.1 mrg return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A, 1034 1.1 mrg (__v8df) 1035 1.1 mrg _mm512_setzero_pd (), 1036 1.1 mrg (__mmask8) __U, 1037 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1038 1.1 mrg } 1039 1.1 mrg 1040 1.1 mrg extern __inline __m512d 1041 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1042 1.1 mrg _mm512_cvtepu64_pd (__m512i __A) 1043 1.1 mrg { 1044 1.1 mrg return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A, 1045 1.1 mrg (__v8df) 1046 1.1 mrg _mm512_setzero_pd (), 1047 1.1 mrg (__mmask8) -1, 1048 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1049 1.1 mrg } 1050 1.1 mrg 1051 1.1 mrg extern __inline __m512d 1052 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1053 1.1 mrg _mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A) 1054 1.1 mrg { 1055 1.1 mrg return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A, 1056 1.1 mrg (__v8df) __W, 1057 1.1 mrg (__mmask8) __U, 1058 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1059 1.1 mrg } 1060 1.1 mrg 1061 1.1 mrg extern __inline __m512d 1062 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1063 1.1 mrg _mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) 1064 1.1 mrg { 1065 1.1 mrg return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A, 1066 1.1 mrg (__v8df) 1067 1.1 mrg _mm512_setzero_pd (), 1068 1.1 mrg (__mmask8) __U, 1069 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1070 1.1 mrg } 1071 1.1 mrg 1072 1.1 mrg #ifdef __OPTIMIZE__ 1073 1.3 mrg extern __inline __mmask8 1074 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1075 1.3 mrg _kshiftli_mask8 (__mmask8 __A, unsigned int __B) 1076 1.3 mrg { 1077 1.3 mrg return (__mmask8) __builtin_ia32_kshiftliqi ((__mmask8) __A, (__mmask8) __B); 1078 1.3 mrg } 1079 1.3 mrg 1080 1.3 mrg extern __inline __mmask8 1081 1.3 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1082 1.3 mrg _kshiftri_mask8 (__mmask8 __A, unsigned int __B) 1083 1.3 mrg { 1084 1.3 mrg return (__mmask8) __builtin_ia32_kshiftriqi ((__mmask8) __A, (__mmask8) __B); 1085 1.3 mrg } 1086 1.3 mrg 1087 1.1 mrg extern __inline __m512d 1088 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1089 1.1 mrg _mm512_range_pd (__m512d __A, __m512d __B, int __C) 1090 1.1 mrg { 1091 1.1 mrg return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, 1092 1.1 mrg (__v8df) __B, __C, 1093 1.1 mrg (__v8df) 1094 1.1 mrg _mm512_setzero_pd (), 1095 1.1 mrg (__mmask8) -1, 1096 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1097 1.1 mrg } 1098 1.1 mrg 1099 1.1 mrg extern __inline __m512d 1100 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1101 1.1 mrg _mm512_mask_range_pd (__m512d __W, __mmask8 __U, 1102 1.1 mrg __m512d __A, __m512d __B, int __C) 1103 1.1 mrg { 1104 1.1 mrg return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, 1105 1.1 mrg (__v8df) __B, __C, 1106 1.1 mrg (__v8df) __W, 1107 1.1 mrg (__mmask8) __U, 1108 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1109 1.1 mrg } 1110 1.1 mrg 1111 1.1 mrg extern __inline __m512d 1112 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1113 1.1 mrg _mm512_maskz_range_pd (__mmask8 __U, __m512d __A, __m512d __B, int __C) 1114 1.1 mrg { 1115 1.1 mrg return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, 1116 1.1 mrg (__v8df) __B, __C, 1117 1.1 mrg (__v8df) 1118 1.1 mrg _mm512_setzero_pd (), 1119 1.1 mrg (__mmask8) __U, 1120 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1121 1.1 mrg } 1122 1.1 mrg 1123 1.1 mrg extern __inline __m512 1124 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1125 1.1 mrg _mm512_range_ps (__m512 __A, __m512 __B, int __C) 1126 1.1 mrg { 1127 1.1 mrg return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A, 1128 1.1 mrg (__v16sf) __B, __C, 1129 1.1 mrg (__v16sf) 1130 1.1 mrg _mm512_setzero_ps (), 1131 1.1 mrg (__mmask16) -1, 1132 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1133 1.1 mrg } 1134 1.1 mrg 1135 1.1 mrg extern __inline __m512 1136 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1137 1.1 mrg _mm512_mask_range_ps (__m512 __W, __mmask16 __U, 1138 1.1 mrg __m512 __A, __m512 __B, int __C) 1139 1.1 mrg { 1140 1.1 mrg return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A, 1141 1.1 mrg (__v16sf) __B, __C, 1142 1.1 mrg (__v16sf) __W, 1143 1.1 mrg (__mmask16) __U, 1144 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1145 1.1 mrg } 1146 1.1 mrg 1147 1.1 mrg extern __inline __m512 1148 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1149 1.1 mrg _mm512_maskz_range_ps (__mmask16 __U, __m512 __A, __m512 __B, int __C) 1150 1.1 mrg { 1151 1.1 mrg return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A, 1152 1.1 mrg (__v16sf) __B, __C, 1153 1.1 mrg (__v16sf) 1154 1.1 mrg _mm512_setzero_ps (), 1155 1.1 mrg (__mmask16) __U, 1156 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1157 1.1 mrg } 1158 1.1 mrg 1159 1.1 mrg extern __inline __m128d 1160 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1161 1.1 mrg _mm_reduce_sd (__m128d __A, __m128d __B, int __C) 1162 1.1 mrg { 1163 1.4 mrg return (__m128d) __builtin_ia32_reducesd_mask ((__v2df) __A, 1164 1.4 mrg (__v2df) __B, __C, 1165 1.4 mrg (__v2df) _mm_setzero_pd (), 1166 1.4 mrg (__mmask8) -1); 1167 1.4 mrg } 1168 1.4 mrg 1169 1.4 mrg extern __inline __m128d 1170 1.4 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1171 1.7 mrg _mm_reduce_round_sd (__m128d __A, __m128d __B, int __C, const int __R) 1172 1.7 mrg { 1173 1.7 mrg return (__m128d) __builtin_ia32_reducesd_mask_round ((__v2df) __A, 1174 1.7 mrg (__v2df) __B, __C, 1175 1.7 mrg (__v2df) 1176 1.7 mrg _mm_setzero_pd (), 1177 1.7 mrg (__mmask8) -1, __R); 1178 1.7 mrg } 1179 1.7 mrg 1180 1.7 mrg extern __inline __m128d 1181 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1182 1.4 mrg _mm_mask_reduce_sd (__m128d __W, __mmask8 __U, __m128d __A, 1183 1.4 mrg __m128d __B, int __C) 1184 1.4 mrg { 1185 1.4 mrg return (__m128d) __builtin_ia32_reducesd_mask ((__v2df) __A, 1186 1.4 mrg (__v2df) __B, __C, 1187 1.4 mrg (__v2df) __W, 1188 1.4 mrg (__mmask8) __U); 1189 1.4 mrg } 1190 1.4 mrg 1191 1.4 mrg extern __inline __m128d 1192 1.4 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1193 1.7 mrg _mm_mask_reduce_round_sd (__m128d __W, __mmask8 __U, __m128d __A, 1194 1.7 mrg __m128d __B, int __C, const int __R) 1195 1.7 mrg { 1196 1.7 mrg return (__m128d) __builtin_ia32_reducesd_mask_round ((__v2df) __A, 1197 1.7 mrg (__v2df) __B, __C, 1198 1.7 mrg (__v2df) __W, 1199 1.7 mrg __U, __R); 1200 1.7 mrg } 1201 1.7 mrg 1202 1.7 mrg extern __inline __m128d 1203 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1204 1.4 mrg _mm_maskz_reduce_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C) 1205 1.4 mrg { 1206 1.4 mrg return (__m128d) __builtin_ia32_reducesd_mask ((__v2df) __A, 1207 1.4 mrg (__v2df) __B, __C, 1208 1.4 mrg (__v2df) _mm_setzero_pd (), 1209 1.4 mrg (__mmask8) __U); 1210 1.1 mrg } 1211 1.1 mrg 1212 1.7 mrg extern __inline __m128d 1213 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1214 1.7 mrg _mm_maskz_reduce_round_sd (__mmask8 __U, __m128d __A, __m128d __B, 1215 1.7 mrg int __C, const int __R) 1216 1.7 mrg { 1217 1.7 mrg return (__m128d) __builtin_ia32_reducesd_mask_round ((__v2df) __A, 1218 1.7 mrg (__v2df) __B, __C, 1219 1.7 mrg (__v2df) 1220 1.7 mrg _mm_setzero_pd (), 1221 1.7 mrg __U, __R); 1222 1.7 mrg } 1223 1.7 mrg 1224 1.1 mrg extern __inline __m128 1225 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1226 1.1 mrg _mm_reduce_ss (__m128 __A, __m128 __B, int __C) 1227 1.1 mrg { 1228 1.4 mrg return (__m128) __builtin_ia32_reducess_mask ((__v4sf) __A, 1229 1.4 mrg (__v4sf) __B, __C, 1230 1.4 mrg (__v4sf) _mm_setzero_ps (), 1231 1.4 mrg (__mmask8) -1); 1232 1.4 mrg } 1233 1.4 mrg 1234 1.7 mrg extern __inline __m128 1235 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1236 1.7 mrg _mm_reduce_round_ss (__m128 __A, __m128 __B, int __C, const int __R) 1237 1.7 mrg { 1238 1.7 mrg return (__m128) __builtin_ia32_reducess_mask_round ((__v4sf) __A, 1239 1.7 mrg (__v4sf) __B, __C, 1240 1.7 mrg (__v4sf) 1241 1.7 mrg _mm_setzero_ps (), 1242 1.7 mrg (__mmask8) -1, __R); 1243 1.7 mrg } 1244 1.4 mrg 1245 1.4 mrg extern __inline __m128 1246 1.4 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1247 1.4 mrg _mm_mask_reduce_ss (__m128 __W, __mmask8 __U, __m128 __A, 1248 1.4 mrg __m128 __B, int __C) 1249 1.4 mrg { 1250 1.4 mrg return (__m128) __builtin_ia32_reducess_mask ((__v4sf) __A, 1251 1.4 mrg (__v4sf) __B, __C, 1252 1.4 mrg (__v4sf) __W, 1253 1.4 mrg (__mmask8) __U); 1254 1.4 mrg } 1255 1.4 mrg 1256 1.4 mrg extern __inline __m128 1257 1.4 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1258 1.7 mrg _mm_mask_reduce_round_ss (__m128 __W, __mmask8 __U, __m128 __A, 1259 1.7 mrg __m128 __B, int __C, const int __R) 1260 1.7 mrg { 1261 1.7 mrg return (__m128) __builtin_ia32_reducess_mask_round ((__v4sf) __A, 1262 1.7 mrg (__v4sf) __B, __C, 1263 1.7 mrg (__v4sf) __W, 1264 1.7 mrg __U, __R); 1265 1.7 mrg } 1266 1.7 mrg 1267 1.7 mrg extern __inline __m128 1268 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1269 1.4 mrg _mm_maskz_reduce_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C) 1270 1.4 mrg { 1271 1.4 mrg return (__m128) __builtin_ia32_reducess_mask ((__v4sf) __A, 1272 1.4 mrg (__v4sf) __B, __C, 1273 1.4 mrg (__v4sf) _mm_setzero_ps (), 1274 1.4 mrg (__mmask8) __U); 1275 1.1 mrg } 1276 1.1 mrg 1277 1.7 mrg extern __inline __m128 1278 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1279 1.7 mrg _mm_maskz_reduce_round_ss (__mmask8 __U, __m128 __A, __m128 __B, 1280 1.7 mrg int __C, const int __R) 1281 1.7 mrg { 1282 1.7 mrg return (__m128) __builtin_ia32_reducess_mask_round ((__v4sf) __A, 1283 1.7 mrg (__v4sf) __B, __C, 1284 1.7 mrg (__v4sf) 1285 1.7 mrg _mm_setzero_ps (), 1286 1.7 mrg __U, __R); 1287 1.7 mrg } 1288 1.7 mrg 1289 1.1 mrg extern __inline __m128d 1290 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1291 1.1 mrg _mm_range_sd (__m128d __A, __m128d __B, int __C) 1292 1.1 mrg { 1293 1.4 mrg return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A, 1294 1.4 mrg (__v2df) __B, __C, 1295 1.4 mrg (__v2df) 1296 1.4 mrg _mm_setzero_pd (), 1297 1.4 mrg (__mmask8) -1, 1298 1.4 mrg _MM_FROUND_CUR_DIRECTION); 1299 1.4 mrg } 1300 1.4 mrg 1301 1.4 mrg extern __inline __m128d 1302 1.4 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1303 1.4 mrg _mm_mask_range_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B, int __C) 1304 1.4 mrg { 1305 1.4 mrg return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A, 1306 1.1 mrg (__v2df) __B, __C, 1307 1.4 mrg (__v2df) __W, 1308 1.4 mrg (__mmask8) __U, 1309 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1310 1.1 mrg } 1311 1.1 mrg 1312 1.4 mrg extern __inline __m128d 1313 1.4 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1314 1.4 mrg _mm_maskz_range_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C) 1315 1.4 mrg { 1316 1.4 mrg return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A, 1317 1.4 mrg (__v2df) __B, __C, 1318 1.4 mrg (__v2df) 1319 1.4 mrg _mm_setzero_pd (), 1320 1.4 mrg (__mmask8) __U, 1321 1.4 mrg _MM_FROUND_CUR_DIRECTION); 1322 1.4 mrg } 1323 1.1 mrg 1324 1.1 mrg extern __inline __m128 1325 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1326 1.1 mrg _mm_range_ss (__m128 __A, __m128 __B, int __C) 1327 1.1 mrg { 1328 1.4 mrg return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A, 1329 1.4 mrg (__v4sf) __B, __C, 1330 1.4 mrg (__v4sf) 1331 1.4 mrg _mm_setzero_ps (), 1332 1.4 mrg (__mmask8) -1, 1333 1.4 mrg _MM_FROUND_CUR_DIRECTION); 1334 1.4 mrg } 1335 1.4 mrg 1336 1.4 mrg extern __inline __m128 1337 1.4 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1338 1.4 mrg _mm_mask_range_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, int __C) 1339 1.4 mrg { 1340 1.4 mrg return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A, 1341 1.1 mrg (__v4sf) __B, __C, 1342 1.4 mrg (__v4sf) __W, 1343 1.4 mrg (__mmask8) __U, 1344 1.4 mrg _MM_FROUND_CUR_DIRECTION); 1345 1.4 mrg } 1346 1.4 mrg 1347 1.4 mrg 1348 1.4 mrg extern __inline __m128 1349 1.4 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1350 1.4 mrg _mm_maskz_range_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C) 1351 1.4 mrg { 1352 1.4 mrg return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A, 1353 1.4 mrg (__v4sf) __B, __C, 1354 1.4 mrg (__v4sf) 1355 1.4 mrg _mm_setzero_ps (), 1356 1.4 mrg (__mmask8) __U, 1357 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1358 1.1 mrg } 1359 1.1 mrg 1360 1.1 mrg extern __inline __m128d 1361 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1362 1.1 mrg _mm_range_round_sd (__m128d __A, __m128d __B, int __C, const int __R) 1363 1.1 mrg { 1364 1.4 mrg return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A, 1365 1.1 mrg (__v2df) __B, __C, 1366 1.4 mrg (__v2df) 1367 1.4 mrg _mm_setzero_pd (), 1368 1.4 mrg (__mmask8) -1, __R); 1369 1.4 mrg } 1370 1.4 mrg 1371 1.4 mrg extern __inline __m128d 1372 1.4 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1373 1.4 mrg _mm_mask_range_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B, 1374 1.4 mrg int __C, const int __R) 1375 1.4 mrg { 1376 1.4 mrg return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A, 1377 1.4 mrg (__v2df) __B, __C, 1378 1.4 mrg (__v2df) __W, 1379 1.4 mrg (__mmask8) __U, __R); 1380 1.4 mrg } 1381 1.4 mrg 1382 1.4 mrg extern __inline __m128d 1383 1.4 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1384 1.4 mrg _mm_maskz_range_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C, 1385 1.4 mrg const int __R) 1386 1.4 mrg { 1387 1.4 mrg return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A, 1388 1.4 mrg (__v2df) __B, __C, 1389 1.4 mrg (__v2df) 1390 1.4 mrg _mm_setzero_pd (), 1391 1.4 mrg (__mmask8) __U, __R); 1392 1.1 mrg } 1393 1.1 mrg 1394 1.1 mrg extern __inline __m128 1395 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1396 1.1 mrg _mm_range_round_ss (__m128 __A, __m128 __B, int __C, const int __R) 1397 1.1 mrg { 1398 1.4 mrg return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A, 1399 1.4 mrg (__v4sf) __B, __C, 1400 1.4 mrg (__v4sf) 1401 1.4 mrg _mm_setzero_ps (), 1402 1.4 mrg (__mmask8) -1, __R); 1403 1.4 mrg } 1404 1.4 mrg 1405 1.4 mrg extern __inline __m128 1406 1.4 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1407 1.4 mrg _mm_mask_range_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, 1408 1.4 mrg int __C, const int __R) 1409 1.4 mrg { 1410 1.4 mrg return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A, 1411 1.4 mrg (__v4sf) __B, __C, 1412 1.4 mrg (__v4sf) __W, 1413 1.4 mrg (__mmask8) __U, __R); 1414 1.4 mrg } 1415 1.4 mrg 1416 1.4 mrg extern __inline __m128 1417 1.4 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1418 1.4 mrg _mm_maskz_range_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C, 1419 1.4 mrg const int __R) 1420 1.4 mrg { 1421 1.4 mrg return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A, 1422 1.1 mrg (__v4sf) __B, __C, 1423 1.4 mrg (__v4sf) 1424 1.4 mrg _mm_setzero_ps (), 1425 1.4 mrg (__mmask8) __U, __R); 1426 1.1 mrg } 1427 1.1 mrg 1428 1.1 mrg extern __inline __mmask8 1429 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1430 1.1 mrg _mm_fpclass_ss_mask (__m128 __A, const int __imm) 1431 1.1 mrg { 1432 1.6 mrg return (__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) __A, __imm, 1433 1.6 mrg (__mmask8) -1); 1434 1.1 mrg } 1435 1.1 mrg 1436 1.1 mrg extern __inline __mmask8 1437 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1438 1.1 mrg _mm_fpclass_sd_mask (__m128d __A, const int __imm) 1439 1.1 mrg { 1440 1.6 mrg return (__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) __A, __imm, 1441 1.6 mrg (__mmask8) -1); 1442 1.6 mrg } 1443 1.6 mrg 1444 1.6 mrg extern __inline __mmask8 1445 1.6 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1446 1.6 mrg _mm_mask_fpclass_ss_mask (__mmask8 __U, __m128 __A, const int __imm) 1447 1.6 mrg { 1448 1.6 mrg return (__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) __A, __imm, __U); 1449 1.6 mrg } 1450 1.6 mrg 1451 1.6 mrg extern __inline __mmask8 1452 1.6 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1453 1.6 mrg _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm) 1454 1.6 mrg { 1455 1.6 mrg return (__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) __A, __imm, __U); 1456 1.1 mrg } 1457 1.1 mrg 1458 1.1 mrg extern __inline __m512i 1459 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1460 1.1 mrg _mm512_cvtt_roundpd_epi64 (__m512d __A, const int __R) 1461 1.1 mrg { 1462 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, 1463 1.1 mrg (__v8di) 1464 1.1 mrg _mm512_setzero_si512 (), 1465 1.1 mrg (__mmask8) -1, 1466 1.1 mrg __R); 1467 1.1 mrg } 1468 1.1 mrg 1469 1.1 mrg extern __inline __m512i 1470 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1471 1.1 mrg _mm512_mask_cvtt_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A, 1472 1.1 mrg const int __R) 1473 1.1 mrg { 1474 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, 1475 1.1 mrg (__v8di) __W, 1476 1.1 mrg (__mmask8) __U, 1477 1.1 mrg __R); 1478 1.1 mrg } 1479 1.1 mrg 1480 1.1 mrg extern __inline __m512i 1481 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1482 1.1 mrg _mm512_maskz_cvtt_roundpd_epi64 (__mmask8 __U, __m512d __A, 1483 1.1 mrg const int __R) 1484 1.1 mrg { 1485 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, 1486 1.1 mrg (__v8di) 1487 1.1 mrg _mm512_setzero_si512 (), 1488 1.1 mrg (__mmask8) __U, 1489 1.1 mrg __R); 1490 1.1 mrg } 1491 1.1 mrg 1492 1.1 mrg extern __inline __m512i 1493 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1494 1.1 mrg _mm512_cvtt_roundpd_epu64 (__m512d __A, const int __R) 1495 1.1 mrg { 1496 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, 1497 1.1 mrg (__v8di) 1498 1.1 mrg _mm512_setzero_si512 (), 1499 1.1 mrg (__mmask8) -1, 1500 1.1 mrg __R); 1501 1.1 mrg } 1502 1.1 mrg 1503 1.1 mrg extern __inline __m512i 1504 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1505 1.1 mrg _mm512_mask_cvtt_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A, 1506 1.1 mrg const int __R) 1507 1.1 mrg { 1508 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, 1509 1.1 mrg (__v8di) __W, 1510 1.1 mrg (__mmask8) __U, 1511 1.1 mrg __R); 1512 1.1 mrg } 1513 1.1 mrg 1514 1.1 mrg extern __inline __m512i 1515 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1516 1.1 mrg _mm512_maskz_cvtt_roundpd_epu64 (__mmask8 __U, __m512d __A, 1517 1.1 mrg const int __R) 1518 1.1 mrg { 1519 1.1 mrg return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, 1520 1.1 mrg (__v8di) 1521 1.1 mrg _mm512_setzero_si512 (), 1522 1.1 mrg (__mmask8) __U, 1523 1.1 mrg __R); 1524 1.1 mrg } 1525 1.1 mrg 1526 1.1 mrg extern __inline __m512i 1527 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1528 1.1 mrg _mm512_cvtt_roundps_epi64 (__m256 __A, const int __R) 1529 1.1 mrg { 1530 1.1 mrg return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, 1531 1.1 mrg (__v8di) 1532 1.1 mrg _mm512_setzero_si512 (), 1533 1.1 mrg (__mmask8) -1, 1534 1.1 mrg __R); 1535 1.1 mrg } 1536 1.1 mrg 1537 1.1 mrg extern __inline __m512i 1538 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1539 1.1 mrg _mm512_mask_cvtt_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A, 1540 1.1 mrg const int __R) 1541 1.1 mrg { 1542 1.1 mrg return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, 1543 1.1 mrg (__v8di) __W, 1544 1.1 mrg (__mmask8) __U, 1545 1.1 mrg __R); 1546 1.1 mrg } 1547 1.1 mrg 1548 1.1 mrg extern __inline __m512i 1549 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1550 1.1 mrg _mm512_maskz_cvtt_roundps_epi64 (__mmask8 __U, __m256 __A, 1551 1.1 mrg const int __R) 1552 1.1 mrg { 1553 1.1 mrg return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, 1554 1.1 mrg (__v8di) 1555 1.1 mrg _mm512_setzero_si512 (), 1556 1.1 mrg (__mmask8) __U, 1557 1.1 mrg __R); 1558 1.1 mrg } 1559 1.1 mrg 1560 1.1 mrg extern __inline __m512i 1561 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1562 1.1 mrg _mm512_cvtt_roundps_epu64 (__m256 __A, const int __R) 1563 1.1 mrg { 1564 1.1 mrg return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, 1565 1.1 mrg (__v8di) 1566 1.1 mrg _mm512_setzero_si512 (), 1567 1.1 mrg (__mmask8) -1, 1568 1.1 mrg __R); 1569 1.1 mrg } 1570 1.1 mrg 1571 1.1 mrg extern __inline __m512i 1572 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1573 1.1 mrg _mm512_mask_cvtt_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A, 1574 1.1 mrg const int __R) 1575 1.1 mrg { 1576 1.1 mrg return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, 1577 1.1 mrg (__v8di) __W, 1578 1.1 mrg (__mmask8) __U, 1579 1.1 mrg __R); 1580 1.1 mrg } 1581 1.1 mrg 1582 1.1 mrg extern __inline __m512i 1583 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1584 1.1 mrg _mm512_maskz_cvtt_roundps_epu64 (__mmask8 __U, __m256 __A, 1585 1.1 mrg const int __R) 1586 1.1 mrg { 1587 1.1 mrg return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, 1588 1.1 mrg (__v8di) 1589 1.1 mrg _mm512_setzero_si512 (), 1590 1.1 mrg (__mmask8) __U, 1591 1.1 mrg __R); 1592 1.1 mrg } 1593 1.1 mrg 1594 1.1 mrg extern __inline __m512i 1595 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1596 1.1 mrg _mm512_cvt_roundpd_epi64 (__m512d __A, const int __R) 1597 1.1 mrg { 1598 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, 1599 1.1 mrg (__v8di) 1600 1.1 mrg _mm512_setzero_si512 (), 1601 1.1 mrg (__mmask8) -1, 1602 1.1 mrg __R); 1603 1.1 mrg } 1604 1.1 mrg 1605 1.1 mrg extern __inline __m512i 1606 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1607 1.1 mrg _mm512_mask_cvt_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A, 1608 1.1 mrg const int __R) 1609 1.1 mrg { 1610 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, 1611 1.1 mrg (__v8di) __W, 1612 1.1 mrg (__mmask8) __U, 1613 1.1 mrg __R); 1614 1.1 mrg } 1615 1.1 mrg 1616 1.1 mrg extern __inline __m512i 1617 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1618 1.1 mrg _mm512_maskz_cvt_roundpd_epi64 (__mmask8 __U, __m512d __A, 1619 1.1 mrg const int __R) 1620 1.1 mrg { 1621 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, 1622 1.1 mrg (__v8di) 1623 1.1 mrg _mm512_setzero_si512 (), 1624 1.1 mrg (__mmask8) __U, 1625 1.1 mrg __R); 1626 1.1 mrg } 1627 1.1 mrg 1628 1.1 mrg extern __inline __m512i 1629 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1630 1.1 mrg _mm512_cvt_roundpd_epu64 (__m512d __A, const int __R) 1631 1.1 mrg { 1632 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, 1633 1.1 mrg (__v8di) 1634 1.1 mrg _mm512_setzero_si512 (), 1635 1.1 mrg (__mmask8) -1, 1636 1.1 mrg __R); 1637 1.1 mrg } 1638 1.1 mrg 1639 1.1 mrg extern __inline __m512i 1640 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1641 1.1 mrg _mm512_mask_cvt_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A, 1642 1.1 mrg const int __R) 1643 1.1 mrg { 1644 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, 1645 1.1 mrg (__v8di) __W, 1646 1.1 mrg (__mmask8) __U, 1647 1.1 mrg __R); 1648 1.1 mrg } 1649 1.1 mrg 1650 1.1 mrg extern __inline __m512i 1651 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1652 1.1 mrg _mm512_maskz_cvt_roundpd_epu64 (__mmask8 __U, __m512d __A, 1653 1.1 mrg const int __R) 1654 1.1 mrg { 1655 1.1 mrg return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, 1656 1.1 mrg (__v8di) 1657 1.1 mrg _mm512_setzero_si512 (), 1658 1.1 mrg (__mmask8) __U, 1659 1.1 mrg __R); 1660 1.1 mrg } 1661 1.1 mrg 1662 1.1 mrg extern __inline __m512i 1663 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1664 1.1 mrg _mm512_cvt_roundps_epi64 (__m256 __A, const int __R) 1665 1.1 mrg { 1666 1.1 mrg return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, 1667 1.1 mrg (__v8di) 1668 1.1 mrg _mm512_setzero_si512 (), 1669 1.1 mrg (__mmask8) -1, 1670 1.1 mrg __R); 1671 1.1 mrg } 1672 1.1 mrg 1673 1.1 mrg extern __inline __m512i 1674 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1675 1.1 mrg _mm512_mask_cvt_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A, 1676 1.1 mrg const int __R) 1677 1.1 mrg { 1678 1.1 mrg return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, 1679 1.1 mrg (__v8di) __W, 1680 1.1 mrg (__mmask8) __U, 1681 1.1 mrg __R); 1682 1.1 mrg } 1683 1.1 mrg 1684 1.1 mrg extern __inline __m512i 1685 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1686 1.1 mrg _mm512_maskz_cvt_roundps_epi64 (__mmask8 __U, __m256 __A, 1687 1.1 mrg const int __R) 1688 1.1 mrg { 1689 1.1 mrg return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, 1690 1.1 mrg (__v8di) 1691 1.1 mrg _mm512_setzero_si512 (), 1692 1.1 mrg (__mmask8) __U, 1693 1.1 mrg __R); 1694 1.1 mrg } 1695 1.1 mrg 1696 1.1 mrg extern __inline __m512i 1697 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1698 1.1 mrg _mm512_cvt_roundps_epu64 (__m256 __A, const int __R) 1699 1.1 mrg { 1700 1.1 mrg return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, 1701 1.1 mrg (__v8di) 1702 1.1 mrg _mm512_setzero_si512 (), 1703 1.1 mrg (__mmask8) -1, 1704 1.1 mrg __R); 1705 1.1 mrg } 1706 1.1 mrg 1707 1.1 mrg extern __inline __m512i 1708 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1709 1.1 mrg _mm512_mask_cvt_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A, 1710 1.1 mrg const int __R) 1711 1.1 mrg { 1712 1.1 mrg return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, 1713 1.1 mrg (__v8di) __W, 1714 1.1 mrg (__mmask8) __U, 1715 1.1 mrg __R); 1716 1.1 mrg } 1717 1.1 mrg 1718 1.1 mrg extern __inline __m512i 1719 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1720 1.1 mrg _mm512_maskz_cvt_roundps_epu64 (__mmask8 __U, __m256 __A, 1721 1.1 mrg const int __R) 1722 1.1 mrg { 1723 1.1 mrg return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, 1724 1.1 mrg (__v8di) 1725 1.1 mrg _mm512_setzero_si512 (), 1726 1.1 mrg (__mmask8) __U, 1727 1.1 mrg __R); 1728 1.1 mrg } 1729 1.1 mrg 1730 1.1 mrg extern __inline __m256 1731 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1732 1.1 mrg _mm512_cvt_roundepi64_ps (__m512i __A, const int __R) 1733 1.1 mrg { 1734 1.1 mrg return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, 1735 1.1 mrg (__v8sf) 1736 1.1 mrg _mm256_setzero_ps (), 1737 1.1 mrg (__mmask8) -1, 1738 1.1 mrg __R); 1739 1.1 mrg } 1740 1.1 mrg 1741 1.1 mrg extern __inline __m256 1742 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1743 1.1 mrg _mm512_mask_cvt_roundepi64_ps (__m256 __W, __mmask8 __U, __m512i __A, 1744 1.1 mrg const int __R) 1745 1.1 mrg { 1746 1.1 mrg return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, 1747 1.1 mrg (__v8sf) __W, 1748 1.1 mrg (__mmask8) __U, 1749 1.1 mrg __R); 1750 1.1 mrg } 1751 1.1 mrg 1752 1.1 mrg extern __inline __m256 1753 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1754 1.1 mrg _mm512_maskz_cvt_roundepi64_ps (__mmask8 __U, __m512i __A, 1755 1.1 mrg const int __R) 1756 1.1 mrg { 1757 1.1 mrg return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, 1758 1.1 mrg (__v8sf) 1759 1.1 mrg _mm256_setzero_ps (), 1760 1.1 mrg (__mmask8) __U, 1761 1.1 mrg __R); 1762 1.1 mrg } 1763 1.1 mrg 1764 1.1 mrg extern __inline __m256 1765 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1766 1.1 mrg _mm512_cvt_roundepu64_ps (__m512i __A, const int __R) 1767 1.1 mrg { 1768 1.1 mrg return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, 1769 1.1 mrg (__v8sf) 1770 1.1 mrg _mm256_setzero_ps (), 1771 1.1 mrg (__mmask8) -1, 1772 1.1 mrg __R); 1773 1.1 mrg } 1774 1.1 mrg 1775 1.1 mrg extern __inline __m256 1776 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1777 1.1 mrg _mm512_mask_cvt_roundepu64_ps (__m256 __W, __mmask8 __U, __m512i __A, 1778 1.1 mrg const int __R) 1779 1.1 mrg { 1780 1.1 mrg return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, 1781 1.1 mrg (__v8sf) __W, 1782 1.1 mrg (__mmask8) __U, 1783 1.1 mrg __R); 1784 1.1 mrg } 1785 1.1 mrg 1786 1.1 mrg extern __inline __m256 1787 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1788 1.1 mrg _mm512_maskz_cvt_roundepu64_ps (__mmask8 __U, __m512i __A, 1789 1.1 mrg const int __R) 1790 1.1 mrg { 1791 1.1 mrg return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, 1792 1.1 mrg (__v8sf) 1793 1.1 mrg _mm256_setzero_ps (), 1794 1.1 mrg (__mmask8) __U, 1795 1.1 mrg __R); 1796 1.1 mrg } 1797 1.1 mrg 1798 1.1 mrg extern __inline __m512d 1799 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1800 1.1 mrg _mm512_cvt_roundepi64_pd (__m512i __A, const int __R) 1801 1.1 mrg { 1802 1.1 mrg return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A, 1803 1.1 mrg (__v8df) 1804 1.1 mrg _mm512_setzero_pd (), 1805 1.1 mrg (__mmask8) -1, 1806 1.1 mrg __R); 1807 1.1 mrg } 1808 1.1 mrg 1809 1.1 mrg extern __inline __m512d 1810 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1811 1.1 mrg _mm512_mask_cvt_roundepi64_pd (__m512d __W, __mmask8 __U, __m512i __A, 1812 1.1 mrg const int __R) 1813 1.1 mrg { 1814 1.1 mrg return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A, 1815 1.1 mrg (__v8df) __W, 1816 1.1 mrg (__mmask8) __U, 1817 1.1 mrg __R); 1818 1.1 mrg } 1819 1.1 mrg 1820 1.1 mrg extern __inline __m512d 1821 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1822 1.1 mrg _mm512_maskz_cvt_roundepi64_pd (__mmask8 __U, __m512i __A, 1823 1.1 mrg const int __R) 1824 1.1 mrg { 1825 1.1 mrg return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A, 1826 1.1 mrg (__v8df) 1827 1.1 mrg _mm512_setzero_pd (), 1828 1.1 mrg (__mmask8) __U, 1829 1.1 mrg __R); 1830 1.1 mrg } 1831 1.1 mrg 1832 1.1 mrg extern __inline __m512d 1833 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1834 1.1 mrg _mm512_cvt_roundepu64_pd (__m512i __A, const int __R) 1835 1.1 mrg { 1836 1.1 mrg return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A, 1837 1.1 mrg (__v8df) 1838 1.1 mrg _mm512_setzero_pd (), 1839 1.1 mrg (__mmask8) -1, 1840 1.1 mrg __R); 1841 1.1 mrg } 1842 1.1 mrg 1843 1.1 mrg extern __inline __m512d 1844 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1845 1.1 mrg _mm512_mask_cvt_roundepu64_pd (__m512d __W, __mmask8 __U, __m512i __A, 1846 1.1 mrg const int __R) 1847 1.1 mrg { 1848 1.1 mrg return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A, 1849 1.1 mrg (__v8df) __W, 1850 1.1 mrg (__mmask8) __U, 1851 1.1 mrg __R); 1852 1.1 mrg } 1853 1.1 mrg 1854 1.1 mrg extern __inline __m512d 1855 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1856 1.1 mrg _mm512_maskz_cvt_roundepu64_pd (__mmask8 __U, __m512i __A, 1857 1.1 mrg const int __R) 1858 1.1 mrg { 1859 1.1 mrg return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A, 1860 1.1 mrg (__v8df) 1861 1.1 mrg _mm512_setzero_pd (), 1862 1.1 mrg (__mmask8) __U, 1863 1.1 mrg __R); 1864 1.1 mrg } 1865 1.1 mrg 1866 1.1 mrg extern __inline __m512d 1867 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1868 1.1 mrg _mm512_reduce_pd (__m512d __A, int __B) 1869 1.1 mrg { 1870 1.1 mrg return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B, 1871 1.1 mrg (__v8df) 1872 1.1 mrg _mm512_setzero_pd (), 1873 1.1 mrg (__mmask8) -1); 1874 1.1 mrg } 1875 1.1 mrg 1876 1.1 mrg extern __inline __m512d 1877 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1878 1.7 mrg _mm512_reduce_round_pd (__m512d __A, int __B, const int __R) 1879 1.7 mrg { 1880 1.7 mrg return (__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df) __A, 1881 1.7 mrg __B, 1882 1.7 mrg (__v8df) 1883 1.7 mrg _mm512_setzero_pd (), 1884 1.7 mrg (__mmask8) -1, __R); 1885 1.7 mrg } 1886 1.7 mrg 1887 1.7 mrg extern __inline __m512d 1888 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1889 1.1 mrg _mm512_mask_reduce_pd (__m512d __W, __mmask8 __U, __m512d __A, int __B) 1890 1.1 mrg { 1891 1.1 mrg return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B, 1892 1.1 mrg (__v8df) __W, 1893 1.1 mrg (__mmask8) __U); 1894 1.1 mrg } 1895 1.1 mrg 1896 1.1 mrg extern __inline __m512d 1897 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1898 1.7 mrg _mm512_mask_reduce_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 1899 1.7 mrg int __B, const int __R) 1900 1.7 mrg { 1901 1.7 mrg return (__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df) __A, 1902 1.7 mrg __B, 1903 1.7 mrg (__v8df) __W, 1904 1.7 mrg __U, __R); 1905 1.7 mrg } 1906 1.7 mrg 1907 1.7 mrg extern __inline __m512d 1908 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1909 1.1 mrg _mm512_maskz_reduce_pd (__mmask8 __U, __m512d __A, int __B) 1910 1.1 mrg { 1911 1.1 mrg return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B, 1912 1.1 mrg (__v8df) 1913 1.1 mrg _mm512_setzero_pd (), 1914 1.1 mrg (__mmask8) __U); 1915 1.1 mrg } 1916 1.1 mrg 1917 1.7 mrg extern __inline __m512d 1918 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1919 1.7 mrg _mm512_maskz_reduce_round_pd (__mmask8 __U, __m512d __A, int __B, 1920 1.7 mrg const int __R) 1921 1.7 mrg { 1922 1.7 mrg return (__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df) __A, 1923 1.7 mrg __B, 1924 1.7 mrg (__v8df) 1925 1.7 mrg _mm512_setzero_pd (), 1926 1.7 mrg __U, __R); 1927 1.7 mrg } 1928 1.7 mrg 1929 1.1 mrg extern __inline __m512 1930 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1931 1.1 mrg _mm512_reduce_ps (__m512 __A, int __B) 1932 1.1 mrg { 1933 1.1 mrg return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B, 1934 1.1 mrg (__v16sf) 1935 1.1 mrg _mm512_setzero_ps (), 1936 1.1 mrg (__mmask16) -1); 1937 1.1 mrg } 1938 1.1 mrg 1939 1.1 mrg extern __inline __m512 1940 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1941 1.7 mrg _mm512_reduce_round_ps (__m512 __A, int __B, const int __R) 1942 1.7 mrg { 1943 1.7 mrg return (__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf) __A, 1944 1.7 mrg __B, 1945 1.7 mrg (__v16sf) 1946 1.7 mrg _mm512_setzero_ps (), 1947 1.7 mrg (__mmask16) -1, __R); 1948 1.7 mrg } 1949 1.7 mrg 1950 1.7 mrg extern __inline __m512 1951 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1952 1.1 mrg _mm512_mask_reduce_ps (__m512 __W, __mmask16 __U, __m512 __A, int __B) 1953 1.1 mrg { 1954 1.1 mrg return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B, 1955 1.1 mrg (__v16sf) __W, 1956 1.1 mrg (__mmask16) __U); 1957 1.1 mrg } 1958 1.1 mrg 1959 1.1 mrg extern __inline __m512 1960 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1961 1.7 mrg _mm512_mask_reduce_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __B, 1962 1.7 mrg const int __R) 1963 1.7 mrg { 1964 1.7 mrg return (__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf) __A, 1965 1.7 mrg __B, 1966 1.7 mrg (__v16sf) __W, 1967 1.7 mrg __U, __R); 1968 1.7 mrg } 1969 1.7 mrg 1970 1.7 mrg extern __inline __m512 1971 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1972 1.1 mrg _mm512_maskz_reduce_ps (__mmask16 __U, __m512 __A, int __B) 1973 1.1 mrg { 1974 1.1 mrg return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B, 1975 1.1 mrg (__v16sf) 1976 1.1 mrg _mm512_setzero_ps (), 1977 1.1 mrg (__mmask16) __U); 1978 1.1 mrg } 1979 1.1 mrg 1980 1.7 mrg extern __inline __m512 1981 1.7 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1982 1.7 mrg _mm512_maskz_reduce_round_ps (__mmask16 __U, __m512 __A, int __B, 1983 1.7 mrg const int __R) 1984 1.7 mrg { 1985 1.7 mrg return (__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf) __A, 1986 1.7 mrg __B, 1987 1.7 mrg (__v16sf) 1988 1.7 mrg _mm512_setzero_ps (), 1989 1.7 mrg __U, __R); 1990 1.7 mrg } 1991 1.7 mrg 1992 1.1 mrg extern __inline __m256 1993 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1994 1.1 mrg _mm512_extractf32x8_ps (__m512 __A, const int __imm) 1995 1.1 mrg { 1996 1.1 mrg return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A, 1997 1.1 mrg __imm, 1998 1.1 mrg (__v8sf) 1999 1.1 mrg _mm256_setzero_ps (), 2000 1.1 mrg (__mmask8) -1); 2001 1.1 mrg } 2002 1.1 mrg 2003 1.1 mrg extern __inline __m256 2004 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2005 1.1 mrg _mm512_mask_extractf32x8_ps (__m256 __W, __mmask8 __U, __m512 __A, 2006 1.1 mrg const int __imm) 2007 1.1 mrg { 2008 1.1 mrg return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A, 2009 1.1 mrg __imm, 2010 1.1 mrg (__v8sf) __W, 2011 1.1 mrg (__mmask8) __U); 2012 1.1 mrg } 2013 1.1 mrg 2014 1.1 mrg extern __inline __m256 2015 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2016 1.1 mrg _mm512_maskz_extractf32x8_ps (__mmask8 __U, __m512 __A, 2017 1.1 mrg const int __imm) 2018 1.1 mrg { 2019 1.1 mrg return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A, 2020 1.1 mrg __imm, 2021 1.1 mrg (__v8sf) 2022 1.1 mrg _mm256_setzero_ps (), 2023 1.1 mrg (__mmask8) __U); 2024 1.1 mrg } 2025 1.1 mrg 2026 1.1 mrg extern __inline __m128d 2027 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2028 1.1 mrg _mm512_extractf64x2_pd (__m512d __A, const int __imm) 2029 1.1 mrg { 2030 1.1 mrg return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A, 2031 1.1 mrg __imm, 2032 1.1 mrg (__v2df) 2033 1.1 mrg _mm_setzero_pd (), 2034 1.3 mrg (__mmask8) -1); 2035 1.1 mrg } 2036 1.1 mrg 2037 1.1 mrg extern __inline __m128d 2038 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2039 1.1 mrg _mm512_mask_extractf64x2_pd (__m128d __W, __mmask8 __U, __m512d __A, 2040 1.1 mrg const int __imm) 2041 1.1 mrg { 2042 1.1 mrg return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A, 2043 1.1 mrg __imm, 2044 1.1 mrg (__v2df) __W, 2045 1.1 mrg (__mmask8) 2046 1.1 mrg __U); 2047 1.1 mrg } 2048 1.1 mrg 2049 1.1 mrg extern __inline __m128d 2050 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2051 1.1 mrg _mm512_maskz_extractf64x2_pd (__mmask8 __U, __m512d __A, 2052 1.1 mrg const int __imm) 2053 1.1 mrg { 2054 1.1 mrg return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A, 2055 1.1 mrg __imm, 2056 1.1 mrg (__v2df) 2057 1.1 mrg _mm_setzero_pd (), 2058 1.1 mrg (__mmask8) 2059 1.1 mrg __U); 2060 1.1 mrg } 2061 1.1 mrg 2062 1.1 mrg extern __inline __m256i 2063 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2064 1.1 mrg _mm512_extracti32x8_epi32 (__m512i __A, const int __imm) 2065 1.1 mrg { 2066 1.1 mrg return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A, 2067 1.1 mrg __imm, 2068 1.1 mrg (__v8si) 2069 1.1 mrg _mm256_setzero_si256 (), 2070 1.1 mrg (__mmask8) -1); 2071 1.1 mrg } 2072 1.1 mrg 2073 1.1 mrg extern __inline __m256i 2074 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2075 1.1 mrg _mm512_mask_extracti32x8_epi32 (__m256i __W, __mmask8 __U, __m512i __A, 2076 1.1 mrg const int __imm) 2077 1.1 mrg { 2078 1.1 mrg return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A, 2079 1.1 mrg __imm, 2080 1.1 mrg (__v8si) __W, 2081 1.1 mrg (__mmask8) __U); 2082 1.1 mrg } 2083 1.1 mrg 2084 1.1 mrg extern __inline __m256i 2085 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2086 1.1 mrg _mm512_maskz_extracti32x8_epi32 (__mmask8 __U, __m512i __A, 2087 1.1 mrg const int __imm) 2088 1.1 mrg { 2089 1.1 mrg return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A, 2090 1.1 mrg __imm, 2091 1.1 mrg (__v8si) 2092 1.1 mrg _mm256_setzero_si256 (), 2093 1.1 mrg (__mmask8) __U); 2094 1.1 mrg } 2095 1.1 mrg 2096 1.1 mrg extern __inline __m128i 2097 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2098 1.1 mrg _mm512_extracti64x2_epi64 (__m512i __A, const int __imm) 2099 1.1 mrg { 2100 1.1 mrg return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A, 2101 1.1 mrg __imm, 2102 1.1 mrg (__v2di) 2103 1.3 mrg _mm_setzero_si128 (), 2104 1.3 mrg (__mmask8) -1); 2105 1.1 mrg } 2106 1.1 mrg 2107 1.1 mrg extern __inline __m128i 2108 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2109 1.1 mrg _mm512_mask_extracti64x2_epi64 (__m128i __W, __mmask8 __U, __m512i __A, 2110 1.1 mrg const int __imm) 2111 1.1 mrg { 2112 1.1 mrg return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A, 2113 1.1 mrg __imm, 2114 1.1 mrg (__v2di) __W, 2115 1.1 mrg (__mmask8) 2116 1.1 mrg __U); 2117 1.1 mrg } 2118 1.1 mrg 2119 1.1 mrg extern __inline __m128i 2120 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2121 1.1 mrg _mm512_maskz_extracti64x2_epi64 (__mmask8 __U, __m512i __A, 2122 1.1 mrg const int __imm) 2123 1.1 mrg { 2124 1.1 mrg return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A, 2125 1.1 mrg __imm, 2126 1.1 mrg (__v2di) 2127 1.3 mrg _mm_setzero_si128 (), 2128 1.1 mrg (__mmask8) 2129 1.1 mrg __U); 2130 1.1 mrg } 2131 1.1 mrg 2132 1.1 mrg extern __inline __m512d 2133 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2134 1.1 mrg _mm512_range_round_pd (__m512d __A, __m512d __B, int __C, 2135 1.1 mrg const int __R) 2136 1.1 mrg { 2137 1.1 mrg return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, 2138 1.1 mrg (__v8df) __B, __C, 2139 1.1 mrg (__v8df) 2140 1.1 mrg _mm512_setzero_pd (), 2141 1.1 mrg (__mmask8) -1, 2142 1.1 mrg __R); 2143 1.1 mrg } 2144 1.1 mrg 2145 1.1 mrg extern __inline __m512d 2146 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2147 1.1 mrg _mm512_mask_range_round_pd (__m512d __W, __mmask8 __U, 2148 1.1 mrg __m512d __A, __m512d __B, int __C, 2149 1.1 mrg const int __R) 2150 1.1 mrg { 2151 1.1 mrg return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, 2152 1.1 mrg (__v8df) __B, __C, 2153 1.1 mrg (__v8df) __W, 2154 1.1 mrg (__mmask8) __U, 2155 1.1 mrg __R); 2156 1.1 mrg } 2157 1.1 mrg 2158 1.1 mrg extern __inline __m512d 2159 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2160 1.1 mrg _mm512_maskz_range_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 2161 1.1 mrg int __C, const int __R) 2162 1.1 mrg { 2163 1.1 mrg return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, 2164 1.1 mrg (__v8df) __B, __C, 2165 1.1 mrg (__v8df) 2166 1.1 mrg _mm512_setzero_pd (), 2167 1.1 mrg (__mmask8) __U, 2168 1.1 mrg __R); 2169 1.1 mrg } 2170 1.1 mrg 2171 1.1 mrg extern __inline __m512 2172 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2173 1.1 mrg _mm512_range_round_ps (__m512 __A, __m512 __B, int __C, const int __R) 2174 1.1 mrg { 2175 1.1 mrg return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A, 2176 1.1 mrg (__v16sf) __B, __C, 2177 1.1 mrg (__v16sf) 2178 1.1 mrg _mm512_setzero_ps (), 2179 1.1 mrg (__mmask16) -1, 2180 1.1 mrg __R); 2181 1.1 mrg } 2182 1.1 mrg 2183 1.1 mrg extern __inline __m512 2184 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2185 1.1 mrg _mm512_mask_range_round_ps (__m512 __W, __mmask16 __U, 2186 1.1 mrg __m512 __A, __m512 __B, int __C, 2187 1.1 mrg const int __R) 2188 1.1 mrg { 2189 1.1 mrg return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A, 2190 1.1 mrg (__v16sf) __B, __C, 2191 1.1 mrg (__v16sf) __W, 2192 1.1 mrg (__mmask16) __U, 2193 1.1 mrg __R); 2194 1.1 mrg } 2195 1.1 mrg 2196 1.1 mrg extern __inline __m512 2197 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2198 1.1 mrg _mm512_maskz_range_round_ps (__mmask16 __U, __m512 __A, __m512 __B, 2199 1.1 mrg int __C, const int __R) 2200 1.1 mrg { 2201 1.1 mrg return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A, 2202 1.1 mrg (__v16sf) __B, __C, 2203 1.1 mrg (__v16sf) 2204 1.1 mrg _mm512_setzero_ps (), 2205 1.1 mrg (__mmask16) __U, 2206 1.1 mrg __R); 2207 1.1 mrg } 2208 1.1 mrg 2209 1.1 mrg extern __inline __m512i 2210 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2211 1.1 mrg _mm512_inserti32x8 (__m512i __A, __m256i __B, const int __imm) 2212 1.1 mrg { 2213 1.1 mrg return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A, 2214 1.1 mrg (__v8si) __B, 2215 1.1 mrg __imm, 2216 1.1 mrg (__v16si) 2217 1.1 mrg _mm512_setzero_si512 (), 2218 1.1 mrg (__mmask16) -1); 2219 1.1 mrg } 2220 1.1 mrg 2221 1.1 mrg extern __inline __m512i 2222 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2223 1.1 mrg _mm512_mask_inserti32x8 (__m512i __W, __mmask16 __U, __m512i __A, 2224 1.1 mrg __m256i __B, const int __imm) 2225 1.1 mrg { 2226 1.1 mrg return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A, 2227 1.1 mrg (__v8si) __B, 2228 1.1 mrg __imm, 2229 1.1 mrg (__v16si) __W, 2230 1.1 mrg (__mmask16) __U); 2231 1.1 mrg } 2232 1.1 mrg 2233 1.1 mrg extern __inline __m512i 2234 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2235 1.1 mrg _mm512_maskz_inserti32x8 (__mmask16 __U, __m512i __A, __m256i __B, 2236 1.1 mrg const int __imm) 2237 1.1 mrg { 2238 1.1 mrg return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A, 2239 1.1 mrg (__v8si) __B, 2240 1.1 mrg __imm, 2241 1.1 mrg (__v16si) 2242 1.1 mrg _mm512_setzero_si512 (), 2243 1.1 mrg (__mmask16) __U); 2244 1.1 mrg } 2245 1.1 mrg 2246 1.1 mrg extern __inline __m512 2247 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2248 1.1 mrg _mm512_insertf32x8 (__m512 __A, __m256 __B, const int __imm) 2249 1.1 mrg { 2250 1.1 mrg return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A, 2251 1.1 mrg (__v8sf) __B, 2252 1.1 mrg __imm, 2253 1.1 mrg (__v16sf) 2254 1.1 mrg _mm512_setzero_ps (), 2255 1.1 mrg (__mmask16) -1); 2256 1.1 mrg } 2257 1.1 mrg 2258 1.1 mrg extern __inline __m512 2259 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2260 1.1 mrg _mm512_mask_insertf32x8 (__m512 __W, __mmask16 __U, __m512 __A, 2261 1.1 mrg __m256 __B, const int __imm) 2262 1.1 mrg { 2263 1.1 mrg return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A, 2264 1.1 mrg (__v8sf) __B, 2265 1.1 mrg __imm, 2266 1.1 mrg (__v16sf) __W, 2267 1.1 mrg (__mmask16) __U); 2268 1.1 mrg } 2269 1.1 mrg 2270 1.1 mrg extern __inline __m512 2271 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2272 1.1 mrg _mm512_maskz_insertf32x8 (__mmask16 __U, __m512 __A, __m256 __B, 2273 1.1 mrg const int __imm) 2274 1.1 mrg { 2275 1.1 mrg return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A, 2276 1.1 mrg (__v8sf) __B, 2277 1.1 mrg __imm, 2278 1.1 mrg (__v16sf) 2279 1.1 mrg _mm512_setzero_ps (), 2280 1.1 mrg (__mmask16) __U); 2281 1.1 mrg } 2282 1.1 mrg 2283 1.1 mrg extern __inline __m512i 2284 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2285 1.1 mrg _mm512_inserti64x2 (__m512i __A, __m128i __B, const int __imm) 2286 1.1 mrg { 2287 1.1 mrg return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A, 2288 1.1 mrg (__v2di) __B, 2289 1.1 mrg __imm, 2290 1.1 mrg (__v8di) 2291 1.1 mrg _mm512_setzero_si512 (), 2292 1.3 mrg (__mmask8) -1); 2293 1.1 mrg } 2294 1.1 mrg 2295 1.1 mrg extern __inline __m512i 2296 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2297 1.1 mrg _mm512_mask_inserti64x2 (__m512i __W, __mmask8 __U, __m512i __A, 2298 1.1 mrg __m128i __B, const int __imm) 2299 1.1 mrg { 2300 1.1 mrg return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A, 2301 1.1 mrg (__v2di) __B, 2302 1.1 mrg __imm, 2303 1.1 mrg (__v8di) __W, 2304 1.1 mrg (__mmask8) 2305 1.1 mrg __U); 2306 1.1 mrg } 2307 1.1 mrg 2308 1.1 mrg extern __inline __m512i 2309 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2310 1.1 mrg _mm512_maskz_inserti64x2 (__mmask8 __U, __m512i __A, __m128i __B, 2311 1.1 mrg const int __imm) 2312 1.1 mrg { 2313 1.1 mrg return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A, 2314 1.1 mrg (__v2di) __B, 2315 1.1 mrg __imm, 2316 1.1 mrg (__v8di) 2317 1.1 mrg _mm512_setzero_si512 (), 2318 1.1 mrg (__mmask8) 2319 1.1 mrg __U); 2320 1.1 mrg } 2321 1.1 mrg 2322 1.1 mrg extern __inline __m512d 2323 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2324 1.1 mrg _mm512_insertf64x2 (__m512d __A, __m128d __B, const int __imm) 2325 1.1 mrg { 2326 1.1 mrg return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A, 2327 1.1 mrg (__v2df) __B, 2328 1.1 mrg __imm, 2329 1.1 mrg (__v8df) 2330 1.1 mrg _mm512_setzero_pd (), 2331 1.3 mrg (__mmask8) -1); 2332 1.1 mrg } 2333 1.1 mrg 2334 1.1 mrg extern __inline __m512d 2335 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2336 1.1 mrg _mm512_mask_insertf64x2 (__m512d __W, __mmask8 __U, __m512d __A, 2337 1.1 mrg __m128d __B, const int __imm) 2338 1.1 mrg { 2339 1.1 mrg return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A, 2340 1.1 mrg (__v2df) __B, 2341 1.1 mrg __imm, 2342 1.1 mrg (__v8df) __W, 2343 1.1 mrg (__mmask8) 2344 1.1 mrg __U); 2345 1.1 mrg } 2346 1.1 mrg 2347 1.1 mrg extern __inline __m512d 2348 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2349 1.1 mrg _mm512_maskz_insertf64x2 (__mmask8 __U, __m512d __A, __m128d __B, 2350 1.1 mrg const int __imm) 2351 1.1 mrg { 2352 1.1 mrg return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A, 2353 1.1 mrg (__v2df) __B, 2354 1.1 mrg __imm, 2355 1.1 mrg (__v8df) 2356 1.1 mrg _mm512_setzero_pd (), 2357 1.1 mrg (__mmask8) 2358 1.1 mrg __U); 2359 1.1 mrg } 2360 1.1 mrg 2361 1.1 mrg extern __inline __mmask8 2362 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2363 1.1 mrg _mm512_mask_fpclass_pd_mask (__mmask8 __U, __m512d __A, 2364 1.1 mrg const int __imm) 2365 1.1 mrg { 2366 1.1 mrg return (__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) __A, 2367 1.1 mrg __imm, __U); 2368 1.1 mrg } 2369 1.1 mrg 2370 1.1 mrg extern __inline __mmask8 2371 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2372 1.1 mrg _mm512_fpclass_pd_mask (__m512d __A, const int __imm) 2373 1.1 mrg { 2374 1.1 mrg return (__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) __A, 2375 1.1 mrg __imm, 2376 1.1 mrg (__mmask8) -1); 2377 1.1 mrg } 2378 1.1 mrg 2379 1.1 mrg extern __inline __mmask16 2380 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2381 1.1 mrg _mm512_mask_fpclass_ps_mask (__mmask16 __U, __m512 __A, 2382 1.1 mrg const int __imm) 2383 1.1 mrg { 2384 1.1 mrg return (__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) __A, 2385 1.1 mrg __imm, __U); 2386 1.1 mrg } 2387 1.1 mrg 2388 1.1 mrg extern __inline __mmask16 2389 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2390 1.1 mrg _mm512_fpclass_ps_mask (__m512 __A, const int __imm) 2391 1.1 mrg { 2392 1.1 mrg return (__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) __A, 2393 1.1 mrg __imm, 2394 1.3 mrg (__mmask16) -1); 2395 1.1 mrg } 2396 1.1 mrg 2397 1.1 mrg #else 2398 1.3 mrg #define _kshiftli_mask8(X, Y) \ 2399 1.3 mrg ((__mmask8) __builtin_ia32_kshiftliqi ((__mmask8)(X), (__mmask8)(Y))) 2400 1.3 mrg 2401 1.3 mrg #define _kshiftri_mask8(X, Y) \ 2402 1.3 mrg ((__mmask8) __builtin_ia32_kshiftriqi ((__mmask8)(X), (__mmask8)(Y))) 2403 1.3 mrg 2404 1.4 mrg #define _mm_range_sd(A, B, C) \ 2405 1.4 mrg ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \ 2406 1.4 mrg (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \ 2407 1.4 mrg (__mmask8) -1, _MM_FROUND_CUR_DIRECTION)) 2408 1.4 mrg 2409 1.4 mrg #define _mm_mask_range_sd(W, U, A, B, C) \ 2410 1.4 mrg ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \ 2411 1.4 mrg (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), \ 2412 1.4 mrg (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 2413 1.4 mrg 2414 1.4 mrg #define _mm_maskz_range_sd(U, A, B, C) \ 2415 1.4 mrg ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \ 2416 1.4 mrg (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \ 2417 1.4 mrg (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 2418 1.1 mrg 2419 1.1 mrg #define _mm_range_ss(A, B, C) \ 2420 1.4 mrg ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \ 2421 1.4 mrg (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \ 2422 1.4 mrg (__mmask8) -1, _MM_FROUND_CUR_DIRECTION)) 2423 1.4 mrg 2424 1.4 mrg #define _mm_mask_range_ss(W, U, A, B, C) \ 2425 1.4 mrg ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \ 2426 1.4 mrg (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), \ 2427 1.4 mrg (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 2428 1.4 mrg 2429 1.4 mrg #define _mm_maskz_range_ss(U, A, B, C) \ 2430 1.4 mrg ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \ 2431 1.4 mrg (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \ 2432 1.4 mrg (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 2433 1.4 mrg 2434 1.4 mrg #define _mm_range_round_sd(A, B, C, R) \ 2435 1.4 mrg ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \ 2436 1.4 mrg (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \ 2437 1.4 mrg (__mmask8) -1, (R))) 2438 1.4 mrg 2439 1.4 mrg #define _mm_mask_range_round_sd(W, U, A, B, C, R) \ 2440 1.4 mrg ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \ 2441 1.4 mrg (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), \ 2442 1.4 mrg (__mmask8)(U), (R))) 2443 1.4 mrg 2444 1.4 mrg #define _mm_maskz_range_round_sd(U, A, B, C, R) \ 2445 1.4 mrg ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \ 2446 1.4 mrg (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \ 2447 1.4 mrg (__mmask8)(U), (R))) 2448 1.1 mrg 2449 1.1 mrg #define _mm_range_round_ss(A, B, C, R) \ 2450 1.4 mrg ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \ 2451 1.4 mrg (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \ 2452 1.4 mrg (__mmask8) -1, (R))) 2453 1.4 mrg 2454 1.4 mrg #define _mm_mask_range_round_ss(W, U, A, B, C, R) \ 2455 1.4 mrg ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \ 2456 1.4 mrg (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), \ 2457 1.4 mrg (__mmask8)(U), (R))) 2458 1.4 mrg 2459 1.4 mrg #define _mm_maskz_range_round_ss(U, A, B, C, R) \ 2460 1.4 mrg ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \ 2461 1.4 mrg (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \ 2462 1.4 mrg (__mmask8)(U), (R))) 2463 1.1 mrg 2464 1.1 mrg #define _mm512_cvtt_roundpd_epi64(A, B) \ 2465 1.3 mrg ((__m512i)__builtin_ia32_cvttpd2qq512_mask ((A), (__v8di) \ 2466 1.3 mrg _mm512_setzero_si512 (), \ 2467 1.3 mrg -1, (B))) 2468 1.1 mrg 2469 1.1 mrg #define _mm512_mask_cvtt_roundpd_epi64(W, U, A, B) \ 2470 1.3 mrg ((__m512i)__builtin_ia32_cvttpd2qq512_mask ((A), (__v8di)(W), (U), (B))) 2471 1.1 mrg 2472 1.1 mrg #define _mm512_maskz_cvtt_roundpd_epi64(U, A, B) \ 2473 1.3 mrg ((__m512i)__builtin_ia32_cvttpd2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B))) 2474 1.1 mrg 2475 1.1 mrg #define _mm512_cvtt_roundpd_epu64(A, B) \ 2476 1.3 mrg ((__m512i)__builtin_ia32_cvttpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B))) 2477 1.1 mrg 2478 1.1 mrg #define _mm512_mask_cvtt_roundpd_epu64(W, U, A, B) \ 2479 1.3 mrg ((__m512i)__builtin_ia32_cvttpd2uqq512_mask ((A), (__v8di)(W), (U), (B))) 2480 1.1 mrg 2481 1.1 mrg #define _mm512_maskz_cvtt_roundpd_epu64(U, A, B) \ 2482 1.3 mrg ((__m512i)__builtin_ia32_cvttpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B))) 2483 1.1 mrg 2484 1.1 mrg #define _mm512_cvtt_roundps_epi64(A, B) \ 2485 1.3 mrg ((__m512i)__builtin_ia32_cvttps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B))) 2486 1.1 mrg 2487 1.1 mrg #define _mm512_mask_cvtt_roundps_epi64(W, U, A, B) \ 2488 1.3 mrg ((__m512i)__builtin_ia32_cvttps2qq512_mask ((A), (__v8di)(W), (U), (B))) 2489 1.1 mrg 2490 1.1 mrg #define _mm512_maskz_cvtt_roundps_epi64(U, A, B) \ 2491 1.3 mrg ((__m512i)__builtin_ia32_cvttps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B))) 2492 1.1 mrg 2493 1.1 mrg #define _mm512_cvtt_roundps_epu64(A, B) \ 2494 1.3 mrg ((__m512i)__builtin_ia32_cvttps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B))) 2495 1.1 mrg 2496 1.1 mrg #define _mm512_mask_cvtt_roundps_epu64(W, U, A, B) \ 2497 1.3 mrg ((__m512i)__builtin_ia32_cvttps2uqq512_mask ((A), (__v8di)(W), (U), (B))) 2498 1.1 mrg 2499 1.1 mrg #define _mm512_maskz_cvtt_roundps_epu64(U, A, B) \ 2500 1.3 mrg ((__m512i)__builtin_ia32_cvttps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B))) 2501 1.1 mrg 2502 1.1 mrg #define _mm512_cvt_roundpd_epi64(A, B) \ 2503 1.3 mrg ((__m512i)__builtin_ia32_cvtpd2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B))) 2504 1.1 mrg 2505 1.1 mrg #define _mm512_mask_cvt_roundpd_epi64(W, U, A, B) \ 2506 1.3 mrg ((__m512i)__builtin_ia32_cvtpd2qq512_mask ((A), (__v8di)(W), (U), (B))) 2507 1.1 mrg 2508 1.1 mrg #define _mm512_maskz_cvt_roundpd_epi64(U, A, B) \ 2509 1.3 mrg ((__m512i)__builtin_ia32_cvtpd2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B))) 2510 1.1 mrg 2511 1.1 mrg #define _mm512_cvt_roundpd_epu64(A, B) \ 2512 1.3 mrg ((__m512i)__builtin_ia32_cvtpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B))) 2513 1.1 mrg 2514 1.1 mrg #define _mm512_mask_cvt_roundpd_epu64(W, U, A, B) \ 2515 1.3 mrg ((__m512i)__builtin_ia32_cvtpd2uqq512_mask ((A), (__v8di)(W), (U), (B))) 2516 1.1 mrg 2517 1.1 mrg #define _mm512_maskz_cvt_roundpd_epu64(U, A, B) \ 2518 1.3 mrg ((__m512i)__builtin_ia32_cvtpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B))) 2519 1.1 mrg 2520 1.1 mrg #define _mm512_cvt_roundps_epi64(A, B) \ 2521 1.3 mrg ((__m512i)__builtin_ia32_cvtps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B))) 2522 1.1 mrg 2523 1.1 mrg #define _mm512_mask_cvt_roundps_epi64(W, U, A, B) \ 2524 1.3 mrg ((__m512i)__builtin_ia32_cvtps2qq512_mask ((A), (__v8di)(W), (U), (B))) 2525 1.1 mrg 2526 1.1 mrg #define _mm512_maskz_cvt_roundps_epi64(U, A, B) \ 2527 1.3 mrg ((__m512i)__builtin_ia32_cvtps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B))) 2528 1.1 mrg 2529 1.1 mrg #define _mm512_cvt_roundps_epu64(A, B) \ 2530 1.3 mrg ((__m512i)__builtin_ia32_cvtps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B))) 2531 1.1 mrg 2532 1.1 mrg #define _mm512_mask_cvt_roundps_epu64(W, U, A, B) \ 2533 1.3 mrg ((__m512i)__builtin_ia32_cvtps2uqq512_mask ((A), (__v8di)(W), (U), (B))) 2534 1.1 mrg 2535 1.1 mrg #define _mm512_maskz_cvt_roundps_epu64(U, A, B) \ 2536 1.3 mrg ((__m512i)__builtin_ia32_cvtps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B))) 2537 1.1 mrg 2538 1.1 mrg #define _mm512_cvt_roundepi64_ps(A, B) \ 2539 1.3 mrg ((__m256)__builtin_ia32_cvtqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), -1, (B))) 2540 1.1 mrg 2541 1.1 mrg #define _mm512_mask_cvt_roundepi64_ps(W, U, A, B) \ 2542 1.3 mrg ((__m256)__builtin_ia32_cvtqq2ps512_mask ((__v8di)(A), (W), (U), (B))) 2543 1.1 mrg 2544 1.1 mrg #define _mm512_maskz_cvt_roundepi64_ps(U, A, B) \ 2545 1.3 mrg ((__m256)__builtin_ia32_cvtqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), (U), (B))) 2546 1.1 mrg 2547 1.1 mrg #define _mm512_cvt_roundepu64_ps(A, B) \ 2548 1.3 mrg ((__m256)__builtin_ia32_cvtuqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), -1, (B))) 2549 1.1 mrg 2550 1.1 mrg #define _mm512_mask_cvt_roundepu64_ps(W, U, A, B) \ 2551 1.3 mrg ((__m256)__builtin_ia32_cvtuqq2ps512_mask ((__v8di)(A), (W), (U), (B))) 2552 1.1 mrg 2553 1.1 mrg #define _mm512_maskz_cvt_roundepu64_ps(U, A, B) \ 2554 1.3 mrg ((__m256)__builtin_ia32_cvtuqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), (U), (B))) 2555 1.1 mrg 2556 1.1 mrg #define _mm512_cvt_roundepi64_pd(A, B) \ 2557 1.3 mrg ((__m512d)__builtin_ia32_cvtqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), -1, (B))) 2558 1.1 mrg 2559 1.1 mrg #define _mm512_mask_cvt_roundepi64_pd(W, U, A, B) \ 2560 1.3 mrg ((__m512d)__builtin_ia32_cvtqq2pd512_mask ((__v8di)(A), (W), (U), (B))) 2561 1.1 mrg 2562 1.1 mrg #define _mm512_maskz_cvt_roundepi64_pd(U, A, B) \ 2563 1.3 mrg ((__m512d)__builtin_ia32_cvtqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), (U), (B))) 2564 1.1 mrg 2565 1.1 mrg #define _mm512_cvt_roundepu64_pd(A, B) \ 2566 1.3 mrg ((__m512d)__builtin_ia32_cvtuqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), -1, (B))) 2567 1.1 mrg 2568 1.1 mrg #define _mm512_mask_cvt_roundepu64_pd(W, U, A, B) \ 2569 1.3 mrg ((__m512d)__builtin_ia32_cvtuqq2pd512_mask ((__v8di)(A), (W), (U), (B))) 2570 1.1 mrg 2571 1.1 mrg #define _mm512_maskz_cvt_roundepu64_pd(U, A, B) \ 2572 1.3 mrg ((__m512d)__builtin_ia32_cvtuqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), (U), (B))) 2573 1.1 mrg 2574 1.1 mrg #define _mm512_reduce_pd(A, B) \ 2575 1.1 mrg ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \ 2576 1.3 mrg (int)(B), (__v8df)_mm512_setzero_pd (), (__mmask8)-1)) 2577 1.1 mrg 2578 1.7 mrg #define _mm512_reduce_round_pd(A, B, R) \ 2579 1.7 mrg ((__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df)(__m512d)(A),\ 2580 1.7 mrg (int)(B), (__v8df)_mm512_setzero_pd (), (__mmask8)-1, (R))) 2581 1.7 mrg 2582 1.1 mrg #define _mm512_mask_reduce_pd(W, U, A, B) \ 2583 1.1 mrg ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \ 2584 1.1 mrg (int)(B), (__v8df)(__m512d)(W), (__mmask8)(U))) 2585 1.1 mrg 2586 1.7 mrg #define _mm512_mask_reduce_round_pd(W, U, A, B, R) \ 2587 1.7 mrg ((__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df)(__m512d)(A),\ 2588 1.7 mrg (int)(B), (__v8df)(__m512d)(W), (U), (R))) 2589 1.7 mrg 2590 1.1 mrg #define _mm512_maskz_reduce_pd(U, A, B) \ 2591 1.1 mrg ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \ 2592 1.3 mrg (int)(B), (__v8df)_mm512_setzero_pd (), (__mmask8)(U))) 2593 1.1 mrg 2594 1.7 mrg #define _mm512_maskz_reduce_round_pd(U, A, B, R) \ 2595 1.7 mrg ((__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df)(__m512d)(A),\ 2596 1.7 mrg (int)(B), (__v8df)_mm512_setzero_pd (), (U), (R))) 2597 1.7 mrg 2598 1.1 mrg #define _mm512_reduce_ps(A, B) \ 2599 1.1 mrg ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \ 2600 1.3 mrg (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)-1)) 2601 1.1 mrg 2602 1.7 mrg #define _mm512_reduce_round_ps(A, B, R) \ 2603 1.7 mrg ((__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf)(__m512)(A),\ 2604 1.7 mrg (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)-1, (R))) 2605 1.7 mrg 2606 1.1 mrg #define _mm512_mask_reduce_ps(W, U, A, B) \ 2607 1.1 mrg ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \ 2608 1.1 mrg (int)(B), (__v16sf)(__m512)(W), (__mmask16)(U))) 2609 1.1 mrg 2610 1.7 mrg #define _mm512_mask_reduce_round_ps(W, U, A, B, R) \ 2611 1.7 mrg ((__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf)(__m512)(A),\ 2612 1.7 mrg (int)(B), (__v16sf)(__m512)(W), (U), (R))) 2613 1.7 mrg 2614 1.1 mrg #define _mm512_maskz_reduce_ps(U, A, B) \ 2615 1.1 mrg ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \ 2616 1.3 mrg (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)(U))) 2617 1.1 mrg 2618 1.7 mrg #define _mm512_maskz_reduce_round_ps(U, A, B, R) \ 2619 1.7 mrg ((__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf)(__m512)(A),\ 2620 1.7 mrg (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)(U), (R))) 2621 1.7 mrg 2622 1.1 mrg #define _mm512_extractf32x8_ps(X, C) \ 2623 1.1 mrg ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \ 2624 1.3 mrg (int) (C), (__v8sf)(__m256) _mm256_setzero_ps (), (__mmask8)-1)) 2625 1.1 mrg 2626 1.1 mrg #define _mm512_mask_extractf32x8_ps(W, U, X, C) \ 2627 1.1 mrg ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \ 2628 1.1 mrg (int) (C), (__v8sf)(__m256) (W), (__mmask8) (U))) 2629 1.1 mrg 2630 1.1 mrg #define _mm512_maskz_extractf32x8_ps(U, X, C) \ 2631 1.1 mrg ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \ 2632 1.3 mrg (int) (C), (__v8sf)(__m256) _mm256_setzero_ps (), (__mmask8) (U))) 2633 1.1 mrg 2634 1.1 mrg #define _mm512_extractf64x2_pd(X, C) \ 2635 1.1 mrg ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\ 2636 1.3 mrg (int) (C), (__v2df)(__m128d) _mm_setzero_pd (), (__mmask8)-1)) 2637 1.1 mrg 2638 1.1 mrg #define _mm512_mask_extractf64x2_pd(W, U, X, C) \ 2639 1.1 mrg ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\ 2640 1.1 mrg (int) (C), (__v2df)(__m128d) (W), (__mmask8) (U))) 2641 1.1 mrg 2642 1.1 mrg #define _mm512_maskz_extractf64x2_pd(U, X, C) \ 2643 1.1 mrg ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\ 2644 1.3 mrg (int) (C), (__v2df)(__m128d) _mm_setzero_pd (), (__mmask8) (U))) 2645 1.1 mrg 2646 1.1 mrg #define _mm512_extracti32x8_epi32(X, C) \ 2647 1.1 mrg ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \ 2648 1.3 mrg (int) (C), (__v8si)(__m256i) _mm256_setzero_si256 (), (__mmask8)-1)) 2649 1.1 mrg 2650 1.1 mrg #define _mm512_mask_extracti32x8_epi32(W, U, X, C) \ 2651 1.1 mrg ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \ 2652 1.1 mrg (int) (C), (__v8si)(__m256i) (W), (__mmask8) (U))) 2653 1.1 mrg 2654 1.1 mrg #define _mm512_maskz_extracti32x8_epi32(U, X, C) \ 2655 1.1 mrg ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \ 2656 1.3 mrg (int) (C), (__v8si)(__m256i) _mm256_setzero_si256 (), (__mmask8) (U))) 2657 1.1 mrg 2658 1.1 mrg #define _mm512_extracti64x2_epi64(X, C) \ 2659 1.1 mrg ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\ 2660 1.3 mrg (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8)-1)) 2661 1.1 mrg 2662 1.1 mrg #define _mm512_mask_extracti64x2_epi64(W, U, X, C) \ 2663 1.1 mrg ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\ 2664 1.1 mrg (int) (C), (__v2di)(__m128i) (W), (__mmask8) (U))) 2665 1.1 mrg 2666 1.1 mrg #define _mm512_maskz_extracti64x2_epi64(U, X, C) \ 2667 1.1 mrg ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\ 2668 1.3 mrg (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8) (U))) 2669 1.1 mrg 2670 1.1 mrg #define _mm512_range_pd(A, B, C) \ 2671 1.1 mrg ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \ 2672 1.1 mrg (__v8df)(__m512d)(B), (int)(C), \ 2673 1.3 mrg (__v8df)_mm512_setzero_pd (), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)) 2674 1.1 mrg 2675 1.1 mrg #define _mm512_mask_range_pd(W, U, A, B, C) \ 2676 1.1 mrg ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \ 2677 1.1 mrg (__v8df)(__m512d)(B), (int)(C), \ 2678 1.1 mrg (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 2679 1.1 mrg 2680 1.1 mrg #define _mm512_maskz_range_pd(U, A, B, C) \ 2681 1.1 mrg ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \ 2682 1.1 mrg (__v8df)(__m512d)(B), (int)(C), \ 2683 1.3 mrg (__v8df)_mm512_setzero_pd (), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 2684 1.1 mrg 2685 1.1 mrg #define _mm512_range_ps(A, B, C) \ 2686 1.1 mrg ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \ 2687 1.1 mrg (__v16sf)(__m512)(B), (int)(C), \ 2688 1.3 mrg (__v16sf)_mm512_setzero_ps (), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION)) 2689 1.1 mrg 2690 1.1 mrg #define _mm512_mask_range_ps(W, U, A, B, C) \ 2691 1.1 mrg ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \ 2692 1.1 mrg (__v16sf)(__m512)(B), (int)(C), \ 2693 1.1 mrg (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) 2694 1.1 mrg 2695 1.1 mrg #define _mm512_maskz_range_ps(U, A, B, C) \ 2696 1.1 mrg ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \ 2697 1.1 mrg (__v16sf)(__m512)(B), (int)(C), \ 2698 1.3 mrg (__v16sf)_mm512_setzero_ps (), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) 2699 1.1 mrg 2700 1.1 mrg #define _mm512_range_round_pd(A, B, C, R) \ 2701 1.1 mrg ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \ 2702 1.1 mrg (__v8df)(__m512d)(B), (int)(C), \ 2703 1.3 mrg (__v8df)_mm512_setzero_pd (), (__mmask8)-1, (R))) 2704 1.1 mrg 2705 1.1 mrg #define _mm512_mask_range_round_pd(W, U, A, B, C, R) \ 2706 1.1 mrg ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \ 2707 1.1 mrg (__v8df)(__m512d)(B), (int)(C), \ 2708 1.1 mrg (__v8df)(__m512d)(W), (__mmask8)(U), (R))) 2709 1.1 mrg 2710 1.1 mrg #define _mm512_maskz_range_round_pd(U, A, B, C, R) \ 2711 1.1 mrg ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \ 2712 1.1 mrg (__v8df)(__m512d)(B), (int)(C), \ 2713 1.3 mrg (__v8df)_mm512_setzero_pd (), (__mmask8)(U), (R))) 2714 1.1 mrg 2715 1.1 mrg #define _mm512_range_round_ps(A, B, C, R) \ 2716 1.1 mrg ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \ 2717 1.1 mrg (__v16sf)(__m512)(B), (int)(C), \ 2718 1.3 mrg (__v16sf)_mm512_setzero_ps (), (__mmask16)-1, (R))) 2719 1.1 mrg 2720 1.1 mrg #define _mm512_mask_range_round_ps(W, U, A, B, C, R) \ 2721 1.1 mrg ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \ 2722 1.1 mrg (__v16sf)(__m512)(B), (int)(C), \ 2723 1.1 mrg (__v16sf)(__m512)(W), (__mmask16)(U), (R))) 2724 1.1 mrg 2725 1.1 mrg #define _mm512_maskz_range_round_ps(U, A, B, C, R) \ 2726 1.1 mrg ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \ 2727 1.1 mrg (__v16sf)(__m512)(B), (int)(C), \ 2728 1.3 mrg (__v16sf)_mm512_setzero_ps (), (__mmask16)(U), (R))) 2729 1.1 mrg 2730 1.1 mrg #define _mm512_insertf64x2(X, Y, C) \ 2731 1.1 mrg ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\ 2732 1.1 mrg (__v2df)(__m128d) (Y), (int) (C), (__v8df)(__m512d) (X), \ 2733 1.1 mrg (__mmask8)-1)) 2734 1.1 mrg 2735 1.1 mrg #define _mm512_mask_insertf64x2(W, U, X, Y, C) \ 2736 1.1 mrg ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\ 2737 1.1 mrg (__v2df)(__m128d) (Y), (int) (C), (__v8df)(__m512d) (W), \ 2738 1.1 mrg (__mmask8) (U))) 2739 1.1 mrg 2740 1.1 mrg #define _mm512_maskz_insertf64x2(U, X, Y, C) \ 2741 1.1 mrg ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\ 2742 1.1 mrg (__v2df)(__m128d) (Y), (int) (C), \ 2743 1.3 mrg (__v8df)(__m512d) _mm512_setzero_pd (), (__mmask8) (U))) 2744 1.1 mrg 2745 1.1 mrg #define _mm512_inserti64x2(X, Y, C) \ 2746 1.1 mrg ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\ 2747 1.1 mrg (__v2di)(__m128i) (Y), (int) (C), (__v8di)(__m512i) (X), (__mmask8)-1)) 2748 1.1 mrg 2749 1.1 mrg #define _mm512_mask_inserti64x2(W, U, X, Y, C) \ 2750 1.1 mrg ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\ 2751 1.1 mrg (__v2di)(__m128i) (Y), (int) (C), (__v8di)(__m512i) (W), \ 2752 1.1 mrg (__mmask8) (U))) 2753 1.1 mrg 2754 1.1 mrg #define _mm512_maskz_inserti64x2(U, X, Y, C) \ 2755 1.1 mrg ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\ 2756 1.1 mrg (__v2di)(__m128i) (Y), (int) (C), \ 2757 1.1 mrg (__v8di)(__m512i) _mm512_setzero_si512 (), (__mmask8) (U))) 2758 1.1 mrg 2759 1.1 mrg #define _mm512_insertf32x8(X, Y, C) \ 2760 1.1 mrg ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X), \ 2761 1.1 mrg (__v8sf)(__m256) (Y), (int) (C),\ 2762 1.3 mrg (__v16sf)(__m512)_mm512_setzero_ps (),\ 2763 1.1 mrg (__mmask16)-1)) 2764 1.1 mrg 2765 1.1 mrg #define _mm512_mask_insertf32x8(W, U, X, Y, C) \ 2766 1.1 mrg ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X), \ 2767 1.1 mrg (__v8sf)(__m256) (Y), (int) (C),\ 2768 1.1 mrg (__v16sf)(__m512)(W),\ 2769 1.1 mrg (__mmask16)(U))) 2770 1.1 mrg 2771 1.1 mrg #define _mm512_maskz_insertf32x8(U, X, Y, C) \ 2772 1.1 mrg ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X), \ 2773 1.1 mrg (__v8sf)(__m256) (Y), (int) (C),\ 2774 1.3 mrg (__v16sf)(__m512)_mm512_setzero_ps (),\ 2775 1.1 mrg (__mmask16)(U))) 2776 1.1 mrg 2777 1.1 mrg #define _mm512_inserti32x8(X, Y, C) \ 2778 1.1 mrg ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X), \ 2779 1.1 mrg (__v8si)(__m256i) (Y), (int) (C),\ 2780 1.1 mrg (__v16si)(__m512i)_mm512_setzero_si512 (),\ 2781 1.1 mrg (__mmask16)-1)) 2782 1.1 mrg 2783 1.1 mrg #define _mm512_mask_inserti32x8(W, U, X, Y, C) \ 2784 1.1 mrg ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X), \ 2785 1.1 mrg (__v8si)(__m256i) (Y), (int) (C),\ 2786 1.1 mrg (__v16si)(__m512i)(W),\ 2787 1.1 mrg (__mmask16)(U))) 2788 1.1 mrg 2789 1.1 mrg #define _mm512_maskz_inserti32x8(U, X, Y, C) \ 2790 1.1 mrg ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X), \ 2791 1.1 mrg (__v8si)(__m256i) (Y), (int) (C),\ 2792 1.1 mrg (__v16si)(__m512i)_mm512_setzero_si512 (),\ 2793 1.1 mrg (__mmask16)(U))) 2794 1.1 mrg 2795 1.6 mrg #define _mm_fpclass_ss_mask(X, C) \ 2796 1.6 mrg ((__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) (__m128) (X), \ 2797 1.6 mrg (int) (C), (__mmask8) (-1))) \ 2798 1.6 mrg 2799 1.6 mrg #define _mm_fpclass_sd_mask(X, C) \ 2800 1.6 mrg ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X), \ 2801 1.6 mrg (int) (C), (__mmask8) (-1))) \ 2802 1.6 mrg 2803 1.7 mrg #define _mm_mask_fpclass_ss_mask(U, X, C) \ 2804 1.6 mrg ((__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) (__m128) (X), \ 2805 1.6 mrg (int) (C), (__mmask8) (U))) 2806 1.6 mrg 2807 1.7 mrg #define _mm_mask_fpclass_sd_mask(U, X, C) \ 2808 1.6 mrg ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X), \ 2809 1.6 mrg (int) (C), (__mmask8) (U))) 2810 1.1 mrg 2811 1.1 mrg #define _mm512_mask_fpclass_pd_mask(u, X, C) \ 2812 1.1 mrg ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), \ 2813 1.1 mrg (int) (C), (__mmask8)(u))) 2814 1.1 mrg 2815 1.1 mrg #define _mm512_mask_fpclass_ps_mask(u, x, c) \ 2816 1.1 mrg ((__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) (__m512) (x),\ 2817 1.6 mrg (int) (c),(__mmask16)(u))) 2818 1.1 mrg 2819 1.1 mrg #define _mm512_fpclass_pd_mask(X, C) \ 2820 1.1 mrg ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), \ 2821 1.1 mrg (int) (C), (__mmask8)-1)) 2822 1.1 mrg 2823 1.1 mrg #define _mm512_fpclass_ps_mask(x, c) \ 2824 1.1 mrg ((__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) (__m512) (x),\ 2825 1.6 mrg (int) (c),(__mmask16)-1)) 2826 1.1 mrg 2827 1.1 mrg #define _mm_reduce_sd(A, B, C) \ 2828 1.4 mrg ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A), \ 2829 1.4 mrg (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \ 2830 1.4 mrg (__mmask8)-1)) 2831 1.4 mrg 2832 1.4 mrg #define _mm_mask_reduce_sd(W, U, A, B, C) \ 2833 1.4 mrg ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A), \ 2834 1.4 mrg (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), (__mmask8)(U))) 2835 1.4 mrg 2836 1.4 mrg #define _mm_maskz_reduce_sd(U, A, B, C) \ 2837 1.4 mrg ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A), \ 2838 1.4 mrg (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \ 2839 1.4 mrg (__mmask8)(U))) 2840 1.1 mrg 2841 1.7 mrg #define _mm_reduce_round_sd(A, B, C, R) \ 2842 1.7 mrg ((__m128d) __builtin_ia32_reducesd_mask_round ((__v2df)(__m128d)(A), \ 2843 1.7 mrg (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \ 2844 1.7 mrg (__mmask8)(-1), (int)(R))) 2845 1.7 mrg 2846 1.7 mrg #define _mm_mask_reduce_round_sd(W, U, A, B, C, R) \ 2847 1.7 mrg ((__m128d) __builtin_ia32_reducesd_mask_round ((__v2df)(__m128d)(A), \ 2848 1.7 mrg (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), \ 2849 1.7 mrg (__mmask8)(U), (int)(R))) 2850 1.7 mrg 2851 1.7 mrg #define _mm_maskz_reduce_round_sd(U, A, B, C, R) \ 2852 1.7 mrg ((__m128d) __builtin_ia32_reducesd_mask_round ((__v2df)(__m128d)(A), \ 2853 1.7 mrg (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \ 2854 1.7 mrg (__mmask8)(U), (int)(R))) 2855 1.7 mrg 2856 1.1 mrg #define _mm_reduce_ss(A, B, C) \ 2857 1.4 mrg ((__m128) __builtin_ia32_reducess_mask ((__v4sf)(__m128)(A), \ 2858 1.4 mrg (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \ 2859 1.4 mrg (__mmask8)-1)) 2860 1.4 mrg 2861 1.4 mrg #define _mm_mask_reduce_ss(W, U, A, B, C) \ 2862 1.4 mrg ((__m128) __builtin_ia32_reducess_mask ((__v4sf)(__m128)(A), \ 2863 1.4 mrg (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), (__mmask8)(U))) 2864 1.4 mrg 2865 1.4 mrg #define _mm_maskz_reduce_ss(U, A, B, C) \ 2866 1.4 mrg ((__m128) __builtin_ia32_reducess_mask ((__v4sf)(__m128)(A), \ 2867 1.4 mrg (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \ 2868 1.4 mrg (__mmask8)(U))) 2869 1.4 mrg 2870 1.7 mrg #define _mm_reduce_round_ss(A, B, C, R) \ 2871 1.7 mrg ((__m128) __builtin_ia32_reducess_mask_round ((__v4sf)(__m128)(A), \ 2872 1.7 mrg (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \ 2873 1.7 mrg (__mmask8)(-1), (int)(R))) 2874 1.7 mrg 2875 1.7 mrg #define _mm_mask_reduce_round_ss(W, U, A, B, C, R) \ 2876 1.7 mrg ((__m128) __builtin_ia32_reducess_mask_round ((__v4sf)(__m128)(A), \ 2877 1.7 mrg (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), \ 2878 1.7 mrg (__mmask8)(U), (int)(R))) 2879 1.7 mrg 2880 1.7 mrg #define _mm_maskz_reduce_round_ss(U, A, B, C, R) \ 2881 1.7 mrg ((__m128) __builtin_ia32_reducess_mask_round ((__v4sf)(__m128)(A), \ 2882 1.7 mrg (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \ 2883 1.7 mrg (__mmask8)(U), (int)(R))) 2884 1.4 mrg 2885 1.1 mrg 2886 1.1 mrg #endif 2887 1.1 mrg 2888 1.1 mrg #ifdef __DISABLE_AVX512DQ__ 2889 1.1 mrg #undef __DISABLE_AVX512DQ__ 2890 1.1 mrg #pragma GCC pop_options 2891 1.1 mrg #endif /* __DISABLE_AVX512DQ__ */ 2892 1.1 mrg 2893 1.1 mrg #endif /* _AVX512DQINTRIN_H_INCLUDED */ 2894