1 1.7 mrg /* Copyright (C) 2014-2022 Free Software Foundation, Inc. 2 1.1 mrg 3 1.1 mrg This file is part of GCC. 4 1.1 mrg 5 1.1 mrg GCC is free software; you can redistribute it and/or modify 6 1.1 mrg it under the terms of the GNU General Public License as published by 7 1.1 mrg the Free Software Foundation; either version 3, or (at your option) 8 1.1 mrg any later version. 9 1.1 mrg 10 1.1 mrg GCC is distributed in the hope that it will be useful, 11 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of 12 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 1.1 mrg GNU General Public License for more details. 14 1.1 mrg 15 1.1 mrg Under Section 7 of GPL version 3, you are granted additional 16 1.1 mrg permissions described in the GCC Runtime Library Exception, version 17 1.1 mrg 3.1, as published by the Free Software Foundation. 18 1.1 mrg 19 1.1 mrg You should have received a copy of the GNU General Public License and 20 1.1 mrg a copy of the GCC Runtime Library Exception along with this program; 21 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 1.1 mrg <http://www.gnu.org/licenses/>. */ 23 1.1 mrg 24 1.1 mrg #ifndef _IMMINTRIN_H_INCLUDED 25 1.1 mrg #error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead." 26 1.1 mrg #endif 27 1.1 mrg 28 1.1 mrg #ifndef _AVX512VLDQINTRIN_H_INCLUDED 29 1.1 mrg #define _AVX512VLDQINTRIN_H_INCLUDED 30 1.1 mrg 31 1.1 mrg #if !defined(__AVX512VL__) || !defined(__AVX512DQ__) 32 1.1 mrg #pragma GCC push_options 33 1.1 mrg #pragma GCC target("avx512vl,avx512dq") 34 1.1 mrg #define __DISABLE_AVX512VLDQ__ 35 1.1 mrg #endif /* __AVX512VLDQ__ */ 36 1.1 mrg 37 1.1 mrg extern __inline __m256i 38 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 39 1.1 mrg _mm256_cvttpd_epi64 (__m256d __A) 40 1.1 mrg { 41 1.1 mrg return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, 42 1.1 mrg (__v4di) 43 1.1 mrg _mm256_setzero_si256 (), 44 1.1 mrg (__mmask8) -1); 45 1.1 mrg } 46 1.1 mrg 47 1.1 mrg extern __inline __m256i 48 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 49 1.1 mrg _mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) 50 1.1 mrg { 51 1.1 mrg return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, 52 1.1 mrg (__v4di) __W, 53 1.1 mrg (__mmask8) __U); 54 1.1 mrg } 55 1.1 mrg 56 1.1 mrg extern __inline __m256i 57 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 58 1.1 mrg _mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A) 59 1.1 mrg { 60 1.1 mrg return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, 61 1.1 mrg (__v4di) 62 1.1 mrg _mm256_setzero_si256 (), 63 1.1 mrg (__mmask8) __U); 64 1.1 mrg } 65 1.1 mrg 66 1.1 mrg extern __inline __m128i 67 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 68 1.1 mrg _mm_cvttpd_epi64 (__m128d __A) 69 1.1 mrg { 70 1.1 mrg return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, 71 1.1 mrg (__v2di) 72 1.3 mrg _mm_setzero_si128 (), 73 1.1 mrg (__mmask8) -1); 74 1.1 mrg } 75 1.1 mrg 76 1.1 mrg extern __inline __m128i 77 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 78 1.1 mrg _mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) 79 1.1 mrg { 80 1.1 mrg return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, 81 1.1 mrg (__v2di) __W, 82 1.1 mrg (__mmask8) __U); 83 1.1 mrg } 84 1.1 mrg 85 1.1 mrg extern __inline __m128i 86 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 87 1.1 mrg _mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A) 88 1.1 mrg { 89 1.1 mrg return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, 90 1.1 mrg (__v2di) 91 1.1 mrg _mm_setzero_si128 (), 92 1.1 mrg (__mmask8) __U); 93 1.1 mrg } 94 1.1 mrg 95 1.1 mrg extern __inline __m256i 96 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 97 1.1 mrg _mm256_cvttpd_epu64 (__m256d __A) 98 1.1 mrg { 99 1.1 mrg return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, 100 1.1 mrg (__v4di) 101 1.1 mrg _mm256_setzero_si256 (), 102 1.1 mrg (__mmask8) -1); 103 1.1 mrg } 104 1.1 mrg 105 1.1 mrg extern __inline __m256i 106 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 107 1.1 mrg _mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) 108 1.1 mrg { 109 1.1 mrg return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, 110 1.1 mrg (__v4di) __W, 111 1.1 mrg (__mmask8) __U); 112 1.1 mrg } 113 1.1 mrg 114 1.1 mrg extern __inline __m256i 115 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 116 1.1 mrg _mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A) 117 1.1 mrg { 118 1.1 mrg return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, 119 1.1 mrg (__v4di) 120 1.1 mrg _mm256_setzero_si256 (), 121 1.1 mrg (__mmask8) __U); 122 1.1 mrg } 123 1.1 mrg 124 1.1 mrg extern __inline __m128i 125 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 126 1.1 mrg _mm_cvttpd_epu64 (__m128d __A) 127 1.1 mrg { 128 1.1 mrg return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, 129 1.1 mrg (__v2di) 130 1.3 mrg _mm_setzero_si128 (), 131 1.1 mrg (__mmask8) -1); 132 1.1 mrg } 133 1.1 mrg 134 1.1 mrg extern __inline __m128i 135 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 136 1.1 mrg _mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) 137 1.1 mrg { 138 1.1 mrg return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, 139 1.1 mrg (__v2di) __W, 140 1.1 mrg (__mmask8) __U); 141 1.1 mrg } 142 1.1 mrg 143 1.1 mrg extern __inline __m128i 144 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 145 1.1 mrg _mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A) 146 1.1 mrg { 147 1.1 mrg return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, 148 1.1 mrg (__v2di) 149 1.1 mrg _mm_setzero_si128 (), 150 1.1 mrg (__mmask8) __U); 151 1.1 mrg } 152 1.1 mrg 153 1.1 mrg extern __inline __m256i 154 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 155 1.1 mrg _mm256_cvtpd_epi64 (__m256d __A) 156 1.1 mrg { 157 1.1 mrg return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, 158 1.1 mrg (__v4di) 159 1.1 mrg _mm256_setzero_si256 (), 160 1.1 mrg (__mmask8) -1); 161 1.1 mrg } 162 1.1 mrg 163 1.1 mrg extern __inline __m256i 164 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 165 1.1 mrg _mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) 166 1.1 mrg { 167 1.1 mrg return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, 168 1.1 mrg (__v4di) __W, 169 1.1 mrg (__mmask8) __U); 170 1.1 mrg } 171 1.1 mrg 172 1.1 mrg extern __inline __m256i 173 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 174 1.1 mrg _mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A) 175 1.1 mrg { 176 1.1 mrg return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, 177 1.1 mrg (__v4di) 178 1.1 mrg _mm256_setzero_si256 (), 179 1.1 mrg (__mmask8) __U); 180 1.1 mrg } 181 1.1 mrg 182 1.1 mrg extern __inline __m128i 183 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 184 1.1 mrg _mm_cvtpd_epi64 (__m128d __A) 185 1.1 mrg { 186 1.1 mrg return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, 187 1.1 mrg (__v2di) 188 1.3 mrg _mm_setzero_si128 (), 189 1.1 mrg (__mmask8) -1); 190 1.1 mrg } 191 1.1 mrg 192 1.1 mrg extern __inline __m128i 193 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 194 1.1 mrg _mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) 195 1.1 mrg { 196 1.1 mrg return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, 197 1.1 mrg (__v2di) __W, 198 1.1 mrg (__mmask8) __U); 199 1.1 mrg } 200 1.1 mrg 201 1.1 mrg extern __inline __m128i 202 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 203 1.1 mrg _mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A) 204 1.1 mrg { 205 1.1 mrg return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, 206 1.1 mrg (__v2di) 207 1.1 mrg _mm_setzero_si128 (), 208 1.1 mrg (__mmask8) __U); 209 1.1 mrg } 210 1.1 mrg 211 1.1 mrg extern __inline __m256i 212 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 213 1.1 mrg _mm256_cvtpd_epu64 (__m256d __A) 214 1.1 mrg { 215 1.1 mrg return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, 216 1.1 mrg (__v4di) 217 1.1 mrg _mm256_setzero_si256 (), 218 1.1 mrg (__mmask8) -1); 219 1.1 mrg } 220 1.1 mrg 221 1.1 mrg extern __inline __m256i 222 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 223 1.1 mrg _mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) 224 1.1 mrg { 225 1.1 mrg return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, 226 1.1 mrg (__v4di) __W, 227 1.1 mrg (__mmask8) __U); 228 1.1 mrg } 229 1.1 mrg 230 1.1 mrg extern __inline __m256i 231 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 232 1.1 mrg _mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A) 233 1.1 mrg { 234 1.1 mrg return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, 235 1.1 mrg (__v4di) 236 1.1 mrg _mm256_setzero_si256 (), 237 1.1 mrg (__mmask8) __U); 238 1.1 mrg } 239 1.1 mrg 240 1.1 mrg extern __inline __m128i 241 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 242 1.1 mrg _mm_cvtpd_epu64 (__m128d __A) 243 1.1 mrg { 244 1.1 mrg return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, 245 1.1 mrg (__v2di) 246 1.3 mrg _mm_setzero_si128 (), 247 1.1 mrg (__mmask8) -1); 248 1.1 mrg } 249 1.1 mrg 250 1.1 mrg extern __inline __m128i 251 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 252 1.1 mrg _mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) 253 1.1 mrg { 254 1.1 mrg return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, 255 1.1 mrg (__v2di) __W, 256 1.1 mrg (__mmask8) __U); 257 1.1 mrg } 258 1.1 mrg 259 1.1 mrg extern __inline __m128i 260 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 261 1.1 mrg _mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A) 262 1.1 mrg { 263 1.1 mrg return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, 264 1.1 mrg (__v2di) 265 1.1 mrg _mm_setzero_si128 (), 266 1.1 mrg (__mmask8) __U); 267 1.1 mrg } 268 1.1 mrg 269 1.1 mrg extern __inline __m256i 270 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 271 1.1 mrg _mm256_cvttps_epi64 (__m128 __A) 272 1.1 mrg { 273 1.1 mrg return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, 274 1.1 mrg (__v4di) 275 1.1 mrg _mm256_setzero_si256 (), 276 1.1 mrg (__mmask8) -1); 277 1.1 mrg } 278 1.1 mrg 279 1.1 mrg extern __inline __m256i 280 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 281 1.1 mrg _mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) 282 1.1 mrg { 283 1.1 mrg return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, 284 1.1 mrg (__v4di) __W, 285 1.1 mrg (__mmask8) __U); 286 1.1 mrg } 287 1.1 mrg 288 1.1 mrg extern __inline __m256i 289 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 290 1.1 mrg _mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) 291 1.1 mrg { 292 1.1 mrg return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, 293 1.1 mrg (__v4di) 294 1.1 mrg _mm256_setzero_si256 (), 295 1.1 mrg (__mmask8) __U); 296 1.1 mrg } 297 1.1 mrg 298 1.1 mrg extern __inline __m128i 299 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 300 1.1 mrg _mm_cvttps_epi64 (__m128 __A) 301 1.1 mrg { 302 1.1 mrg return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, 303 1.1 mrg (__v2di) 304 1.3 mrg _mm_setzero_si128 (), 305 1.1 mrg (__mmask8) -1); 306 1.1 mrg } 307 1.1 mrg 308 1.1 mrg extern __inline __m128i 309 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 310 1.1 mrg _mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) 311 1.1 mrg { 312 1.1 mrg return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, 313 1.1 mrg (__v2di) __W, 314 1.1 mrg (__mmask8) __U); 315 1.1 mrg } 316 1.1 mrg 317 1.1 mrg extern __inline __m128i 318 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 319 1.1 mrg _mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) 320 1.1 mrg { 321 1.1 mrg return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, 322 1.1 mrg (__v2di) 323 1.3 mrg _mm_setzero_si128 (), 324 1.1 mrg (__mmask8) __U); 325 1.1 mrg } 326 1.1 mrg 327 1.1 mrg extern __inline __m256i 328 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 329 1.1 mrg _mm256_cvttps_epu64 (__m128 __A) 330 1.1 mrg { 331 1.1 mrg return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, 332 1.1 mrg (__v4di) 333 1.1 mrg _mm256_setzero_si256 (), 334 1.1 mrg (__mmask8) -1); 335 1.1 mrg } 336 1.1 mrg 337 1.1 mrg extern __inline __m256i 338 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 339 1.1 mrg _mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) 340 1.1 mrg { 341 1.1 mrg return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, 342 1.1 mrg (__v4di) __W, 343 1.1 mrg (__mmask8) __U); 344 1.1 mrg } 345 1.1 mrg 346 1.1 mrg extern __inline __m256i 347 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 348 1.1 mrg _mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) 349 1.1 mrg { 350 1.1 mrg return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, 351 1.1 mrg (__v4di) 352 1.1 mrg _mm256_setzero_si256 (), 353 1.1 mrg (__mmask8) __U); 354 1.1 mrg } 355 1.1 mrg 356 1.1 mrg extern __inline __m128i 357 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 358 1.1 mrg _mm_cvttps_epu64 (__m128 __A) 359 1.1 mrg { 360 1.1 mrg return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, 361 1.1 mrg (__v2di) 362 1.3 mrg _mm_setzero_si128 (), 363 1.1 mrg (__mmask8) -1); 364 1.1 mrg } 365 1.1 mrg 366 1.1 mrg extern __inline __m128i 367 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 368 1.1 mrg _mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) 369 1.1 mrg { 370 1.1 mrg return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, 371 1.1 mrg (__v2di) __W, 372 1.1 mrg (__mmask8) __U); 373 1.1 mrg } 374 1.1 mrg 375 1.1 mrg extern __inline __m128i 376 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 377 1.1 mrg _mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) 378 1.1 mrg { 379 1.1 mrg return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, 380 1.1 mrg (__v2di) 381 1.3 mrg _mm_setzero_si128 (), 382 1.1 mrg (__mmask8) __U); 383 1.1 mrg } 384 1.1 mrg 385 1.1 mrg extern __inline __m256d 386 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 387 1.1 mrg _mm256_broadcast_f64x2 (__m128d __A) 388 1.1 mrg { 389 1.1 mrg return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) 390 1.1 mrg __A, 391 1.1 mrg (__v4df)_mm256_undefined_pd(), 392 1.3 mrg (__mmask8) -1); 393 1.1 mrg } 394 1.1 mrg 395 1.1 mrg extern __inline __m256d 396 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 397 1.1 mrg _mm256_mask_broadcast_f64x2 (__m256d __O, __mmask8 __M, __m128d __A) 398 1.1 mrg { 399 1.1 mrg return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) 400 1.1 mrg __A, 401 1.1 mrg (__v4df) 402 1.1 mrg __O, __M); 403 1.1 mrg } 404 1.1 mrg 405 1.1 mrg extern __inline __m256d 406 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 407 1.1 mrg _mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A) 408 1.1 mrg { 409 1.1 mrg return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) 410 1.1 mrg __A, 411 1.1 mrg (__v4df) 412 1.1 mrg _mm256_setzero_ps (), 413 1.1 mrg __M); 414 1.1 mrg } 415 1.1 mrg 416 1.1 mrg extern __inline __m256i 417 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 418 1.1 mrg _mm256_broadcast_i64x2 (__m128i __A) 419 1.1 mrg { 420 1.1 mrg return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) 421 1.1 mrg __A, 422 1.1 mrg (__v4di)_mm256_undefined_si256(), 423 1.3 mrg (__mmask8) -1); 424 1.1 mrg } 425 1.1 mrg 426 1.1 mrg extern __inline __m256i 427 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 428 1.1 mrg _mm256_mask_broadcast_i64x2 (__m256i __O, __mmask8 __M, __m128i __A) 429 1.1 mrg { 430 1.1 mrg return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) 431 1.1 mrg __A, 432 1.1 mrg (__v4di) 433 1.1 mrg __O, __M); 434 1.1 mrg } 435 1.1 mrg 436 1.1 mrg extern __inline __m256i 437 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 438 1.1 mrg _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) 439 1.1 mrg { 440 1.1 mrg return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) 441 1.1 mrg __A, 442 1.1 mrg (__v4di) 443 1.1 mrg _mm256_setzero_si256 (), 444 1.1 mrg __M); 445 1.1 mrg } 446 1.1 mrg 447 1.1 mrg extern __inline __m256 448 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 449 1.1 mrg _mm256_broadcast_f32x2 (__m128 __A) 450 1.1 mrg { 451 1.1 mrg return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A, 452 1.1 mrg (__v8sf)_mm256_undefined_ps(), 453 1.3 mrg (__mmask8) -1); 454 1.1 mrg } 455 1.1 mrg 456 1.1 mrg extern __inline __m256 457 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 458 1.1 mrg _mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A) 459 1.1 mrg { 460 1.1 mrg return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A, 461 1.1 mrg (__v8sf) __O, 462 1.1 mrg __M); 463 1.1 mrg } 464 1.1 mrg 465 1.1 mrg extern __inline __m256 466 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 467 1.1 mrg _mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A) 468 1.1 mrg { 469 1.1 mrg return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A, 470 1.1 mrg (__v8sf) 471 1.1 mrg _mm256_setzero_ps (), 472 1.1 mrg __M); 473 1.1 mrg } 474 1.1 mrg 475 1.1 mrg extern __inline __m256i 476 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 477 1.1 mrg _mm256_broadcast_i32x2 (__m128i __A) 478 1.1 mrg { 479 1.1 mrg return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) 480 1.1 mrg __A, 481 1.1 mrg (__v8si)_mm256_undefined_si256(), 482 1.3 mrg (__mmask8) -1); 483 1.1 mrg } 484 1.1 mrg 485 1.1 mrg extern __inline __m256i 486 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 487 1.1 mrg _mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A) 488 1.1 mrg { 489 1.1 mrg return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) 490 1.1 mrg __A, 491 1.1 mrg (__v8si) 492 1.1 mrg __O, __M); 493 1.1 mrg } 494 1.1 mrg 495 1.1 mrg extern __inline __m256i 496 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 497 1.1 mrg _mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) 498 1.1 mrg { 499 1.1 mrg return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) 500 1.1 mrg __A, 501 1.1 mrg (__v8si) 502 1.1 mrg _mm256_setzero_si256 (), 503 1.1 mrg __M); 504 1.1 mrg } 505 1.1 mrg 506 1.1 mrg extern __inline __m128i 507 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 508 1.1 mrg _mm_broadcast_i32x2 (__m128i __A) 509 1.1 mrg { 510 1.1 mrg return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) 511 1.1 mrg __A, 512 1.1 mrg (__v4si)_mm_undefined_si128(), 513 1.3 mrg (__mmask8) -1); 514 1.1 mrg } 515 1.1 mrg 516 1.1 mrg extern __inline __m128i 517 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 518 1.1 mrg _mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A) 519 1.1 mrg { 520 1.1 mrg return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) 521 1.1 mrg __A, 522 1.1 mrg (__v4si) 523 1.1 mrg __O, __M); 524 1.1 mrg } 525 1.1 mrg 526 1.1 mrg extern __inline __m128i 527 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 528 1.1 mrg _mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) 529 1.1 mrg { 530 1.1 mrg return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) 531 1.1 mrg __A, 532 1.1 mrg (__v4si) 533 1.1 mrg _mm_setzero_si128 (), 534 1.1 mrg __M); 535 1.1 mrg } 536 1.1 mrg 537 1.1 mrg extern __inline __m256i 538 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 539 1.1 mrg _mm256_mullo_epi64 (__m256i __A, __m256i __B) 540 1.1 mrg { 541 1.1 mrg return (__m256i) ((__v4du) __A * (__v4du) __B); 542 1.1 mrg } 543 1.1 mrg 544 1.1 mrg extern __inline __m256i 545 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 546 1.1 mrg _mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 547 1.1 mrg __m256i __B) 548 1.1 mrg { 549 1.1 mrg return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A, 550 1.1 mrg (__v4di) __B, 551 1.1 mrg (__v4di) __W, 552 1.1 mrg (__mmask8) __U); 553 1.1 mrg } 554 1.1 mrg 555 1.1 mrg extern __inline __m256i 556 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 557 1.1 mrg _mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) 558 1.1 mrg { 559 1.1 mrg return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A, 560 1.1 mrg (__v4di) __B, 561 1.1 mrg (__v4di) 562 1.1 mrg _mm256_setzero_si256 (), 563 1.1 mrg (__mmask8) __U); 564 1.1 mrg } 565 1.1 mrg 566 1.1 mrg extern __inline __m128i 567 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 568 1.1 mrg _mm_mullo_epi64 (__m128i __A, __m128i __B) 569 1.1 mrg { 570 1.1 mrg return (__m128i) ((__v2du) __A * (__v2du) __B); 571 1.1 mrg } 572 1.1 mrg 573 1.1 mrg extern __inline __m128i 574 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 575 1.1 mrg _mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, 576 1.1 mrg __m128i __B) 577 1.1 mrg { 578 1.1 mrg return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A, 579 1.1 mrg (__v2di) __B, 580 1.1 mrg (__v2di) __W, 581 1.1 mrg (__mmask8) __U); 582 1.1 mrg } 583 1.1 mrg 584 1.1 mrg extern __inline __m128i 585 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 586 1.1 mrg _mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 587 1.1 mrg { 588 1.1 mrg return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A, 589 1.1 mrg (__v2di) __B, 590 1.1 mrg (__v2di) 591 1.3 mrg _mm_setzero_si128 (), 592 1.1 mrg (__mmask8) __U); 593 1.1 mrg } 594 1.1 mrg 595 1.1 mrg extern __inline __m256d 596 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 597 1.1 mrg _mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, 598 1.1 mrg __m256d __B) 599 1.1 mrg { 600 1.1 mrg return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A, 601 1.1 mrg (__v4df) __B, 602 1.1 mrg (__v4df) __W, 603 1.1 mrg (__mmask8) __U); 604 1.1 mrg } 605 1.1 mrg 606 1.1 mrg extern __inline __m256d 607 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 608 1.1 mrg _mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) 609 1.1 mrg { 610 1.1 mrg return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A, 611 1.1 mrg (__v4df) __B, 612 1.1 mrg (__v4df) 613 1.1 mrg _mm256_setzero_pd (), 614 1.1 mrg (__mmask8) __U); 615 1.1 mrg } 616 1.1 mrg 617 1.1 mrg extern __inline __m128d 618 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 619 1.1 mrg _mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, 620 1.1 mrg __m128d __B) 621 1.1 mrg { 622 1.1 mrg return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A, 623 1.1 mrg (__v2df) __B, 624 1.1 mrg (__v2df) __W, 625 1.1 mrg (__mmask8) __U); 626 1.1 mrg } 627 1.1 mrg 628 1.1 mrg extern __inline __m128d 629 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 630 1.1 mrg _mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) 631 1.1 mrg { 632 1.1 mrg return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A, 633 1.1 mrg (__v2df) __B, 634 1.1 mrg (__v2df) 635 1.1 mrg _mm_setzero_pd (), 636 1.1 mrg (__mmask8) __U); 637 1.1 mrg } 638 1.1 mrg 639 1.1 mrg extern __inline __m256 640 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 641 1.1 mrg _mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, 642 1.1 mrg __m256 __B) 643 1.1 mrg { 644 1.1 mrg return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A, 645 1.1 mrg (__v8sf) __B, 646 1.1 mrg (__v8sf) __W, 647 1.1 mrg (__mmask8) __U); 648 1.1 mrg } 649 1.1 mrg 650 1.1 mrg extern __inline __m256 651 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 652 1.1 mrg _mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) 653 1.1 mrg { 654 1.1 mrg return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A, 655 1.1 mrg (__v8sf) __B, 656 1.1 mrg (__v8sf) 657 1.1 mrg _mm256_setzero_ps (), 658 1.1 mrg (__mmask8) __U); 659 1.1 mrg } 660 1.1 mrg 661 1.1 mrg extern __inline __m128 662 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 663 1.1 mrg _mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 664 1.1 mrg { 665 1.1 mrg return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A, 666 1.1 mrg (__v4sf) __B, 667 1.1 mrg (__v4sf) __W, 668 1.1 mrg (__mmask8) __U); 669 1.1 mrg } 670 1.1 mrg 671 1.1 mrg extern __inline __m128 672 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 673 1.1 mrg _mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) 674 1.1 mrg { 675 1.1 mrg return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A, 676 1.1 mrg (__v4sf) __B, 677 1.1 mrg (__v4sf) 678 1.1 mrg _mm_setzero_ps (), 679 1.1 mrg (__mmask8) __U); 680 1.1 mrg } 681 1.1 mrg 682 1.1 mrg extern __inline __m256i 683 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 684 1.1 mrg _mm256_cvtps_epi64 (__m128 __A) 685 1.1 mrg { 686 1.1 mrg return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, 687 1.1 mrg (__v4di) 688 1.1 mrg _mm256_setzero_si256 (), 689 1.1 mrg (__mmask8) -1); 690 1.1 mrg } 691 1.1 mrg 692 1.1 mrg extern __inline __m256i 693 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 694 1.1 mrg _mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) 695 1.1 mrg { 696 1.1 mrg return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, 697 1.1 mrg (__v4di) __W, 698 1.1 mrg (__mmask8) __U); 699 1.1 mrg } 700 1.1 mrg 701 1.1 mrg extern __inline __m256i 702 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 703 1.1 mrg _mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) 704 1.1 mrg { 705 1.1 mrg return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, 706 1.1 mrg (__v4di) 707 1.1 mrg _mm256_setzero_si256 (), 708 1.1 mrg (__mmask8) __U); 709 1.1 mrg } 710 1.1 mrg 711 1.1 mrg extern __inline __m128i 712 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 713 1.1 mrg _mm_cvtps_epi64 (__m128 __A) 714 1.1 mrg { 715 1.1 mrg return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, 716 1.1 mrg (__v2di) 717 1.3 mrg _mm_setzero_si128 (), 718 1.1 mrg (__mmask8) -1); 719 1.1 mrg } 720 1.1 mrg 721 1.1 mrg extern __inline __m128i 722 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 723 1.1 mrg _mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) 724 1.1 mrg { 725 1.1 mrg return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, 726 1.1 mrg (__v2di) __W, 727 1.1 mrg (__mmask8) __U); 728 1.1 mrg } 729 1.1 mrg 730 1.1 mrg extern __inline __m128i 731 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 732 1.1 mrg _mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) 733 1.1 mrg { 734 1.1 mrg return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, 735 1.1 mrg (__v2di) 736 1.3 mrg _mm_setzero_si128 (), 737 1.1 mrg (__mmask8) __U); 738 1.1 mrg } 739 1.1 mrg 740 1.1 mrg extern __inline __m256i 741 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 742 1.1 mrg _mm256_cvtps_epu64 (__m128 __A) 743 1.1 mrg { 744 1.1 mrg return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, 745 1.1 mrg (__v4di) 746 1.1 mrg _mm256_setzero_si256 (), 747 1.1 mrg (__mmask8) -1); 748 1.1 mrg } 749 1.1 mrg 750 1.1 mrg extern __inline __m256i 751 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 752 1.1 mrg _mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) 753 1.1 mrg { 754 1.1 mrg return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, 755 1.1 mrg (__v4di) __W, 756 1.1 mrg (__mmask8) __U); 757 1.1 mrg } 758 1.1 mrg 759 1.1 mrg extern __inline __m256i 760 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 761 1.1 mrg _mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) 762 1.1 mrg { 763 1.1 mrg return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, 764 1.1 mrg (__v4di) 765 1.1 mrg _mm256_setzero_si256 (), 766 1.1 mrg (__mmask8) __U); 767 1.1 mrg } 768 1.1 mrg 769 1.1 mrg extern __inline __m128i 770 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 771 1.1 mrg _mm_cvtps_epu64 (__m128 __A) 772 1.1 mrg { 773 1.1 mrg return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, 774 1.1 mrg (__v2di) 775 1.3 mrg _mm_setzero_si128 (), 776 1.1 mrg (__mmask8) -1); 777 1.1 mrg } 778 1.1 mrg 779 1.1 mrg extern __inline __m128i 780 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 781 1.1 mrg _mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) 782 1.1 mrg { 783 1.1 mrg return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, 784 1.1 mrg (__v2di) __W, 785 1.1 mrg (__mmask8) __U); 786 1.1 mrg } 787 1.1 mrg 788 1.1 mrg extern __inline __m128i 789 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 790 1.1 mrg _mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) 791 1.1 mrg { 792 1.1 mrg return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, 793 1.1 mrg (__v2di) 794 1.3 mrg _mm_setzero_si128 (), 795 1.1 mrg (__mmask8) __U); 796 1.1 mrg } 797 1.1 mrg 798 1.1 mrg extern __inline __m128 799 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 800 1.1 mrg _mm256_cvtepi64_ps (__m256i __A) 801 1.1 mrg { 802 1.1 mrg return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, 803 1.1 mrg (__v4sf) 804 1.1 mrg _mm_setzero_ps (), 805 1.1 mrg (__mmask8) -1); 806 1.1 mrg } 807 1.1 mrg 808 1.1 mrg extern __inline __m128 809 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 810 1.1 mrg _mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) 811 1.1 mrg { 812 1.1 mrg return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, 813 1.1 mrg (__v4sf) __W, 814 1.1 mrg (__mmask8) __U); 815 1.1 mrg } 816 1.1 mrg 817 1.1 mrg extern __inline __m128 818 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 819 1.1 mrg _mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) 820 1.1 mrg { 821 1.1 mrg return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, 822 1.1 mrg (__v4sf) 823 1.1 mrg _mm_setzero_ps (), 824 1.1 mrg (__mmask8) __U); 825 1.1 mrg } 826 1.1 mrg 827 1.1 mrg extern __inline __m128 828 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 829 1.1 mrg _mm_cvtepi64_ps (__m128i __A) 830 1.1 mrg { 831 1.1 mrg return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, 832 1.1 mrg (__v4sf) 833 1.1 mrg _mm_setzero_ps (), 834 1.1 mrg (__mmask8) -1); 835 1.1 mrg } 836 1.1 mrg 837 1.1 mrg extern __inline __m128 838 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 839 1.1 mrg _mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A) 840 1.1 mrg { 841 1.1 mrg return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, 842 1.1 mrg (__v4sf) __W, 843 1.1 mrg (__mmask8) __U); 844 1.1 mrg } 845 1.1 mrg 846 1.1 mrg extern __inline __m128 847 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 848 1.1 mrg _mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) 849 1.1 mrg { 850 1.1 mrg return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, 851 1.1 mrg (__v4sf) 852 1.1 mrg _mm_setzero_ps (), 853 1.1 mrg (__mmask8) __U); 854 1.1 mrg } 855 1.1 mrg 856 1.1 mrg extern __inline __m128 857 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 858 1.1 mrg _mm256_cvtepu64_ps (__m256i __A) 859 1.1 mrg { 860 1.1 mrg return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, 861 1.1 mrg (__v4sf) 862 1.1 mrg _mm_setzero_ps (), 863 1.1 mrg (__mmask8) -1); 864 1.1 mrg } 865 1.1 mrg 866 1.1 mrg extern __inline __m128 867 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 868 1.1 mrg _mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) 869 1.1 mrg { 870 1.1 mrg return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, 871 1.1 mrg (__v4sf) __W, 872 1.1 mrg (__mmask8) __U); 873 1.1 mrg } 874 1.1 mrg 875 1.1 mrg extern __inline __m128 876 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 877 1.1 mrg _mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) 878 1.1 mrg { 879 1.1 mrg return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, 880 1.1 mrg (__v4sf) 881 1.1 mrg _mm_setzero_ps (), 882 1.1 mrg (__mmask8) __U); 883 1.1 mrg } 884 1.1 mrg 885 1.1 mrg extern __inline __m128 886 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 887 1.1 mrg _mm_cvtepu64_ps (__m128i __A) 888 1.1 mrg { 889 1.1 mrg return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, 890 1.1 mrg (__v4sf) 891 1.1 mrg _mm_setzero_ps (), 892 1.1 mrg (__mmask8) -1); 893 1.1 mrg } 894 1.1 mrg 895 1.1 mrg extern __inline __m128 896 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 897 1.1 mrg _mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A) 898 1.1 mrg { 899 1.1 mrg return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, 900 1.1 mrg (__v4sf) __W, 901 1.1 mrg (__mmask8) __U); 902 1.1 mrg } 903 1.1 mrg 904 1.1 mrg extern __inline __m128 905 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 906 1.1 mrg _mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) 907 1.1 mrg { 908 1.1 mrg return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, 909 1.1 mrg (__v4sf) 910 1.1 mrg _mm_setzero_ps (), 911 1.1 mrg (__mmask8) __U); 912 1.1 mrg } 913 1.1 mrg 914 1.1 mrg extern __inline __m256d 915 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 916 1.1 mrg _mm256_cvtepi64_pd (__m256i __A) 917 1.1 mrg { 918 1.1 mrg return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A, 919 1.1 mrg (__v4df) 920 1.1 mrg _mm256_setzero_pd (), 921 1.1 mrg (__mmask8) -1); 922 1.1 mrg } 923 1.1 mrg 924 1.1 mrg extern __inline __m256d 925 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 926 1.1 mrg _mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) 927 1.1 mrg { 928 1.1 mrg return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A, 929 1.1 mrg (__v4df) __W, 930 1.1 mrg (__mmask8) __U); 931 1.1 mrg } 932 1.1 mrg 933 1.1 mrg extern __inline __m256d 934 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 935 1.1 mrg _mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) 936 1.1 mrg { 937 1.1 mrg return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A, 938 1.1 mrg (__v4df) 939 1.1 mrg _mm256_setzero_pd (), 940 1.1 mrg (__mmask8) __U); 941 1.1 mrg } 942 1.1 mrg 943 1.1 mrg extern __inline __m128d 944 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 945 1.1 mrg _mm_cvtepi64_pd (__m128i __A) 946 1.1 mrg { 947 1.1 mrg return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A, 948 1.1 mrg (__v2df) 949 1.1 mrg _mm_setzero_pd (), 950 1.1 mrg (__mmask8) -1); 951 1.1 mrg } 952 1.1 mrg 953 1.1 mrg extern __inline __m128d 954 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 955 1.1 mrg _mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) 956 1.1 mrg { 957 1.1 mrg return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A, 958 1.1 mrg (__v2df) __W, 959 1.1 mrg (__mmask8) __U); 960 1.1 mrg } 961 1.1 mrg 962 1.1 mrg extern __inline __m128d 963 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 964 1.1 mrg _mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) 965 1.1 mrg { 966 1.1 mrg return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A, 967 1.1 mrg (__v2df) 968 1.1 mrg _mm_setzero_pd (), 969 1.1 mrg (__mmask8) __U); 970 1.1 mrg } 971 1.1 mrg 972 1.1 mrg extern __inline __m256d 973 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 974 1.1 mrg _mm256_cvtepu64_pd (__m256i __A) 975 1.1 mrg { 976 1.1 mrg return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A, 977 1.1 mrg (__v4df) 978 1.1 mrg _mm256_setzero_pd (), 979 1.1 mrg (__mmask8) -1); 980 1.1 mrg } 981 1.1 mrg 982 1.1 mrg extern __inline __m256d 983 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 984 1.1 mrg _mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) 985 1.1 mrg { 986 1.1 mrg return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A, 987 1.1 mrg (__v4df) __W, 988 1.1 mrg (__mmask8) __U); 989 1.1 mrg } 990 1.1 mrg 991 1.1 mrg extern __inline __m256d 992 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 993 1.1 mrg _mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) 994 1.1 mrg { 995 1.1 mrg return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A, 996 1.1 mrg (__v4df) 997 1.1 mrg _mm256_setzero_pd (), 998 1.1 mrg (__mmask8) __U); 999 1.1 mrg } 1000 1.1 mrg 1001 1.1 mrg extern __inline __m256d 1002 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1003 1.1 mrg _mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, 1004 1.1 mrg __m256d __B) 1005 1.1 mrg { 1006 1.1 mrg return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A, 1007 1.1 mrg (__v4df) __B, 1008 1.1 mrg (__v4df) __W, 1009 1.1 mrg (__mmask8) __U); 1010 1.1 mrg } 1011 1.1 mrg 1012 1.1 mrg extern __inline __m256d 1013 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1014 1.1 mrg _mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) 1015 1.1 mrg { 1016 1.1 mrg return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A, 1017 1.1 mrg (__v4df) __B, 1018 1.1 mrg (__v4df) 1019 1.1 mrg _mm256_setzero_pd (), 1020 1.1 mrg (__mmask8) __U); 1021 1.1 mrg } 1022 1.1 mrg 1023 1.1 mrg extern __inline __m128d 1024 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1025 1.1 mrg _mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 1026 1.1 mrg { 1027 1.1 mrg return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A, 1028 1.1 mrg (__v2df) __B, 1029 1.1 mrg (__v2df) __W, 1030 1.1 mrg (__mmask8) __U); 1031 1.1 mrg } 1032 1.1 mrg 1033 1.1 mrg extern __inline __m128d 1034 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1035 1.1 mrg _mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) 1036 1.1 mrg { 1037 1.1 mrg return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A, 1038 1.1 mrg (__v2df) __B, 1039 1.1 mrg (__v2df) 1040 1.1 mrg _mm_setzero_pd (), 1041 1.1 mrg (__mmask8) __U); 1042 1.1 mrg } 1043 1.1 mrg 1044 1.1 mrg extern __inline __m256 1045 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1046 1.1 mrg _mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) 1047 1.1 mrg { 1048 1.1 mrg return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A, 1049 1.1 mrg (__v8sf) __B, 1050 1.1 mrg (__v8sf) __W, 1051 1.1 mrg (__mmask8) __U); 1052 1.1 mrg } 1053 1.1 mrg 1054 1.1 mrg extern __inline __m256 1055 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1056 1.1 mrg _mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) 1057 1.1 mrg { 1058 1.1 mrg return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A, 1059 1.1 mrg (__v8sf) __B, 1060 1.1 mrg (__v8sf) 1061 1.1 mrg _mm256_setzero_ps (), 1062 1.1 mrg (__mmask8) __U); 1063 1.1 mrg } 1064 1.1 mrg 1065 1.1 mrg extern __inline __m128 1066 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1067 1.1 mrg _mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 1068 1.1 mrg { 1069 1.1 mrg return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A, 1070 1.1 mrg (__v4sf) __B, 1071 1.1 mrg (__v4sf) __W, 1072 1.1 mrg (__mmask8) __U); 1073 1.1 mrg } 1074 1.1 mrg 1075 1.1 mrg extern __inline __m128 1076 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1077 1.1 mrg _mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) 1078 1.1 mrg { 1079 1.1 mrg return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A, 1080 1.1 mrg (__v4sf) __B, 1081 1.1 mrg (__v4sf) 1082 1.1 mrg _mm_setzero_ps (), 1083 1.1 mrg (__mmask8) __U); 1084 1.1 mrg } 1085 1.1 mrg 1086 1.1 mrg extern __inline __m128d 1087 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1088 1.1 mrg _mm_cvtepu64_pd (__m128i __A) 1089 1.1 mrg { 1090 1.1 mrg return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A, 1091 1.1 mrg (__v2df) 1092 1.1 mrg _mm_setzero_pd (), 1093 1.1 mrg (__mmask8) -1); 1094 1.1 mrg } 1095 1.1 mrg 1096 1.1 mrg extern __inline __m128d 1097 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1098 1.1 mrg _mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) 1099 1.1 mrg { 1100 1.1 mrg return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A, 1101 1.1 mrg (__v2df) __W, 1102 1.1 mrg (__mmask8) __U); 1103 1.1 mrg } 1104 1.1 mrg 1105 1.1 mrg extern __inline __m128d 1106 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1107 1.1 mrg _mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) 1108 1.1 mrg { 1109 1.1 mrg return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A, 1110 1.1 mrg (__v2df) 1111 1.1 mrg _mm_setzero_pd (), 1112 1.1 mrg (__mmask8) __U); 1113 1.1 mrg } 1114 1.1 mrg 1115 1.1 mrg extern __inline __m256d 1116 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1117 1.1 mrg _mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A, 1118 1.1 mrg __m256d __B) 1119 1.1 mrg { 1120 1.1 mrg return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A, 1121 1.1 mrg (__v4df) __B, 1122 1.1 mrg (__v4df) __W, 1123 1.1 mrg (__mmask8) __U); 1124 1.1 mrg } 1125 1.1 mrg 1126 1.1 mrg extern __inline __m256d 1127 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1128 1.1 mrg _mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) 1129 1.1 mrg { 1130 1.1 mrg return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A, 1131 1.1 mrg (__v4df) __B, 1132 1.1 mrg (__v4df) 1133 1.1 mrg _mm256_setzero_pd (), 1134 1.1 mrg (__mmask8) __U); 1135 1.1 mrg } 1136 1.1 mrg 1137 1.1 mrg extern __inline __m128d 1138 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1139 1.1 mrg _mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 1140 1.1 mrg { 1141 1.1 mrg return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A, 1142 1.1 mrg (__v2df) __B, 1143 1.1 mrg (__v2df) __W, 1144 1.1 mrg (__mmask8) __U); 1145 1.1 mrg } 1146 1.1 mrg 1147 1.1 mrg extern __inline __m128d 1148 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1149 1.1 mrg _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) 1150 1.1 mrg { 1151 1.1 mrg return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A, 1152 1.1 mrg (__v2df) __B, 1153 1.1 mrg (__v2df) 1154 1.1 mrg _mm_setzero_pd (), 1155 1.1 mrg (__mmask8) __U); 1156 1.1 mrg } 1157 1.1 mrg 1158 1.1 mrg extern __inline __m256 1159 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1160 1.1 mrg _mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) 1161 1.1 mrg { 1162 1.1 mrg return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A, 1163 1.1 mrg (__v8sf) __B, 1164 1.1 mrg (__v8sf) __W, 1165 1.1 mrg (__mmask8) __U); 1166 1.1 mrg } 1167 1.1 mrg 1168 1.1 mrg extern __inline __m256 1169 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1170 1.1 mrg _mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) 1171 1.1 mrg { 1172 1.1 mrg return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A, 1173 1.1 mrg (__v8sf) __B, 1174 1.1 mrg (__v8sf) 1175 1.1 mrg _mm256_setzero_ps (), 1176 1.1 mrg (__mmask8) __U); 1177 1.1 mrg } 1178 1.1 mrg 1179 1.1 mrg extern __inline __m128 1180 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1181 1.1 mrg _mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 1182 1.1 mrg { 1183 1.1 mrg return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A, 1184 1.1 mrg (__v4sf) __B, 1185 1.1 mrg (__v4sf) __W, 1186 1.1 mrg (__mmask8) __U); 1187 1.1 mrg } 1188 1.1 mrg 1189 1.1 mrg extern __inline __m128 1190 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1191 1.1 mrg _mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) 1192 1.1 mrg { 1193 1.1 mrg return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A, 1194 1.1 mrg (__v4sf) __B, 1195 1.1 mrg (__v4sf) 1196 1.1 mrg _mm_setzero_ps (), 1197 1.1 mrg (__mmask8) __U); 1198 1.1 mrg } 1199 1.1 mrg 1200 1.1 mrg extern __inline __m256d 1201 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1202 1.1 mrg _mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) 1203 1.1 mrg { 1204 1.1 mrg return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A, 1205 1.1 mrg (__v4df) __B, 1206 1.1 mrg (__v4df) __W, 1207 1.1 mrg (__mmask8) __U); 1208 1.1 mrg } 1209 1.1 mrg 1210 1.1 mrg extern __inline __m256d 1211 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1212 1.1 mrg _mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) 1213 1.1 mrg { 1214 1.1 mrg return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A, 1215 1.1 mrg (__v4df) __B, 1216 1.1 mrg (__v4df) 1217 1.1 mrg _mm256_setzero_pd (), 1218 1.1 mrg (__mmask8) __U); 1219 1.1 mrg } 1220 1.1 mrg 1221 1.1 mrg extern __inline __m128d 1222 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1223 1.1 mrg _mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 1224 1.1 mrg { 1225 1.1 mrg return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A, 1226 1.1 mrg (__v2df) __B, 1227 1.1 mrg (__v2df) __W, 1228 1.1 mrg (__mmask8) __U); 1229 1.1 mrg } 1230 1.1 mrg 1231 1.1 mrg extern __inline __m128d 1232 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1233 1.1 mrg _mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) 1234 1.1 mrg { 1235 1.1 mrg return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A, 1236 1.1 mrg (__v2df) __B, 1237 1.1 mrg (__v2df) 1238 1.1 mrg _mm_setzero_pd (), 1239 1.1 mrg (__mmask8) __U); 1240 1.1 mrg } 1241 1.1 mrg 1242 1.1 mrg extern __inline __m256 1243 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1244 1.1 mrg _mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) 1245 1.1 mrg { 1246 1.1 mrg return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A, 1247 1.1 mrg (__v8sf) __B, 1248 1.1 mrg (__v8sf) __W, 1249 1.1 mrg (__mmask8) __U); 1250 1.1 mrg } 1251 1.1 mrg 1252 1.1 mrg extern __inline __m256 1253 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1254 1.1 mrg _mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) 1255 1.1 mrg { 1256 1.1 mrg return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A, 1257 1.1 mrg (__v8sf) __B, 1258 1.1 mrg (__v8sf) 1259 1.1 mrg _mm256_setzero_ps (), 1260 1.1 mrg (__mmask8) __U); 1261 1.1 mrg } 1262 1.1 mrg 1263 1.1 mrg extern __inline __m128 1264 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1265 1.1 mrg _mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 1266 1.1 mrg { 1267 1.1 mrg return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A, 1268 1.1 mrg (__v4sf) __B, 1269 1.1 mrg (__v4sf) __W, 1270 1.1 mrg (__mmask8) __U); 1271 1.1 mrg } 1272 1.1 mrg 1273 1.1 mrg extern __inline __m128 1274 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1275 1.1 mrg _mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) 1276 1.1 mrg { 1277 1.1 mrg return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A, 1278 1.1 mrg (__v4sf) __B, 1279 1.1 mrg (__v4sf) 1280 1.1 mrg _mm_setzero_ps (), 1281 1.1 mrg (__mmask8) __U); 1282 1.1 mrg } 1283 1.1 mrg 1284 1.1 mrg extern __inline __m128i 1285 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1286 1.1 mrg _mm_movm_epi32 (__mmask8 __A) 1287 1.1 mrg { 1288 1.1 mrg return (__m128i) __builtin_ia32_cvtmask2d128 (__A); 1289 1.1 mrg } 1290 1.1 mrg 1291 1.1 mrg extern __inline __m256i 1292 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1293 1.1 mrg _mm256_movm_epi32 (__mmask8 __A) 1294 1.1 mrg { 1295 1.1 mrg return (__m256i) __builtin_ia32_cvtmask2d256 (__A); 1296 1.1 mrg } 1297 1.1 mrg 1298 1.1 mrg extern __inline __m128i 1299 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1300 1.1 mrg _mm_movm_epi64 (__mmask8 __A) 1301 1.1 mrg { 1302 1.1 mrg return (__m128i) __builtin_ia32_cvtmask2q128 (__A); 1303 1.1 mrg } 1304 1.1 mrg 1305 1.1 mrg extern __inline __m256i 1306 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1307 1.1 mrg _mm256_movm_epi64 (__mmask8 __A) 1308 1.1 mrg { 1309 1.1 mrg return (__m256i) __builtin_ia32_cvtmask2q256 (__A); 1310 1.1 mrg } 1311 1.1 mrg 1312 1.1 mrg extern __inline __mmask8 1313 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1314 1.1 mrg _mm_movepi32_mask (__m128i __A) 1315 1.1 mrg { 1316 1.1 mrg return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A); 1317 1.1 mrg } 1318 1.1 mrg 1319 1.1 mrg extern __inline __mmask8 1320 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1321 1.1 mrg _mm256_movepi32_mask (__m256i __A) 1322 1.1 mrg { 1323 1.1 mrg return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A); 1324 1.1 mrg } 1325 1.1 mrg 1326 1.1 mrg extern __inline __mmask8 1327 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1328 1.1 mrg _mm_movepi64_mask (__m128i __A) 1329 1.1 mrg { 1330 1.1 mrg return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A); 1331 1.1 mrg } 1332 1.1 mrg 1333 1.1 mrg extern __inline __mmask8 1334 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1335 1.1 mrg _mm256_movepi64_mask (__m256i __A) 1336 1.1 mrg { 1337 1.1 mrg return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A); 1338 1.1 mrg } 1339 1.1 mrg 1340 1.1 mrg #ifdef __OPTIMIZE__ 1341 1.1 mrg extern __inline __m128d 1342 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1343 1.1 mrg _mm256_extractf64x2_pd (__m256d __A, const int __imm) 1344 1.1 mrg { 1345 1.1 mrg return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A, 1346 1.1 mrg __imm, 1347 1.1 mrg (__v2df) 1348 1.1 mrg _mm_setzero_pd (), 1349 1.3 mrg (__mmask8) -1); 1350 1.1 mrg } 1351 1.1 mrg 1352 1.1 mrg extern __inline __m128d 1353 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1354 1.1 mrg _mm256_mask_extractf64x2_pd (__m128d __W, __mmask8 __U, __m256d __A, 1355 1.1 mrg const int __imm) 1356 1.1 mrg { 1357 1.1 mrg return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A, 1358 1.1 mrg __imm, 1359 1.1 mrg (__v2df) __W, 1360 1.1 mrg (__mmask8) 1361 1.1 mrg __U); 1362 1.1 mrg } 1363 1.1 mrg 1364 1.1 mrg extern __inline __m128d 1365 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1366 1.1 mrg _mm256_maskz_extractf64x2_pd (__mmask8 __U, __m256d __A, 1367 1.1 mrg const int __imm) 1368 1.1 mrg { 1369 1.1 mrg return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A, 1370 1.1 mrg __imm, 1371 1.1 mrg (__v2df) 1372 1.1 mrg _mm_setzero_pd (), 1373 1.1 mrg (__mmask8) 1374 1.1 mrg __U); 1375 1.1 mrg } 1376 1.1 mrg 1377 1.1 mrg extern __inline __m128i 1378 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1379 1.1 mrg _mm256_extracti64x2_epi64 (__m256i __A, const int __imm) 1380 1.1 mrg { 1381 1.1 mrg return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A, 1382 1.1 mrg __imm, 1383 1.1 mrg (__v2di) 1384 1.3 mrg _mm_setzero_si128 (), 1385 1.3 mrg (__mmask8) -1); 1386 1.1 mrg } 1387 1.1 mrg 1388 1.1 mrg extern __inline __m128i 1389 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1390 1.1 mrg _mm256_mask_extracti64x2_epi64 (__m128i __W, __mmask8 __U, __m256i __A, 1391 1.1 mrg const int __imm) 1392 1.1 mrg { 1393 1.1 mrg return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A, 1394 1.1 mrg __imm, 1395 1.1 mrg (__v2di) __W, 1396 1.1 mrg (__mmask8) 1397 1.1 mrg __U); 1398 1.1 mrg } 1399 1.1 mrg 1400 1.1 mrg extern __inline __m128i 1401 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1402 1.1 mrg _mm256_maskz_extracti64x2_epi64 (__mmask8 __U, __m256i __A, 1403 1.1 mrg const int __imm) 1404 1.1 mrg { 1405 1.1 mrg return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A, 1406 1.1 mrg __imm, 1407 1.1 mrg (__v2di) 1408 1.3 mrg _mm_setzero_si128 (), 1409 1.1 mrg (__mmask8) 1410 1.1 mrg __U); 1411 1.1 mrg } 1412 1.1 mrg 1413 1.1 mrg extern __inline __m256d 1414 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1415 1.1 mrg _mm256_reduce_pd (__m256d __A, int __B) 1416 1.1 mrg { 1417 1.1 mrg return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B, 1418 1.1 mrg (__v4df) 1419 1.1 mrg _mm256_setzero_pd (), 1420 1.1 mrg (__mmask8) -1); 1421 1.1 mrg } 1422 1.1 mrg 1423 1.1 mrg extern __inline __m256d 1424 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1425 1.1 mrg _mm256_mask_reduce_pd (__m256d __W, __mmask8 __U, __m256d __A, int __B) 1426 1.1 mrg { 1427 1.1 mrg return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B, 1428 1.1 mrg (__v4df) __W, 1429 1.1 mrg (__mmask8) __U); 1430 1.1 mrg } 1431 1.1 mrg 1432 1.1 mrg extern __inline __m256d 1433 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1434 1.1 mrg _mm256_maskz_reduce_pd (__mmask8 __U, __m256d __A, int __B) 1435 1.1 mrg { 1436 1.1 mrg return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B, 1437 1.1 mrg (__v4df) 1438 1.1 mrg _mm256_setzero_pd (), 1439 1.1 mrg (__mmask8) __U); 1440 1.1 mrg } 1441 1.1 mrg 1442 1.1 mrg extern __inline __m128d 1443 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1444 1.1 mrg _mm_reduce_pd (__m128d __A, int __B) 1445 1.1 mrg { 1446 1.1 mrg return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B, 1447 1.1 mrg (__v2df) 1448 1.1 mrg _mm_setzero_pd (), 1449 1.1 mrg (__mmask8) -1); 1450 1.1 mrg } 1451 1.1 mrg 1452 1.1 mrg extern __inline __m128d 1453 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1454 1.1 mrg _mm_mask_reduce_pd (__m128d __W, __mmask8 __U, __m128d __A, int __B) 1455 1.1 mrg { 1456 1.1 mrg return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B, 1457 1.1 mrg (__v2df) __W, 1458 1.1 mrg (__mmask8) __U); 1459 1.1 mrg } 1460 1.1 mrg 1461 1.1 mrg extern __inline __m128d 1462 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1463 1.1 mrg _mm_maskz_reduce_pd (__mmask8 __U, __m128d __A, int __B) 1464 1.1 mrg { 1465 1.1 mrg return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B, 1466 1.1 mrg (__v2df) 1467 1.1 mrg _mm_setzero_pd (), 1468 1.1 mrg (__mmask8) __U); 1469 1.1 mrg } 1470 1.1 mrg 1471 1.1 mrg extern __inline __m256 1472 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1473 1.1 mrg _mm256_reduce_ps (__m256 __A, int __B) 1474 1.1 mrg { 1475 1.1 mrg return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B, 1476 1.1 mrg (__v8sf) 1477 1.1 mrg _mm256_setzero_ps (), 1478 1.1 mrg (__mmask8) -1); 1479 1.1 mrg } 1480 1.1 mrg 1481 1.1 mrg extern __inline __m256 1482 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1483 1.1 mrg _mm256_mask_reduce_ps (__m256 __W, __mmask8 __U, __m256 __A, int __B) 1484 1.1 mrg { 1485 1.1 mrg return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B, 1486 1.1 mrg (__v8sf) __W, 1487 1.1 mrg (__mmask8) __U); 1488 1.1 mrg } 1489 1.1 mrg 1490 1.1 mrg extern __inline __m256 1491 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1492 1.1 mrg _mm256_maskz_reduce_ps (__mmask8 __U, __m256 __A, int __B) 1493 1.1 mrg { 1494 1.1 mrg return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B, 1495 1.1 mrg (__v8sf) 1496 1.1 mrg _mm256_setzero_ps (), 1497 1.1 mrg (__mmask8) __U); 1498 1.1 mrg } 1499 1.1 mrg 1500 1.1 mrg extern __inline __m128 1501 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1502 1.1 mrg _mm_reduce_ps (__m128 __A, int __B) 1503 1.1 mrg { 1504 1.1 mrg return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B, 1505 1.1 mrg (__v4sf) 1506 1.1 mrg _mm_setzero_ps (), 1507 1.1 mrg (__mmask8) -1); 1508 1.1 mrg } 1509 1.1 mrg 1510 1.1 mrg extern __inline __m128 1511 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1512 1.1 mrg _mm_mask_reduce_ps (__m128 __W, __mmask8 __U, __m128 __A, int __B) 1513 1.1 mrg { 1514 1.1 mrg return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B, 1515 1.1 mrg (__v4sf) __W, 1516 1.1 mrg (__mmask8) __U); 1517 1.1 mrg } 1518 1.1 mrg 1519 1.1 mrg extern __inline __m128 1520 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1521 1.1 mrg _mm_maskz_reduce_ps (__mmask8 __U, __m128 __A, int __B) 1522 1.1 mrg { 1523 1.1 mrg return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B, 1524 1.1 mrg (__v4sf) 1525 1.1 mrg _mm_setzero_ps (), 1526 1.1 mrg (__mmask8) __U); 1527 1.1 mrg } 1528 1.1 mrg 1529 1.1 mrg extern __inline __m256d 1530 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1531 1.1 mrg _mm256_range_pd (__m256d __A, __m256d __B, int __C) 1532 1.1 mrg { 1533 1.1 mrg return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A, 1534 1.1 mrg (__v4df) __B, __C, 1535 1.1 mrg (__v4df) 1536 1.1 mrg _mm256_setzero_pd (), 1537 1.1 mrg (__mmask8) -1); 1538 1.1 mrg } 1539 1.1 mrg 1540 1.1 mrg extern __inline __m256d 1541 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1542 1.1 mrg _mm256_mask_range_pd (__m256d __W, __mmask8 __U, 1543 1.1 mrg __m256d __A, __m256d __B, int __C) 1544 1.1 mrg { 1545 1.1 mrg return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A, 1546 1.1 mrg (__v4df) __B, __C, 1547 1.1 mrg (__v4df) __W, 1548 1.1 mrg (__mmask8) __U); 1549 1.1 mrg } 1550 1.1 mrg 1551 1.1 mrg extern __inline __m256d 1552 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1553 1.1 mrg _mm256_maskz_range_pd (__mmask8 __U, __m256d __A, __m256d __B, int __C) 1554 1.1 mrg { 1555 1.1 mrg return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A, 1556 1.1 mrg (__v4df) __B, __C, 1557 1.1 mrg (__v4df) 1558 1.1 mrg _mm256_setzero_pd (), 1559 1.1 mrg (__mmask8) __U); 1560 1.1 mrg } 1561 1.1 mrg 1562 1.1 mrg extern __inline __m128d 1563 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1564 1.1 mrg _mm_range_pd (__m128d __A, __m128d __B, int __C) 1565 1.1 mrg { 1566 1.1 mrg return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A, 1567 1.1 mrg (__v2df) __B, __C, 1568 1.1 mrg (__v2df) 1569 1.1 mrg _mm_setzero_pd (), 1570 1.1 mrg (__mmask8) -1); 1571 1.1 mrg } 1572 1.1 mrg 1573 1.1 mrg extern __inline __m128d 1574 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1575 1.1 mrg _mm_mask_range_pd (__m128d __W, __mmask8 __U, 1576 1.1 mrg __m128d __A, __m128d __B, int __C) 1577 1.1 mrg { 1578 1.1 mrg return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A, 1579 1.1 mrg (__v2df) __B, __C, 1580 1.1 mrg (__v2df) __W, 1581 1.1 mrg (__mmask8) __U); 1582 1.1 mrg } 1583 1.1 mrg 1584 1.1 mrg extern __inline __m128d 1585 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1586 1.1 mrg _mm_maskz_range_pd (__mmask8 __U, __m128d __A, __m128d __B, int __C) 1587 1.1 mrg { 1588 1.1 mrg return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A, 1589 1.1 mrg (__v2df) __B, __C, 1590 1.1 mrg (__v2df) 1591 1.1 mrg _mm_setzero_pd (), 1592 1.1 mrg (__mmask8) __U); 1593 1.1 mrg } 1594 1.1 mrg 1595 1.1 mrg extern __inline __m256 1596 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1597 1.1 mrg _mm256_range_ps (__m256 __A, __m256 __B, int __C) 1598 1.1 mrg { 1599 1.1 mrg return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A, 1600 1.1 mrg (__v8sf) __B, __C, 1601 1.1 mrg (__v8sf) 1602 1.1 mrg _mm256_setzero_ps (), 1603 1.1 mrg (__mmask8) -1); 1604 1.1 mrg } 1605 1.1 mrg 1606 1.1 mrg extern __inline __m256 1607 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1608 1.1 mrg _mm256_mask_range_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B, 1609 1.1 mrg int __C) 1610 1.1 mrg { 1611 1.1 mrg return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A, 1612 1.1 mrg (__v8sf) __B, __C, 1613 1.1 mrg (__v8sf) __W, 1614 1.1 mrg (__mmask8) __U); 1615 1.1 mrg } 1616 1.1 mrg 1617 1.1 mrg extern __inline __m256 1618 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1619 1.1 mrg _mm256_maskz_range_ps (__mmask8 __U, __m256 __A, __m256 __B, int __C) 1620 1.1 mrg { 1621 1.1 mrg return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A, 1622 1.1 mrg (__v8sf) __B, __C, 1623 1.1 mrg (__v8sf) 1624 1.1 mrg _mm256_setzero_ps (), 1625 1.1 mrg (__mmask8) __U); 1626 1.1 mrg } 1627 1.1 mrg 1628 1.1 mrg extern __inline __m128 1629 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1630 1.1 mrg _mm_range_ps (__m128 __A, __m128 __B, int __C) 1631 1.1 mrg { 1632 1.1 mrg return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A, 1633 1.1 mrg (__v4sf) __B, __C, 1634 1.1 mrg (__v4sf) 1635 1.1 mrg _mm_setzero_ps (), 1636 1.1 mrg (__mmask8) -1); 1637 1.1 mrg } 1638 1.1 mrg 1639 1.1 mrg extern __inline __m128 1640 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1641 1.1 mrg _mm_mask_range_ps (__m128 __W, __mmask8 __U, 1642 1.1 mrg __m128 __A, __m128 __B, int __C) 1643 1.1 mrg { 1644 1.1 mrg return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A, 1645 1.1 mrg (__v4sf) __B, __C, 1646 1.1 mrg (__v4sf) __W, 1647 1.1 mrg (__mmask8) __U); 1648 1.1 mrg } 1649 1.1 mrg 1650 1.1 mrg extern __inline __m128 1651 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1652 1.1 mrg _mm_maskz_range_ps (__mmask8 __U, __m128 __A, __m128 __B, int __C) 1653 1.1 mrg { 1654 1.1 mrg return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A, 1655 1.1 mrg (__v4sf) __B, __C, 1656 1.1 mrg (__v4sf) 1657 1.1 mrg _mm_setzero_ps (), 1658 1.1 mrg (__mmask8) __U); 1659 1.1 mrg } 1660 1.1 mrg 1661 1.1 mrg extern __inline __mmask8 1662 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1663 1.1 mrg _mm256_mask_fpclass_pd_mask (__mmask8 __U, __m256d __A, 1664 1.1 mrg const int __imm) 1665 1.1 mrg { 1666 1.1 mrg return (__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) __A, 1667 1.1 mrg __imm, __U); 1668 1.1 mrg } 1669 1.1 mrg 1670 1.1 mrg extern __inline __mmask8 1671 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1672 1.1 mrg _mm256_fpclass_pd_mask (__m256d __A, const int __imm) 1673 1.1 mrg { 1674 1.1 mrg return (__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) __A, 1675 1.1 mrg __imm, 1676 1.1 mrg (__mmask8) -1); 1677 1.1 mrg } 1678 1.1 mrg 1679 1.1 mrg extern __inline __mmask8 1680 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1681 1.1 mrg _mm256_mask_fpclass_ps_mask (__mmask8 __U, __m256 __A, const int __imm) 1682 1.1 mrg { 1683 1.1 mrg return (__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) __A, 1684 1.1 mrg __imm, __U); 1685 1.1 mrg } 1686 1.1 mrg 1687 1.1 mrg extern __inline __mmask8 1688 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1689 1.1 mrg _mm256_fpclass_ps_mask (__m256 __A, const int __imm) 1690 1.1 mrg { 1691 1.1 mrg return (__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) __A, 1692 1.1 mrg __imm, 1693 1.1 mrg (__mmask8) -1); 1694 1.1 mrg } 1695 1.1 mrg 1696 1.1 mrg extern __inline __mmask8 1697 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1698 1.1 mrg _mm_mask_fpclass_pd_mask (__mmask8 __U, __m128d __A, const int __imm) 1699 1.1 mrg { 1700 1.1 mrg return (__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) __A, 1701 1.1 mrg __imm, __U); 1702 1.1 mrg } 1703 1.1 mrg 1704 1.1 mrg extern __inline __mmask8 1705 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1706 1.1 mrg _mm_fpclass_pd_mask (__m128d __A, const int __imm) 1707 1.1 mrg { 1708 1.1 mrg return (__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) __A, 1709 1.1 mrg __imm, 1710 1.1 mrg (__mmask8) -1); 1711 1.1 mrg } 1712 1.1 mrg 1713 1.1 mrg extern __inline __mmask8 1714 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1715 1.1 mrg _mm_mask_fpclass_ps_mask (__mmask8 __U, __m128 __A, const int __imm) 1716 1.1 mrg { 1717 1.1 mrg return (__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) __A, 1718 1.1 mrg __imm, __U); 1719 1.1 mrg } 1720 1.1 mrg 1721 1.1 mrg extern __inline __mmask8 1722 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1723 1.1 mrg _mm_fpclass_ps_mask (__m128 __A, const int __imm) 1724 1.1 mrg { 1725 1.1 mrg return (__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) __A, 1726 1.1 mrg __imm, 1727 1.1 mrg (__mmask8) -1); 1728 1.1 mrg } 1729 1.1 mrg 1730 1.1 mrg extern __inline __m256i 1731 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1732 1.1 mrg _mm256_inserti64x2 (__m256i __A, __m128i __B, const int __imm) 1733 1.1 mrg { 1734 1.1 mrg return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A, 1735 1.1 mrg (__v2di) __B, 1736 1.1 mrg __imm, 1737 1.1 mrg (__v4di) 1738 1.1 mrg _mm256_setzero_si256 (), 1739 1.3 mrg (__mmask8) -1); 1740 1.1 mrg } 1741 1.1 mrg 1742 1.1 mrg extern __inline __m256i 1743 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1744 1.1 mrg _mm256_mask_inserti64x2 (__m256i __W, __mmask8 __U, __m256i __A, 1745 1.1 mrg __m128i __B, const int __imm) 1746 1.1 mrg { 1747 1.1 mrg return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A, 1748 1.1 mrg (__v2di) __B, 1749 1.1 mrg __imm, 1750 1.1 mrg (__v4di) __W, 1751 1.1 mrg (__mmask8) 1752 1.1 mrg __U); 1753 1.1 mrg } 1754 1.1 mrg 1755 1.1 mrg extern __inline __m256i 1756 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1757 1.1 mrg _mm256_maskz_inserti64x2 (__mmask8 __U, __m256i __A, __m128i __B, 1758 1.1 mrg const int __imm) 1759 1.1 mrg { 1760 1.1 mrg return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A, 1761 1.1 mrg (__v2di) __B, 1762 1.1 mrg __imm, 1763 1.1 mrg (__v4di) 1764 1.1 mrg _mm256_setzero_si256 (), 1765 1.1 mrg (__mmask8) 1766 1.1 mrg __U); 1767 1.1 mrg } 1768 1.1 mrg 1769 1.1 mrg extern __inline __m256d 1770 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1771 1.1 mrg _mm256_insertf64x2 (__m256d __A, __m128d __B, const int __imm) 1772 1.1 mrg { 1773 1.1 mrg return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A, 1774 1.1 mrg (__v2df) __B, 1775 1.1 mrg __imm, 1776 1.1 mrg (__v4df) 1777 1.1 mrg _mm256_setzero_pd (), 1778 1.3 mrg (__mmask8) -1); 1779 1.1 mrg } 1780 1.1 mrg 1781 1.1 mrg extern __inline __m256d 1782 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1783 1.1 mrg _mm256_mask_insertf64x2 (__m256d __W, __mmask8 __U, __m256d __A, 1784 1.1 mrg __m128d __B, const int __imm) 1785 1.1 mrg { 1786 1.1 mrg return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A, 1787 1.1 mrg (__v2df) __B, 1788 1.1 mrg __imm, 1789 1.1 mrg (__v4df) __W, 1790 1.1 mrg (__mmask8) 1791 1.1 mrg __U); 1792 1.1 mrg } 1793 1.1 mrg 1794 1.1 mrg extern __inline __m256d 1795 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1796 1.1 mrg _mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, __m128d __B, 1797 1.1 mrg const int __imm) 1798 1.1 mrg { 1799 1.1 mrg return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A, 1800 1.1 mrg (__v2df) __B, 1801 1.1 mrg __imm, 1802 1.1 mrg (__v4df) 1803 1.1 mrg _mm256_setzero_pd (), 1804 1.1 mrg (__mmask8) 1805 1.1 mrg __U); 1806 1.1 mrg } 1807 1.1 mrg 1808 1.1 mrg #else 1809 1.1 mrg #define _mm256_insertf64x2(X, Y, C) \ 1810 1.1 mrg ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\ 1811 1.1 mrg (__v2df)(__m128d) (Y), (int) (C), \ 1812 1.1 mrg (__v4df)(__m256d)_mm256_setzero_pd(), \ 1813 1.1 mrg (__mmask8)-1)) 1814 1.1 mrg 1815 1.1 mrg #define _mm256_mask_insertf64x2(W, U, X, Y, C) \ 1816 1.1 mrg ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\ 1817 1.1 mrg (__v2df)(__m128d) (Y), (int) (C), \ 1818 1.1 mrg (__v4df)(__m256d)(W), \ 1819 1.1 mrg (__mmask8)(U))) 1820 1.1 mrg 1821 1.1 mrg #define _mm256_maskz_insertf64x2(U, X, Y, C) \ 1822 1.1 mrg ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\ 1823 1.1 mrg (__v2df)(__m128d) (Y), (int) (C), \ 1824 1.1 mrg (__v4df)(__m256d)_mm256_setzero_pd(), \ 1825 1.1 mrg (__mmask8)(U))) 1826 1.1 mrg 1827 1.1 mrg #define _mm256_inserti64x2(X, Y, C) \ 1828 1.1 mrg ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\ 1829 1.1 mrg (__v2di)(__m128i) (Y), (int) (C), \ 1830 1.1 mrg (__v4di)(__m256i)_mm256_setzero_si256 (), \ 1831 1.1 mrg (__mmask8)-1)) 1832 1.1 mrg 1833 1.1 mrg #define _mm256_mask_inserti64x2(W, U, X, Y, C) \ 1834 1.1 mrg ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\ 1835 1.1 mrg (__v2di)(__m128i) (Y), (int) (C), \ 1836 1.1 mrg (__v4di)(__m256i)(W), \ 1837 1.1 mrg (__mmask8)(U))) 1838 1.1 mrg 1839 1.1 mrg #define _mm256_maskz_inserti64x2(U, X, Y, C) \ 1840 1.1 mrg ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\ 1841 1.1 mrg (__v2di)(__m128i) (Y), (int) (C), \ 1842 1.1 mrg (__v4di)(__m256i)_mm256_setzero_si256 (), \ 1843 1.1 mrg (__mmask8)(U))) 1844 1.1 mrg 1845 1.1 mrg #define _mm256_extractf64x2_pd(X, C) \ 1846 1.1 mrg ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\ 1847 1.1 mrg (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8)-1)) 1848 1.1 mrg 1849 1.1 mrg #define _mm256_mask_extractf64x2_pd(W, U, X, C) \ 1850 1.1 mrg ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\ 1851 1.1 mrg (int) (C), (__v2df)(__m128d) (W), (__mmask8) (U))) 1852 1.1 mrg 1853 1.1 mrg #define _mm256_maskz_extractf64x2_pd(U, X, C) \ 1854 1.1 mrg ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\ 1855 1.1 mrg (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8) (U))) 1856 1.1 mrg 1857 1.1 mrg #define _mm256_extracti64x2_epi64(X, C) \ 1858 1.1 mrg ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\ 1859 1.3 mrg (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8)-1)) 1860 1.1 mrg 1861 1.1 mrg #define _mm256_mask_extracti64x2_epi64(W, U, X, C) \ 1862 1.1 mrg ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\ 1863 1.1 mrg (int) (C), (__v2di)(__m128i) (W), (__mmask8) (U))) 1864 1.1 mrg 1865 1.1 mrg #define _mm256_maskz_extracti64x2_epi64(U, X, C) \ 1866 1.1 mrg ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\ 1867 1.3 mrg (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8) (U))) 1868 1.1 mrg 1869 1.1 mrg #define _mm256_reduce_pd(A, B) \ 1870 1.1 mrg ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \ 1871 1.1 mrg (int)(B), (__v4df)_mm256_setzero_pd(), (__mmask8)-1)) 1872 1.1 mrg 1873 1.1 mrg #define _mm256_mask_reduce_pd(W, U, A, B) \ 1874 1.1 mrg ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \ 1875 1.1 mrg (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U))) 1876 1.1 mrg 1877 1.1 mrg #define _mm256_maskz_reduce_pd(U, A, B) \ 1878 1.1 mrg ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \ 1879 1.1 mrg (int)(B), (__v4df)_mm256_setzero_pd(), (__mmask8)(U))) 1880 1.1 mrg 1881 1.1 mrg #define _mm_reduce_pd(A, B) \ 1882 1.1 mrg ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \ 1883 1.1 mrg (int)(B), (__v2df)_mm_setzero_pd(), (__mmask8)-1)) 1884 1.1 mrg 1885 1.1 mrg #define _mm_mask_reduce_pd(W, U, A, B) \ 1886 1.1 mrg ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \ 1887 1.1 mrg (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U))) 1888 1.1 mrg 1889 1.1 mrg #define _mm_maskz_reduce_pd(U, A, B) \ 1890 1.1 mrg ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \ 1891 1.1 mrg (int)(B), (__v2df)_mm_setzero_pd(), (__mmask8)(U))) 1892 1.1 mrg 1893 1.1 mrg #define _mm256_reduce_ps(A, B) \ 1894 1.1 mrg ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \ 1895 1.1 mrg (int)(B), (__v8sf)_mm256_setzero_ps(), (__mmask8)-1)) 1896 1.1 mrg 1897 1.1 mrg #define _mm256_mask_reduce_ps(W, U, A, B) \ 1898 1.1 mrg ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \ 1899 1.1 mrg (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U))) 1900 1.1 mrg 1901 1.1 mrg #define _mm256_maskz_reduce_ps(U, A, B) \ 1902 1.1 mrg ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \ 1903 1.1 mrg (int)(B), (__v8sf)_mm256_setzero_ps(), (__mmask8)(U))) 1904 1.1 mrg 1905 1.1 mrg #define _mm_reduce_ps(A, B) \ 1906 1.1 mrg ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \ 1907 1.1 mrg (int)(B), (__v4sf)_mm_setzero_ps(), (__mmask8)-1)) 1908 1.1 mrg 1909 1.1 mrg #define _mm_mask_reduce_ps(W, U, A, B) \ 1910 1.1 mrg ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \ 1911 1.1 mrg (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U))) 1912 1.1 mrg 1913 1.1 mrg #define _mm_maskz_reduce_ps(U, A, B) \ 1914 1.1 mrg ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \ 1915 1.1 mrg (int)(B), (__v4sf)_mm_setzero_ps(), (__mmask8)(U))) 1916 1.1 mrg 1917 1.1 mrg #define _mm256_range_pd(A, B, C) \ 1918 1.1 mrg ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \ 1919 1.1 mrg (__v4df)(__m256d)(B), (int)(C), \ 1920 1.1 mrg (__v4df)_mm256_setzero_pd(), (__mmask8)-1)) 1921 1.1 mrg 1922 1.1 mrg #define _mm256_maskz_range_pd(U, A, B, C) \ 1923 1.1 mrg ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \ 1924 1.1 mrg (__v4df)(__m256d)(B), (int)(C), \ 1925 1.1 mrg (__v4df)_mm256_setzero_pd(), (__mmask8)(U))) 1926 1.1 mrg 1927 1.1 mrg #define _mm_range_pd(A, B, C) \ 1928 1.1 mrg ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \ 1929 1.1 mrg (__v2df)(__m128d)(B), (int)(C), \ 1930 1.1 mrg (__v2df)_mm_setzero_pd(), (__mmask8)-1)) 1931 1.1 mrg 1932 1.1 mrg #define _mm256_range_ps(A, B, C) \ 1933 1.1 mrg ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \ 1934 1.1 mrg (__v8sf)(__m256)(B), (int)(C), \ 1935 1.1 mrg (__v8sf)_mm256_setzero_ps(), (__mmask8)-1)) 1936 1.1 mrg 1937 1.1 mrg #define _mm256_mask_range_ps(W, U, A, B, C) \ 1938 1.1 mrg ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \ 1939 1.1 mrg (__v8sf)(__m256)(B), (int)(C), \ 1940 1.1 mrg (__v8sf)(__m256)(W), (__mmask8)(U))) 1941 1.1 mrg 1942 1.1 mrg #define _mm256_maskz_range_ps(U, A, B, C) \ 1943 1.1 mrg ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \ 1944 1.1 mrg (__v8sf)(__m256)(B), (int)(C), \ 1945 1.1 mrg (__v8sf)_mm256_setzero_ps(), (__mmask8)(U))) 1946 1.1 mrg 1947 1.1 mrg #define _mm_range_ps(A, B, C) \ 1948 1.1 mrg ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \ 1949 1.1 mrg (__v4sf)(__m128)(B), (int)(C), \ 1950 1.1 mrg (__v4sf)_mm_setzero_ps(), (__mmask8)-1)) 1951 1.1 mrg 1952 1.1 mrg #define _mm_mask_range_ps(W, U, A, B, C) \ 1953 1.1 mrg ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \ 1954 1.1 mrg (__v4sf)(__m128)(B), (int)(C), \ 1955 1.1 mrg (__v4sf)(__m128)(W), (__mmask8)(U))) 1956 1.1 mrg 1957 1.1 mrg #define _mm_maskz_range_ps(U, A, B, C) \ 1958 1.1 mrg ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \ 1959 1.1 mrg (__v4sf)(__m128)(B), (int)(C), \ 1960 1.1 mrg (__v4sf)_mm_setzero_ps(), (__mmask8)(U))) 1961 1.1 mrg 1962 1.1 mrg #define _mm256_mask_range_pd(W, U, A, B, C) \ 1963 1.1 mrg ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \ 1964 1.1 mrg (__v4df)(__m256d)(B), (int)(C), \ 1965 1.1 mrg (__v4df)(__m256d)(W), (__mmask8)(U))) 1966 1.1 mrg 1967 1.1 mrg #define _mm_mask_range_pd(W, U, A, B, C) \ 1968 1.1 mrg ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \ 1969 1.1 mrg (__v2df)(__m128d)(B), (int)(C), \ 1970 1.1 mrg (__v2df)(__m128d)(W), (__mmask8)(U))) 1971 1.1 mrg 1972 1.1 mrg #define _mm_maskz_range_pd(U, A, B, C) \ 1973 1.1 mrg ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \ 1974 1.1 mrg (__v2df)(__m128d)(B), (int)(C), \ 1975 1.1 mrg (__v2df)_mm_setzero_pd(), (__mmask8)(U))) 1976 1.1 mrg 1977 1.1 mrg #define _mm256_mask_fpclass_pd_mask(u, X, C) \ 1978 1.1 mrg ((__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) (__m256d) (X), \ 1979 1.1 mrg (int) (C),(__mmask8)(u))) 1980 1.1 mrg 1981 1.1 mrg #define _mm256_mask_fpclass_ps_mask(u, X, C) \ 1982 1.1 mrg ((__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) (__m256) (X), \ 1983 1.1 mrg (int) (C),(__mmask8)(u))) 1984 1.1 mrg 1985 1.1 mrg #define _mm_mask_fpclass_pd_mask(u, X, C) \ 1986 1.1 mrg ((__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) (__m128d) (X), \ 1987 1.1 mrg (int) (C),(__mmask8)(u))) 1988 1.1 mrg 1989 1.1 mrg #define _mm_mask_fpclass_ps_mask(u, X, C) \ 1990 1.1 mrg ((__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) (__m128) (X), \ 1991 1.1 mrg (int) (C),(__mmask8)(u))) 1992 1.1 mrg 1993 1.1 mrg #define _mm256_fpclass_pd_mask(X, C) \ 1994 1.1 mrg ((__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) (__m256d) (X), \ 1995 1.1 mrg (int) (C),(__mmask8)-1)) 1996 1.1 mrg 1997 1.1 mrg #define _mm256_fpclass_ps_mask(X, C) \ 1998 1.1 mrg ((__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) (__m256) (X), \ 1999 1.1 mrg (int) (C),(__mmask8)-1)) 2000 1.1 mrg 2001 1.1 mrg #define _mm_fpclass_pd_mask(X, C) \ 2002 1.1 mrg ((__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) (__m128d) (X), \ 2003 1.1 mrg (int) (C),(__mmask8)-1)) 2004 1.1 mrg 2005 1.1 mrg #define _mm_fpclass_ps_mask(X, C) \ 2006 1.1 mrg ((__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) (__m128) (X), \ 2007 1.1 mrg (int) (C),(__mmask8)-1)) 2008 1.1 mrg 2009 1.1 mrg #endif 2010 1.1 mrg 2011 1.1 mrg #ifdef __DISABLE_AVX512VLDQ__ 2012 1.1 mrg #undef __DISABLE_AVX512VLDQ__ 2013 1.1 mrg #pragma GCC pop_options 2014 1.1 mrg #endif /* __DISABLE_AVX512VLDQ__ */ 2015 1.1 mrg 2016 1.1 mrg #endif /* _AVX512VLDQINTRIN_H_INCLUDED */ 2017