1 /* Copyright (C) 2007-2022 Free Software Foundation, Inc. 2 3 This file is part of GCC. 4 5 GCC is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3, or (at your option) 8 any later version. 9 10 GCC is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 Under Section 7 of GPL version 3, you are granted additional 16 permissions described in the GCC Runtime Library Exception, version 17 3.1, as published by the Free Software Foundation. 18 19 You should have received a copy of the GNU General Public License and 20 a copy of the GCC Runtime Library Exception along with this program; 21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 <http://www.gnu.org/licenses/>. */ 23 24 #ifndef _X86INTRIN_H_INCLUDED 25 # error "Never use <xopintrin.h> directly; include <x86intrin.h> instead." 26 #endif 27 28 #ifndef _XOPMMINTRIN_H_INCLUDED 29 #define _XOPMMINTRIN_H_INCLUDED 30 31 #include <fma4intrin.h> 32 33 #ifndef __XOP__ 34 #pragma GCC push_options 35 #pragma GCC target("xop") 36 #define __DISABLE_XOP__ 37 #endif /* __XOP__ */ 38 39 /* Integer multiply/add instructions. */ 40 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 41 _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) 42 { 43 return (__m128i) __builtin_ia32_vpmacssww ((__v8hi)__A,(__v8hi)__B, (__v8hi)__C); 44 } 45 46 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 47 _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C) 48 { 49 return (__m128i) __builtin_ia32_vpmacsww ((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); 50 } 51 52 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 53 _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C) 54 { 55 return (__m128i) __builtin_ia32_vpmacsswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C); 56 } 57 58 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 59 _mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C) 60 { 61 return (__m128i) __builtin_ia32_vpmacswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C); 62 } 63 64 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 65 _mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C) 66 { 67 return (__m128i) __builtin_ia32_vpmacssdd ((__v4si)__A, (__v4si)__B, (__v4si)__C); 68 } 69 70 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 71 _mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C) 72 { 73 return (__m128i) __builtin_ia32_vpmacsdd ((__v4si)__A, (__v4si)__B, (__v4si)__C); 74 } 75 76 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 77 _mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C) 78 { 79 return (__m128i) __builtin_ia32_vpmacssdql ((__v4si)__A, (__v4si)__B, (__v2di)__C); 80 } 81 82 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 83 _mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C) 84 { 85 return (__m128i) __builtin_ia32_vpmacsdql ((__v4si)__A, (__v4si)__B, (__v2di)__C); 86 } 87 88 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 89 _mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C) 90 { 91 return (__m128i) __builtin_ia32_vpmacssdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C); 92 } 93 94 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 95 _mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C) 96 { 97 return (__m128i) __builtin_ia32_vpmacsdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C); 98 } 99 100 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 101 _mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C) 102 { 103 return (__m128i) __builtin_ia32_vpmadcsswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C); 104 } 105 106 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 107 _mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C) 108 { 109 return (__m128i) __builtin_ia32_vpmadcswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C); 110 } 111 112 /* Packed Integer Horizontal Add and Subtract */ 113 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 114 _mm_haddw_epi8(__m128i __A) 115 { 116 return (__m128i) __builtin_ia32_vphaddbw ((__v16qi)__A); 117 } 118 119 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 120 _mm_haddd_epi8(__m128i __A) 121 { 122 return (__m128i) __builtin_ia32_vphaddbd ((__v16qi)__A); 123 } 124 125 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 126 _mm_haddq_epi8(__m128i __A) 127 { 128 return (__m128i) __builtin_ia32_vphaddbq ((__v16qi)__A); 129 } 130 131 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 132 _mm_haddd_epi16(__m128i __A) 133 { 134 return (__m128i) __builtin_ia32_vphaddwd ((__v8hi)__A); 135 } 136 137 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 138 _mm_haddq_epi16(__m128i __A) 139 { 140 return (__m128i) __builtin_ia32_vphaddwq ((__v8hi)__A); 141 } 142 143 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 144 _mm_haddq_epi32(__m128i __A) 145 { 146 return (__m128i) __builtin_ia32_vphadddq ((__v4si)__A); 147 } 148 149 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 150 _mm_haddw_epu8(__m128i __A) 151 { 152 return (__m128i) __builtin_ia32_vphaddubw ((__v16qi)__A); 153 } 154 155 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 156 _mm_haddd_epu8(__m128i __A) 157 { 158 return (__m128i) __builtin_ia32_vphaddubd ((__v16qi)__A); 159 } 160 161 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 162 _mm_haddq_epu8(__m128i __A) 163 { 164 return (__m128i) __builtin_ia32_vphaddubq ((__v16qi)__A); 165 } 166 167 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 168 _mm_haddd_epu16(__m128i __A) 169 { 170 return (__m128i) __builtin_ia32_vphadduwd ((__v8hi)__A); 171 } 172 173 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 174 _mm_haddq_epu16(__m128i __A) 175 { 176 return (__m128i) __builtin_ia32_vphadduwq ((__v8hi)__A); 177 } 178 179 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 180 _mm_haddq_epu32(__m128i __A) 181 { 182 return (__m128i) __builtin_ia32_vphaddudq ((__v4si)__A); 183 } 184 185 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 186 _mm_hsubw_epi8(__m128i __A) 187 { 188 return (__m128i) __builtin_ia32_vphsubbw ((__v16qi)__A); 189 } 190 191 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 192 _mm_hsubd_epi16(__m128i __A) 193 { 194 return (__m128i) __builtin_ia32_vphsubwd ((__v8hi)__A); 195 } 196 197 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 198 _mm_hsubq_epi32(__m128i __A) 199 { 200 return (__m128i) __builtin_ia32_vphsubdq ((__v4si)__A); 201 } 202 203 /* Vector conditional move and permute */ 204 205 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 206 _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C) 207 { 208 return (__m128i) __builtin_ia32_vpcmov (__A, __B, __C); 209 } 210 211 extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 212 _mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C) 213 { 214 return (__m256i) __builtin_ia32_vpcmov256 (__A, __B, __C); 215 } 216 217 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 218 _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C) 219 { 220 return (__m128i) __builtin_ia32_vpperm ((__v16qi)__A, (__v16qi)__B, (__v16qi)__C); 221 } 222 223 /* Packed Integer Rotates and Shifts 224 Rotates - Non-Immediate form */ 225 226 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 227 _mm_rot_epi8(__m128i __A, __m128i __B) 228 { 229 return (__m128i) __builtin_ia32_vprotb ((__v16qi)__A, (__v16qi)__B); 230 } 231 232 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 233 _mm_rot_epi16(__m128i __A, __m128i __B) 234 { 235 return (__m128i) __builtin_ia32_vprotw ((__v8hi)__A, (__v8hi)__B); 236 } 237 238 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 239 _mm_rot_epi32(__m128i __A, __m128i __B) 240 { 241 return (__m128i) __builtin_ia32_vprotd ((__v4si)__A, (__v4si)__B); 242 } 243 244 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 245 _mm_rot_epi64(__m128i __A, __m128i __B) 246 { 247 return (__m128i) __builtin_ia32_vprotq ((__v2di)__A, (__v2di)__B); 248 } 249 250 /* Rotates - Immediate form */ 251 252 #ifdef __OPTIMIZE__ 253 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 254 _mm_roti_epi8(__m128i __A, const int __B) 255 { 256 return (__m128i) __builtin_ia32_vprotbi ((__v16qi)__A, __B); 257 } 258 259 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 260 _mm_roti_epi16(__m128i __A, const int __B) 261 { 262 return (__m128i) __builtin_ia32_vprotwi ((__v8hi)__A, __B); 263 } 264 265 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 266 _mm_roti_epi32(__m128i __A, const int __B) 267 { 268 return (__m128i) __builtin_ia32_vprotdi ((__v4si)__A, __B); 269 } 270 271 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 272 _mm_roti_epi64(__m128i __A, const int __B) 273 { 274 return (__m128i) __builtin_ia32_vprotqi ((__v2di)__A, __B); 275 } 276 #else 277 #define _mm_roti_epi8(A, N) \ 278 ((__m128i) __builtin_ia32_vprotbi ((__v16qi)(__m128i)(A), (int)(N))) 279 #define _mm_roti_epi16(A, N) \ 280 ((__m128i) __builtin_ia32_vprotwi ((__v8hi)(__m128i)(A), (int)(N))) 281 #define _mm_roti_epi32(A, N) \ 282 ((__m128i) __builtin_ia32_vprotdi ((__v4si)(__m128i)(A), (int)(N))) 283 #define _mm_roti_epi64(A, N) \ 284 ((__m128i) __builtin_ia32_vprotqi ((__v2di)(__m128i)(A), (int)(N))) 285 #endif 286 287 /* Shifts */ 288 289 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 290 _mm_shl_epi8(__m128i __A, __m128i __B) 291 { 292 return (__m128i) __builtin_ia32_vpshlb ((__v16qi)__A, (__v16qi)__B); 293 } 294 295 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 296 _mm_shl_epi16(__m128i __A, __m128i __B) 297 { 298 return (__m128i) __builtin_ia32_vpshlw ((__v8hi)__A, (__v8hi)__B); 299 } 300 301 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 302 _mm_shl_epi32(__m128i __A, __m128i __B) 303 { 304 return (__m128i) __builtin_ia32_vpshld ((__v4si)__A, (__v4si)__B); 305 } 306 307 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 308 _mm_shl_epi64(__m128i __A, __m128i __B) 309 { 310 return (__m128i) __builtin_ia32_vpshlq ((__v2di)__A, (__v2di)__B); 311 } 312 313 314 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 315 _mm_sha_epi8(__m128i __A, __m128i __B) 316 { 317 return (__m128i) __builtin_ia32_vpshab ((__v16qi)__A, (__v16qi)__B); 318 } 319 320 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 321 _mm_sha_epi16(__m128i __A, __m128i __B) 322 { 323 return (__m128i) __builtin_ia32_vpshaw ((__v8hi)__A, (__v8hi)__B); 324 } 325 326 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 327 _mm_sha_epi32(__m128i __A, __m128i __B) 328 { 329 return (__m128i) __builtin_ia32_vpshad ((__v4si)__A, (__v4si)__B); 330 } 331 332 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 333 _mm_sha_epi64(__m128i __A, __m128i __B) 334 { 335 return (__m128i) __builtin_ia32_vpshaq ((__v2di)__A, (__v2di)__B); 336 } 337 338 /* Compare and Predicate Generation 339 pcom (integer, unsigned bytes) */ 340 341 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 342 _mm_comlt_epu8(__m128i __A, __m128i __B) 343 { 344 return (__m128i) __builtin_ia32_vpcomltub ((__v16qi)__A, (__v16qi)__B); 345 } 346 347 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 348 _mm_comle_epu8(__m128i __A, __m128i __B) 349 { 350 return (__m128i) __builtin_ia32_vpcomleub ((__v16qi)__A, (__v16qi)__B); 351 } 352 353 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 354 _mm_comgt_epu8(__m128i __A, __m128i __B) 355 { 356 return (__m128i) __builtin_ia32_vpcomgtub ((__v16qi)__A, (__v16qi)__B); 357 } 358 359 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 360 _mm_comge_epu8(__m128i __A, __m128i __B) 361 { 362 return (__m128i) __builtin_ia32_vpcomgeub ((__v16qi)__A, (__v16qi)__B); 363 } 364 365 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 366 _mm_comeq_epu8(__m128i __A, __m128i __B) 367 { 368 return (__m128i) __builtin_ia32_vpcomequb ((__v16qi)__A, (__v16qi)__B); 369 } 370 371 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 372 _mm_comneq_epu8(__m128i __A, __m128i __B) 373 { 374 return (__m128i) __builtin_ia32_vpcomnequb ((__v16qi)__A, (__v16qi)__B); 375 } 376 377 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 378 _mm_comfalse_epu8(__m128i __A, __m128i __B) 379 { 380 return (__m128i) __builtin_ia32_vpcomfalseub ((__v16qi)__A, (__v16qi)__B); 381 } 382 383 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 384 _mm_comtrue_epu8(__m128i __A, __m128i __B) 385 { 386 return (__m128i) __builtin_ia32_vpcomtrueub ((__v16qi)__A, (__v16qi)__B); 387 } 388 389 /*pcom (integer, unsigned words) */ 390 391 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 392 _mm_comlt_epu16(__m128i __A, __m128i __B) 393 { 394 return (__m128i) __builtin_ia32_vpcomltuw ((__v8hi)__A, (__v8hi)__B); 395 } 396 397 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 398 _mm_comle_epu16(__m128i __A, __m128i __B) 399 { 400 return (__m128i) __builtin_ia32_vpcomleuw ((__v8hi)__A, (__v8hi)__B); 401 } 402 403 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 404 _mm_comgt_epu16(__m128i __A, __m128i __B) 405 { 406 return (__m128i) __builtin_ia32_vpcomgtuw ((__v8hi)__A, (__v8hi)__B); 407 } 408 409 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 410 _mm_comge_epu16(__m128i __A, __m128i __B) 411 { 412 return (__m128i) __builtin_ia32_vpcomgeuw ((__v8hi)__A, (__v8hi)__B); 413 } 414 415 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 416 _mm_comeq_epu16(__m128i __A, __m128i __B) 417 { 418 return (__m128i) __builtin_ia32_vpcomequw ((__v8hi)__A, (__v8hi)__B); 419 } 420 421 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 422 _mm_comneq_epu16(__m128i __A, __m128i __B) 423 { 424 return (__m128i) __builtin_ia32_vpcomnequw ((__v8hi)__A, (__v8hi)__B); 425 } 426 427 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 428 _mm_comfalse_epu16(__m128i __A, __m128i __B) 429 { 430 return (__m128i) __builtin_ia32_vpcomfalseuw ((__v8hi)__A, (__v8hi)__B); 431 } 432 433 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 434 _mm_comtrue_epu16(__m128i __A, __m128i __B) 435 { 436 return (__m128i) __builtin_ia32_vpcomtrueuw ((__v8hi)__A, (__v8hi)__B); 437 } 438 439 /*pcom (integer, unsigned double words) */ 440 441 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 442 _mm_comlt_epu32(__m128i __A, __m128i __B) 443 { 444 return (__m128i) __builtin_ia32_vpcomltud ((__v4si)__A, (__v4si)__B); 445 } 446 447 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 448 _mm_comle_epu32(__m128i __A, __m128i __B) 449 { 450 return (__m128i) __builtin_ia32_vpcomleud ((__v4si)__A, (__v4si)__B); 451 } 452 453 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 454 _mm_comgt_epu32(__m128i __A, __m128i __B) 455 { 456 return (__m128i) __builtin_ia32_vpcomgtud ((__v4si)__A, (__v4si)__B); 457 } 458 459 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 460 _mm_comge_epu32(__m128i __A, __m128i __B) 461 { 462 return (__m128i) __builtin_ia32_vpcomgeud ((__v4si)__A, (__v4si)__B); 463 } 464 465 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 466 _mm_comeq_epu32(__m128i __A, __m128i __B) 467 { 468 return (__m128i) __builtin_ia32_vpcomequd ((__v4si)__A, (__v4si)__B); 469 } 470 471 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 472 _mm_comneq_epu32(__m128i __A, __m128i __B) 473 { 474 return (__m128i) __builtin_ia32_vpcomnequd ((__v4si)__A, (__v4si)__B); 475 } 476 477 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 478 _mm_comfalse_epu32(__m128i __A, __m128i __B) 479 { 480 return (__m128i) __builtin_ia32_vpcomfalseud ((__v4si)__A, (__v4si)__B); 481 } 482 483 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 484 _mm_comtrue_epu32(__m128i __A, __m128i __B) 485 { 486 return (__m128i) __builtin_ia32_vpcomtrueud ((__v4si)__A, (__v4si)__B); 487 } 488 489 /*pcom (integer, unsigned quad words) */ 490 491 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 492 _mm_comlt_epu64(__m128i __A, __m128i __B) 493 { 494 return (__m128i) __builtin_ia32_vpcomltuq ((__v2di)__A, (__v2di)__B); 495 } 496 497 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 498 _mm_comle_epu64(__m128i __A, __m128i __B) 499 { 500 return (__m128i) __builtin_ia32_vpcomleuq ((__v2di)__A, (__v2di)__B); 501 } 502 503 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 504 _mm_comgt_epu64(__m128i __A, __m128i __B) 505 { 506 return (__m128i) __builtin_ia32_vpcomgtuq ((__v2di)__A, (__v2di)__B); 507 } 508 509 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 510 _mm_comge_epu64(__m128i __A, __m128i __B) 511 { 512 return (__m128i) __builtin_ia32_vpcomgeuq ((__v2di)__A, (__v2di)__B); 513 } 514 515 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 516 _mm_comeq_epu64(__m128i __A, __m128i __B) 517 { 518 return (__m128i) __builtin_ia32_vpcomequq ((__v2di)__A, (__v2di)__B); 519 } 520 521 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 522 _mm_comneq_epu64(__m128i __A, __m128i __B) 523 { 524 return (__m128i) __builtin_ia32_vpcomnequq ((__v2di)__A, (__v2di)__B); 525 } 526 527 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 528 _mm_comfalse_epu64(__m128i __A, __m128i __B) 529 { 530 return (__m128i) __builtin_ia32_vpcomfalseuq ((__v2di)__A, (__v2di)__B); 531 } 532 533 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 534 _mm_comtrue_epu64(__m128i __A, __m128i __B) 535 { 536 return (__m128i) __builtin_ia32_vpcomtrueuq ((__v2di)__A, (__v2di)__B); 537 } 538 539 /*pcom (integer, signed bytes) */ 540 541 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 542 _mm_comlt_epi8(__m128i __A, __m128i __B) 543 { 544 return (__m128i) __builtin_ia32_vpcomltb ((__v16qi)__A, (__v16qi)__B); 545 } 546 547 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 548 _mm_comle_epi8(__m128i __A, __m128i __B) 549 { 550 return (__m128i) __builtin_ia32_vpcomleb ((__v16qi)__A, (__v16qi)__B); 551 } 552 553 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 554 _mm_comgt_epi8(__m128i __A, __m128i __B) 555 { 556 return (__m128i) __builtin_ia32_vpcomgtb ((__v16qi)__A, (__v16qi)__B); 557 } 558 559 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 560 _mm_comge_epi8(__m128i __A, __m128i __B) 561 { 562 return (__m128i) __builtin_ia32_vpcomgeb ((__v16qi)__A, (__v16qi)__B); 563 } 564 565 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 566 _mm_comeq_epi8(__m128i __A, __m128i __B) 567 { 568 return (__m128i) __builtin_ia32_vpcomeqb ((__v16qi)__A, (__v16qi)__B); 569 } 570 571 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 572 _mm_comneq_epi8(__m128i __A, __m128i __B) 573 { 574 return (__m128i) __builtin_ia32_vpcomneqb ((__v16qi)__A, (__v16qi)__B); 575 } 576 577 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 578 _mm_comfalse_epi8(__m128i __A, __m128i __B) 579 { 580 return (__m128i) __builtin_ia32_vpcomfalseb ((__v16qi)__A, (__v16qi)__B); 581 } 582 583 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 584 _mm_comtrue_epi8(__m128i __A, __m128i __B) 585 { 586 return (__m128i) __builtin_ia32_vpcomtrueb ((__v16qi)__A, (__v16qi)__B); 587 } 588 589 /*pcom (integer, signed words) */ 590 591 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 592 _mm_comlt_epi16(__m128i __A, __m128i __B) 593 { 594 return (__m128i) __builtin_ia32_vpcomltw ((__v8hi)__A, (__v8hi)__B); 595 } 596 597 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 598 _mm_comle_epi16(__m128i __A, __m128i __B) 599 { 600 return (__m128i) __builtin_ia32_vpcomlew ((__v8hi)__A, (__v8hi)__B); 601 } 602 603 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 604 _mm_comgt_epi16(__m128i __A, __m128i __B) 605 { 606 return (__m128i) __builtin_ia32_vpcomgtw ((__v8hi)__A, (__v8hi)__B); 607 } 608 609 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 610 _mm_comge_epi16(__m128i __A, __m128i __B) 611 { 612 return (__m128i) __builtin_ia32_vpcomgew ((__v8hi)__A, (__v8hi)__B); 613 } 614 615 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 616 _mm_comeq_epi16(__m128i __A, __m128i __B) 617 { 618 return (__m128i) __builtin_ia32_vpcomeqw ((__v8hi)__A, (__v8hi)__B); 619 } 620 621 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 622 _mm_comneq_epi16(__m128i __A, __m128i __B) 623 { 624 return (__m128i) __builtin_ia32_vpcomneqw ((__v8hi)__A, (__v8hi)__B); 625 } 626 627 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 628 _mm_comfalse_epi16(__m128i __A, __m128i __B) 629 { 630 return (__m128i) __builtin_ia32_vpcomfalsew ((__v8hi)__A, (__v8hi)__B); 631 } 632 633 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 634 _mm_comtrue_epi16(__m128i __A, __m128i __B) 635 { 636 return (__m128i) __builtin_ia32_vpcomtruew ((__v8hi)__A, (__v8hi)__B); 637 } 638 639 /*pcom (integer, signed double words) */ 640 641 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 642 _mm_comlt_epi32(__m128i __A, __m128i __B) 643 { 644 return (__m128i) __builtin_ia32_vpcomltd ((__v4si)__A, (__v4si)__B); 645 } 646 647 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 648 _mm_comle_epi32(__m128i __A, __m128i __B) 649 { 650 return (__m128i) __builtin_ia32_vpcomled ((__v4si)__A, (__v4si)__B); 651 } 652 653 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 654 _mm_comgt_epi32(__m128i __A, __m128i __B) 655 { 656 return (__m128i) __builtin_ia32_vpcomgtd ((__v4si)__A, (__v4si)__B); 657 } 658 659 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 660 _mm_comge_epi32(__m128i __A, __m128i __B) 661 { 662 return (__m128i) __builtin_ia32_vpcomged ((__v4si)__A, (__v4si)__B); 663 } 664 665 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 666 _mm_comeq_epi32(__m128i __A, __m128i __B) 667 { 668 return (__m128i) __builtin_ia32_vpcomeqd ((__v4si)__A, (__v4si)__B); 669 } 670 671 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 672 _mm_comneq_epi32(__m128i __A, __m128i __B) 673 { 674 return (__m128i) __builtin_ia32_vpcomneqd ((__v4si)__A, (__v4si)__B); 675 } 676 677 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 678 _mm_comfalse_epi32(__m128i __A, __m128i __B) 679 { 680 return (__m128i) __builtin_ia32_vpcomfalsed ((__v4si)__A, (__v4si)__B); 681 } 682 683 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 684 _mm_comtrue_epi32(__m128i __A, __m128i __B) 685 { 686 return (__m128i) __builtin_ia32_vpcomtrued ((__v4si)__A, (__v4si)__B); 687 } 688 689 /*pcom (integer, signed quad words) */ 690 691 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 692 _mm_comlt_epi64(__m128i __A, __m128i __B) 693 { 694 return (__m128i) __builtin_ia32_vpcomltq ((__v2di)__A, (__v2di)__B); 695 } 696 697 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 698 _mm_comle_epi64(__m128i __A, __m128i __B) 699 { 700 return (__m128i) __builtin_ia32_vpcomleq ((__v2di)__A, (__v2di)__B); 701 } 702 703 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 704 _mm_comgt_epi64(__m128i __A, __m128i __B) 705 { 706 return (__m128i) __builtin_ia32_vpcomgtq ((__v2di)__A, (__v2di)__B); 707 } 708 709 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 710 _mm_comge_epi64(__m128i __A, __m128i __B) 711 { 712 return (__m128i) __builtin_ia32_vpcomgeq ((__v2di)__A, (__v2di)__B); 713 } 714 715 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 716 _mm_comeq_epi64(__m128i __A, __m128i __B) 717 { 718 return (__m128i) __builtin_ia32_vpcomeqq ((__v2di)__A, (__v2di)__B); 719 } 720 721 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 722 _mm_comneq_epi64(__m128i __A, __m128i __B) 723 { 724 return (__m128i) __builtin_ia32_vpcomneqq ((__v2di)__A, (__v2di)__B); 725 } 726 727 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 728 _mm_comfalse_epi64(__m128i __A, __m128i __B) 729 { 730 return (__m128i) __builtin_ia32_vpcomfalseq ((__v2di)__A, (__v2di)__B); 731 } 732 733 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 734 _mm_comtrue_epi64(__m128i __A, __m128i __B) 735 { 736 return (__m128i) __builtin_ia32_vpcomtrueq ((__v2di)__A, (__v2di)__B); 737 } 738 739 /* FRCZ */ 740 741 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 742 _mm_frcz_ps (__m128 __A) 743 { 744 return (__m128) __builtin_ia32_vfrczps ((__v4sf)__A); 745 } 746 747 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 748 _mm_frcz_pd (__m128d __A) 749 { 750 return (__m128d) __builtin_ia32_vfrczpd ((__v2df)__A); 751 } 752 753 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 754 _mm_frcz_ss (__m128 __A, __m128 __B) 755 { 756 return (__m128) __builtin_ia32_movss ((__v4sf)__A, 757 (__v4sf) 758 __builtin_ia32_vfrczss ((__v4sf)__B)); 759 } 760 761 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 762 _mm_frcz_sd (__m128d __A, __m128d __B) 763 { 764 return (__m128d) __builtin_ia32_movsd ((__v2df)__A, 765 (__v2df) 766 __builtin_ia32_vfrczsd ((__v2df)__B)); 767 } 768 769 extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 770 _mm256_frcz_ps (__m256 __A) 771 { 772 return (__m256) __builtin_ia32_vfrczps256 ((__v8sf)__A); 773 } 774 775 extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 776 _mm256_frcz_pd (__m256d __A) 777 { 778 return (__m256d) __builtin_ia32_vfrczpd256 ((__v4df)__A); 779 } 780 781 /* PERMIL2 */ 782 783 #ifdef __OPTIMIZE__ 784 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 785 _mm_permute2_pd (__m128d __X, __m128d __Y, __m128i __C, const int __I) 786 { 787 return (__m128d) __builtin_ia32_vpermil2pd ((__v2df)__X, 788 (__v2df)__Y, 789 (__v2di)__C, 790 __I); 791 } 792 793 extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 794 _mm256_permute2_pd (__m256d __X, __m256d __Y, __m256i __C, const int __I) 795 { 796 return (__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)__X, 797 (__v4df)__Y, 798 (__v4di)__C, 799 __I); 800 } 801 802 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 803 _mm_permute2_ps (__m128 __X, __m128 __Y, __m128i __C, const int __I) 804 { 805 return (__m128) __builtin_ia32_vpermil2ps ((__v4sf)__X, 806 (__v4sf)__Y, 807 (__v4si)__C, 808 __I); 809 } 810 811 extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 812 _mm256_permute2_ps (__m256 __X, __m256 __Y, __m256i __C, const int __I) 813 { 814 return (__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)__X, 815 (__v8sf)__Y, 816 (__v8si)__C, 817 __I); 818 } 819 #else 820 #define _mm_permute2_pd(X, Y, C, I) \ 821 ((__m128d) __builtin_ia32_vpermil2pd ((__v2df)(__m128d)(X), \ 822 (__v2df)(__m128d)(Y), \ 823 (__v2di)(__m128i)(C), \ 824 (int)(I))) 825 826 #define _mm256_permute2_pd(X, Y, C, I) \ 827 ((__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)(__m256d)(X), \ 828 (__v4df)(__m256d)(Y), \ 829 (__v4di)(__m256i)(C), \ 830 (int)(I))) 831 832 #define _mm_permute2_ps(X, Y, C, I) \ 833 ((__m128) __builtin_ia32_vpermil2ps ((__v4sf)(__m128)(X), \ 834 (__v4sf)(__m128)(Y), \ 835 (__v4si)(__m128i)(C), \ 836 (int)(I))) 837 838 #define _mm256_permute2_ps(X, Y, C, I) \ 839 ((__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)(__m256)(X), \ 840 (__v8sf)(__m256)(Y), \ 841 (__v8si)(__m256i)(C), \ 842 (int)(I))) 843 #endif /* __OPTIMIZE__ */ 844 845 #ifdef __DISABLE_XOP__ 846 #undef __DISABLE_XOP__ 847 #pragma GCC pop_options 848 #endif /* __DISABLE_XOP__ */ 849 850 #endif /* _XOPMMINTRIN_H_INCLUDED */ 851