Home | History | Annotate | Line # | Download | only in i386
      1 /* Copyright (C) 2007-2022 Free Software Foundation, Inc.
      2 
      3    This file is part of GCC.
      4 
      5    GCC is free software; you can redistribute it and/or modify
      6    it under the terms of the GNU General Public License as published by
      7    the Free Software Foundation; either version 3, or (at your option)
      8    any later version.
      9 
     10    GCC is distributed in the hope that it will be useful,
     11    but WITHOUT ANY WARRANTY; without even the implied warranty of
     12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13    GNU General Public License for more details.
     14 
     15    Under Section 7 of GPL version 3, you are granted additional
     16    permissions described in the GCC Runtime Library Exception, version
     17    3.1, as published by the Free Software Foundation.
     18 
     19    You should have received a copy of the GNU General Public License and
     20    a copy of the GCC Runtime Library Exception along with this program;
     21    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     22    <http://www.gnu.org/licenses/>.  */
     23 
     24 #ifndef _X86INTRIN_H_INCLUDED
     25 # error "Never use <xopintrin.h> directly; include <x86intrin.h> instead."
     26 #endif
     27 
     28 #ifndef _XOPMMINTRIN_H_INCLUDED
     29 #define _XOPMMINTRIN_H_INCLUDED
     30 
     31 #include <fma4intrin.h>
     32 
     33 #ifndef __XOP__
     34 #pragma GCC push_options
     35 #pragma GCC target("xop")
     36 #define __DISABLE_XOP__
     37 #endif /* __XOP__ */
     38 
     39 /* Integer multiply/add instructions. */
     40 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     41 _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
     42 {
     43   return (__m128i) __builtin_ia32_vpmacssww ((__v8hi)__A,(__v8hi)__B, (__v8hi)__C);
     44 }
     45 
     46 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     47 _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)
     48 {
     49   return (__m128i) __builtin_ia32_vpmacsww ((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
     50 }
     51 
     52 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     53 _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)
     54 {
     55   return  (__m128i) __builtin_ia32_vpmacsswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
     56 }
     57 
     58 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     59 _mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)
     60 {
     61   return  (__m128i) __builtin_ia32_vpmacswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
     62 }
     63 
     64 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     65 _mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)
     66 {
     67   return  (__m128i) __builtin_ia32_vpmacssdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
     68 }
     69 
     70 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     71 _mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)
     72 {
     73   return  (__m128i) __builtin_ia32_vpmacsdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
     74 }
     75 
     76 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     77 _mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)
     78 {
     79   return  (__m128i) __builtin_ia32_vpmacssdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
     80 }
     81 
     82 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     83 _mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)
     84 {
     85   return  (__m128i) __builtin_ia32_vpmacsdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
     86 }
     87 
     88 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     89 _mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)
     90 {
     91   return  (__m128i) __builtin_ia32_vpmacssdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
     92 }
     93 
     94 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     95 _mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)
     96 {
     97   return  (__m128i) __builtin_ia32_vpmacsdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
     98 }
     99 
    100 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    101 _mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)
    102 {
    103   return  (__m128i) __builtin_ia32_vpmadcsswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
    104 }
    105 
    106 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    107 _mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)
    108 {
    109   return  (__m128i) __builtin_ia32_vpmadcswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
    110 }
    111 
    112 /* Packed Integer Horizontal Add and Subtract */
    113 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    114 _mm_haddw_epi8(__m128i __A)
    115 {
    116   return  (__m128i) __builtin_ia32_vphaddbw ((__v16qi)__A);
    117 }
    118 
    119 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    120 _mm_haddd_epi8(__m128i __A)
    121 {
    122   return  (__m128i) __builtin_ia32_vphaddbd ((__v16qi)__A);
    123 }
    124 
    125 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    126 _mm_haddq_epi8(__m128i __A)
    127 {
    128   return  (__m128i) __builtin_ia32_vphaddbq ((__v16qi)__A);
    129 }
    130 
    131 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    132 _mm_haddd_epi16(__m128i __A)
    133 {
    134   return  (__m128i) __builtin_ia32_vphaddwd ((__v8hi)__A);
    135 }
    136 
    137 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    138 _mm_haddq_epi16(__m128i __A)
    139 {
    140   return  (__m128i) __builtin_ia32_vphaddwq ((__v8hi)__A);
    141 }
    142 
    143 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    144 _mm_haddq_epi32(__m128i __A)
    145 {
    146   return  (__m128i) __builtin_ia32_vphadddq ((__v4si)__A);
    147 }
    148 
    149 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    150 _mm_haddw_epu8(__m128i __A)
    151 {
    152   return  (__m128i) __builtin_ia32_vphaddubw ((__v16qi)__A);
    153 }
    154 
    155 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    156 _mm_haddd_epu8(__m128i __A)
    157 {
    158   return  (__m128i) __builtin_ia32_vphaddubd ((__v16qi)__A);
    159 }
    160 
    161 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    162 _mm_haddq_epu8(__m128i __A)
    163 {
    164   return  (__m128i) __builtin_ia32_vphaddubq ((__v16qi)__A);
    165 }
    166 
    167 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    168 _mm_haddd_epu16(__m128i __A)
    169 {
    170   return  (__m128i) __builtin_ia32_vphadduwd ((__v8hi)__A);
    171 }
    172 
    173 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    174 _mm_haddq_epu16(__m128i __A)
    175 {
    176   return  (__m128i) __builtin_ia32_vphadduwq ((__v8hi)__A);
    177 }
    178 
    179 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    180 _mm_haddq_epu32(__m128i __A)
    181 {
    182   return  (__m128i) __builtin_ia32_vphaddudq ((__v4si)__A);
    183 }
    184 
    185 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    186 _mm_hsubw_epi8(__m128i __A)
    187 {
    188   return  (__m128i) __builtin_ia32_vphsubbw ((__v16qi)__A);
    189 }
    190 
    191 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    192 _mm_hsubd_epi16(__m128i __A)
    193 {
    194   return  (__m128i) __builtin_ia32_vphsubwd ((__v8hi)__A);
    195 }
    196 
    197 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    198 _mm_hsubq_epi32(__m128i __A)
    199 {
    200   return  (__m128i) __builtin_ia32_vphsubdq ((__v4si)__A);
    201 }
    202 
    203 /* Vector conditional move and permute */
    204 
    205 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    206 _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
    207 {
    208   return  (__m128i) __builtin_ia32_vpcmov (__A, __B, __C);
    209 }
    210 
    211 extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    212 _mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C)
    213 {
    214   return  (__m256i) __builtin_ia32_vpcmov256 (__A, __B, __C);
    215 }
    216 
    217 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    218 _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
    219 {
    220   return  (__m128i) __builtin_ia32_vpperm ((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
    221 }
    222 
    223 /* Packed Integer Rotates and Shifts
    224    Rotates - Non-Immediate form */
    225 
    226 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    227 _mm_rot_epi8(__m128i __A,  __m128i __B)
    228 {
    229   return  (__m128i) __builtin_ia32_vprotb ((__v16qi)__A, (__v16qi)__B);
    230 }
    231 
    232 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    233 _mm_rot_epi16(__m128i __A,  __m128i __B)
    234 {
    235   return  (__m128i) __builtin_ia32_vprotw ((__v8hi)__A, (__v8hi)__B);
    236 }
    237 
    238 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    239 _mm_rot_epi32(__m128i __A,  __m128i __B)
    240 {
    241   return  (__m128i) __builtin_ia32_vprotd ((__v4si)__A, (__v4si)__B);
    242 }
    243 
    244 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    245 _mm_rot_epi64(__m128i __A,  __m128i __B)
    246 {
    247   return (__m128i)  __builtin_ia32_vprotq ((__v2di)__A, (__v2di)__B);
    248 }
    249 
    250 /* Rotates - Immediate form */
    251 
    252 #ifdef __OPTIMIZE__
    253 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    254 _mm_roti_epi8(__m128i __A, const int __B)
    255 {
    256   return  (__m128i) __builtin_ia32_vprotbi ((__v16qi)__A, __B);
    257 }
    258 
    259 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    260 _mm_roti_epi16(__m128i __A, const int __B)
    261 {
    262   return  (__m128i) __builtin_ia32_vprotwi ((__v8hi)__A, __B);
    263 }
    264 
    265 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    266 _mm_roti_epi32(__m128i __A, const int __B)
    267 {
    268   return  (__m128i) __builtin_ia32_vprotdi ((__v4si)__A, __B);
    269 }
    270 
    271 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    272 _mm_roti_epi64(__m128i __A, const int __B)
    273 {
    274   return  (__m128i) __builtin_ia32_vprotqi ((__v2di)__A, __B);
    275 }
    276 #else
    277 #define _mm_roti_epi8(A, N) \
    278   ((__m128i) __builtin_ia32_vprotbi ((__v16qi)(__m128i)(A), (int)(N)))
    279 #define _mm_roti_epi16(A, N) \
    280   ((__m128i) __builtin_ia32_vprotwi ((__v8hi)(__m128i)(A), (int)(N)))
    281 #define _mm_roti_epi32(A, N) \
    282   ((__m128i) __builtin_ia32_vprotdi ((__v4si)(__m128i)(A), (int)(N)))
    283 #define _mm_roti_epi64(A, N) \
    284   ((__m128i) __builtin_ia32_vprotqi ((__v2di)(__m128i)(A), (int)(N)))
    285 #endif
    286 
    287 /* Shifts */
    288 
    289 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    290 _mm_shl_epi8(__m128i __A,  __m128i __B)
    291 {
    292   return  (__m128i) __builtin_ia32_vpshlb ((__v16qi)__A, (__v16qi)__B);
    293 }
    294 
    295 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    296 _mm_shl_epi16(__m128i __A,  __m128i __B)
    297 {
    298   return  (__m128i) __builtin_ia32_vpshlw ((__v8hi)__A, (__v8hi)__B);
    299 }
    300 
    301 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    302 _mm_shl_epi32(__m128i __A,  __m128i __B)
    303 {
    304   return  (__m128i) __builtin_ia32_vpshld ((__v4si)__A, (__v4si)__B);
    305 }
    306 
    307 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    308 _mm_shl_epi64(__m128i __A,  __m128i __B)
    309 {
    310   return  (__m128i) __builtin_ia32_vpshlq ((__v2di)__A, (__v2di)__B);
    311 }
    312 
    313 
    314 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    315 _mm_sha_epi8(__m128i __A,  __m128i __B)
    316 {
    317   return  (__m128i) __builtin_ia32_vpshab ((__v16qi)__A, (__v16qi)__B);
    318 }
    319 
    320 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    321 _mm_sha_epi16(__m128i __A,  __m128i __B)
    322 {
    323   return  (__m128i) __builtin_ia32_vpshaw ((__v8hi)__A, (__v8hi)__B);
    324 }
    325 
    326 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    327 _mm_sha_epi32(__m128i __A,  __m128i __B)
    328 {
    329   return  (__m128i) __builtin_ia32_vpshad ((__v4si)__A, (__v4si)__B);
    330 }
    331 
    332 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    333 _mm_sha_epi64(__m128i __A,  __m128i __B)
    334 {
    335   return  (__m128i) __builtin_ia32_vpshaq ((__v2di)__A, (__v2di)__B);
    336 }
    337 
    338 /* Compare and Predicate Generation
    339    pcom (integer, unsigned bytes) */
    340 
    341 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    342 _mm_comlt_epu8(__m128i __A, __m128i __B)
    343 {
    344   return (__m128i) __builtin_ia32_vpcomltub ((__v16qi)__A, (__v16qi)__B);
    345 }
    346 
    347 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    348 _mm_comle_epu8(__m128i __A, __m128i __B)
    349 {
    350   return (__m128i) __builtin_ia32_vpcomleub ((__v16qi)__A, (__v16qi)__B);
    351 }
    352 
    353 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    354 _mm_comgt_epu8(__m128i __A, __m128i __B)
    355 {
    356   return (__m128i) __builtin_ia32_vpcomgtub ((__v16qi)__A, (__v16qi)__B);
    357 }
    358 
    359 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    360 _mm_comge_epu8(__m128i __A, __m128i __B)
    361 {
    362   return (__m128i) __builtin_ia32_vpcomgeub ((__v16qi)__A, (__v16qi)__B);
    363 }
    364 
    365 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    366 _mm_comeq_epu8(__m128i __A, __m128i __B)
    367 {
    368   return (__m128i) __builtin_ia32_vpcomequb ((__v16qi)__A, (__v16qi)__B);
    369 }
    370 
    371 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    372 _mm_comneq_epu8(__m128i __A, __m128i __B)
    373 {
    374   return (__m128i) __builtin_ia32_vpcomnequb ((__v16qi)__A, (__v16qi)__B);
    375 }
    376 
    377 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    378 _mm_comfalse_epu8(__m128i __A, __m128i __B)
    379 {
    380   return (__m128i) __builtin_ia32_vpcomfalseub ((__v16qi)__A, (__v16qi)__B);
    381 }
    382 
    383 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    384 _mm_comtrue_epu8(__m128i __A, __m128i __B)
    385 {
    386   return (__m128i) __builtin_ia32_vpcomtrueub ((__v16qi)__A, (__v16qi)__B);
    387 }
    388 
    389 /*pcom (integer, unsigned words) */
    390 
    391 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    392 _mm_comlt_epu16(__m128i __A, __m128i __B)
    393 {
    394   return (__m128i) __builtin_ia32_vpcomltuw ((__v8hi)__A, (__v8hi)__B);
    395 }
    396 
    397 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    398 _mm_comle_epu16(__m128i __A, __m128i __B)
    399 {
    400   return (__m128i) __builtin_ia32_vpcomleuw ((__v8hi)__A, (__v8hi)__B);
    401 }
    402 
    403 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    404 _mm_comgt_epu16(__m128i __A, __m128i __B)
    405 {
    406   return (__m128i) __builtin_ia32_vpcomgtuw ((__v8hi)__A, (__v8hi)__B);
    407 }
    408 
    409 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    410 _mm_comge_epu16(__m128i __A, __m128i __B)
    411 {
    412   return (__m128i) __builtin_ia32_vpcomgeuw ((__v8hi)__A, (__v8hi)__B);
    413 }
    414 
    415 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    416 _mm_comeq_epu16(__m128i __A, __m128i __B)
    417 {
    418   return (__m128i) __builtin_ia32_vpcomequw ((__v8hi)__A, (__v8hi)__B);
    419 }
    420 
    421 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    422 _mm_comneq_epu16(__m128i __A, __m128i __B)
    423 {
    424   return (__m128i) __builtin_ia32_vpcomnequw ((__v8hi)__A, (__v8hi)__B);
    425 }
    426 
    427 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    428 _mm_comfalse_epu16(__m128i __A, __m128i __B)
    429 {
    430   return (__m128i) __builtin_ia32_vpcomfalseuw ((__v8hi)__A, (__v8hi)__B);
    431 }
    432 
    433 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    434 _mm_comtrue_epu16(__m128i __A, __m128i __B)
    435 {
    436   return (__m128i) __builtin_ia32_vpcomtrueuw ((__v8hi)__A, (__v8hi)__B);
    437 }
    438 
    439 /*pcom (integer, unsigned double words) */
    440 
    441 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    442 _mm_comlt_epu32(__m128i __A, __m128i __B)
    443 {
    444   return (__m128i) __builtin_ia32_vpcomltud ((__v4si)__A, (__v4si)__B);
    445 }
    446 
    447 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    448 _mm_comle_epu32(__m128i __A, __m128i __B)
    449 {
    450   return (__m128i) __builtin_ia32_vpcomleud ((__v4si)__A, (__v4si)__B);
    451 }
    452 
    453 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    454 _mm_comgt_epu32(__m128i __A, __m128i __B)
    455 {
    456   return (__m128i) __builtin_ia32_vpcomgtud ((__v4si)__A, (__v4si)__B);
    457 }
    458 
    459 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    460 _mm_comge_epu32(__m128i __A, __m128i __B)
    461 {
    462   return (__m128i) __builtin_ia32_vpcomgeud ((__v4si)__A, (__v4si)__B);
    463 }
    464 
    465 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    466 _mm_comeq_epu32(__m128i __A, __m128i __B)
    467 {
    468   return (__m128i) __builtin_ia32_vpcomequd ((__v4si)__A, (__v4si)__B);
    469 }
    470 
    471 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    472 _mm_comneq_epu32(__m128i __A, __m128i __B)
    473 {
    474   return (__m128i) __builtin_ia32_vpcomnequd ((__v4si)__A, (__v4si)__B);
    475 }
    476 
    477 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    478 _mm_comfalse_epu32(__m128i __A, __m128i __B)
    479 {
    480   return (__m128i) __builtin_ia32_vpcomfalseud ((__v4si)__A, (__v4si)__B);
    481 }
    482 
    483 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    484 _mm_comtrue_epu32(__m128i __A, __m128i __B)
    485 {
    486   return (__m128i) __builtin_ia32_vpcomtrueud ((__v4si)__A, (__v4si)__B);
    487 }
    488 
    489 /*pcom (integer, unsigned quad words) */
    490 
    491 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    492 _mm_comlt_epu64(__m128i __A, __m128i __B)
    493 {
    494   return (__m128i) __builtin_ia32_vpcomltuq ((__v2di)__A, (__v2di)__B);
    495 }
    496 
    497 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    498 _mm_comle_epu64(__m128i __A, __m128i __B)
    499 {
    500   return (__m128i) __builtin_ia32_vpcomleuq ((__v2di)__A, (__v2di)__B);
    501 }
    502 
    503 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    504 _mm_comgt_epu64(__m128i __A, __m128i __B)
    505 {
    506   return (__m128i) __builtin_ia32_vpcomgtuq ((__v2di)__A, (__v2di)__B);
    507 }
    508 
    509 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    510 _mm_comge_epu64(__m128i __A, __m128i __B)
    511 {
    512   return (__m128i) __builtin_ia32_vpcomgeuq ((__v2di)__A, (__v2di)__B);
    513 }
    514 
    515 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    516 _mm_comeq_epu64(__m128i __A, __m128i __B)
    517 {
    518   return (__m128i) __builtin_ia32_vpcomequq ((__v2di)__A, (__v2di)__B);
    519 }
    520 
    521 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    522 _mm_comneq_epu64(__m128i __A, __m128i __B)
    523 {
    524   return (__m128i) __builtin_ia32_vpcomnequq ((__v2di)__A, (__v2di)__B);
    525 }
    526 
    527 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    528 _mm_comfalse_epu64(__m128i __A, __m128i __B)
    529 {
    530   return (__m128i) __builtin_ia32_vpcomfalseuq ((__v2di)__A, (__v2di)__B);
    531 }
    532 
    533 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    534 _mm_comtrue_epu64(__m128i __A, __m128i __B)
    535 {
    536   return (__m128i) __builtin_ia32_vpcomtrueuq ((__v2di)__A, (__v2di)__B);
    537 }
    538 
    539 /*pcom (integer, signed bytes) */
    540 
    541 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    542 _mm_comlt_epi8(__m128i __A, __m128i __B)
    543 {
    544   return (__m128i) __builtin_ia32_vpcomltb ((__v16qi)__A, (__v16qi)__B);
    545 }
    546 
    547 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    548 _mm_comle_epi8(__m128i __A, __m128i __B)
    549 {
    550   return (__m128i) __builtin_ia32_vpcomleb ((__v16qi)__A, (__v16qi)__B);
    551 }
    552 
    553 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    554 _mm_comgt_epi8(__m128i __A, __m128i __B)
    555 {
    556   return (__m128i) __builtin_ia32_vpcomgtb ((__v16qi)__A, (__v16qi)__B);
    557 }
    558 
    559 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    560 _mm_comge_epi8(__m128i __A, __m128i __B)
    561 {
    562   return (__m128i) __builtin_ia32_vpcomgeb ((__v16qi)__A, (__v16qi)__B);
    563 }
    564 
    565 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    566 _mm_comeq_epi8(__m128i __A, __m128i __B)
    567 {
    568   return (__m128i) __builtin_ia32_vpcomeqb ((__v16qi)__A, (__v16qi)__B);
    569 }
    570 
    571 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    572 _mm_comneq_epi8(__m128i __A, __m128i __B)
    573 {
    574   return (__m128i) __builtin_ia32_vpcomneqb ((__v16qi)__A, (__v16qi)__B);
    575 }
    576 
    577 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    578 _mm_comfalse_epi8(__m128i __A, __m128i __B)
    579 {
    580   return (__m128i) __builtin_ia32_vpcomfalseb ((__v16qi)__A, (__v16qi)__B);
    581 }
    582 
    583 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    584 _mm_comtrue_epi8(__m128i __A, __m128i __B)
    585 {
    586   return (__m128i) __builtin_ia32_vpcomtrueb ((__v16qi)__A, (__v16qi)__B);
    587 }
    588 
    589 /*pcom (integer, signed words) */
    590 
    591 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    592 _mm_comlt_epi16(__m128i __A, __m128i __B)
    593 {
    594   return (__m128i) __builtin_ia32_vpcomltw ((__v8hi)__A, (__v8hi)__B);
    595 }
    596 
    597 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    598 _mm_comle_epi16(__m128i __A, __m128i __B)
    599 {
    600   return (__m128i) __builtin_ia32_vpcomlew ((__v8hi)__A, (__v8hi)__B);
    601 }
    602 
    603 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    604 _mm_comgt_epi16(__m128i __A, __m128i __B)
    605 {
    606   return (__m128i) __builtin_ia32_vpcomgtw ((__v8hi)__A, (__v8hi)__B);
    607 }
    608 
    609 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    610 _mm_comge_epi16(__m128i __A, __m128i __B)
    611 {
    612   return (__m128i) __builtin_ia32_vpcomgew ((__v8hi)__A, (__v8hi)__B);
    613 }
    614 
    615 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    616 _mm_comeq_epi16(__m128i __A, __m128i __B)
    617 {
    618   return (__m128i) __builtin_ia32_vpcomeqw ((__v8hi)__A, (__v8hi)__B);
    619 }
    620 
    621 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    622 _mm_comneq_epi16(__m128i __A, __m128i __B)
    623 {
    624   return (__m128i) __builtin_ia32_vpcomneqw ((__v8hi)__A, (__v8hi)__B);
    625 }
    626 
    627 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    628 _mm_comfalse_epi16(__m128i __A, __m128i __B)
    629 {
    630   return (__m128i) __builtin_ia32_vpcomfalsew ((__v8hi)__A, (__v8hi)__B);
    631 }
    632 
    633 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    634 _mm_comtrue_epi16(__m128i __A, __m128i __B)
    635 {
    636   return (__m128i) __builtin_ia32_vpcomtruew ((__v8hi)__A, (__v8hi)__B);
    637 }
    638 
    639 /*pcom (integer, signed double words) */
    640 
    641 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    642 _mm_comlt_epi32(__m128i __A, __m128i __B)
    643 {
    644   return (__m128i) __builtin_ia32_vpcomltd ((__v4si)__A, (__v4si)__B);
    645 }
    646 
    647 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    648 _mm_comle_epi32(__m128i __A, __m128i __B)
    649 {
    650   return (__m128i) __builtin_ia32_vpcomled ((__v4si)__A, (__v4si)__B);
    651 }
    652 
    653 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    654 _mm_comgt_epi32(__m128i __A, __m128i __B)
    655 {
    656   return (__m128i) __builtin_ia32_vpcomgtd ((__v4si)__A, (__v4si)__B);
    657 }
    658 
    659 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    660 _mm_comge_epi32(__m128i __A, __m128i __B)
    661 {
    662   return (__m128i) __builtin_ia32_vpcomged ((__v4si)__A, (__v4si)__B);
    663 }
    664 
    665 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    666 _mm_comeq_epi32(__m128i __A, __m128i __B)
    667 {
    668   return (__m128i) __builtin_ia32_vpcomeqd ((__v4si)__A, (__v4si)__B);
    669 }
    670 
    671 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    672 _mm_comneq_epi32(__m128i __A, __m128i __B)
    673 {
    674   return (__m128i) __builtin_ia32_vpcomneqd ((__v4si)__A, (__v4si)__B);
    675 }
    676 
    677 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    678 _mm_comfalse_epi32(__m128i __A, __m128i __B)
    679 {
    680   return (__m128i) __builtin_ia32_vpcomfalsed ((__v4si)__A, (__v4si)__B);
    681 }
    682 
    683 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    684 _mm_comtrue_epi32(__m128i __A, __m128i __B)
    685 {
    686   return (__m128i) __builtin_ia32_vpcomtrued ((__v4si)__A, (__v4si)__B);
    687 }
    688 
    689 /*pcom (integer, signed quad words) */
    690 
    691 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    692 _mm_comlt_epi64(__m128i __A, __m128i __B)
    693 {
    694   return (__m128i) __builtin_ia32_vpcomltq ((__v2di)__A, (__v2di)__B);
    695 }
    696 
    697 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    698 _mm_comle_epi64(__m128i __A, __m128i __B)
    699 {
    700   return (__m128i) __builtin_ia32_vpcomleq ((__v2di)__A, (__v2di)__B);
    701 }
    702 
    703 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    704 _mm_comgt_epi64(__m128i __A, __m128i __B)
    705 {
    706   return (__m128i) __builtin_ia32_vpcomgtq ((__v2di)__A, (__v2di)__B);
    707 }
    708 
    709 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    710 _mm_comge_epi64(__m128i __A, __m128i __B)
    711 {
    712   return (__m128i) __builtin_ia32_vpcomgeq ((__v2di)__A, (__v2di)__B);
    713 }
    714 
    715 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    716 _mm_comeq_epi64(__m128i __A, __m128i __B)
    717 {
    718   return (__m128i) __builtin_ia32_vpcomeqq ((__v2di)__A, (__v2di)__B);
    719 }
    720 
    721 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    722 _mm_comneq_epi64(__m128i __A, __m128i __B)
    723 {
    724   return (__m128i) __builtin_ia32_vpcomneqq ((__v2di)__A, (__v2di)__B);
    725 }
    726 
    727 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    728 _mm_comfalse_epi64(__m128i __A, __m128i __B)
    729 {
    730   return (__m128i) __builtin_ia32_vpcomfalseq ((__v2di)__A, (__v2di)__B);
    731 }
    732 
    733 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    734 _mm_comtrue_epi64(__m128i __A, __m128i __B)
    735 {
    736   return (__m128i) __builtin_ia32_vpcomtrueq ((__v2di)__A, (__v2di)__B);
    737 }
    738 
    739 /* FRCZ */
    740 
    741 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    742 _mm_frcz_ps (__m128 __A)
    743 {
    744   return (__m128) __builtin_ia32_vfrczps ((__v4sf)__A);
    745 }
    746 
    747 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    748 _mm_frcz_pd (__m128d __A)
    749 {
    750   return (__m128d) __builtin_ia32_vfrczpd ((__v2df)__A);
    751 }
    752 
    753 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    754 _mm_frcz_ss (__m128 __A, __m128 __B)
    755 {
    756   return (__m128) __builtin_ia32_movss ((__v4sf)__A,
    757 					(__v4sf)
    758 					__builtin_ia32_vfrczss ((__v4sf)__B));
    759 }
    760 
    761 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    762 _mm_frcz_sd (__m128d __A, __m128d __B)
    763 {
    764   return (__m128d) __builtin_ia32_movsd ((__v2df)__A,
    765 					 (__v2df)
    766 					 __builtin_ia32_vfrczsd ((__v2df)__B));
    767 }
    768 
    769 extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    770 _mm256_frcz_ps (__m256 __A)
    771 {
    772   return (__m256) __builtin_ia32_vfrczps256 ((__v8sf)__A);
    773 }
    774 
    775 extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    776 _mm256_frcz_pd (__m256d __A)
    777 {
    778   return (__m256d) __builtin_ia32_vfrczpd256 ((__v4df)__A);
    779 }
    780 
    781 /* PERMIL2 */
    782 
    783 #ifdef __OPTIMIZE__
    784 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    785 _mm_permute2_pd (__m128d __X, __m128d __Y, __m128i __C, const int __I)
    786 {
    787   return (__m128d) __builtin_ia32_vpermil2pd ((__v2df)__X,
    788 					      (__v2df)__Y,
    789 					      (__v2di)__C,
    790 					      __I);
    791 }
    792 
    793 extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    794 _mm256_permute2_pd (__m256d __X, __m256d __Y, __m256i __C, const int __I)
    795 {
    796   return (__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)__X,
    797 						 (__v4df)__Y,
    798 						 (__v4di)__C,
    799 						 __I);
    800 }
    801 
    802 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    803 _mm_permute2_ps (__m128 __X, __m128 __Y, __m128i __C, const int __I)
    804 {
    805   return (__m128) __builtin_ia32_vpermil2ps ((__v4sf)__X,
    806 					     (__v4sf)__Y,
    807 					     (__v4si)__C,
    808 					     __I);
    809 }
    810 
    811 extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    812 _mm256_permute2_ps (__m256 __X, __m256 __Y, __m256i __C, const int __I)
    813 {
    814   return (__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)__X,
    815 						(__v8sf)__Y,
    816 						(__v8si)__C,
    817 						__I);
    818 }
    819 #else
    820 #define _mm_permute2_pd(X, Y, C, I)					\
    821   ((__m128d) __builtin_ia32_vpermil2pd ((__v2df)(__m128d)(X),		\
    822 					(__v2df)(__m128d)(Y),		\
    823 					(__v2di)(__m128i)(C),		\
    824 					(int)(I)))
    825 
    826 #define _mm256_permute2_pd(X, Y, C, I)					\
    827   ((__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)(__m256d)(X),	\
    828 					   (__v4df)(__m256d)(Y),	\
    829 					   (__v4di)(__m256i)(C),	\
    830 					   (int)(I)))
    831 
    832 #define _mm_permute2_ps(X, Y, C, I)					\
    833   ((__m128) __builtin_ia32_vpermil2ps ((__v4sf)(__m128)(X),		\
    834 				       (__v4sf)(__m128)(Y),		\
    835 				       (__v4si)(__m128i)(C),		\
    836 				       (int)(I)))
    837 
    838 #define _mm256_permute2_ps(X, Y, C, I)					\
    839   ((__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)(__m256)(X),		\
    840 					  (__v8sf)(__m256)(Y),  	\
    841 					  (__v8si)(__m256i)(C),		\
    842  					  (int)(I)))
    843 #endif /* __OPTIMIZE__ */
    844 
    845 #ifdef __DISABLE_XOP__
    846 #undef __DISABLE_XOP__
    847 #pragma GCC pop_options
    848 #endif /* __DISABLE_XOP__ */
    849 
    850 #endif /* _XOPMMINTRIN_H_INCLUDED */
    851