1 1.12 mrg /* Copyright (C) 2007-2022 Free Software Foundation, Inc. 2 1.1 mrg 3 1.1 mrg This file is part of GCC. 4 1.1 mrg 5 1.1 mrg GCC is free software; you can redistribute it and/or modify 6 1.1 mrg it under the terms of the GNU General Public License as published by 7 1.1 mrg the Free Software Foundation; either version 3, or (at your option) 8 1.1 mrg any later version. 9 1.1 mrg 10 1.1 mrg GCC is distributed in the hope that it will be useful, 11 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of 12 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 1.1 mrg GNU General Public License for more details. 14 1.1 mrg 15 1.1 mrg Under Section 7 of GPL version 3, you are granted additional 16 1.1 mrg permissions described in the GCC Runtime Library Exception, version 17 1.1 mrg 3.1, as published by the Free Software Foundation. 18 1.1 mrg 19 1.1 mrg You should have received a copy of the GNU General Public License and 20 1.1 mrg a copy of the GCC Runtime Library Exception along with this program; 21 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 1.1 mrg <http://www.gnu.org/licenses/>. */ 23 1.1 mrg 24 1.1 mrg #ifndef _X86INTRIN_H_INCLUDED 25 1.1 mrg # error "Never use <xopintrin.h> directly; include <x86intrin.h> instead." 26 1.1 mrg #endif 27 1.1 mrg 28 1.1 mrg #ifndef _XOPMMINTRIN_H_INCLUDED 29 1.1 mrg #define _XOPMMINTRIN_H_INCLUDED 30 1.1 mrg 31 1.5 mrg #include <fma4intrin.h> 32 1.5 mrg 33 1.1 mrg #ifndef __XOP__ 34 1.5 mrg #pragma GCC push_options 35 1.5 mrg #pragma GCC target("xop") 36 1.5 mrg #define __DISABLE_XOP__ 37 1.5 mrg #endif /* __XOP__ */ 38 1.1 mrg 39 1.8 mrg /* Integer multiply/add instructions. */ 40 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 41 1.1 mrg _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) 42 1.1 mrg { 43 1.1 mrg return (__m128i) __builtin_ia32_vpmacssww ((__v8hi)__A,(__v8hi)__B, (__v8hi)__C); 44 1.1 mrg } 45 1.1 mrg 46 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 47 1.1 mrg _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C) 48 1.1 mrg { 49 1.1 mrg return (__m128i) __builtin_ia32_vpmacsww ((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); 50 1.1 mrg } 51 1.1 mrg 52 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 53 1.1 mrg _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C) 54 1.1 mrg { 55 1.1 mrg return (__m128i) __builtin_ia32_vpmacsswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C); 56 1.1 mrg } 57 1.1 mrg 58 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 59 1.1 mrg _mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C) 60 1.1 mrg { 61 1.1 mrg return (__m128i) __builtin_ia32_vpmacswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C); 62 1.1 mrg } 63 1.1 mrg 64 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 65 1.1 mrg _mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C) 66 1.1 mrg { 67 1.1 mrg return (__m128i) __builtin_ia32_vpmacssdd ((__v4si)__A, (__v4si)__B, (__v4si)__C); 68 1.1 mrg } 69 1.1 mrg 70 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 71 1.1 mrg _mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C) 72 1.1 mrg { 73 1.1 mrg return (__m128i) __builtin_ia32_vpmacsdd ((__v4si)__A, (__v4si)__B, (__v4si)__C); 74 1.1 mrg } 75 1.1 mrg 76 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 77 1.1 mrg _mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C) 78 1.1 mrg { 79 1.1 mrg return (__m128i) __builtin_ia32_vpmacssdql ((__v4si)__A, (__v4si)__B, (__v2di)__C); 80 1.1 mrg } 81 1.1 mrg 82 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 83 1.1 mrg _mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C) 84 1.1 mrg { 85 1.1 mrg return (__m128i) __builtin_ia32_vpmacsdql ((__v4si)__A, (__v4si)__B, (__v2di)__C); 86 1.1 mrg } 87 1.1 mrg 88 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 89 1.1 mrg _mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C) 90 1.1 mrg { 91 1.1 mrg return (__m128i) __builtin_ia32_vpmacssdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C); 92 1.1 mrg } 93 1.1 mrg 94 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 95 1.1 mrg _mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C) 96 1.1 mrg { 97 1.1 mrg return (__m128i) __builtin_ia32_vpmacsdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C); 98 1.1 mrg } 99 1.1 mrg 100 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 101 1.1 mrg _mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C) 102 1.1 mrg { 103 1.1 mrg return (__m128i) __builtin_ia32_vpmadcsswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C); 104 1.1 mrg } 105 1.1 mrg 106 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 107 1.1 mrg _mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C) 108 1.1 mrg { 109 1.1 mrg return (__m128i) __builtin_ia32_vpmadcswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C); 110 1.1 mrg } 111 1.1 mrg 112 1.1 mrg /* Packed Integer Horizontal Add and Subtract */ 113 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 114 1.1 mrg _mm_haddw_epi8(__m128i __A) 115 1.1 mrg { 116 1.1 mrg return (__m128i) __builtin_ia32_vphaddbw ((__v16qi)__A); 117 1.1 mrg } 118 1.1 mrg 119 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 120 1.1 mrg _mm_haddd_epi8(__m128i __A) 121 1.1 mrg { 122 1.1 mrg return (__m128i) __builtin_ia32_vphaddbd ((__v16qi)__A); 123 1.1 mrg } 124 1.1 mrg 125 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 126 1.1 mrg _mm_haddq_epi8(__m128i __A) 127 1.1 mrg { 128 1.1 mrg return (__m128i) __builtin_ia32_vphaddbq ((__v16qi)__A); 129 1.1 mrg } 130 1.1 mrg 131 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 132 1.1 mrg _mm_haddd_epi16(__m128i __A) 133 1.1 mrg { 134 1.1 mrg return (__m128i) __builtin_ia32_vphaddwd ((__v8hi)__A); 135 1.1 mrg } 136 1.1 mrg 137 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 138 1.1 mrg _mm_haddq_epi16(__m128i __A) 139 1.1 mrg { 140 1.1 mrg return (__m128i) __builtin_ia32_vphaddwq ((__v8hi)__A); 141 1.1 mrg } 142 1.1 mrg 143 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 144 1.1 mrg _mm_haddq_epi32(__m128i __A) 145 1.1 mrg { 146 1.1 mrg return (__m128i) __builtin_ia32_vphadddq ((__v4si)__A); 147 1.1 mrg } 148 1.1 mrg 149 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 150 1.1 mrg _mm_haddw_epu8(__m128i __A) 151 1.1 mrg { 152 1.1 mrg return (__m128i) __builtin_ia32_vphaddubw ((__v16qi)__A); 153 1.1 mrg } 154 1.1 mrg 155 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 156 1.1 mrg _mm_haddd_epu8(__m128i __A) 157 1.1 mrg { 158 1.1 mrg return (__m128i) __builtin_ia32_vphaddubd ((__v16qi)__A); 159 1.1 mrg } 160 1.1 mrg 161 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 162 1.1 mrg _mm_haddq_epu8(__m128i __A) 163 1.1 mrg { 164 1.1 mrg return (__m128i) __builtin_ia32_vphaddubq ((__v16qi)__A); 165 1.1 mrg } 166 1.1 mrg 167 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 168 1.1 mrg _mm_haddd_epu16(__m128i __A) 169 1.1 mrg { 170 1.1 mrg return (__m128i) __builtin_ia32_vphadduwd ((__v8hi)__A); 171 1.1 mrg } 172 1.1 mrg 173 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 174 1.1 mrg _mm_haddq_epu16(__m128i __A) 175 1.1 mrg { 176 1.1 mrg return (__m128i) __builtin_ia32_vphadduwq ((__v8hi)__A); 177 1.1 mrg } 178 1.1 mrg 179 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 180 1.1 mrg _mm_haddq_epu32(__m128i __A) 181 1.1 mrg { 182 1.1 mrg return (__m128i) __builtin_ia32_vphaddudq ((__v4si)__A); 183 1.1 mrg } 184 1.1 mrg 185 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 186 1.1 mrg _mm_hsubw_epi8(__m128i __A) 187 1.1 mrg { 188 1.1 mrg return (__m128i) __builtin_ia32_vphsubbw ((__v16qi)__A); 189 1.1 mrg } 190 1.1 mrg 191 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 192 1.1 mrg _mm_hsubd_epi16(__m128i __A) 193 1.1 mrg { 194 1.1 mrg return (__m128i) __builtin_ia32_vphsubwd ((__v8hi)__A); 195 1.1 mrg } 196 1.1 mrg 197 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 198 1.1 mrg _mm_hsubq_epi32(__m128i __A) 199 1.1 mrg { 200 1.1 mrg return (__m128i) __builtin_ia32_vphsubdq ((__v4si)__A); 201 1.1 mrg } 202 1.1 mrg 203 1.1 mrg /* Vector conditional move and permute */ 204 1.1 mrg 205 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 206 1.1 mrg _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C) 207 1.1 mrg { 208 1.1 mrg return (__m128i) __builtin_ia32_vpcmov (__A, __B, __C); 209 1.1 mrg } 210 1.1 mrg 211 1.11 mrg extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 212 1.11 mrg _mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C) 213 1.11 mrg { 214 1.11 mrg return (__m256i) __builtin_ia32_vpcmov256 (__A, __B, __C); 215 1.11 mrg } 216 1.11 mrg 217 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 218 1.1 mrg _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C) 219 1.1 mrg { 220 1.1 mrg return (__m128i) __builtin_ia32_vpperm ((__v16qi)__A, (__v16qi)__B, (__v16qi)__C); 221 1.1 mrg } 222 1.1 mrg 223 1.1 mrg /* Packed Integer Rotates and Shifts 224 1.1 mrg Rotates - Non-Immediate form */ 225 1.1 mrg 226 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 227 1.1 mrg _mm_rot_epi8(__m128i __A, __m128i __B) 228 1.1 mrg { 229 1.1 mrg return (__m128i) __builtin_ia32_vprotb ((__v16qi)__A, (__v16qi)__B); 230 1.1 mrg } 231 1.1 mrg 232 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 233 1.1 mrg _mm_rot_epi16(__m128i __A, __m128i __B) 234 1.1 mrg { 235 1.1 mrg return (__m128i) __builtin_ia32_vprotw ((__v8hi)__A, (__v8hi)__B); 236 1.1 mrg } 237 1.1 mrg 238 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 239 1.1 mrg _mm_rot_epi32(__m128i __A, __m128i __B) 240 1.1 mrg { 241 1.1 mrg return (__m128i) __builtin_ia32_vprotd ((__v4si)__A, (__v4si)__B); 242 1.1 mrg } 243 1.1 mrg 244 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 245 1.1 mrg _mm_rot_epi64(__m128i __A, __m128i __B) 246 1.1 mrg { 247 1.1 mrg return (__m128i) __builtin_ia32_vprotq ((__v2di)__A, (__v2di)__B); 248 1.1 mrg } 249 1.1 mrg 250 1.1 mrg /* Rotates - Immediate form */ 251 1.1 mrg 252 1.1 mrg #ifdef __OPTIMIZE__ 253 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 254 1.1 mrg _mm_roti_epi8(__m128i __A, const int __B) 255 1.1 mrg { 256 1.1 mrg return (__m128i) __builtin_ia32_vprotbi ((__v16qi)__A, __B); 257 1.1 mrg } 258 1.1 mrg 259 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 260 1.1 mrg _mm_roti_epi16(__m128i __A, const int __B) 261 1.1 mrg { 262 1.1 mrg return (__m128i) __builtin_ia32_vprotwi ((__v8hi)__A, __B); 263 1.1 mrg } 264 1.1 mrg 265 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 266 1.1 mrg _mm_roti_epi32(__m128i __A, const int __B) 267 1.1 mrg { 268 1.1 mrg return (__m128i) __builtin_ia32_vprotdi ((__v4si)__A, __B); 269 1.1 mrg } 270 1.1 mrg 271 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 272 1.1 mrg _mm_roti_epi64(__m128i __A, const int __B) 273 1.1 mrg { 274 1.1 mrg return (__m128i) __builtin_ia32_vprotqi ((__v2di)__A, __B); 275 1.1 mrg } 276 1.1 mrg #else 277 1.1 mrg #define _mm_roti_epi8(A, N) \ 278 1.1 mrg ((__m128i) __builtin_ia32_vprotbi ((__v16qi)(__m128i)(A), (int)(N))) 279 1.1 mrg #define _mm_roti_epi16(A, N) \ 280 1.1 mrg ((__m128i) __builtin_ia32_vprotwi ((__v8hi)(__m128i)(A), (int)(N))) 281 1.1 mrg #define _mm_roti_epi32(A, N) \ 282 1.1 mrg ((__m128i) __builtin_ia32_vprotdi ((__v4si)(__m128i)(A), (int)(N))) 283 1.1 mrg #define _mm_roti_epi64(A, N) \ 284 1.1 mrg ((__m128i) __builtin_ia32_vprotqi ((__v2di)(__m128i)(A), (int)(N))) 285 1.1 mrg #endif 286 1.1 mrg 287 1.1 mrg /* Shifts */ 288 1.1 mrg 289 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 290 1.1 mrg _mm_shl_epi8(__m128i __A, __m128i __B) 291 1.1 mrg { 292 1.1 mrg return (__m128i) __builtin_ia32_vpshlb ((__v16qi)__A, (__v16qi)__B); 293 1.1 mrg } 294 1.1 mrg 295 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 296 1.1 mrg _mm_shl_epi16(__m128i __A, __m128i __B) 297 1.1 mrg { 298 1.1 mrg return (__m128i) __builtin_ia32_vpshlw ((__v8hi)__A, (__v8hi)__B); 299 1.1 mrg } 300 1.1 mrg 301 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 302 1.1 mrg _mm_shl_epi32(__m128i __A, __m128i __B) 303 1.1 mrg { 304 1.1 mrg return (__m128i) __builtin_ia32_vpshld ((__v4si)__A, (__v4si)__B); 305 1.1 mrg } 306 1.1 mrg 307 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 308 1.1 mrg _mm_shl_epi64(__m128i __A, __m128i __B) 309 1.1 mrg { 310 1.1 mrg return (__m128i) __builtin_ia32_vpshlq ((__v2di)__A, (__v2di)__B); 311 1.1 mrg } 312 1.1 mrg 313 1.1 mrg 314 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 315 1.1 mrg _mm_sha_epi8(__m128i __A, __m128i __B) 316 1.1 mrg { 317 1.1 mrg return (__m128i) __builtin_ia32_vpshab ((__v16qi)__A, (__v16qi)__B); 318 1.1 mrg } 319 1.1 mrg 320 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 321 1.1 mrg _mm_sha_epi16(__m128i __A, __m128i __B) 322 1.1 mrg { 323 1.1 mrg return (__m128i) __builtin_ia32_vpshaw ((__v8hi)__A, (__v8hi)__B); 324 1.1 mrg } 325 1.1 mrg 326 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 327 1.1 mrg _mm_sha_epi32(__m128i __A, __m128i __B) 328 1.1 mrg { 329 1.1 mrg return (__m128i) __builtin_ia32_vpshad ((__v4si)__A, (__v4si)__B); 330 1.1 mrg } 331 1.1 mrg 332 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 333 1.1 mrg _mm_sha_epi64(__m128i __A, __m128i __B) 334 1.1 mrg { 335 1.1 mrg return (__m128i) __builtin_ia32_vpshaq ((__v2di)__A, (__v2di)__B); 336 1.1 mrg } 337 1.1 mrg 338 1.1 mrg /* Compare and Predicate Generation 339 1.8 mrg pcom (integer, unsigned bytes) */ 340 1.1 mrg 341 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 342 1.1 mrg _mm_comlt_epu8(__m128i __A, __m128i __B) 343 1.1 mrg { 344 1.1 mrg return (__m128i) __builtin_ia32_vpcomltub ((__v16qi)__A, (__v16qi)__B); 345 1.1 mrg } 346 1.1 mrg 347 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 348 1.1 mrg _mm_comle_epu8(__m128i __A, __m128i __B) 349 1.1 mrg { 350 1.1 mrg return (__m128i) __builtin_ia32_vpcomleub ((__v16qi)__A, (__v16qi)__B); 351 1.1 mrg } 352 1.1 mrg 353 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 354 1.1 mrg _mm_comgt_epu8(__m128i __A, __m128i __B) 355 1.1 mrg { 356 1.1 mrg return (__m128i) __builtin_ia32_vpcomgtub ((__v16qi)__A, (__v16qi)__B); 357 1.1 mrg } 358 1.1 mrg 359 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 360 1.1 mrg _mm_comge_epu8(__m128i __A, __m128i __B) 361 1.1 mrg { 362 1.1 mrg return (__m128i) __builtin_ia32_vpcomgeub ((__v16qi)__A, (__v16qi)__B); 363 1.1 mrg } 364 1.1 mrg 365 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 366 1.1 mrg _mm_comeq_epu8(__m128i __A, __m128i __B) 367 1.1 mrg { 368 1.1 mrg return (__m128i) __builtin_ia32_vpcomequb ((__v16qi)__A, (__v16qi)__B); 369 1.1 mrg } 370 1.1 mrg 371 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 372 1.1 mrg _mm_comneq_epu8(__m128i __A, __m128i __B) 373 1.1 mrg { 374 1.1 mrg return (__m128i) __builtin_ia32_vpcomnequb ((__v16qi)__A, (__v16qi)__B); 375 1.1 mrg } 376 1.1 mrg 377 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 378 1.1 mrg _mm_comfalse_epu8(__m128i __A, __m128i __B) 379 1.1 mrg { 380 1.1 mrg return (__m128i) __builtin_ia32_vpcomfalseub ((__v16qi)__A, (__v16qi)__B); 381 1.1 mrg } 382 1.1 mrg 383 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 384 1.1 mrg _mm_comtrue_epu8(__m128i __A, __m128i __B) 385 1.1 mrg { 386 1.1 mrg return (__m128i) __builtin_ia32_vpcomtrueub ((__v16qi)__A, (__v16qi)__B); 387 1.1 mrg } 388 1.1 mrg 389 1.8 mrg /*pcom (integer, unsigned words) */ 390 1.1 mrg 391 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 392 1.1 mrg _mm_comlt_epu16(__m128i __A, __m128i __B) 393 1.1 mrg { 394 1.1 mrg return (__m128i) __builtin_ia32_vpcomltuw ((__v8hi)__A, (__v8hi)__B); 395 1.1 mrg } 396 1.1 mrg 397 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 398 1.1 mrg _mm_comle_epu16(__m128i __A, __m128i __B) 399 1.1 mrg { 400 1.1 mrg return (__m128i) __builtin_ia32_vpcomleuw ((__v8hi)__A, (__v8hi)__B); 401 1.1 mrg } 402 1.1 mrg 403 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 404 1.1 mrg _mm_comgt_epu16(__m128i __A, __m128i __B) 405 1.1 mrg { 406 1.1 mrg return (__m128i) __builtin_ia32_vpcomgtuw ((__v8hi)__A, (__v8hi)__B); 407 1.1 mrg } 408 1.1 mrg 409 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 410 1.1 mrg _mm_comge_epu16(__m128i __A, __m128i __B) 411 1.1 mrg { 412 1.1 mrg return (__m128i) __builtin_ia32_vpcomgeuw ((__v8hi)__A, (__v8hi)__B); 413 1.1 mrg } 414 1.1 mrg 415 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 416 1.1 mrg _mm_comeq_epu16(__m128i __A, __m128i __B) 417 1.1 mrg { 418 1.1 mrg return (__m128i) __builtin_ia32_vpcomequw ((__v8hi)__A, (__v8hi)__B); 419 1.1 mrg } 420 1.1 mrg 421 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 422 1.1 mrg _mm_comneq_epu16(__m128i __A, __m128i __B) 423 1.1 mrg { 424 1.1 mrg return (__m128i) __builtin_ia32_vpcomnequw ((__v8hi)__A, (__v8hi)__B); 425 1.1 mrg } 426 1.1 mrg 427 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 428 1.1 mrg _mm_comfalse_epu16(__m128i __A, __m128i __B) 429 1.1 mrg { 430 1.1 mrg return (__m128i) __builtin_ia32_vpcomfalseuw ((__v8hi)__A, (__v8hi)__B); 431 1.1 mrg } 432 1.1 mrg 433 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 434 1.1 mrg _mm_comtrue_epu16(__m128i __A, __m128i __B) 435 1.1 mrg { 436 1.1 mrg return (__m128i) __builtin_ia32_vpcomtrueuw ((__v8hi)__A, (__v8hi)__B); 437 1.1 mrg } 438 1.1 mrg 439 1.8 mrg /*pcom (integer, unsigned double words) */ 440 1.1 mrg 441 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 442 1.1 mrg _mm_comlt_epu32(__m128i __A, __m128i __B) 443 1.1 mrg { 444 1.1 mrg return (__m128i) __builtin_ia32_vpcomltud ((__v4si)__A, (__v4si)__B); 445 1.1 mrg } 446 1.1 mrg 447 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 448 1.1 mrg _mm_comle_epu32(__m128i __A, __m128i __B) 449 1.1 mrg { 450 1.1 mrg return (__m128i) __builtin_ia32_vpcomleud ((__v4si)__A, (__v4si)__B); 451 1.1 mrg } 452 1.1 mrg 453 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 454 1.1 mrg _mm_comgt_epu32(__m128i __A, __m128i __B) 455 1.1 mrg { 456 1.1 mrg return (__m128i) __builtin_ia32_vpcomgtud ((__v4si)__A, (__v4si)__B); 457 1.1 mrg } 458 1.1 mrg 459 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 460 1.1 mrg _mm_comge_epu32(__m128i __A, __m128i __B) 461 1.1 mrg { 462 1.1 mrg return (__m128i) __builtin_ia32_vpcomgeud ((__v4si)__A, (__v4si)__B); 463 1.1 mrg } 464 1.1 mrg 465 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 466 1.1 mrg _mm_comeq_epu32(__m128i __A, __m128i __B) 467 1.1 mrg { 468 1.1 mrg return (__m128i) __builtin_ia32_vpcomequd ((__v4si)__A, (__v4si)__B); 469 1.1 mrg } 470 1.1 mrg 471 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 472 1.1 mrg _mm_comneq_epu32(__m128i __A, __m128i __B) 473 1.1 mrg { 474 1.1 mrg return (__m128i) __builtin_ia32_vpcomnequd ((__v4si)__A, (__v4si)__B); 475 1.1 mrg } 476 1.1 mrg 477 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 478 1.1 mrg _mm_comfalse_epu32(__m128i __A, __m128i __B) 479 1.1 mrg { 480 1.1 mrg return (__m128i) __builtin_ia32_vpcomfalseud ((__v4si)__A, (__v4si)__B); 481 1.1 mrg } 482 1.1 mrg 483 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 484 1.1 mrg _mm_comtrue_epu32(__m128i __A, __m128i __B) 485 1.1 mrg { 486 1.1 mrg return (__m128i) __builtin_ia32_vpcomtrueud ((__v4si)__A, (__v4si)__B); 487 1.1 mrg } 488 1.1 mrg 489 1.8 mrg /*pcom (integer, unsigned quad words) */ 490 1.1 mrg 491 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 492 1.1 mrg _mm_comlt_epu64(__m128i __A, __m128i __B) 493 1.1 mrg { 494 1.1 mrg return (__m128i) __builtin_ia32_vpcomltuq ((__v2di)__A, (__v2di)__B); 495 1.1 mrg } 496 1.1 mrg 497 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 498 1.1 mrg _mm_comle_epu64(__m128i __A, __m128i __B) 499 1.1 mrg { 500 1.1 mrg return (__m128i) __builtin_ia32_vpcomleuq ((__v2di)__A, (__v2di)__B); 501 1.1 mrg } 502 1.1 mrg 503 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 504 1.1 mrg _mm_comgt_epu64(__m128i __A, __m128i __B) 505 1.1 mrg { 506 1.1 mrg return (__m128i) __builtin_ia32_vpcomgtuq ((__v2di)__A, (__v2di)__B); 507 1.1 mrg } 508 1.1 mrg 509 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 510 1.1 mrg _mm_comge_epu64(__m128i __A, __m128i __B) 511 1.1 mrg { 512 1.1 mrg return (__m128i) __builtin_ia32_vpcomgeuq ((__v2di)__A, (__v2di)__B); 513 1.1 mrg } 514 1.1 mrg 515 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 516 1.1 mrg _mm_comeq_epu64(__m128i __A, __m128i __B) 517 1.1 mrg { 518 1.1 mrg return (__m128i) __builtin_ia32_vpcomequq ((__v2di)__A, (__v2di)__B); 519 1.1 mrg } 520 1.1 mrg 521 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 522 1.1 mrg _mm_comneq_epu64(__m128i __A, __m128i __B) 523 1.1 mrg { 524 1.1 mrg return (__m128i) __builtin_ia32_vpcomnequq ((__v2di)__A, (__v2di)__B); 525 1.1 mrg } 526 1.1 mrg 527 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 528 1.1 mrg _mm_comfalse_epu64(__m128i __A, __m128i __B) 529 1.1 mrg { 530 1.1 mrg return (__m128i) __builtin_ia32_vpcomfalseuq ((__v2di)__A, (__v2di)__B); 531 1.1 mrg } 532 1.1 mrg 533 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 534 1.1 mrg _mm_comtrue_epu64(__m128i __A, __m128i __B) 535 1.1 mrg { 536 1.1 mrg return (__m128i) __builtin_ia32_vpcomtrueuq ((__v2di)__A, (__v2di)__B); 537 1.1 mrg } 538 1.1 mrg 539 1.1 mrg /*pcom (integer, signed bytes) */ 540 1.1 mrg 541 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 542 1.1 mrg _mm_comlt_epi8(__m128i __A, __m128i __B) 543 1.1 mrg { 544 1.1 mrg return (__m128i) __builtin_ia32_vpcomltb ((__v16qi)__A, (__v16qi)__B); 545 1.1 mrg } 546 1.1 mrg 547 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 548 1.1 mrg _mm_comle_epi8(__m128i __A, __m128i __B) 549 1.1 mrg { 550 1.1 mrg return (__m128i) __builtin_ia32_vpcomleb ((__v16qi)__A, (__v16qi)__B); 551 1.1 mrg } 552 1.1 mrg 553 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 554 1.1 mrg _mm_comgt_epi8(__m128i __A, __m128i __B) 555 1.1 mrg { 556 1.1 mrg return (__m128i) __builtin_ia32_vpcomgtb ((__v16qi)__A, (__v16qi)__B); 557 1.1 mrg } 558 1.1 mrg 559 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 560 1.1 mrg _mm_comge_epi8(__m128i __A, __m128i __B) 561 1.1 mrg { 562 1.1 mrg return (__m128i) __builtin_ia32_vpcomgeb ((__v16qi)__A, (__v16qi)__B); 563 1.1 mrg } 564 1.1 mrg 565 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 566 1.1 mrg _mm_comeq_epi8(__m128i __A, __m128i __B) 567 1.1 mrg { 568 1.1 mrg return (__m128i) __builtin_ia32_vpcomeqb ((__v16qi)__A, (__v16qi)__B); 569 1.1 mrg } 570 1.1 mrg 571 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 572 1.1 mrg _mm_comneq_epi8(__m128i __A, __m128i __B) 573 1.1 mrg { 574 1.1 mrg return (__m128i) __builtin_ia32_vpcomneqb ((__v16qi)__A, (__v16qi)__B); 575 1.1 mrg } 576 1.1 mrg 577 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 578 1.1 mrg _mm_comfalse_epi8(__m128i __A, __m128i __B) 579 1.1 mrg { 580 1.1 mrg return (__m128i) __builtin_ia32_vpcomfalseb ((__v16qi)__A, (__v16qi)__B); 581 1.1 mrg } 582 1.1 mrg 583 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 584 1.1 mrg _mm_comtrue_epi8(__m128i __A, __m128i __B) 585 1.1 mrg { 586 1.1 mrg return (__m128i) __builtin_ia32_vpcomtrueb ((__v16qi)__A, (__v16qi)__B); 587 1.1 mrg } 588 1.1 mrg 589 1.1 mrg /*pcom (integer, signed words) */ 590 1.1 mrg 591 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 592 1.1 mrg _mm_comlt_epi16(__m128i __A, __m128i __B) 593 1.1 mrg { 594 1.1 mrg return (__m128i) __builtin_ia32_vpcomltw ((__v8hi)__A, (__v8hi)__B); 595 1.1 mrg } 596 1.1 mrg 597 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 598 1.1 mrg _mm_comle_epi16(__m128i __A, __m128i __B) 599 1.1 mrg { 600 1.1 mrg return (__m128i) __builtin_ia32_vpcomlew ((__v8hi)__A, (__v8hi)__B); 601 1.1 mrg } 602 1.1 mrg 603 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 604 1.1 mrg _mm_comgt_epi16(__m128i __A, __m128i __B) 605 1.1 mrg { 606 1.1 mrg return (__m128i) __builtin_ia32_vpcomgtw ((__v8hi)__A, (__v8hi)__B); 607 1.1 mrg } 608 1.1 mrg 609 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 610 1.1 mrg _mm_comge_epi16(__m128i __A, __m128i __B) 611 1.1 mrg { 612 1.1 mrg return (__m128i) __builtin_ia32_vpcomgew ((__v8hi)__A, (__v8hi)__B); 613 1.1 mrg } 614 1.1 mrg 615 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 616 1.1 mrg _mm_comeq_epi16(__m128i __A, __m128i __B) 617 1.1 mrg { 618 1.1 mrg return (__m128i) __builtin_ia32_vpcomeqw ((__v8hi)__A, (__v8hi)__B); 619 1.1 mrg } 620 1.1 mrg 621 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 622 1.1 mrg _mm_comneq_epi16(__m128i __A, __m128i __B) 623 1.1 mrg { 624 1.1 mrg return (__m128i) __builtin_ia32_vpcomneqw ((__v8hi)__A, (__v8hi)__B); 625 1.1 mrg } 626 1.1 mrg 627 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 628 1.1 mrg _mm_comfalse_epi16(__m128i __A, __m128i __B) 629 1.1 mrg { 630 1.1 mrg return (__m128i) __builtin_ia32_vpcomfalsew ((__v8hi)__A, (__v8hi)__B); 631 1.1 mrg } 632 1.1 mrg 633 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 634 1.1 mrg _mm_comtrue_epi16(__m128i __A, __m128i __B) 635 1.1 mrg { 636 1.1 mrg return (__m128i) __builtin_ia32_vpcomtruew ((__v8hi)__A, (__v8hi)__B); 637 1.1 mrg } 638 1.1 mrg 639 1.1 mrg /*pcom (integer, signed double words) */ 640 1.1 mrg 641 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 642 1.1 mrg _mm_comlt_epi32(__m128i __A, __m128i __B) 643 1.1 mrg { 644 1.1 mrg return (__m128i) __builtin_ia32_vpcomltd ((__v4si)__A, (__v4si)__B); 645 1.1 mrg } 646 1.1 mrg 647 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 648 1.1 mrg _mm_comle_epi32(__m128i __A, __m128i __B) 649 1.1 mrg { 650 1.1 mrg return (__m128i) __builtin_ia32_vpcomled ((__v4si)__A, (__v4si)__B); 651 1.1 mrg } 652 1.1 mrg 653 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 654 1.1 mrg _mm_comgt_epi32(__m128i __A, __m128i __B) 655 1.1 mrg { 656 1.1 mrg return (__m128i) __builtin_ia32_vpcomgtd ((__v4si)__A, (__v4si)__B); 657 1.1 mrg } 658 1.1 mrg 659 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 660 1.1 mrg _mm_comge_epi32(__m128i __A, __m128i __B) 661 1.1 mrg { 662 1.1 mrg return (__m128i) __builtin_ia32_vpcomged ((__v4si)__A, (__v4si)__B); 663 1.1 mrg } 664 1.1 mrg 665 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 666 1.1 mrg _mm_comeq_epi32(__m128i __A, __m128i __B) 667 1.1 mrg { 668 1.1 mrg return (__m128i) __builtin_ia32_vpcomeqd ((__v4si)__A, (__v4si)__B); 669 1.1 mrg } 670 1.1 mrg 671 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 672 1.1 mrg _mm_comneq_epi32(__m128i __A, __m128i __B) 673 1.1 mrg { 674 1.1 mrg return (__m128i) __builtin_ia32_vpcomneqd ((__v4si)__A, (__v4si)__B); 675 1.1 mrg } 676 1.1 mrg 677 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 678 1.1 mrg _mm_comfalse_epi32(__m128i __A, __m128i __B) 679 1.1 mrg { 680 1.1 mrg return (__m128i) __builtin_ia32_vpcomfalsed ((__v4si)__A, (__v4si)__B); 681 1.1 mrg } 682 1.1 mrg 683 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 684 1.1 mrg _mm_comtrue_epi32(__m128i __A, __m128i __B) 685 1.1 mrg { 686 1.1 mrg return (__m128i) __builtin_ia32_vpcomtrued ((__v4si)__A, (__v4si)__B); 687 1.1 mrg } 688 1.1 mrg 689 1.1 mrg /*pcom (integer, signed quad words) */ 690 1.1 mrg 691 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 692 1.1 mrg _mm_comlt_epi64(__m128i __A, __m128i __B) 693 1.1 mrg { 694 1.1 mrg return (__m128i) __builtin_ia32_vpcomltq ((__v2di)__A, (__v2di)__B); 695 1.1 mrg } 696 1.1 mrg 697 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 698 1.1 mrg _mm_comle_epi64(__m128i __A, __m128i __B) 699 1.1 mrg { 700 1.1 mrg return (__m128i) __builtin_ia32_vpcomleq ((__v2di)__A, (__v2di)__B); 701 1.1 mrg } 702 1.1 mrg 703 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 704 1.1 mrg _mm_comgt_epi64(__m128i __A, __m128i __B) 705 1.1 mrg { 706 1.1 mrg return (__m128i) __builtin_ia32_vpcomgtq ((__v2di)__A, (__v2di)__B); 707 1.1 mrg } 708 1.1 mrg 709 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 710 1.1 mrg _mm_comge_epi64(__m128i __A, __m128i __B) 711 1.1 mrg { 712 1.1 mrg return (__m128i) __builtin_ia32_vpcomgeq ((__v2di)__A, (__v2di)__B); 713 1.1 mrg } 714 1.1 mrg 715 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 716 1.1 mrg _mm_comeq_epi64(__m128i __A, __m128i __B) 717 1.1 mrg { 718 1.1 mrg return (__m128i) __builtin_ia32_vpcomeqq ((__v2di)__A, (__v2di)__B); 719 1.1 mrg } 720 1.1 mrg 721 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 722 1.1 mrg _mm_comneq_epi64(__m128i __A, __m128i __B) 723 1.1 mrg { 724 1.1 mrg return (__m128i) __builtin_ia32_vpcomneqq ((__v2di)__A, (__v2di)__B); 725 1.1 mrg } 726 1.1 mrg 727 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 728 1.1 mrg _mm_comfalse_epi64(__m128i __A, __m128i __B) 729 1.1 mrg { 730 1.1 mrg return (__m128i) __builtin_ia32_vpcomfalseq ((__v2di)__A, (__v2di)__B); 731 1.1 mrg } 732 1.1 mrg 733 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 734 1.1 mrg _mm_comtrue_epi64(__m128i __A, __m128i __B) 735 1.1 mrg { 736 1.1 mrg return (__m128i) __builtin_ia32_vpcomtrueq ((__v2di)__A, (__v2di)__B); 737 1.1 mrg } 738 1.1 mrg 739 1.1 mrg /* FRCZ */ 740 1.1 mrg 741 1.1 mrg extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 742 1.1 mrg _mm_frcz_ps (__m128 __A) 743 1.1 mrg { 744 1.1 mrg return (__m128) __builtin_ia32_vfrczps ((__v4sf)__A); 745 1.1 mrg } 746 1.1 mrg 747 1.1 mrg extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 748 1.1 mrg _mm_frcz_pd (__m128d __A) 749 1.1 mrg { 750 1.1 mrg return (__m128d) __builtin_ia32_vfrczpd ((__v2df)__A); 751 1.1 mrg } 752 1.1 mrg 753 1.1 mrg extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 754 1.1 mrg _mm_frcz_ss (__m128 __A, __m128 __B) 755 1.1 mrg { 756 1.3 mrg return (__m128) __builtin_ia32_movss ((__v4sf)__A, 757 1.3 mrg (__v4sf) 758 1.3 mrg __builtin_ia32_vfrczss ((__v4sf)__B)); 759 1.1 mrg } 760 1.1 mrg 761 1.1 mrg extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 762 1.1 mrg _mm_frcz_sd (__m128d __A, __m128d __B) 763 1.1 mrg { 764 1.3 mrg return (__m128d) __builtin_ia32_movsd ((__v2df)__A, 765 1.3 mrg (__v2df) 766 1.3 mrg __builtin_ia32_vfrczsd ((__v2df)__B)); 767 1.1 mrg } 768 1.1 mrg 769 1.1 mrg extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 770 1.1 mrg _mm256_frcz_ps (__m256 __A) 771 1.1 mrg { 772 1.1 mrg return (__m256) __builtin_ia32_vfrczps256 ((__v8sf)__A); 773 1.1 mrg } 774 1.1 mrg 775 1.1 mrg extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 776 1.1 mrg _mm256_frcz_pd (__m256d __A) 777 1.1 mrg { 778 1.1 mrg return (__m256d) __builtin_ia32_vfrczpd256 ((__v4df)__A); 779 1.1 mrg } 780 1.1 mrg 781 1.1 mrg /* PERMIL2 */ 782 1.1 mrg 783 1.1 mrg #ifdef __OPTIMIZE__ 784 1.1 mrg extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 785 1.1 mrg _mm_permute2_pd (__m128d __X, __m128d __Y, __m128i __C, const int __I) 786 1.1 mrg { 787 1.1 mrg return (__m128d) __builtin_ia32_vpermil2pd ((__v2df)__X, 788 1.1 mrg (__v2df)__Y, 789 1.1 mrg (__v2di)__C, 790 1.1 mrg __I); 791 1.1 mrg } 792 1.1 mrg 793 1.1 mrg extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 794 1.1 mrg _mm256_permute2_pd (__m256d __X, __m256d __Y, __m256i __C, const int __I) 795 1.1 mrg { 796 1.1 mrg return (__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)__X, 797 1.1 mrg (__v4df)__Y, 798 1.1 mrg (__v4di)__C, 799 1.1 mrg __I); 800 1.1 mrg } 801 1.1 mrg 802 1.1 mrg extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 803 1.1 mrg _mm_permute2_ps (__m128 __X, __m128 __Y, __m128i __C, const int __I) 804 1.1 mrg { 805 1.1 mrg return (__m128) __builtin_ia32_vpermil2ps ((__v4sf)__X, 806 1.1 mrg (__v4sf)__Y, 807 1.1 mrg (__v4si)__C, 808 1.1 mrg __I); 809 1.1 mrg } 810 1.1 mrg 811 1.1 mrg extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 812 1.1 mrg _mm256_permute2_ps (__m256 __X, __m256 __Y, __m256i __C, const int __I) 813 1.1 mrg { 814 1.1 mrg return (__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)__X, 815 1.1 mrg (__v8sf)__Y, 816 1.1 mrg (__v8si)__C, 817 1.1 mrg __I); 818 1.1 mrg } 819 1.1 mrg #else 820 1.1 mrg #define _mm_permute2_pd(X, Y, C, I) \ 821 1.1 mrg ((__m128d) __builtin_ia32_vpermil2pd ((__v2df)(__m128d)(X), \ 822 1.1 mrg (__v2df)(__m128d)(Y), \ 823 1.10 mrg (__v2di)(__m128i)(C), \ 824 1.1 mrg (int)(I))) 825 1.1 mrg 826 1.1 mrg #define _mm256_permute2_pd(X, Y, C, I) \ 827 1.1 mrg ((__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)(__m256d)(X), \ 828 1.1 mrg (__v4df)(__m256d)(Y), \ 829 1.10 mrg (__v4di)(__m256i)(C), \ 830 1.1 mrg (int)(I))) 831 1.1 mrg 832 1.1 mrg #define _mm_permute2_ps(X, Y, C, I) \ 833 1.1 mrg ((__m128) __builtin_ia32_vpermil2ps ((__v4sf)(__m128)(X), \ 834 1.1 mrg (__v4sf)(__m128)(Y), \ 835 1.10 mrg (__v4si)(__m128i)(C), \ 836 1.1 mrg (int)(I))) 837 1.1 mrg 838 1.1 mrg #define _mm256_permute2_ps(X, Y, C, I) \ 839 1.1 mrg ((__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)(__m256)(X), \ 840 1.1 mrg (__v8sf)(__m256)(Y), \ 841 1.10 mrg (__v8si)(__m256i)(C), \ 842 1.1 mrg (int)(I))) 843 1.1 mrg #endif /* __OPTIMIZE__ */ 844 1.1 mrg 845 1.5 mrg #ifdef __DISABLE_XOP__ 846 1.5 mrg #undef __DISABLE_XOP__ 847 1.5 mrg #pragma GCC pop_options 848 1.5 mrg #endif /* __DISABLE_XOP__ */ 849 1.1 mrg 850 1.1 mrg #endif /* _XOPMMINTRIN_H_INCLUDED */ 851