xopintrin.h revision 1.12 1 1.12 mrg /* Copyright (C) 2007-2022 Free Software Foundation, Inc.
2 1.1 mrg
3 1.1 mrg This file is part of GCC.
4 1.1 mrg
5 1.1 mrg GCC is free software; you can redistribute it and/or modify
6 1.1 mrg it under the terms of the GNU General Public License as published by
7 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
8 1.1 mrg any later version.
9 1.1 mrg
10 1.1 mrg GCC is distributed in the hope that it will be useful,
11 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of
12 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 1.1 mrg GNU General Public License for more details.
14 1.1 mrg
15 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
16 1.1 mrg permissions described in the GCC Runtime Library Exception, version
17 1.1 mrg 3.1, as published by the Free Software Foundation.
18 1.1 mrg
19 1.1 mrg You should have received a copy of the GNU General Public License and
20 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
21 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 1.1 mrg <http://www.gnu.org/licenses/>. */
23 1.1 mrg
24 1.1 mrg #ifndef _X86INTRIN_H_INCLUDED
25 1.1 mrg # error "Never use <xopintrin.h> directly; include <x86intrin.h> instead."
26 1.1 mrg #endif
27 1.1 mrg
28 1.1 mrg #ifndef _XOPMMINTRIN_H_INCLUDED
29 1.1 mrg #define _XOPMMINTRIN_H_INCLUDED
30 1.1 mrg
31 1.5 mrg #include <fma4intrin.h>
32 1.5 mrg
33 1.1 mrg #ifndef __XOP__
34 1.5 mrg #pragma GCC push_options
35 1.5 mrg #pragma GCC target("xop")
36 1.5 mrg #define __DISABLE_XOP__
37 1.5 mrg #endif /* __XOP__ */
38 1.1 mrg
39 1.8 mrg /* Integer multiply/add instructions. */
40 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
41 1.1 mrg _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
42 1.1 mrg {
43 1.1 mrg return (__m128i) __builtin_ia32_vpmacssww ((__v8hi)__A,(__v8hi)__B, (__v8hi)__C);
44 1.1 mrg }
45 1.1 mrg
46 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
47 1.1 mrg _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)
48 1.1 mrg {
49 1.1 mrg return (__m128i) __builtin_ia32_vpmacsww ((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
50 1.1 mrg }
51 1.1 mrg
52 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
53 1.1 mrg _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)
54 1.1 mrg {
55 1.1 mrg return (__m128i) __builtin_ia32_vpmacsswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
56 1.1 mrg }
57 1.1 mrg
58 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
59 1.1 mrg _mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)
60 1.1 mrg {
61 1.1 mrg return (__m128i) __builtin_ia32_vpmacswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
62 1.1 mrg }
63 1.1 mrg
64 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
65 1.1 mrg _mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)
66 1.1 mrg {
67 1.1 mrg return (__m128i) __builtin_ia32_vpmacssdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
68 1.1 mrg }
69 1.1 mrg
70 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
71 1.1 mrg _mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)
72 1.1 mrg {
73 1.1 mrg return (__m128i) __builtin_ia32_vpmacsdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
74 1.1 mrg }
75 1.1 mrg
76 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
77 1.1 mrg _mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)
78 1.1 mrg {
79 1.1 mrg return (__m128i) __builtin_ia32_vpmacssdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
80 1.1 mrg }
81 1.1 mrg
82 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
83 1.1 mrg _mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)
84 1.1 mrg {
85 1.1 mrg return (__m128i) __builtin_ia32_vpmacsdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
86 1.1 mrg }
87 1.1 mrg
88 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
89 1.1 mrg _mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)
90 1.1 mrg {
91 1.1 mrg return (__m128i) __builtin_ia32_vpmacssdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
92 1.1 mrg }
93 1.1 mrg
94 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
95 1.1 mrg _mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)
96 1.1 mrg {
97 1.1 mrg return (__m128i) __builtin_ia32_vpmacsdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
98 1.1 mrg }
99 1.1 mrg
100 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
101 1.1 mrg _mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)
102 1.1 mrg {
103 1.1 mrg return (__m128i) __builtin_ia32_vpmadcsswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
104 1.1 mrg }
105 1.1 mrg
106 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
107 1.1 mrg _mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)
108 1.1 mrg {
109 1.1 mrg return (__m128i) __builtin_ia32_vpmadcswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
110 1.1 mrg }
111 1.1 mrg
112 1.1 mrg /* Packed Integer Horizontal Add and Subtract */
113 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
114 1.1 mrg _mm_haddw_epi8(__m128i __A)
115 1.1 mrg {
116 1.1 mrg return (__m128i) __builtin_ia32_vphaddbw ((__v16qi)__A);
117 1.1 mrg }
118 1.1 mrg
119 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
120 1.1 mrg _mm_haddd_epi8(__m128i __A)
121 1.1 mrg {
122 1.1 mrg return (__m128i) __builtin_ia32_vphaddbd ((__v16qi)__A);
123 1.1 mrg }
124 1.1 mrg
125 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
126 1.1 mrg _mm_haddq_epi8(__m128i __A)
127 1.1 mrg {
128 1.1 mrg return (__m128i) __builtin_ia32_vphaddbq ((__v16qi)__A);
129 1.1 mrg }
130 1.1 mrg
131 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
132 1.1 mrg _mm_haddd_epi16(__m128i __A)
133 1.1 mrg {
134 1.1 mrg return (__m128i) __builtin_ia32_vphaddwd ((__v8hi)__A);
135 1.1 mrg }
136 1.1 mrg
137 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
138 1.1 mrg _mm_haddq_epi16(__m128i __A)
139 1.1 mrg {
140 1.1 mrg return (__m128i) __builtin_ia32_vphaddwq ((__v8hi)__A);
141 1.1 mrg }
142 1.1 mrg
143 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
144 1.1 mrg _mm_haddq_epi32(__m128i __A)
145 1.1 mrg {
146 1.1 mrg return (__m128i) __builtin_ia32_vphadddq ((__v4si)__A);
147 1.1 mrg }
148 1.1 mrg
149 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
150 1.1 mrg _mm_haddw_epu8(__m128i __A)
151 1.1 mrg {
152 1.1 mrg return (__m128i) __builtin_ia32_vphaddubw ((__v16qi)__A);
153 1.1 mrg }
154 1.1 mrg
155 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
156 1.1 mrg _mm_haddd_epu8(__m128i __A)
157 1.1 mrg {
158 1.1 mrg return (__m128i) __builtin_ia32_vphaddubd ((__v16qi)__A);
159 1.1 mrg }
160 1.1 mrg
161 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
162 1.1 mrg _mm_haddq_epu8(__m128i __A)
163 1.1 mrg {
164 1.1 mrg return (__m128i) __builtin_ia32_vphaddubq ((__v16qi)__A);
165 1.1 mrg }
166 1.1 mrg
167 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
168 1.1 mrg _mm_haddd_epu16(__m128i __A)
169 1.1 mrg {
170 1.1 mrg return (__m128i) __builtin_ia32_vphadduwd ((__v8hi)__A);
171 1.1 mrg }
172 1.1 mrg
173 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
174 1.1 mrg _mm_haddq_epu16(__m128i __A)
175 1.1 mrg {
176 1.1 mrg return (__m128i) __builtin_ia32_vphadduwq ((__v8hi)__A);
177 1.1 mrg }
178 1.1 mrg
179 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
180 1.1 mrg _mm_haddq_epu32(__m128i __A)
181 1.1 mrg {
182 1.1 mrg return (__m128i) __builtin_ia32_vphaddudq ((__v4si)__A);
183 1.1 mrg }
184 1.1 mrg
185 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
186 1.1 mrg _mm_hsubw_epi8(__m128i __A)
187 1.1 mrg {
188 1.1 mrg return (__m128i) __builtin_ia32_vphsubbw ((__v16qi)__A);
189 1.1 mrg }
190 1.1 mrg
191 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
192 1.1 mrg _mm_hsubd_epi16(__m128i __A)
193 1.1 mrg {
194 1.1 mrg return (__m128i) __builtin_ia32_vphsubwd ((__v8hi)__A);
195 1.1 mrg }
196 1.1 mrg
197 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
198 1.1 mrg _mm_hsubq_epi32(__m128i __A)
199 1.1 mrg {
200 1.1 mrg return (__m128i) __builtin_ia32_vphsubdq ((__v4si)__A);
201 1.1 mrg }
202 1.1 mrg
203 1.1 mrg /* Vector conditional move and permute */
204 1.1 mrg
205 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
206 1.1 mrg _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
207 1.1 mrg {
208 1.1 mrg return (__m128i) __builtin_ia32_vpcmov (__A, __B, __C);
209 1.1 mrg }
210 1.1 mrg
211 1.11 mrg extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
212 1.11 mrg _mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C)
213 1.11 mrg {
214 1.11 mrg return (__m256i) __builtin_ia32_vpcmov256 (__A, __B, __C);
215 1.11 mrg }
216 1.11 mrg
217 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
218 1.1 mrg _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
219 1.1 mrg {
220 1.1 mrg return (__m128i) __builtin_ia32_vpperm ((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
221 1.1 mrg }
222 1.1 mrg
223 1.1 mrg /* Packed Integer Rotates and Shifts
224 1.1 mrg Rotates - Non-Immediate form */
225 1.1 mrg
226 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
227 1.1 mrg _mm_rot_epi8(__m128i __A, __m128i __B)
228 1.1 mrg {
229 1.1 mrg return (__m128i) __builtin_ia32_vprotb ((__v16qi)__A, (__v16qi)__B);
230 1.1 mrg }
231 1.1 mrg
232 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
233 1.1 mrg _mm_rot_epi16(__m128i __A, __m128i __B)
234 1.1 mrg {
235 1.1 mrg return (__m128i) __builtin_ia32_vprotw ((__v8hi)__A, (__v8hi)__B);
236 1.1 mrg }
237 1.1 mrg
238 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
239 1.1 mrg _mm_rot_epi32(__m128i __A, __m128i __B)
240 1.1 mrg {
241 1.1 mrg return (__m128i) __builtin_ia32_vprotd ((__v4si)__A, (__v4si)__B);
242 1.1 mrg }
243 1.1 mrg
244 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
245 1.1 mrg _mm_rot_epi64(__m128i __A, __m128i __B)
246 1.1 mrg {
247 1.1 mrg return (__m128i) __builtin_ia32_vprotq ((__v2di)__A, (__v2di)__B);
248 1.1 mrg }
249 1.1 mrg
250 1.1 mrg /* Rotates - Immediate form */
251 1.1 mrg
252 1.1 mrg #ifdef __OPTIMIZE__
253 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
254 1.1 mrg _mm_roti_epi8(__m128i __A, const int __B)
255 1.1 mrg {
256 1.1 mrg return (__m128i) __builtin_ia32_vprotbi ((__v16qi)__A, __B);
257 1.1 mrg }
258 1.1 mrg
259 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
260 1.1 mrg _mm_roti_epi16(__m128i __A, const int __B)
261 1.1 mrg {
262 1.1 mrg return (__m128i) __builtin_ia32_vprotwi ((__v8hi)__A, __B);
263 1.1 mrg }
264 1.1 mrg
265 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
266 1.1 mrg _mm_roti_epi32(__m128i __A, const int __B)
267 1.1 mrg {
268 1.1 mrg return (__m128i) __builtin_ia32_vprotdi ((__v4si)__A, __B);
269 1.1 mrg }
270 1.1 mrg
271 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
272 1.1 mrg _mm_roti_epi64(__m128i __A, const int __B)
273 1.1 mrg {
274 1.1 mrg return (__m128i) __builtin_ia32_vprotqi ((__v2di)__A, __B);
275 1.1 mrg }
276 1.1 mrg #else
277 1.1 mrg #define _mm_roti_epi8(A, N) \
278 1.1 mrg ((__m128i) __builtin_ia32_vprotbi ((__v16qi)(__m128i)(A), (int)(N)))
279 1.1 mrg #define _mm_roti_epi16(A, N) \
280 1.1 mrg ((__m128i) __builtin_ia32_vprotwi ((__v8hi)(__m128i)(A), (int)(N)))
281 1.1 mrg #define _mm_roti_epi32(A, N) \
282 1.1 mrg ((__m128i) __builtin_ia32_vprotdi ((__v4si)(__m128i)(A), (int)(N)))
283 1.1 mrg #define _mm_roti_epi64(A, N) \
284 1.1 mrg ((__m128i) __builtin_ia32_vprotqi ((__v2di)(__m128i)(A), (int)(N)))
285 1.1 mrg #endif
286 1.1 mrg
287 1.1 mrg /* Shifts */
288 1.1 mrg
289 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
290 1.1 mrg _mm_shl_epi8(__m128i __A, __m128i __B)
291 1.1 mrg {
292 1.1 mrg return (__m128i) __builtin_ia32_vpshlb ((__v16qi)__A, (__v16qi)__B);
293 1.1 mrg }
294 1.1 mrg
295 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
296 1.1 mrg _mm_shl_epi16(__m128i __A, __m128i __B)
297 1.1 mrg {
298 1.1 mrg return (__m128i) __builtin_ia32_vpshlw ((__v8hi)__A, (__v8hi)__B);
299 1.1 mrg }
300 1.1 mrg
301 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
302 1.1 mrg _mm_shl_epi32(__m128i __A, __m128i __B)
303 1.1 mrg {
304 1.1 mrg return (__m128i) __builtin_ia32_vpshld ((__v4si)__A, (__v4si)__B);
305 1.1 mrg }
306 1.1 mrg
307 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
308 1.1 mrg _mm_shl_epi64(__m128i __A, __m128i __B)
309 1.1 mrg {
310 1.1 mrg return (__m128i) __builtin_ia32_vpshlq ((__v2di)__A, (__v2di)__B);
311 1.1 mrg }
312 1.1 mrg
313 1.1 mrg
314 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
315 1.1 mrg _mm_sha_epi8(__m128i __A, __m128i __B)
316 1.1 mrg {
317 1.1 mrg return (__m128i) __builtin_ia32_vpshab ((__v16qi)__A, (__v16qi)__B);
318 1.1 mrg }
319 1.1 mrg
320 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
321 1.1 mrg _mm_sha_epi16(__m128i __A, __m128i __B)
322 1.1 mrg {
323 1.1 mrg return (__m128i) __builtin_ia32_vpshaw ((__v8hi)__A, (__v8hi)__B);
324 1.1 mrg }
325 1.1 mrg
326 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
327 1.1 mrg _mm_sha_epi32(__m128i __A, __m128i __B)
328 1.1 mrg {
329 1.1 mrg return (__m128i) __builtin_ia32_vpshad ((__v4si)__A, (__v4si)__B);
330 1.1 mrg }
331 1.1 mrg
332 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
333 1.1 mrg _mm_sha_epi64(__m128i __A, __m128i __B)
334 1.1 mrg {
335 1.1 mrg return (__m128i) __builtin_ia32_vpshaq ((__v2di)__A, (__v2di)__B);
336 1.1 mrg }
337 1.1 mrg
338 1.1 mrg /* Compare and Predicate Generation
339 1.8 mrg pcom (integer, unsigned bytes) */
340 1.1 mrg
341 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
342 1.1 mrg _mm_comlt_epu8(__m128i __A, __m128i __B)
343 1.1 mrg {
344 1.1 mrg return (__m128i) __builtin_ia32_vpcomltub ((__v16qi)__A, (__v16qi)__B);
345 1.1 mrg }
346 1.1 mrg
347 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
348 1.1 mrg _mm_comle_epu8(__m128i __A, __m128i __B)
349 1.1 mrg {
350 1.1 mrg return (__m128i) __builtin_ia32_vpcomleub ((__v16qi)__A, (__v16qi)__B);
351 1.1 mrg }
352 1.1 mrg
353 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
354 1.1 mrg _mm_comgt_epu8(__m128i __A, __m128i __B)
355 1.1 mrg {
356 1.1 mrg return (__m128i) __builtin_ia32_vpcomgtub ((__v16qi)__A, (__v16qi)__B);
357 1.1 mrg }
358 1.1 mrg
359 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
360 1.1 mrg _mm_comge_epu8(__m128i __A, __m128i __B)
361 1.1 mrg {
362 1.1 mrg return (__m128i) __builtin_ia32_vpcomgeub ((__v16qi)__A, (__v16qi)__B);
363 1.1 mrg }
364 1.1 mrg
365 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
366 1.1 mrg _mm_comeq_epu8(__m128i __A, __m128i __B)
367 1.1 mrg {
368 1.1 mrg return (__m128i) __builtin_ia32_vpcomequb ((__v16qi)__A, (__v16qi)__B);
369 1.1 mrg }
370 1.1 mrg
371 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
372 1.1 mrg _mm_comneq_epu8(__m128i __A, __m128i __B)
373 1.1 mrg {
374 1.1 mrg return (__m128i) __builtin_ia32_vpcomnequb ((__v16qi)__A, (__v16qi)__B);
375 1.1 mrg }
376 1.1 mrg
377 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
378 1.1 mrg _mm_comfalse_epu8(__m128i __A, __m128i __B)
379 1.1 mrg {
380 1.1 mrg return (__m128i) __builtin_ia32_vpcomfalseub ((__v16qi)__A, (__v16qi)__B);
381 1.1 mrg }
382 1.1 mrg
383 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
384 1.1 mrg _mm_comtrue_epu8(__m128i __A, __m128i __B)
385 1.1 mrg {
386 1.1 mrg return (__m128i) __builtin_ia32_vpcomtrueub ((__v16qi)__A, (__v16qi)__B);
387 1.1 mrg }
388 1.1 mrg
389 1.8 mrg /*pcom (integer, unsigned words) */
390 1.1 mrg
391 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
392 1.1 mrg _mm_comlt_epu16(__m128i __A, __m128i __B)
393 1.1 mrg {
394 1.1 mrg return (__m128i) __builtin_ia32_vpcomltuw ((__v8hi)__A, (__v8hi)__B);
395 1.1 mrg }
396 1.1 mrg
397 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
398 1.1 mrg _mm_comle_epu16(__m128i __A, __m128i __B)
399 1.1 mrg {
400 1.1 mrg return (__m128i) __builtin_ia32_vpcomleuw ((__v8hi)__A, (__v8hi)__B);
401 1.1 mrg }
402 1.1 mrg
403 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
404 1.1 mrg _mm_comgt_epu16(__m128i __A, __m128i __B)
405 1.1 mrg {
406 1.1 mrg return (__m128i) __builtin_ia32_vpcomgtuw ((__v8hi)__A, (__v8hi)__B);
407 1.1 mrg }
408 1.1 mrg
409 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
410 1.1 mrg _mm_comge_epu16(__m128i __A, __m128i __B)
411 1.1 mrg {
412 1.1 mrg return (__m128i) __builtin_ia32_vpcomgeuw ((__v8hi)__A, (__v8hi)__B);
413 1.1 mrg }
414 1.1 mrg
415 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
416 1.1 mrg _mm_comeq_epu16(__m128i __A, __m128i __B)
417 1.1 mrg {
418 1.1 mrg return (__m128i) __builtin_ia32_vpcomequw ((__v8hi)__A, (__v8hi)__B);
419 1.1 mrg }
420 1.1 mrg
421 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
422 1.1 mrg _mm_comneq_epu16(__m128i __A, __m128i __B)
423 1.1 mrg {
424 1.1 mrg return (__m128i) __builtin_ia32_vpcomnequw ((__v8hi)__A, (__v8hi)__B);
425 1.1 mrg }
426 1.1 mrg
427 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
428 1.1 mrg _mm_comfalse_epu16(__m128i __A, __m128i __B)
429 1.1 mrg {
430 1.1 mrg return (__m128i) __builtin_ia32_vpcomfalseuw ((__v8hi)__A, (__v8hi)__B);
431 1.1 mrg }
432 1.1 mrg
433 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
434 1.1 mrg _mm_comtrue_epu16(__m128i __A, __m128i __B)
435 1.1 mrg {
436 1.1 mrg return (__m128i) __builtin_ia32_vpcomtrueuw ((__v8hi)__A, (__v8hi)__B);
437 1.1 mrg }
438 1.1 mrg
439 1.8 mrg /*pcom (integer, unsigned double words) */
440 1.1 mrg
441 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
442 1.1 mrg _mm_comlt_epu32(__m128i __A, __m128i __B)
443 1.1 mrg {
444 1.1 mrg return (__m128i) __builtin_ia32_vpcomltud ((__v4si)__A, (__v4si)__B);
445 1.1 mrg }
446 1.1 mrg
447 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
448 1.1 mrg _mm_comle_epu32(__m128i __A, __m128i __B)
449 1.1 mrg {
450 1.1 mrg return (__m128i) __builtin_ia32_vpcomleud ((__v4si)__A, (__v4si)__B);
451 1.1 mrg }
452 1.1 mrg
453 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
454 1.1 mrg _mm_comgt_epu32(__m128i __A, __m128i __B)
455 1.1 mrg {
456 1.1 mrg return (__m128i) __builtin_ia32_vpcomgtud ((__v4si)__A, (__v4si)__B);
457 1.1 mrg }
458 1.1 mrg
459 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
460 1.1 mrg _mm_comge_epu32(__m128i __A, __m128i __B)
461 1.1 mrg {
462 1.1 mrg return (__m128i) __builtin_ia32_vpcomgeud ((__v4si)__A, (__v4si)__B);
463 1.1 mrg }
464 1.1 mrg
465 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
466 1.1 mrg _mm_comeq_epu32(__m128i __A, __m128i __B)
467 1.1 mrg {
468 1.1 mrg return (__m128i) __builtin_ia32_vpcomequd ((__v4si)__A, (__v4si)__B);
469 1.1 mrg }
470 1.1 mrg
471 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
472 1.1 mrg _mm_comneq_epu32(__m128i __A, __m128i __B)
473 1.1 mrg {
474 1.1 mrg return (__m128i) __builtin_ia32_vpcomnequd ((__v4si)__A, (__v4si)__B);
475 1.1 mrg }
476 1.1 mrg
477 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
478 1.1 mrg _mm_comfalse_epu32(__m128i __A, __m128i __B)
479 1.1 mrg {
480 1.1 mrg return (__m128i) __builtin_ia32_vpcomfalseud ((__v4si)__A, (__v4si)__B);
481 1.1 mrg }
482 1.1 mrg
483 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
484 1.1 mrg _mm_comtrue_epu32(__m128i __A, __m128i __B)
485 1.1 mrg {
486 1.1 mrg return (__m128i) __builtin_ia32_vpcomtrueud ((__v4si)__A, (__v4si)__B);
487 1.1 mrg }
488 1.1 mrg
489 1.8 mrg /*pcom (integer, unsigned quad words) */
490 1.1 mrg
491 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
492 1.1 mrg _mm_comlt_epu64(__m128i __A, __m128i __B)
493 1.1 mrg {
494 1.1 mrg return (__m128i) __builtin_ia32_vpcomltuq ((__v2di)__A, (__v2di)__B);
495 1.1 mrg }
496 1.1 mrg
497 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
498 1.1 mrg _mm_comle_epu64(__m128i __A, __m128i __B)
499 1.1 mrg {
500 1.1 mrg return (__m128i) __builtin_ia32_vpcomleuq ((__v2di)__A, (__v2di)__B);
501 1.1 mrg }
502 1.1 mrg
503 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
504 1.1 mrg _mm_comgt_epu64(__m128i __A, __m128i __B)
505 1.1 mrg {
506 1.1 mrg return (__m128i) __builtin_ia32_vpcomgtuq ((__v2di)__A, (__v2di)__B);
507 1.1 mrg }
508 1.1 mrg
509 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
510 1.1 mrg _mm_comge_epu64(__m128i __A, __m128i __B)
511 1.1 mrg {
512 1.1 mrg return (__m128i) __builtin_ia32_vpcomgeuq ((__v2di)__A, (__v2di)__B);
513 1.1 mrg }
514 1.1 mrg
515 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
516 1.1 mrg _mm_comeq_epu64(__m128i __A, __m128i __B)
517 1.1 mrg {
518 1.1 mrg return (__m128i) __builtin_ia32_vpcomequq ((__v2di)__A, (__v2di)__B);
519 1.1 mrg }
520 1.1 mrg
521 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
522 1.1 mrg _mm_comneq_epu64(__m128i __A, __m128i __B)
523 1.1 mrg {
524 1.1 mrg return (__m128i) __builtin_ia32_vpcomnequq ((__v2di)__A, (__v2di)__B);
525 1.1 mrg }
526 1.1 mrg
527 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
528 1.1 mrg _mm_comfalse_epu64(__m128i __A, __m128i __B)
529 1.1 mrg {
530 1.1 mrg return (__m128i) __builtin_ia32_vpcomfalseuq ((__v2di)__A, (__v2di)__B);
531 1.1 mrg }
532 1.1 mrg
533 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
534 1.1 mrg _mm_comtrue_epu64(__m128i __A, __m128i __B)
535 1.1 mrg {
536 1.1 mrg return (__m128i) __builtin_ia32_vpcomtrueuq ((__v2di)__A, (__v2di)__B);
537 1.1 mrg }
538 1.1 mrg
539 1.1 mrg /*pcom (integer, signed bytes) */
540 1.1 mrg
541 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
542 1.1 mrg _mm_comlt_epi8(__m128i __A, __m128i __B)
543 1.1 mrg {
544 1.1 mrg return (__m128i) __builtin_ia32_vpcomltb ((__v16qi)__A, (__v16qi)__B);
545 1.1 mrg }
546 1.1 mrg
547 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
548 1.1 mrg _mm_comle_epi8(__m128i __A, __m128i __B)
549 1.1 mrg {
550 1.1 mrg return (__m128i) __builtin_ia32_vpcomleb ((__v16qi)__A, (__v16qi)__B);
551 1.1 mrg }
552 1.1 mrg
553 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
554 1.1 mrg _mm_comgt_epi8(__m128i __A, __m128i __B)
555 1.1 mrg {
556 1.1 mrg return (__m128i) __builtin_ia32_vpcomgtb ((__v16qi)__A, (__v16qi)__B);
557 1.1 mrg }
558 1.1 mrg
559 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
560 1.1 mrg _mm_comge_epi8(__m128i __A, __m128i __B)
561 1.1 mrg {
562 1.1 mrg return (__m128i) __builtin_ia32_vpcomgeb ((__v16qi)__A, (__v16qi)__B);
563 1.1 mrg }
564 1.1 mrg
565 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
566 1.1 mrg _mm_comeq_epi8(__m128i __A, __m128i __B)
567 1.1 mrg {
568 1.1 mrg return (__m128i) __builtin_ia32_vpcomeqb ((__v16qi)__A, (__v16qi)__B);
569 1.1 mrg }
570 1.1 mrg
571 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
572 1.1 mrg _mm_comneq_epi8(__m128i __A, __m128i __B)
573 1.1 mrg {
574 1.1 mrg return (__m128i) __builtin_ia32_vpcomneqb ((__v16qi)__A, (__v16qi)__B);
575 1.1 mrg }
576 1.1 mrg
577 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
578 1.1 mrg _mm_comfalse_epi8(__m128i __A, __m128i __B)
579 1.1 mrg {
580 1.1 mrg return (__m128i) __builtin_ia32_vpcomfalseb ((__v16qi)__A, (__v16qi)__B);
581 1.1 mrg }
582 1.1 mrg
583 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
584 1.1 mrg _mm_comtrue_epi8(__m128i __A, __m128i __B)
585 1.1 mrg {
586 1.1 mrg return (__m128i) __builtin_ia32_vpcomtrueb ((__v16qi)__A, (__v16qi)__B);
587 1.1 mrg }
588 1.1 mrg
589 1.1 mrg /*pcom (integer, signed words) */
590 1.1 mrg
591 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
592 1.1 mrg _mm_comlt_epi16(__m128i __A, __m128i __B)
593 1.1 mrg {
594 1.1 mrg return (__m128i) __builtin_ia32_vpcomltw ((__v8hi)__A, (__v8hi)__B);
595 1.1 mrg }
596 1.1 mrg
597 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
598 1.1 mrg _mm_comle_epi16(__m128i __A, __m128i __B)
599 1.1 mrg {
600 1.1 mrg return (__m128i) __builtin_ia32_vpcomlew ((__v8hi)__A, (__v8hi)__B);
601 1.1 mrg }
602 1.1 mrg
603 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
604 1.1 mrg _mm_comgt_epi16(__m128i __A, __m128i __B)
605 1.1 mrg {
606 1.1 mrg return (__m128i) __builtin_ia32_vpcomgtw ((__v8hi)__A, (__v8hi)__B);
607 1.1 mrg }
608 1.1 mrg
609 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
610 1.1 mrg _mm_comge_epi16(__m128i __A, __m128i __B)
611 1.1 mrg {
612 1.1 mrg return (__m128i) __builtin_ia32_vpcomgew ((__v8hi)__A, (__v8hi)__B);
613 1.1 mrg }
614 1.1 mrg
615 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
616 1.1 mrg _mm_comeq_epi16(__m128i __A, __m128i __B)
617 1.1 mrg {
618 1.1 mrg return (__m128i) __builtin_ia32_vpcomeqw ((__v8hi)__A, (__v8hi)__B);
619 1.1 mrg }
620 1.1 mrg
621 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
622 1.1 mrg _mm_comneq_epi16(__m128i __A, __m128i __B)
623 1.1 mrg {
624 1.1 mrg return (__m128i) __builtin_ia32_vpcomneqw ((__v8hi)__A, (__v8hi)__B);
625 1.1 mrg }
626 1.1 mrg
627 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
628 1.1 mrg _mm_comfalse_epi16(__m128i __A, __m128i __B)
629 1.1 mrg {
630 1.1 mrg return (__m128i) __builtin_ia32_vpcomfalsew ((__v8hi)__A, (__v8hi)__B);
631 1.1 mrg }
632 1.1 mrg
633 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
634 1.1 mrg _mm_comtrue_epi16(__m128i __A, __m128i __B)
635 1.1 mrg {
636 1.1 mrg return (__m128i) __builtin_ia32_vpcomtruew ((__v8hi)__A, (__v8hi)__B);
637 1.1 mrg }
638 1.1 mrg
639 1.1 mrg /*pcom (integer, signed double words) */
640 1.1 mrg
641 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
642 1.1 mrg _mm_comlt_epi32(__m128i __A, __m128i __B)
643 1.1 mrg {
644 1.1 mrg return (__m128i) __builtin_ia32_vpcomltd ((__v4si)__A, (__v4si)__B);
645 1.1 mrg }
646 1.1 mrg
647 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
648 1.1 mrg _mm_comle_epi32(__m128i __A, __m128i __B)
649 1.1 mrg {
650 1.1 mrg return (__m128i) __builtin_ia32_vpcomled ((__v4si)__A, (__v4si)__B);
651 1.1 mrg }
652 1.1 mrg
653 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
654 1.1 mrg _mm_comgt_epi32(__m128i __A, __m128i __B)
655 1.1 mrg {
656 1.1 mrg return (__m128i) __builtin_ia32_vpcomgtd ((__v4si)__A, (__v4si)__B);
657 1.1 mrg }
658 1.1 mrg
659 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
660 1.1 mrg _mm_comge_epi32(__m128i __A, __m128i __B)
661 1.1 mrg {
662 1.1 mrg return (__m128i) __builtin_ia32_vpcomged ((__v4si)__A, (__v4si)__B);
663 1.1 mrg }
664 1.1 mrg
665 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
666 1.1 mrg _mm_comeq_epi32(__m128i __A, __m128i __B)
667 1.1 mrg {
668 1.1 mrg return (__m128i) __builtin_ia32_vpcomeqd ((__v4si)__A, (__v4si)__B);
669 1.1 mrg }
670 1.1 mrg
671 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
672 1.1 mrg _mm_comneq_epi32(__m128i __A, __m128i __B)
673 1.1 mrg {
674 1.1 mrg return (__m128i) __builtin_ia32_vpcomneqd ((__v4si)__A, (__v4si)__B);
675 1.1 mrg }
676 1.1 mrg
677 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
678 1.1 mrg _mm_comfalse_epi32(__m128i __A, __m128i __B)
679 1.1 mrg {
680 1.1 mrg return (__m128i) __builtin_ia32_vpcomfalsed ((__v4si)__A, (__v4si)__B);
681 1.1 mrg }
682 1.1 mrg
683 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
684 1.1 mrg _mm_comtrue_epi32(__m128i __A, __m128i __B)
685 1.1 mrg {
686 1.1 mrg return (__m128i) __builtin_ia32_vpcomtrued ((__v4si)__A, (__v4si)__B);
687 1.1 mrg }
688 1.1 mrg
689 1.1 mrg /*pcom (integer, signed quad words) */
690 1.1 mrg
691 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
692 1.1 mrg _mm_comlt_epi64(__m128i __A, __m128i __B)
693 1.1 mrg {
694 1.1 mrg return (__m128i) __builtin_ia32_vpcomltq ((__v2di)__A, (__v2di)__B);
695 1.1 mrg }
696 1.1 mrg
697 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
698 1.1 mrg _mm_comle_epi64(__m128i __A, __m128i __B)
699 1.1 mrg {
700 1.1 mrg return (__m128i) __builtin_ia32_vpcomleq ((__v2di)__A, (__v2di)__B);
701 1.1 mrg }
702 1.1 mrg
703 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
704 1.1 mrg _mm_comgt_epi64(__m128i __A, __m128i __B)
705 1.1 mrg {
706 1.1 mrg return (__m128i) __builtin_ia32_vpcomgtq ((__v2di)__A, (__v2di)__B);
707 1.1 mrg }
708 1.1 mrg
709 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
710 1.1 mrg _mm_comge_epi64(__m128i __A, __m128i __B)
711 1.1 mrg {
712 1.1 mrg return (__m128i) __builtin_ia32_vpcomgeq ((__v2di)__A, (__v2di)__B);
713 1.1 mrg }
714 1.1 mrg
715 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
716 1.1 mrg _mm_comeq_epi64(__m128i __A, __m128i __B)
717 1.1 mrg {
718 1.1 mrg return (__m128i) __builtin_ia32_vpcomeqq ((__v2di)__A, (__v2di)__B);
719 1.1 mrg }
720 1.1 mrg
721 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
722 1.1 mrg _mm_comneq_epi64(__m128i __A, __m128i __B)
723 1.1 mrg {
724 1.1 mrg return (__m128i) __builtin_ia32_vpcomneqq ((__v2di)__A, (__v2di)__B);
725 1.1 mrg }
726 1.1 mrg
727 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
728 1.1 mrg _mm_comfalse_epi64(__m128i __A, __m128i __B)
729 1.1 mrg {
730 1.1 mrg return (__m128i) __builtin_ia32_vpcomfalseq ((__v2di)__A, (__v2di)__B);
731 1.1 mrg }
732 1.1 mrg
733 1.1 mrg extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
734 1.1 mrg _mm_comtrue_epi64(__m128i __A, __m128i __B)
735 1.1 mrg {
736 1.1 mrg return (__m128i) __builtin_ia32_vpcomtrueq ((__v2di)__A, (__v2di)__B);
737 1.1 mrg }
738 1.1 mrg
739 1.1 mrg /* FRCZ */
740 1.1 mrg
741 1.1 mrg extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
742 1.1 mrg _mm_frcz_ps (__m128 __A)
743 1.1 mrg {
744 1.1 mrg return (__m128) __builtin_ia32_vfrczps ((__v4sf)__A);
745 1.1 mrg }
746 1.1 mrg
747 1.1 mrg extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
748 1.1 mrg _mm_frcz_pd (__m128d __A)
749 1.1 mrg {
750 1.1 mrg return (__m128d) __builtin_ia32_vfrczpd ((__v2df)__A);
751 1.1 mrg }
752 1.1 mrg
753 1.1 mrg extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
754 1.1 mrg _mm_frcz_ss (__m128 __A, __m128 __B)
755 1.1 mrg {
756 1.3 mrg return (__m128) __builtin_ia32_movss ((__v4sf)__A,
757 1.3 mrg (__v4sf)
758 1.3 mrg __builtin_ia32_vfrczss ((__v4sf)__B));
759 1.1 mrg }
760 1.1 mrg
761 1.1 mrg extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
762 1.1 mrg _mm_frcz_sd (__m128d __A, __m128d __B)
763 1.1 mrg {
764 1.3 mrg return (__m128d) __builtin_ia32_movsd ((__v2df)__A,
765 1.3 mrg (__v2df)
766 1.3 mrg __builtin_ia32_vfrczsd ((__v2df)__B));
767 1.1 mrg }
768 1.1 mrg
769 1.1 mrg extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
770 1.1 mrg _mm256_frcz_ps (__m256 __A)
771 1.1 mrg {
772 1.1 mrg return (__m256) __builtin_ia32_vfrczps256 ((__v8sf)__A);
773 1.1 mrg }
774 1.1 mrg
775 1.1 mrg extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
776 1.1 mrg _mm256_frcz_pd (__m256d __A)
777 1.1 mrg {
778 1.1 mrg return (__m256d) __builtin_ia32_vfrczpd256 ((__v4df)__A);
779 1.1 mrg }
780 1.1 mrg
781 1.1 mrg /* PERMIL2 */
782 1.1 mrg
783 1.1 mrg #ifdef __OPTIMIZE__
784 1.1 mrg extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
785 1.1 mrg _mm_permute2_pd (__m128d __X, __m128d __Y, __m128i __C, const int __I)
786 1.1 mrg {
787 1.1 mrg return (__m128d) __builtin_ia32_vpermil2pd ((__v2df)__X,
788 1.1 mrg (__v2df)__Y,
789 1.1 mrg (__v2di)__C,
790 1.1 mrg __I);
791 1.1 mrg }
792 1.1 mrg
793 1.1 mrg extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
794 1.1 mrg _mm256_permute2_pd (__m256d __X, __m256d __Y, __m256i __C, const int __I)
795 1.1 mrg {
796 1.1 mrg return (__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)__X,
797 1.1 mrg (__v4df)__Y,
798 1.1 mrg (__v4di)__C,
799 1.1 mrg __I);
800 1.1 mrg }
801 1.1 mrg
802 1.1 mrg extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
803 1.1 mrg _mm_permute2_ps (__m128 __X, __m128 __Y, __m128i __C, const int __I)
804 1.1 mrg {
805 1.1 mrg return (__m128) __builtin_ia32_vpermil2ps ((__v4sf)__X,
806 1.1 mrg (__v4sf)__Y,
807 1.1 mrg (__v4si)__C,
808 1.1 mrg __I);
809 1.1 mrg }
810 1.1 mrg
811 1.1 mrg extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
812 1.1 mrg _mm256_permute2_ps (__m256 __X, __m256 __Y, __m256i __C, const int __I)
813 1.1 mrg {
814 1.1 mrg return (__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)__X,
815 1.1 mrg (__v8sf)__Y,
816 1.1 mrg (__v8si)__C,
817 1.1 mrg __I);
818 1.1 mrg }
819 1.1 mrg #else
820 1.1 mrg #define _mm_permute2_pd(X, Y, C, I) \
821 1.1 mrg ((__m128d) __builtin_ia32_vpermil2pd ((__v2df)(__m128d)(X), \
822 1.1 mrg (__v2df)(__m128d)(Y), \
823 1.10 mrg (__v2di)(__m128i)(C), \
824 1.1 mrg (int)(I)))
825 1.1 mrg
826 1.1 mrg #define _mm256_permute2_pd(X, Y, C, I) \
827 1.1 mrg ((__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)(__m256d)(X), \
828 1.1 mrg (__v4df)(__m256d)(Y), \
829 1.10 mrg (__v4di)(__m256i)(C), \
830 1.1 mrg (int)(I)))
831 1.1 mrg
832 1.1 mrg #define _mm_permute2_ps(X, Y, C, I) \
833 1.1 mrg ((__m128) __builtin_ia32_vpermil2ps ((__v4sf)(__m128)(X), \
834 1.1 mrg (__v4sf)(__m128)(Y), \
835 1.10 mrg (__v4si)(__m128i)(C), \
836 1.1 mrg (int)(I)))
837 1.1 mrg
838 1.1 mrg #define _mm256_permute2_ps(X, Y, C, I) \
839 1.1 mrg ((__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)(__m256)(X), \
840 1.1 mrg (__v8sf)(__m256)(Y), \
841 1.10 mrg (__v8si)(__m256i)(C), \
842 1.1 mrg (int)(I)))
843 1.1 mrg #endif /* __OPTIMIZE__ */
844 1.1 mrg
845 1.5 mrg #ifdef __DISABLE_XOP__
846 1.5 mrg #undef __DISABLE_XOP__
847 1.5 mrg #pragma GCC pop_options
848 1.5 mrg #endif /* __DISABLE_XOP__ */
849 1.1 mrg
850 1.1 mrg #endif /* _XOPMMINTRIN_H_INCLUDED */
851