avx512fp16intrin.h revision 1.1.1.1 1 1.1 mrg /* Copyright (C) 2019-2022 Free Software Foundation, Inc.
2 1.1 mrg
3 1.1 mrg This file is part of GCC.
4 1.1 mrg
5 1.1 mrg GCC is free software; you can redistribute it and/or modify
6 1.1 mrg it under the terms of the GNU General Public License as published by
7 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
8 1.1 mrg any later version.
9 1.1 mrg
10 1.1 mrg GCC is distributed in the hope that it will be useful,
11 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of
12 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 1.1 mrg GNU General Public License for more details.
14 1.1 mrg
15 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
16 1.1 mrg permissions described in the GCC Runtime Library Exception, version
17 1.1 mrg 3.1, as published by the Free Software Foundation.
18 1.1 mrg
19 1.1 mrg You should have received a copy of the GNU General Public License and
20 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
21 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 1.1 mrg <http://www.gnu.org/licenses/>. */
23 1.1 mrg
24 1.1 mrg #ifndef _IMMINTRIN_H_INCLUDED
25 1.1 mrg #error "Never use <avx512fp16intrin.h> directly; include <immintrin.h> instead."
26 1.1 mrg #endif
27 1.1 mrg
28 1.1 mrg #ifndef __AVX512FP16INTRIN_H_INCLUDED
29 1.1 mrg #define __AVX512FP16INTRIN_H_INCLUDED
30 1.1 mrg
31 1.1 mrg #ifndef __AVX512FP16__
32 1.1 mrg #pragma GCC push_options
33 1.1 mrg #pragma GCC target("avx512fp16")
34 1.1 mrg #define __DISABLE_AVX512FP16__
35 1.1 mrg #endif /* __AVX512FP16__ */
36 1.1 mrg
37 1.1 mrg /* Internal data types for implementing the intrinsics. */
38 1.1 mrg typedef _Float16 __v8hf __attribute__ ((__vector_size__ (16)));
39 1.1 mrg typedef _Float16 __v16hf __attribute__ ((__vector_size__ (32)));
40 1.1 mrg typedef _Float16 __v32hf __attribute__ ((__vector_size__ (64)));
41 1.1 mrg
42 1.1 mrg /* The Intel API is flexible enough that we must allow aliasing with other
43 1.1 mrg vector types, and their scalar components. */
44 1.1 mrg typedef _Float16 __m128h __attribute__ ((__vector_size__ (16), __may_alias__));
45 1.1 mrg typedef _Float16 __m256h __attribute__ ((__vector_size__ (32), __may_alias__));
46 1.1 mrg typedef _Float16 __m512h __attribute__ ((__vector_size__ (64), __may_alias__));
47 1.1 mrg
48 1.1 mrg /* Unaligned version of the same type. */
49 1.1 mrg typedef _Float16 __m128h_u __attribute__ ((__vector_size__ (16), \
50 1.1 mrg __may_alias__, __aligned__ (1)));
51 1.1 mrg typedef _Float16 __m256h_u __attribute__ ((__vector_size__ (32), \
52 1.1 mrg __may_alias__, __aligned__ (1)));
53 1.1 mrg typedef _Float16 __m512h_u __attribute__ ((__vector_size__ (64), \
54 1.1 mrg __may_alias__, __aligned__ (1)));
55 1.1 mrg
56 1.1 mrg extern __inline __m128h
57 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
58 1.1 mrg _mm_set_ph (_Float16 __A7, _Float16 __A6, _Float16 __A5,
59 1.1 mrg _Float16 __A4, _Float16 __A3, _Float16 __A2,
60 1.1 mrg _Float16 __A1, _Float16 __A0)
61 1.1 mrg {
62 1.1 mrg return __extension__ (__m128h)(__v8hf){ __A0, __A1, __A2, __A3,
63 1.1 mrg __A4, __A5, __A6, __A7 };
64 1.1 mrg }
65 1.1 mrg
66 1.1 mrg extern __inline __m256h
67 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
68 1.1 mrg _mm256_set_ph (_Float16 __A15, _Float16 __A14, _Float16 __A13,
69 1.1 mrg _Float16 __A12, _Float16 __A11, _Float16 __A10,
70 1.1 mrg _Float16 __A9, _Float16 __A8, _Float16 __A7,
71 1.1 mrg _Float16 __A6, _Float16 __A5, _Float16 __A4,
72 1.1 mrg _Float16 __A3, _Float16 __A2, _Float16 __A1,
73 1.1 mrg _Float16 __A0)
74 1.1 mrg {
75 1.1 mrg return __extension__ (__m256h)(__v16hf){ __A0, __A1, __A2, __A3,
76 1.1 mrg __A4, __A5, __A6, __A7,
77 1.1 mrg __A8, __A9, __A10, __A11,
78 1.1 mrg __A12, __A13, __A14, __A15 };
79 1.1 mrg }
80 1.1 mrg
81 1.1 mrg extern __inline __m512h
82 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
83 1.1 mrg _mm512_set_ph (_Float16 __A31, _Float16 __A30, _Float16 __A29,
84 1.1 mrg _Float16 __A28, _Float16 __A27, _Float16 __A26,
85 1.1 mrg _Float16 __A25, _Float16 __A24, _Float16 __A23,
86 1.1 mrg _Float16 __A22, _Float16 __A21, _Float16 __A20,
87 1.1 mrg _Float16 __A19, _Float16 __A18, _Float16 __A17,
88 1.1 mrg _Float16 __A16, _Float16 __A15, _Float16 __A14,
89 1.1 mrg _Float16 __A13, _Float16 __A12, _Float16 __A11,
90 1.1 mrg _Float16 __A10, _Float16 __A9, _Float16 __A8,
91 1.1 mrg _Float16 __A7, _Float16 __A6, _Float16 __A5,
92 1.1 mrg _Float16 __A4, _Float16 __A3, _Float16 __A2,
93 1.1 mrg _Float16 __A1, _Float16 __A0)
94 1.1 mrg {
95 1.1 mrg return __extension__ (__m512h)(__v32hf){ __A0, __A1, __A2, __A3,
96 1.1 mrg __A4, __A5, __A6, __A7,
97 1.1 mrg __A8, __A9, __A10, __A11,
98 1.1 mrg __A12, __A13, __A14, __A15,
99 1.1 mrg __A16, __A17, __A18, __A19,
100 1.1 mrg __A20, __A21, __A22, __A23,
101 1.1 mrg __A24, __A25, __A26, __A27,
102 1.1 mrg __A28, __A29, __A30, __A31 };
103 1.1 mrg }
104 1.1 mrg
105 1.1 mrg /* Create vectors of elements in the reversed order from _mm_set_ph,
106 1.1 mrg _mm256_set_ph and _mm512_set_ph functions. */
107 1.1 mrg
108 1.1 mrg extern __inline __m128h
109 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
110 1.1 mrg _mm_setr_ph (_Float16 __A0, _Float16 __A1, _Float16 __A2,
111 1.1 mrg _Float16 __A3, _Float16 __A4, _Float16 __A5,
112 1.1 mrg _Float16 __A6, _Float16 __A7)
113 1.1 mrg {
114 1.1 mrg return _mm_set_ph (__A7, __A6, __A5, __A4, __A3, __A2, __A1, __A0);
115 1.1 mrg }
116 1.1 mrg
117 1.1 mrg extern __inline __m256h
118 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
119 1.1 mrg _mm256_setr_ph (_Float16 __A0, _Float16 __A1, _Float16 __A2,
120 1.1 mrg _Float16 __A3, _Float16 __A4, _Float16 __A5,
121 1.1 mrg _Float16 __A6, _Float16 __A7, _Float16 __A8,
122 1.1 mrg _Float16 __A9, _Float16 __A10, _Float16 __A11,
123 1.1 mrg _Float16 __A12, _Float16 __A13, _Float16 __A14,
124 1.1 mrg _Float16 __A15)
125 1.1 mrg {
126 1.1 mrg return _mm256_set_ph (__A15, __A14, __A13, __A12, __A11, __A10, __A9,
127 1.1 mrg __A8, __A7, __A6, __A5, __A4, __A3, __A2, __A1,
128 1.1 mrg __A0);
129 1.1 mrg }
130 1.1 mrg
131 1.1 mrg extern __inline __m512h
132 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
133 1.1 mrg _mm512_setr_ph (_Float16 __A0, _Float16 __A1, _Float16 __A2,
134 1.1 mrg _Float16 __A3, _Float16 __A4, _Float16 __A5,
135 1.1 mrg _Float16 __A6, _Float16 __A7, _Float16 __A8,
136 1.1 mrg _Float16 __A9, _Float16 __A10, _Float16 __A11,
137 1.1 mrg _Float16 __A12, _Float16 __A13, _Float16 __A14,
138 1.1 mrg _Float16 __A15, _Float16 __A16, _Float16 __A17,
139 1.1 mrg _Float16 __A18, _Float16 __A19, _Float16 __A20,
140 1.1 mrg _Float16 __A21, _Float16 __A22, _Float16 __A23,
141 1.1 mrg _Float16 __A24, _Float16 __A25, _Float16 __A26,
142 1.1 mrg _Float16 __A27, _Float16 __A28, _Float16 __A29,
143 1.1 mrg _Float16 __A30, _Float16 __A31)
144 1.1 mrg
145 1.1 mrg {
146 1.1 mrg return _mm512_set_ph (__A31, __A30, __A29, __A28, __A27, __A26, __A25,
147 1.1 mrg __A24, __A23, __A22, __A21, __A20, __A19, __A18,
148 1.1 mrg __A17, __A16, __A15, __A14, __A13, __A12, __A11,
149 1.1 mrg __A10, __A9, __A8, __A7, __A6, __A5, __A4, __A3,
150 1.1 mrg __A2, __A1, __A0);
151 1.1 mrg }
152 1.1 mrg
153 1.1 mrg /* Broadcast _Float16 to vector. */
154 1.1 mrg
155 1.1 mrg extern __inline __m128h
156 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
157 1.1 mrg _mm_set1_ph (_Float16 __A)
158 1.1 mrg {
159 1.1 mrg return _mm_set_ph (__A, __A, __A, __A, __A, __A, __A, __A);
160 1.1 mrg }
161 1.1 mrg
162 1.1 mrg extern __inline __m256h
163 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
164 1.1 mrg _mm256_set1_ph (_Float16 __A)
165 1.1 mrg {
166 1.1 mrg return _mm256_set_ph (__A, __A, __A, __A, __A, __A, __A, __A,
167 1.1 mrg __A, __A, __A, __A, __A, __A, __A, __A);
168 1.1 mrg }
169 1.1 mrg
170 1.1 mrg extern __inline __m512h
171 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
172 1.1 mrg _mm512_set1_ph (_Float16 __A)
173 1.1 mrg {
174 1.1 mrg return _mm512_set_ph (__A, __A, __A, __A, __A, __A, __A, __A,
175 1.1 mrg __A, __A, __A, __A, __A, __A, __A, __A,
176 1.1 mrg __A, __A, __A, __A, __A, __A, __A, __A,
177 1.1 mrg __A, __A, __A, __A, __A, __A, __A, __A);
178 1.1 mrg }
179 1.1 mrg
180 1.1 mrg /* Create a vector with all zeros. */
181 1.1 mrg
182 1.1 mrg extern __inline __m128h
183 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
184 1.1 mrg _mm_setzero_ph (void)
185 1.1 mrg {
186 1.1 mrg return _mm_set1_ph (0.0f);
187 1.1 mrg }
188 1.1 mrg
189 1.1 mrg extern __inline __m256h
190 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
191 1.1 mrg _mm256_setzero_ph (void)
192 1.1 mrg {
193 1.1 mrg return _mm256_set1_ph (0.0f);
194 1.1 mrg }
195 1.1 mrg
196 1.1 mrg extern __inline __m512h
197 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
198 1.1 mrg _mm512_setzero_ph (void)
199 1.1 mrg {
200 1.1 mrg return _mm512_set1_ph (0.0f);
201 1.1 mrg }
202 1.1 mrg
203 1.1 mrg extern __inline __m128h
204 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
205 1.1 mrg _mm_undefined_ph (void)
206 1.1 mrg {
207 1.1 mrg #pragma GCC diagnostic push
208 1.1 mrg #pragma GCC diagnostic ignored "-Winit-self"
209 1.1 mrg __m128h __Y = __Y;
210 1.1 mrg #pragma GCC diagnostic pop
211 1.1 mrg return __Y;
212 1.1 mrg }
213 1.1 mrg
214 1.1 mrg extern __inline __m256h
215 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
216 1.1 mrg _mm256_undefined_ph (void)
217 1.1 mrg {
218 1.1 mrg #pragma GCC diagnostic push
219 1.1 mrg #pragma GCC diagnostic ignored "-Winit-self"
220 1.1 mrg __m256h __Y = __Y;
221 1.1 mrg #pragma GCC diagnostic pop
222 1.1 mrg return __Y;
223 1.1 mrg }
224 1.1 mrg
225 1.1 mrg extern __inline __m512h
226 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
227 1.1 mrg _mm512_undefined_ph (void)
228 1.1 mrg {
229 1.1 mrg #pragma GCC diagnostic push
230 1.1 mrg #pragma GCC diagnostic ignored "-Winit-self"
231 1.1 mrg __m512h __Y = __Y;
232 1.1 mrg #pragma GCC diagnostic pop
233 1.1 mrg return __Y;
234 1.1 mrg }
235 1.1 mrg
236 1.1 mrg extern __inline _Float16
237 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
238 1.1 mrg _mm_cvtsh_h (__m128h __A)
239 1.1 mrg {
240 1.1 mrg return __A[0];
241 1.1 mrg }
242 1.1 mrg
243 1.1 mrg extern __inline _Float16
244 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
245 1.1 mrg _mm256_cvtsh_h (__m256h __A)
246 1.1 mrg {
247 1.1 mrg return __A[0];
248 1.1 mrg }
249 1.1 mrg
250 1.1 mrg extern __inline _Float16
251 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
252 1.1 mrg _mm512_cvtsh_h (__m512h __A)
253 1.1 mrg {
254 1.1 mrg return __A[0];
255 1.1 mrg }
256 1.1 mrg
257 1.1 mrg extern __inline __m512
258 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
259 1.1 mrg _mm512_castph_ps (__m512h __a)
260 1.1 mrg {
261 1.1 mrg return (__m512) __a;
262 1.1 mrg }
263 1.1 mrg
264 1.1 mrg extern __inline __m512d
265 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
266 1.1 mrg _mm512_castph_pd (__m512h __a)
267 1.1 mrg {
268 1.1 mrg return (__m512d) __a;
269 1.1 mrg }
270 1.1 mrg
271 1.1 mrg extern __inline __m512i
272 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
273 1.1 mrg _mm512_castph_si512 (__m512h __a)
274 1.1 mrg {
275 1.1 mrg return (__m512i) __a;
276 1.1 mrg }
277 1.1 mrg
278 1.1 mrg extern __inline __m128h
279 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
280 1.1 mrg _mm512_castph512_ph128 (__m512h __A)
281 1.1 mrg {
282 1.1 mrg union
283 1.1 mrg {
284 1.1 mrg __m128h __a[4];
285 1.1 mrg __m512h __v;
286 1.1 mrg } __u = { .__v = __A };
287 1.1 mrg return __u.__a[0];
288 1.1 mrg }
289 1.1 mrg
290 1.1 mrg extern __inline __m256h
291 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
292 1.1 mrg _mm512_castph512_ph256 (__m512h __A)
293 1.1 mrg {
294 1.1 mrg union
295 1.1 mrg {
296 1.1 mrg __m256h __a[2];
297 1.1 mrg __m512h __v;
298 1.1 mrg } __u = { .__v = __A };
299 1.1 mrg return __u.__a[0];
300 1.1 mrg }
301 1.1 mrg
302 1.1 mrg extern __inline __m512h
303 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
304 1.1 mrg _mm512_castph128_ph512 (__m128h __A)
305 1.1 mrg {
306 1.1 mrg union
307 1.1 mrg {
308 1.1 mrg __m128h __a[4];
309 1.1 mrg __m512h __v;
310 1.1 mrg } __u;
311 1.1 mrg __u.__a[0] = __A;
312 1.1 mrg return __u.__v;
313 1.1 mrg }
314 1.1 mrg
315 1.1 mrg extern __inline __m512h
316 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
317 1.1 mrg _mm512_castph256_ph512 (__m256h __A)
318 1.1 mrg {
319 1.1 mrg union
320 1.1 mrg {
321 1.1 mrg __m256h __a[2];
322 1.1 mrg __m512h __v;
323 1.1 mrg } __u;
324 1.1 mrg __u.__a[0] = __A;
325 1.1 mrg return __u.__v;
326 1.1 mrg }
327 1.1 mrg
328 1.1 mrg extern __inline __m512h
329 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
330 1.1 mrg _mm512_zextph128_ph512 (__m128h __A)
331 1.1 mrg {
332 1.1 mrg return (__m512h) _mm512_insertf32x4 (_mm512_setzero_ps (),
333 1.1 mrg (__m128) __A, 0);
334 1.1 mrg }
335 1.1 mrg
336 1.1 mrg extern __inline __m512h
337 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
338 1.1 mrg _mm512_zextph256_ph512 (__m256h __A)
339 1.1 mrg {
340 1.1 mrg return (__m512h) _mm512_insertf64x4 (_mm512_setzero_pd (),
341 1.1 mrg (__m256d) __A, 0);
342 1.1 mrg }
343 1.1 mrg
344 1.1 mrg extern __inline __m512h
345 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
346 1.1 mrg _mm512_castps_ph (__m512 __a)
347 1.1 mrg {
348 1.1 mrg return (__m512h) __a;
349 1.1 mrg }
350 1.1 mrg
351 1.1 mrg extern __inline __m512h
352 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
353 1.1 mrg _mm512_castpd_ph (__m512d __a)
354 1.1 mrg {
355 1.1 mrg return (__m512h) __a;
356 1.1 mrg }
357 1.1 mrg
358 1.1 mrg extern __inline __m512h
359 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
360 1.1 mrg _mm512_castsi512_ph (__m512i __a)
361 1.1 mrg {
362 1.1 mrg return (__m512h) __a;
363 1.1 mrg }
364 1.1 mrg
365 1.1 mrg /* Create a vector with element 0 as F and the rest zero. */
366 1.1 mrg extern __inline __m128h
367 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
368 1.1 mrg _mm_set_sh (_Float16 __F)
369 1.1 mrg {
370 1.1 mrg return _mm_set_ph (0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, __F);
371 1.1 mrg }
372 1.1 mrg
373 1.1 mrg /* Create a vector with element 0 as *P and the rest zero. */
374 1.1 mrg extern __inline __m128h
375 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
376 1.1 mrg _mm_load_sh (void const *__P)
377 1.1 mrg {
378 1.1 mrg return _mm_set_ph (0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
379 1.1 mrg *(_Float16 const *) __P);
380 1.1 mrg }
381 1.1 mrg
382 1.1 mrg extern __inline __m512h
383 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
384 1.1 mrg _mm512_load_ph (void const *__P)
385 1.1 mrg {
386 1.1 mrg return *(const __m512h *) __P;
387 1.1 mrg }
388 1.1 mrg
389 1.1 mrg extern __inline __m256h
390 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
391 1.1 mrg _mm256_load_ph (void const *__P)
392 1.1 mrg {
393 1.1 mrg return *(const __m256h *) __P;
394 1.1 mrg }
395 1.1 mrg
396 1.1 mrg extern __inline __m128h
397 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
398 1.1 mrg _mm_load_ph (void const *__P)
399 1.1 mrg {
400 1.1 mrg return *(const __m128h *) __P;
401 1.1 mrg }
402 1.1 mrg
403 1.1 mrg extern __inline __m512h
404 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
405 1.1 mrg _mm512_loadu_ph (void const *__P)
406 1.1 mrg {
407 1.1 mrg return *(const __m512h_u *) __P;
408 1.1 mrg }
409 1.1 mrg
410 1.1 mrg extern __inline __m256h
411 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
412 1.1 mrg _mm256_loadu_ph (void const *__P)
413 1.1 mrg {
414 1.1 mrg return *(const __m256h_u *) __P;
415 1.1 mrg }
416 1.1 mrg
417 1.1 mrg extern __inline __m128h
418 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
419 1.1 mrg _mm_loadu_ph (void const *__P)
420 1.1 mrg {
421 1.1 mrg return *(const __m128h_u *) __P;
422 1.1 mrg }
423 1.1 mrg
424 1.1 mrg /* Stores the lower _Float16 value. */
425 1.1 mrg extern __inline void
426 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
427 1.1 mrg _mm_store_sh (void *__P, __m128h __A)
428 1.1 mrg {
429 1.1 mrg *(_Float16 *) __P = ((__v8hf)__A)[0];
430 1.1 mrg }
431 1.1 mrg
432 1.1 mrg extern __inline void
433 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
434 1.1 mrg _mm512_store_ph (void *__P, __m512h __A)
435 1.1 mrg {
436 1.1 mrg *(__m512h *) __P = __A;
437 1.1 mrg }
438 1.1 mrg
439 1.1 mrg extern __inline void
440 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
441 1.1 mrg _mm256_store_ph (void *__P, __m256h __A)
442 1.1 mrg {
443 1.1 mrg *(__m256h *) __P = __A;
444 1.1 mrg }
445 1.1 mrg
446 1.1 mrg extern __inline void
447 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
448 1.1 mrg _mm_store_ph (void *__P, __m128h __A)
449 1.1 mrg {
450 1.1 mrg *(__m128h *) __P = __A;
451 1.1 mrg }
452 1.1 mrg
453 1.1 mrg extern __inline void
454 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
455 1.1 mrg _mm512_storeu_ph (void *__P, __m512h __A)
456 1.1 mrg {
457 1.1 mrg *(__m512h_u *) __P = __A;
458 1.1 mrg }
459 1.1 mrg
460 1.1 mrg extern __inline void
461 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
462 1.1 mrg _mm256_storeu_ph (void *__P, __m256h __A)
463 1.1 mrg {
464 1.1 mrg *(__m256h_u *) __P = __A;
465 1.1 mrg }
466 1.1 mrg
467 1.1 mrg extern __inline void
468 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
469 1.1 mrg _mm_storeu_ph (void *__P, __m128h __A)
470 1.1 mrg {
471 1.1 mrg *(__m128h_u *) __P = __A;
472 1.1 mrg }
473 1.1 mrg
474 1.1 mrg extern __inline __m512h
475 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
476 1.1 mrg _mm512_abs_ph (__m512h __A)
477 1.1 mrg {
478 1.1 mrg return (__m512h) _mm512_and_epi32 ( _mm512_set1_epi32 (0x7FFF7FFF),
479 1.1 mrg (__m512i) __A);
480 1.1 mrg }
481 1.1 mrg
482 1.1 mrg /* Intrinsics v[add,sub,mul,div]ph. */
483 1.1 mrg extern __inline __m512h
484 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
485 1.1 mrg _mm512_add_ph (__m512h __A, __m512h __B)
486 1.1 mrg {
487 1.1 mrg return (__m512h) ((__v32hf) __A + (__v32hf) __B);
488 1.1 mrg }
489 1.1 mrg
490 1.1 mrg extern __inline __m512h
491 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
492 1.1 mrg _mm512_mask_add_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
493 1.1 mrg {
494 1.1 mrg return __builtin_ia32_addph512_mask (__C, __D, __A, __B);
495 1.1 mrg }
496 1.1 mrg
497 1.1 mrg extern __inline __m512h
498 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
499 1.1 mrg _mm512_maskz_add_ph (__mmask32 __A, __m512h __B, __m512h __C)
500 1.1 mrg {
501 1.1 mrg return __builtin_ia32_addph512_mask (__B, __C,
502 1.1 mrg _mm512_setzero_ph (), __A);
503 1.1 mrg }
504 1.1 mrg
505 1.1 mrg extern __inline __m512h
506 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
507 1.1 mrg _mm512_sub_ph (__m512h __A, __m512h __B)
508 1.1 mrg {
509 1.1 mrg return (__m512h) ((__v32hf) __A - (__v32hf) __B);
510 1.1 mrg }
511 1.1 mrg
512 1.1 mrg extern __inline __m512h
513 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
514 1.1 mrg _mm512_mask_sub_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
515 1.1 mrg {
516 1.1 mrg return __builtin_ia32_subph512_mask (__C, __D, __A, __B);
517 1.1 mrg }
518 1.1 mrg
519 1.1 mrg extern __inline __m512h
520 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
521 1.1 mrg _mm512_maskz_sub_ph (__mmask32 __A, __m512h __B, __m512h __C)
522 1.1 mrg {
523 1.1 mrg return __builtin_ia32_subph512_mask (__B, __C,
524 1.1 mrg _mm512_setzero_ph (), __A);
525 1.1 mrg }
526 1.1 mrg
527 1.1 mrg extern __inline __m512h
528 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
529 1.1 mrg _mm512_mul_ph (__m512h __A, __m512h __B)
530 1.1 mrg {
531 1.1 mrg return (__m512h) ((__v32hf) __A * (__v32hf) __B);
532 1.1 mrg }
533 1.1 mrg
534 1.1 mrg extern __inline __m512h
535 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
536 1.1 mrg _mm512_mask_mul_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
537 1.1 mrg {
538 1.1 mrg return __builtin_ia32_mulph512_mask (__C, __D, __A, __B);
539 1.1 mrg }
540 1.1 mrg
541 1.1 mrg extern __inline __m512h
542 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
543 1.1 mrg _mm512_maskz_mul_ph (__mmask32 __A, __m512h __B, __m512h __C)
544 1.1 mrg {
545 1.1 mrg return __builtin_ia32_mulph512_mask (__B, __C,
546 1.1 mrg _mm512_setzero_ph (), __A);
547 1.1 mrg }
548 1.1 mrg
549 1.1 mrg extern __inline __m512h
550 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
551 1.1 mrg _mm512_div_ph (__m512h __A, __m512h __B)
552 1.1 mrg {
553 1.1 mrg return (__m512h) ((__v32hf) __A / (__v32hf) __B);
554 1.1 mrg }
555 1.1 mrg
556 1.1 mrg extern __inline __m512h
557 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
558 1.1 mrg _mm512_mask_div_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
559 1.1 mrg {
560 1.1 mrg return __builtin_ia32_divph512_mask (__C, __D, __A, __B);
561 1.1 mrg }
562 1.1 mrg
563 1.1 mrg extern __inline __m512h
564 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
565 1.1 mrg _mm512_maskz_div_ph (__mmask32 __A, __m512h __B, __m512h __C)
566 1.1 mrg {
567 1.1 mrg return __builtin_ia32_divph512_mask (__B, __C,
568 1.1 mrg _mm512_setzero_ph (), __A);
569 1.1 mrg }
570 1.1 mrg
571 1.1 mrg #ifdef __OPTIMIZE__
572 1.1 mrg extern __inline __m512h
573 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
574 1.1 mrg _mm512_add_round_ph (__m512h __A, __m512h __B, const int __C)
575 1.1 mrg {
576 1.1 mrg return __builtin_ia32_addph512_mask_round (__A, __B,
577 1.1 mrg _mm512_setzero_ph (),
578 1.1 mrg (__mmask32) -1, __C);
579 1.1 mrg }
580 1.1 mrg
581 1.1 mrg extern __inline __m512h
582 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
583 1.1 mrg _mm512_mask_add_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
584 1.1 mrg __m512h __D, const int __E)
585 1.1 mrg {
586 1.1 mrg return __builtin_ia32_addph512_mask_round (__C, __D, __A, __B, __E);
587 1.1 mrg }
588 1.1 mrg
589 1.1 mrg extern __inline __m512h
590 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
591 1.1 mrg _mm512_maskz_add_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
592 1.1 mrg const int __D)
593 1.1 mrg {
594 1.1 mrg return __builtin_ia32_addph512_mask_round (__B, __C,
595 1.1 mrg _mm512_setzero_ph (),
596 1.1 mrg __A, __D);
597 1.1 mrg }
598 1.1 mrg
599 1.1 mrg extern __inline __m512h
600 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
601 1.1 mrg _mm512_sub_round_ph (__m512h __A, __m512h __B, const int __C)
602 1.1 mrg {
603 1.1 mrg return __builtin_ia32_subph512_mask_round (__A, __B,
604 1.1 mrg _mm512_setzero_ph (),
605 1.1 mrg (__mmask32) -1, __C);
606 1.1 mrg }
607 1.1 mrg
608 1.1 mrg extern __inline __m512h
609 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
610 1.1 mrg _mm512_mask_sub_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
611 1.1 mrg __m512h __D, const int __E)
612 1.1 mrg {
613 1.1 mrg return __builtin_ia32_subph512_mask_round (__C, __D, __A, __B, __E);
614 1.1 mrg }
615 1.1 mrg
616 1.1 mrg extern __inline __m512h
617 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
618 1.1 mrg _mm512_maskz_sub_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
619 1.1 mrg const int __D)
620 1.1 mrg {
621 1.1 mrg return __builtin_ia32_subph512_mask_round (__B, __C,
622 1.1 mrg _mm512_setzero_ph (),
623 1.1 mrg __A, __D);
624 1.1 mrg }
625 1.1 mrg
626 1.1 mrg extern __inline __m512h
627 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
628 1.1 mrg _mm512_mul_round_ph (__m512h __A, __m512h __B, const int __C)
629 1.1 mrg {
630 1.1 mrg return __builtin_ia32_mulph512_mask_round (__A, __B,
631 1.1 mrg _mm512_setzero_ph (),
632 1.1 mrg (__mmask32) -1, __C);
633 1.1 mrg }
634 1.1 mrg
635 1.1 mrg extern __inline __m512h
636 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
637 1.1 mrg _mm512_mask_mul_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
638 1.1 mrg __m512h __D, const int __E)
639 1.1 mrg {
640 1.1 mrg return __builtin_ia32_mulph512_mask_round (__C, __D, __A, __B, __E);
641 1.1 mrg }
642 1.1 mrg
643 1.1 mrg extern __inline __m512h
644 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
645 1.1 mrg _mm512_maskz_mul_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
646 1.1 mrg const int __D)
647 1.1 mrg {
648 1.1 mrg return __builtin_ia32_mulph512_mask_round (__B, __C,
649 1.1 mrg _mm512_setzero_ph (),
650 1.1 mrg __A, __D);
651 1.1 mrg }
652 1.1 mrg
653 1.1 mrg extern __inline __m512h
654 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
655 1.1 mrg _mm512_div_round_ph (__m512h __A, __m512h __B, const int __C)
656 1.1 mrg {
657 1.1 mrg return __builtin_ia32_divph512_mask_round (__A, __B,
658 1.1 mrg _mm512_setzero_ph (),
659 1.1 mrg (__mmask32) -1, __C);
660 1.1 mrg }
661 1.1 mrg
662 1.1 mrg extern __inline __m512h
663 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
664 1.1 mrg _mm512_mask_div_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
665 1.1 mrg __m512h __D, const int __E)
666 1.1 mrg {
667 1.1 mrg return __builtin_ia32_divph512_mask_round (__C, __D, __A, __B, __E);
668 1.1 mrg }
669 1.1 mrg
670 1.1 mrg extern __inline __m512h
671 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
672 1.1 mrg _mm512_maskz_div_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
673 1.1 mrg const int __D)
674 1.1 mrg {
675 1.1 mrg return __builtin_ia32_divph512_mask_round (__B, __C,
676 1.1 mrg _mm512_setzero_ph (),
677 1.1 mrg __A, __D);
678 1.1 mrg }
679 1.1 mrg #else
680 1.1 mrg #define _mm512_add_round_ph(A, B, C) \
681 1.1 mrg ((__m512h)__builtin_ia32_addph512_mask_round((A), (B), \
682 1.1 mrg _mm512_setzero_ph (), \
683 1.1 mrg (__mmask32)-1, (C)))
684 1.1 mrg
685 1.1 mrg #define _mm512_mask_add_round_ph(A, B, C, D, E) \
686 1.1 mrg ((__m512h)__builtin_ia32_addph512_mask_round((C), (D), (A), (B), (E)))
687 1.1 mrg
688 1.1 mrg #define _mm512_maskz_add_round_ph(A, B, C, D) \
689 1.1 mrg ((__m512h)__builtin_ia32_addph512_mask_round((B), (C), \
690 1.1 mrg _mm512_setzero_ph (), \
691 1.1 mrg (A), (D)))
692 1.1 mrg
693 1.1 mrg #define _mm512_sub_round_ph(A, B, C) \
694 1.1 mrg ((__m512h)__builtin_ia32_subph512_mask_round((A), (B), \
695 1.1 mrg _mm512_setzero_ph (), \
696 1.1 mrg (__mmask32)-1, (C)))
697 1.1 mrg
698 1.1 mrg #define _mm512_mask_sub_round_ph(A, B, C, D, E) \
699 1.1 mrg ((__m512h)__builtin_ia32_subph512_mask_round((C), (D), (A), (B), (E)))
700 1.1 mrg
701 1.1 mrg #define _mm512_maskz_sub_round_ph(A, B, C, D) \
702 1.1 mrg ((__m512h)__builtin_ia32_subph512_mask_round((B), (C), \
703 1.1 mrg _mm512_setzero_ph (), \
704 1.1 mrg (A), (D)))
705 1.1 mrg
706 1.1 mrg #define _mm512_mul_round_ph(A, B, C) \
707 1.1 mrg ((__m512h)__builtin_ia32_mulph512_mask_round((A), (B), \
708 1.1 mrg _mm512_setzero_ph (), \
709 1.1 mrg (__mmask32)-1, (C)))
710 1.1 mrg
711 1.1 mrg #define _mm512_mask_mul_round_ph(A, B, C, D, E) \
712 1.1 mrg ((__m512h)__builtin_ia32_mulph512_mask_round((C), (D), (A), (B), (E)))
713 1.1 mrg
714 1.1 mrg #define _mm512_maskz_mul_round_ph(A, B, C, D) \
715 1.1 mrg ((__m512h)__builtin_ia32_mulph512_mask_round((B), (C), \
716 1.1 mrg _mm512_setzero_ph (), \
717 1.1 mrg (A), (D)))
718 1.1 mrg
719 1.1 mrg #define _mm512_div_round_ph(A, B, C) \
720 1.1 mrg ((__m512h)__builtin_ia32_divph512_mask_round((A), (B), \
721 1.1 mrg _mm512_setzero_ph (), \
722 1.1 mrg (__mmask32)-1, (C)))
723 1.1 mrg
724 1.1 mrg #define _mm512_mask_div_round_ph(A, B, C, D, E) \
725 1.1 mrg ((__m512h)__builtin_ia32_divph512_mask_round((C), (D), (A), (B), (E)))
726 1.1 mrg
727 1.1 mrg #define _mm512_maskz_div_round_ph(A, B, C, D) \
728 1.1 mrg ((__m512h)__builtin_ia32_divph512_mask_round((B), (C), \
729 1.1 mrg _mm512_setzero_ph (), \
730 1.1 mrg (A), (D)))
731 1.1 mrg #endif /* __OPTIMIZE__ */
732 1.1 mrg
733 1.1 mrg extern __inline __m512h
734 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
735 1.1 mrg _mm512_conj_pch (__m512h __A)
736 1.1 mrg {
737 1.1 mrg return (__m512h) _mm512_xor_epi32 ((__m512i) __A, _mm512_set1_epi32 (1<<31));
738 1.1 mrg }
739 1.1 mrg
740 1.1 mrg extern __inline __m512h
741 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
742 1.1 mrg _mm512_mask_conj_pch (__m512h __W, __mmask16 __U, __m512h __A)
743 1.1 mrg {
744 1.1 mrg return (__m512h)
745 1.1 mrg __builtin_ia32_movaps512_mask ((__v16sf) _mm512_conj_pch (__A),
746 1.1 mrg (__v16sf) __W,
747 1.1 mrg (__mmask16) __U);
748 1.1 mrg }
749 1.1 mrg
750 1.1 mrg extern __inline __m512h
751 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
752 1.1 mrg _mm512_maskz_conj_pch (__mmask16 __U, __m512h __A)
753 1.1 mrg {
754 1.1 mrg return (__m512h)
755 1.1 mrg __builtin_ia32_movaps512_mask ((__v16sf) _mm512_conj_pch (__A),
756 1.1 mrg (__v16sf) _mm512_setzero_ps (),
757 1.1 mrg (__mmask16) __U);
758 1.1 mrg }
759 1.1 mrg
760 1.1 mrg /* Intrinsics of v[add,sub,mul,div]sh. */
761 1.1 mrg extern __inline __m128h
762 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
763 1.1 mrg _mm_add_sh (__m128h __A, __m128h __B)
764 1.1 mrg {
765 1.1 mrg __A[0] += __B[0];
766 1.1 mrg return __A;
767 1.1 mrg }
768 1.1 mrg
769 1.1 mrg extern __inline __m128h
770 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
771 1.1 mrg _mm_mask_add_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
772 1.1 mrg {
773 1.1 mrg return __builtin_ia32_addsh_mask (__C, __D, __A, __B);
774 1.1 mrg }
775 1.1 mrg
776 1.1 mrg extern __inline __m128h
777 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
778 1.1 mrg _mm_maskz_add_sh (__mmask8 __A, __m128h __B, __m128h __C)
779 1.1 mrg {
780 1.1 mrg return __builtin_ia32_addsh_mask (__B, __C, _mm_setzero_ph (),
781 1.1 mrg __A);
782 1.1 mrg }
783 1.1 mrg
784 1.1 mrg extern __inline __m128h
785 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
786 1.1 mrg _mm_sub_sh (__m128h __A, __m128h __B)
787 1.1 mrg {
788 1.1 mrg __A[0] -= __B[0];
789 1.1 mrg return __A;
790 1.1 mrg }
791 1.1 mrg
792 1.1 mrg extern __inline __m128h
793 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
794 1.1 mrg _mm_mask_sub_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
795 1.1 mrg {
796 1.1 mrg return __builtin_ia32_subsh_mask (__C, __D, __A, __B);
797 1.1 mrg }
798 1.1 mrg
799 1.1 mrg extern __inline __m128h
800 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
801 1.1 mrg _mm_maskz_sub_sh (__mmask8 __A, __m128h __B, __m128h __C)
802 1.1 mrg {
803 1.1 mrg return __builtin_ia32_subsh_mask (__B, __C, _mm_setzero_ph (),
804 1.1 mrg __A);
805 1.1 mrg }
806 1.1 mrg
807 1.1 mrg extern __inline __m128h
808 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
809 1.1 mrg _mm_mul_sh (__m128h __A, __m128h __B)
810 1.1 mrg {
811 1.1 mrg __A[0] *= __B[0];
812 1.1 mrg return __A;
813 1.1 mrg }
814 1.1 mrg
815 1.1 mrg extern __inline __m128h
816 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
817 1.1 mrg _mm_mask_mul_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
818 1.1 mrg {
819 1.1 mrg return __builtin_ia32_mulsh_mask (__C, __D, __A, __B);
820 1.1 mrg }
821 1.1 mrg
822 1.1 mrg extern __inline __m128h
823 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
824 1.1 mrg _mm_maskz_mul_sh (__mmask8 __A, __m128h __B, __m128h __C)
825 1.1 mrg {
826 1.1 mrg return __builtin_ia32_mulsh_mask (__B, __C, _mm_setzero_ph (), __A);
827 1.1 mrg }
828 1.1 mrg
829 1.1 mrg extern __inline __m128h
830 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
831 1.1 mrg _mm_div_sh (__m128h __A, __m128h __B)
832 1.1 mrg {
833 1.1 mrg __A[0] /= __B[0];
834 1.1 mrg return __A;
835 1.1 mrg }
836 1.1 mrg
837 1.1 mrg extern __inline __m128h
838 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
839 1.1 mrg _mm_mask_div_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
840 1.1 mrg {
841 1.1 mrg return __builtin_ia32_divsh_mask (__C, __D, __A, __B);
842 1.1 mrg }
843 1.1 mrg
844 1.1 mrg extern __inline __m128h
845 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
846 1.1 mrg _mm_maskz_div_sh (__mmask8 __A, __m128h __B, __m128h __C)
847 1.1 mrg {
848 1.1 mrg return __builtin_ia32_divsh_mask (__B, __C, _mm_setzero_ph (),
849 1.1 mrg __A);
850 1.1 mrg }
851 1.1 mrg
852 1.1 mrg #ifdef __OPTIMIZE__
853 1.1 mrg extern __inline __m128h
854 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
855 1.1 mrg _mm_add_round_sh (__m128h __A, __m128h __B, const int __C)
856 1.1 mrg {
857 1.1 mrg return __builtin_ia32_addsh_mask_round (__A, __B,
858 1.1 mrg _mm_setzero_ph (),
859 1.1 mrg (__mmask8) -1, __C);
860 1.1 mrg }
861 1.1 mrg
862 1.1 mrg extern __inline __m128h
863 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
864 1.1 mrg _mm_mask_add_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
865 1.1 mrg __m128h __D, const int __E)
866 1.1 mrg {
867 1.1 mrg return __builtin_ia32_addsh_mask_round (__C, __D, __A, __B, __E);
868 1.1 mrg }
869 1.1 mrg
870 1.1 mrg extern __inline __m128h
871 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
872 1.1 mrg _mm_maskz_add_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
873 1.1 mrg const int __D)
874 1.1 mrg {
875 1.1 mrg return __builtin_ia32_addsh_mask_round (__B, __C,
876 1.1 mrg _mm_setzero_ph (),
877 1.1 mrg __A, __D);
878 1.1 mrg }
879 1.1 mrg
880 1.1 mrg extern __inline __m128h
881 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
882 1.1 mrg _mm_sub_round_sh (__m128h __A, __m128h __B, const int __C)
883 1.1 mrg {
884 1.1 mrg return __builtin_ia32_subsh_mask_round (__A, __B,
885 1.1 mrg _mm_setzero_ph (),
886 1.1 mrg (__mmask8) -1, __C);
887 1.1 mrg }
888 1.1 mrg
889 1.1 mrg extern __inline __m128h
890 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
891 1.1 mrg _mm_mask_sub_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
892 1.1 mrg __m128h __D, const int __E)
893 1.1 mrg {
894 1.1 mrg return __builtin_ia32_subsh_mask_round (__C, __D, __A, __B, __E);
895 1.1 mrg }
896 1.1 mrg
897 1.1 mrg extern __inline __m128h
898 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
899 1.1 mrg _mm_maskz_sub_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
900 1.1 mrg const int __D)
901 1.1 mrg {
902 1.1 mrg return __builtin_ia32_subsh_mask_round (__B, __C,
903 1.1 mrg _mm_setzero_ph (),
904 1.1 mrg __A, __D);
905 1.1 mrg }
906 1.1 mrg
907 1.1 mrg extern __inline __m128h
908 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
909 1.1 mrg _mm_mul_round_sh (__m128h __A, __m128h __B, const int __C)
910 1.1 mrg {
911 1.1 mrg return __builtin_ia32_mulsh_mask_round (__A, __B,
912 1.1 mrg _mm_setzero_ph (),
913 1.1 mrg (__mmask8) -1, __C);
914 1.1 mrg }
915 1.1 mrg
916 1.1 mrg extern __inline __m128h
917 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
918 1.1 mrg _mm_mask_mul_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
919 1.1 mrg __m128h __D, const int __E)
920 1.1 mrg {
921 1.1 mrg return __builtin_ia32_mulsh_mask_round (__C, __D, __A, __B, __E);
922 1.1 mrg }
923 1.1 mrg
924 1.1 mrg extern __inline __m128h
925 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
926 1.1 mrg _mm_maskz_mul_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
927 1.1 mrg const int __D)
928 1.1 mrg {
929 1.1 mrg return __builtin_ia32_mulsh_mask_round (__B, __C,
930 1.1 mrg _mm_setzero_ph (),
931 1.1 mrg __A, __D);
932 1.1 mrg }
933 1.1 mrg
934 1.1 mrg extern __inline __m128h
935 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
936 1.1 mrg _mm_div_round_sh (__m128h __A, __m128h __B, const int __C)
937 1.1 mrg {
938 1.1 mrg return __builtin_ia32_divsh_mask_round (__A, __B,
939 1.1 mrg _mm_setzero_ph (),
940 1.1 mrg (__mmask8) -1, __C);
941 1.1 mrg }
942 1.1 mrg
943 1.1 mrg extern __inline __m128h
944 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
945 1.1 mrg _mm_mask_div_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
946 1.1 mrg __m128h __D, const int __E)
947 1.1 mrg {
948 1.1 mrg return __builtin_ia32_divsh_mask_round (__C, __D, __A, __B, __E);
949 1.1 mrg }
950 1.1 mrg
951 1.1 mrg extern __inline __m128h
952 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
953 1.1 mrg _mm_maskz_div_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
954 1.1 mrg const int __D)
955 1.1 mrg {
956 1.1 mrg return __builtin_ia32_divsh_mask_round (__B, __C,
957 1.1 mrg _mm_setzero_ph (),
958 1.1 mrg __A, __D);
959 1.1 mrg }
960 1.1 mrg #else
961 1.1 mrg #define _mm_add_round_sh(A, B, C) \
962 1.1 mrg ((__m128h)__builtin_ia32_addsh_mask_round ((A), (B), \
963 1.1 mrg _mm_setzero_ph (), \
964 1.1 mrg (__mmask8)-1, (C)))
965 1.1 mrg
966 1.1 mrg #define _mm_mask_add_round_sh(A, B, C, D, E) \
967 1.1 mrg ((__m128h)__builtin_ia32_addsh_mask_round ((C), (D), (A), (B), (E)))
968 1.1 mrg
969 1.1 mrg #define _mm_maskz_add_round_sh(A, B, C, D) \
970 1.1 mrg ((__m128h)__builtin_ia32_addsh_mask_round ((B), (C), \
971 1.1 mrg _mm_setzero_ph (), \
972 1.1 mrg (A), (D)))
973 1.1 mrg
974 1.1 mrg #define _mm_sub_round_sh(A, B, C) \
975 1.1 mrg ((__m128h)__builtin_ia32_subsh_mask_round ((A), (B), \
976 1.1 mrg _mm_setzero_ph (), \
977 1.1 mrg (__mmask8)-1, (C)))
978 1.1 mrg
979 1.1 mrg #define _mm_mask_sub_round_sh(A, B, C, D, E) \
980 1.1 mrg ((__m128h)__builtin_ia32_subsh_mask_round ((C), (D), (A), (B), (E)))
981 1.1 mrg
982 1.1 mrg #define _mm_maskz_sub_round_sh(A, B, C, D) \
983 1.1 mrg ((__m128h)__builtin_ia32_subsh_mask_round ((B), (C), \
984 1.1 mrg _mm_setzero_ph (), \
985 1.1 mrg (A), (D)))
986 1.1 mrg
987 1.1 mrg #define _mm_mul_round_sh(A, B, C) \
988 1.1 mrg ((__m128h)__builtin_ia32_mulsh_mask_round ((A), (B), \
989 1.1 mrg _mm_setzero_ph (), \
990 1.1 mrg (__mmask8)-1, (C)))
991 1.1 mrg
992 1.1 mrg #define _mm_mask_mul_round_sh(A, B, C, D, E) \
993 1.1 mrg ((__m128h)__builtin_ia32_mulsh_mask_round ((C), (D), (A), (B), (E)))
994 1.1 mrg
995 1.1 mrg #define _mm_maskz_mul_round_sh(A, B, C, D) \
996 1.1 mrg ((__m128h)__builtin_ia32_mulsh_mask_round ((B), (C), \
997 1.1 mrg _mm_setzero_ph (), \
998 1.1 mrg (A), (D)))
999 1.1 mrg
1000 1.1 mrg #define _mm_div_round_sh(A, B, C) \
1001 1.1 mrg ((__m128h)__builtin_ia32_divsh_mask_round ((A), (B), \
1002 1.1 mrg _mm_setzero_ph (), \
1003 1.1 mrg (__mmask8)-1, (C)))
1004 1.1 mrg
1005 1.1 mrg #define _mm_mask_div_round_sh(A, B, C, D, E) \
1006 1.1 mrg ((__m128h)__builtin_ia32_divsh_mask_round ((C), (D), (A), (B), (E)))
1007 1.1 mrg
1008 1.1 mrg #define _mm_maskz_div_round_sh(A, B, C, D) \
1009 1.1 mrg ((__m128h)__builtin_ia32_divsh_mask_round ((B), (C), \
1010 1.1 mrg _mm_setzero_ph (), \
1011 1.1 mrg (A), (D)))
1012 1.1 mrg #endif /* __OPTIMIZE__ */
1013 1.1 mrg
1014 1.1 mrg /* Intrinsic vmaxph vminph. */
1015 1.1 mrg extern __inline __m512h
1016 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1017 1.1 mrg _mm512_max_ph (__m512h __A, __m512h __B)
1018 1.1 mrg {
1019 1.1 mrg return __builtin_ia32_maxph512_mask (__A, __B,
1020 1.1 mrg _mm512_setzero_ph (),
1021 1.1 mrg (__mmask32) -1);
1022 1.1 mrg }
1023 1.1 mrg
1024 1.1 mrg extern __inline __m512h
1025 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1026 1.1 mrg _mm512_mask_max_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
1027 1.1 mrg {
1028 1.1 mrg return __builtin_ia32_maxph512_mask (__C, __D, __A, __B);
1029 1.1 mrg }
1030 1.1 mrg
1031 1.1 mrg extern __inline __m512h
1032 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1033 1.1 mrg _mm512_maskz_max_ph (__mmask32 __A, __m512h __B, __m512h __C)
1034 1.1 mrg {
1035 1.1 mrg return __builtin_ia32_maxph512_mask (__B, __C,
1036 1.1 mrg _mm512_setzero_ph (), __A);
1037 1.1 mrg }
1038 1.1 mrg
1039 1.1 mrg extern __inline __m512h
1040 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1041 1.1 mrg _mm512_min_ph (__m512h __A, __m512h __B)
1042 1.1 mrg {
1043 1.1 mrg return __builtin_ia32_minph512_mask (__A, __B,
1044 1.1 mrg _mm512_setzero_ph (),
1045 1.1 mrg (__mmask32) -1);
1046 1.1 mrg }
1047 1.1 mrg
1048 1.1 mrg extern __inline __m512h
1049 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1050 1.1 mrg _mm512_mask_min_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
1051 1.1 mrg {
1052 1.1 mrg return __builtin_ia32_minph512_mask (__C, __D, __A, __B);
1053 1.1 mrg }
1054 1.1 mrg
1055 1.1 mrg extern __inline __m512h
1056 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1057 1.1 mrg _mm512_maskz_min_ph (__mmask32 __A, __m512h __B, __m512h __C)
1058 1.1 mrg {
1059 1.1 mrg return __builtin_ia32_minph512_mask (__B, __C,
1060 1.1 mrg _mm512_setzero_ph (), __A);
1061 1.1 mrg }
1062 1.1 mrg
1063 1.1 mrg #ifdef __OPTIMIZE__
1064 1.1 mrg extern __inline __m512h
1065 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1066 1.1 mrg _mm512_max_round_ph (__m512h __A, __m512h __B, const int __C)
1067 1.1 mrg {
1068 1.1 mrg return __builtin_ia32_maxph512_mask_round (__A, __B,
1069 1.1 mrg _mm512_setzero_ph (),
1070 1.1 mrg (__mmask32) -1, __C);
1071 1.1 mrg }
1072 1.1 mrg
1073 1.1 mrg extern __inline __m512h
1074 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1075 1.1 mrg _mm512_mask_max_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
1076 1.1 mrg __m512h __D, const int __E)
1077 1.1 mrg {
1078 1.1 mrg return __builtin_ia32_maxph512_mask_round (__C, __D, __A, __B, __E);
1079 1.1 mrg }
1080 1.1 mrg
1081 1.1 mrg extern __inline __m512h
1082 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1083 1.1 mrg _mm512_maskz_max_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
1084 1.1 mrg const int __D)
1085 1.1 mrg {
1086 1.1 mrg return __builtin_ia32_maxph512_mask_round (__B, __C,
1087 1.1 mrg _mm512_setzero_ph (),
1088 1.1 mrg __A, __D);
1089 1.1 mrg }
1090 1.1 mrg
1091 1.1 mrg extern __inline __m512h
1092 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1093 1.1 mrg _mm512_min_round_ph (__m512h __A, __m512h __B, const int __C)
1094 1.1 mrg {
1095 1.1 mrg return __builtin_ia32_minph512_mask_round (__A, __B,
1096 1.1 mrg _mm512_setzero_ph (),
1097 1.1 mrg (__mmask32) -1, __C);
1098 1.1 mrg }
1099 1.1 mrg
1100 1.1 mrg extern __inline __m512h
1101 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1102 1.1 mrg _mm512_mask_min_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
1103 1.1 mrg __m512h __D, const int __E)
1104 1.1 mrg {
1105 1.1 mrg return __builtin_ia32_minph512_mask_round (__C, __D, __A, __B, __E);
1106 1.1 mrg }
1107 1.1 mrg
1108 1.1 mrg extern __inline __m512h
1109 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1110 1.1 mrg _mm512_maskz_min_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
1111 1.1 mrg const int __D)
1112 1.1 mrg {
1113 1.1 mrg return __builtin_ia32_minph512_mask_round (__B, __C,
1114 1.1 mrg _mm512_setzero_ph (),
1115 1.1 mrg __A, __D);
1116 1.1 mrg }
1117 1.1 mrg
1118 1.1 mrg #else
1119 1.1 mrg #define _mm512_max_round_ph(A, B, C) \
1120 1.1 mrg (__builtin_ia32_maxph512_mask_round ((A), (B), \
1121 1.1 mrg _mm512_setzero_ph (), \
1122 1.1 mrg (__mmask32)-1, (C)))
1123 1.1 mrg
1124 1.1 mrg #define _mm512_mask_max_round_ph(A, B, C, D, E) \
1125 1.1 mrg (__builtin_ia32_maxph512_mask_round ((C), (D), (A), (B), (E)))
1126 1.1 mrg
1127 1.1 mrg #define _mm512_maskz_max_round_ph(A, B, C, D) \
1128 1.1 mrg (__builtin_ia32_maxph512_mask_round ((B), (C), \
1129 1.1 mrg _mm512_setzero_ph (), \
1130 1.1 mrg (A), (D)))
1131 1.1 mrg
1132 1.1 mrg #define _mm512_min_round_ph(A, B, C) \
1133 1.1 mrg (__builtin_ia32_minph512_mask_round ((A), (B), \
1134 1.1 mrg _mm512_setzero_ph (), \
1135 1.1 mrg (__mmask32)-1, (C)))
1136 1.1 mrg
1137 1.1 mrg #define _mm512_mask_min_round_ph(A, B, C, D, E) \
1138 1.1 mrg (__builtin_ia32_minph512_mask_round ((C), (D), (A), (B), (E)))
1139 1.1 mrg
1140 1.1 mrg #define _mm512_maskz_min_round_ph(A, B, C, D) \
1141 1.1 mrg (__builtin_ia32_minph512_mask_round ((B), (C), \
1142 1.1 mrg _mm512_setzero_ph (), \
1143 1.1 mrg (A), (D)))
1144 1.1 mrg #endif /* __OPTIMIZE__ */
1145 1.1 mrg
1146 1.1 mrg /* Intrinsic vmaxsh vminsh. */
1147 1.1 mrg extern __inline __m128h
1148 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1149 1.1 mrg _mm_max_sh (__m128h __A, __m128h __B)
1150 1.1 mrg {
1151 1.1 mrg __A[0] = __A[0] > __B[0] ? __A[0] : __B[0];
1152 1.1 mrg return __A;
1153 1.1 mrg }
1154 1.1 mrg
1155 1.1 mrg extern __inline __m128h
1156 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1157 1.1 mrg _mm_mask_max_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
1158 1.1 mrg {
1159 1.1 mrg return __builtin_ia32_maxsh_mask (__C, __D, __A, __B);
1160 1.1 mrg }
1161 1.1 mrg
1162 1.1 mrg extern __inline __m128h
1163 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1164 1.1 mrg _mm_maskz_max_sh (__mmask8 __A, __m128h __B, __m128h __C)
1165 1.1 mrg {
1166 1.1 mrg return __builtin_ia32_maxsh_mask (__B, __C, _mm_setzero_ph (),
1167 1.1 mrg __A);
1168 1.1 mrg }
1169 1.1 mrg
1170 1.1 mrg extern __inline __m128h
1171 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1172 1.1 mrg _mm_min_sh (__m128h __A, __m128h __B)
1173 1.1 mrg {
1174 1.1 mrg __A[0] = __A[0] < __B[0] ? __A[0] : __B[0];
1175 1.1 mrg return __A;
1176 1.1 mrg }
1177 1.1 mrg
1178 1.1 mrg extern __inline __m128h
1179 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1180 1.1 mrg _mm_mask_min_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
1181 1.1 mrg {
1182 1.1 mrg return __builtin_ia32_minsh_mask (__C, __D, __A, __B);
1183 1.1 mrg }
1184 1.1 mrg
1185 1.1 mrg extern __inline __m128h
1186 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1187 1.1 mrg _mm_maskz_min_sh (__mmask8 __A, __m128h __B, __m128h __C)
1188 1.1 mrg {
1189 1.1 mrg return __builtin_ia32_minsh_mask (__B, __C, _mm_setzero_ph (),
1190 1.1 mrg __A);
1191 1.1 mrg }
1192 1.1 mrg
1193 1.1 mrg #ifdef __OPTIMIZE__
1194 1.1 mrg extern __inline __m128h
1195 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1196 1.1 mrg _mm_max_round_sh (__m128h __A, __m128h __B, const int __C)
1197 1.1 mrg {
1198 1.1 mrg return __builtin_ia32_maxsh_mask_round (__A, __B,
1199 1.1 mrg _mm_setzero_ph (),
1200 1.1 mrg (__mmask8) -1, __C);
1201 1.1 mrg }
1202 1.1 mrg
1203 1.1 mrg extern __inline __m128h
1204 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1205 1.1 mrg _mm_mask_max_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
1206 1.1 mrg __m128h __D, const int __E)
1207 1.1 mrg {
1208 1.1 mrg return __builtin_ia32_maxsh_mask_round (__C, __D, __A, __B, __E);
1209 1.1 mrg }
1210 1.1 mrg
1211 1.1 mrg extern __inline __m128h
1212 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1213 1.1 mrg _mm_maskz_max_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
1214 1.1 mrg const int __D)
1215 1.1 mrg {
1216 1.1 mrg return __builtin_ia32_maxsh_mask_round (__B, __C,
1217 1.1 mrg _mm_setzero_ph (),
1218 1.1 mrg __A, __D);
1219 1.1 mrg }
1220 1.1 mrg
1221 1.1 mrg extern __inline __m128h
1222 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1223 1.1 mrg _mm_min_round_sh (__m128h __A, __m128h __B, const int __C)
1224 1.1 mrg {
1225 1.1 mrg return __builtin_ia32_minsh_mask_round (__A, __B,
1226 1.1 mrg _mm_setzero_ph (),
1227 1.1 mrg (__mmask8) -1, __C);
1228 1.1 mrg }
1229 1.1 mrg
1230 1.1 mrg extern __inline __m128h
1231 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1232 1.1 mrg _mm_mask_min_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
1233 1.1 mrg __m128h __D, const int __E)
1234 1.1 mrg {
1235 1.1 mrg return __builtin_ia32_minsh_mask_round (__C, __D, __A, __B, __E);
1236 1.1 mrg }
1237 1.1 mrg
1238 1.1 mrg extern __inline __m128h
1239 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1240 1.1 mrg _mm_maskz_min_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
1241 1.1 mrg const int __D)
1242 1.1 mrg {
1243 1.1 mrg return __builtin_ia32_minsh_mask_round (__B, __C,
1244 1.1 mrg _mm_setzero_ph (),
1245 1.1 mrg __A, __D);
1246 1.1 mrg }
1247 1.1 mrg
1248 1.1 mrg #else
1249 1.1 mrg #define _mm_max_round_sh(A, B, C) \
1250 1.1 mrg (__builtin_ia32_maxsh_mask_round ((A), (B), \
1251 1.1 mrg _mm_setzero_ph (), \
1252 1.1 mrg (__mmask8)-1, (C)))
1253 1.1 mrg
1254 1.1 mrg #define _mm_mask_max_round_sh(A, B, C, D, E) \
1255 1.1 mrg (__builtin_ia32_maxsh_mask_round ((C), (D), (A), (B), (E)))
1256 1.1 mrg
1257 1.1 mrg #define _mm_maskz_max_round_sh(A, B, C, D) \
1258 1.1 mrg (__builtin_ia32_maxsh_mask_round ((B), (C), \
1259 1.1 mrg _mm_setzero_ph (), \
1260 1.1 mrg (A), (D)))
1261 1.1 mrg
1262 1.1 mrg #define _mm_min_round_sh(A, B, C) \
1263 1.1 mrg (__builtin_ia32_minsh_mask_round ((A), (B), \
1264 1.1 mrg _mm_setzero_ph (), \
1265 1.1 mrg (__mmask8)-1, (C)))
1266 1.1 mrg
1267 1.1 mrg #define _mm_mask_min_round_sh(A, B, C, D, E) \
1268 1.1 mrg (__builtin_ia32_minsh_mask_round ((C), (D), (A), (B), (E)))
1269 1.1 mrg
1270 1.1 mrg #define _mm_maskz_min_round_sh(A, B, C, D) \
1271 1.1 mrg (__builtin_ia32_minsh_mask_round ((B), (C), \
1272 1.1 mrg _mm_setzero_ph (), \
1273 1.1 mrg (A), (D)))
1274 1.1 mrg
1275 1.1 mrg #endif /* __OPTIMIZE__ */
1276 1.1 mrg
1277 1.1 mrg /* vcmpph */
1278 1.1 mrg #ifdef __OPTIMIZE
1279 1.1 mrg extern __inline __mmask32
1280 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1281 1.1 mrg _mm512_cmp_ph_mask (__m512h __A, __m512h __B, const int __C)
1282 1.1 mrg {
1283 1.1 mrg return (__mmask32) __builtin_ia32_cmpph512_mask (__A, __B, __C,
1284 1.1 mrg (__mmask32) -1);
1285 1.1 mrg }
1286 1.1 mrg
1287 1.1 mrg extern __inline __mmask32
1288 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1289 1.1 mrg _mm512_mask_cmp_ph_mask (__mmask32 __A, __m512h __B, __m512h __C,
1290 1.1 mrg const int __D)
1291 1.1 mrg {
1292 1.1 mrg return (__mmask32) __builtin_ia32_cmpph512_mask (__B, __C, __D,
1293 1.1 mrg __A);
1294 1.1 mrg }
1295 1.1 mrg
1296 1.1 mrg extern __inline __mmask32
1297 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1298 1.1 mrg _mm512_cmp_round_ph_mask (__m512h __A, __m512h __B, const int __C,
1299 1.1 mrg const int __D)
1300 1.1 mrg {
1301 1.1 mrg return (__mmask32) __builtin_ia32_cmpph512_mask_round (__A, __B,
1302 1.1 mrg __C, (__mmask32) -1,
1303 1.1 mrg __D);
1304 1.1 mrg }
1305 1.1 mrg
1306 1.1 mrg extern __inline __mmask32
1307 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1308 1.1 mrg _mm512_mask_cmp_round_ph_mask (__mmask32 __A, __m512h __B, __m512h __C,
1309 1.1 mrg const int __D, const int __E)
1310 1.1 mrg {
1311 1.1 mrg return (__mmask32) __builtin_ia32_cmpph512_mask_round (__B, __C,
1312 1.1 mrg __D, __A,
1313 1.1 mrg __E);
1314 1.1 mrg }
1315 1.1 mrg
1316 1.1 mrg #else
1317 1.1 mrg #define _mm512_cmp_ph_mask(A, B, C) \
1318 1.1 mrg (__builtin_ia32_cmpph512_mask ((A), (B), (C), (-1)))
1319 1.1 mrg
1320 1.1 mrg #define _mm512_mask_cmp_ph_mask(A, B, C, D) \
1321 1.1 mrg (__builtin_ia32_cmpph512_mask ((B), (C), (D), (A)))
1322 1.1 mrg
1323 1.1 mrg #define _mm512_cmp_round_ph_mask(A, B, C, D) \
1324 1.1 mrg (__builtin_ia32_cmpph512_mask_round ((A), (B), (C), (-1), (D)))
1325 1.1 mrg
1326 1.1 mrg #define _mm512_mask_cmp_round_ph_mask(A, B, C, D, E) \
1327 1.1 mrg (__builtin_ia32_cmpph512_mask_round ((B), (C), (D), (A), (E)))
1328 1.1 mrg
1329 1.1 mrg #endif /* __OPTIMIZE__ */
1330 1.1 mrg
1331 1.1 mrg /* Intrinsics vcmpsh. */
1332 1.1 mrg #ifdef __OPTIMIZE__
1333 1.1 mrg extern __inline __mmask8
1334 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1335 1.1 mrg _mm_cmp_sh_mask (__m128h __A, __m128h __B, const int __C)
1336 1.1 mrg {
1337 1.1 mrg return (__mmask8)
1338 1.1 mrg __builtin_ia32_cmpsh_mask_round (__A, __B,
1339 1.1 mrg __C, (__mmask8) -1,
1340 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1341 1.1 mrg }
1342 1.1 mrg
1343 1.1 mrg extern __inline __mmask8
1344 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1345 1.1 mrg _mm_mask_cmp_sh_mask (__mmask8 __A, __m128h __B, __m128h __C,
1346 1.1 mrg const int __D)
1347 1.1 mrg {
1348 1.1 mrg return (__mmask8)
1349 1.1 mrg __builtin_ia32_cmpsh_mask_round (__B, __C,
1350 1.1 mrg __D, __A,
1351 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1352 1.1 mrg }
1353 1.1 mrg
1354 1.1 mrg extern __inline __mmask8
1355 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1356 1.1 mrg _mm_cmp_round_sh_mask (__m128h __A, __m128h __B, const int __C,
1357 1.1 mrg const int __D)
1358 1.1 mrg {
1359 1.1 mrg return (__mmask8) __builtin_ia32_cmpsh_mask_round (__A, __B,
1360 1.1 mrg __C, (__mmask8) -1,
1361 1.1 mrg __D);
1362 1.1 mrg }
1363 1.1 mrg
1364 1.1 mrg extern __inline __mmask8
1365 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1366 1.1 mrg _mm_mask_cmp_round_sh_mask (__mmask8 __A, __m128h __B, __m128h __C,
1367 1.1 mrg const int __D, const int __E)
1368 1.1 mrg {
1369 1.1 mrg return (__mmask8) __builtin_ia32_cmpsh_mask_round (__B, __C,
1370 1.1 mrg __D, __A,
1371 1.1 mrg __E);
1372 1.1 mrg }
1373 1.1 mrg
1374 1.1 mrg #else
1375 1.1 mrg #define _mm_cmp_sh_mask(A, B, C) \
1376 1.1 mrg (__builtin_ia32_cmpsh_mask_round ((A), (B), (C), (-1), \
1377 1.1 mrg (_MM_FROUND_CUR_DIRECTION)))
1378 1.1 mrg
1379 1.1 mrg #define _mm_mask_cmp_sh_mask(A, B, C, D) \
1380 1.1 mrg (__builtin_ia32_cmpsh_mask_round ((B), (C), (D), (A), \
1381 1.1 mrg (_MM_FROUND_CUR_DIRECTION)))
1382 1.1 mrg
1383 1.1 mrg #define _mm_cmp_round_sh_mask(A, B, C, D) \
1384 1.1 mrg (__builtin_ia32_cmpsh_mask_round ((A), (B), (C), (-1), (D)))
1385 1.1 mrg
1386 1.1 mrg #define _mm_mask_cmp_round_sh_mask(A, B, C, D, E) \
1387 1.1 mrg (__builtin_ia32_cmpsh_mask_round ((B), (C), (D), (A), (E)))
1388 1.1 mrg
1389 1.1 mrg #endif /* __OPTIMIZE__ */
1390 1.1 mrg
1391 1.1 mrg /* Intrinsics vcomish. */
1392 1.1 mrg extern __inline int
1393 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1394 1.1 mrg _mm_comieq_sh (__m128h __A, __m128h __B)
1395 1.1 mrg {
1396 1.1 mrg return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_EQ_OS,
1397 1.1 mrg (__mmask8) -1,
1398 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1399 1.1 mrg }
1400 1.1 mrg
1401 1.1 mrg extern __inline int
1402 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1403 1.1 mrg _mm_comilt_sh (__m128h __A, __m128h __B)
1404 1.1 mrg {
1405 1.1 mrg return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LT_OS,
1406 1.1 mrg (__mmask8) -1,
1407 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1408 1.1 mrg }
1409 1.1 mrg
1410 1.1 mrg extern __inline int
1411 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1412 1.1 mrg _mm_comile_sh (__m128h __A, __m128h __B)
1413 1.1 mrg {
1414 1.1 mrg return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LE_OS,
1415 1.1 mrg (__mmask8) -1,
1416 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1417 1.1 mrg }
1418 1.1 mrg
1419 1.1 mrg extern __inline int
1420 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1421 1.1 mrg _mm_comigt_sh (__m128h __A, __m128h __B)
1422 1.1 mrg {
1423 1.1 mrg return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GT_OS,
1424 1.1 mrg (__mmask8) -1,
1425 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1426 1.1 mrg }
1427 1.1 mrg
1428 1.1 mrg extern __inline int
1429 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1430 1.1 mrg _mm_comige_sh (__m128h __A, __m128h __B)
1431 1.1 mrg {
1432 1.1 mrg return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GE_OS,
1433 1.1 mrg (__mmask8) -1,
1434 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1435 1.1 mrg }
1436 1.1 mrg
1437 1.1 mrg extern __inline int
1438 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1439 1.1 mrg _mm_comineq_sh (__m128h __A, __m128h __B)
1440 1.1 mrg {
1441 1.1 mrg return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_NEQ_US,
1442 1.1 mrg (__mmask8) -1,
1443 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1444 1.1 mrg }
1445 1.1 mrg
1446 1.1 mrg extern __inline int
1447 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1448 1.1 mrg _mm_ucomieq_sh (__m128h __A, __m128h __B)
1449 1.1 mrg {
1450 1.1 mrg return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_EQ_OQ,
1451 1.1 mrg (__mmask8) -1,
1452 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1453 1.1 mrg }
1454 1.1 mrg
1455 1.1 mrg extern __inline int
1456 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1457 1.1 mrg _mm_ucomilt_sh (__m128h __A, __m128h __B)
1458 1.1 mrg {
1459 1.1 mrg return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LT_OQ,
1460 1.1 mrg (__mmask8) -1,
1461 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1462 1.1 mrg }
1463 1.1 mrg
1464 1.1 mrg extern __inline int
1465 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1466 1.1 mrg _mm_ucomile_sh (__m128h __A, __m128h __B)
1467 1.1 mrg {
1468 1.1 mrg return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LE_OQ,
1469 1.1 mrg (__mmask8) -1,
1470 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1471 1.1 mrg }
1472 1.1 mrg
1473 1.1 mrg extern __inline int
1474 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1475 1.1 mrg _mm_ucomigt_sh (__m128h __A, __m128h __B)
1476 1.1 mrg {
1477 1.1 mrg return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GT_OQ,
1478 1.1 mrg (__mmask8) -1,
1479 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1480 1.1 mrg }
1481 1.1 mrg
1482 1.1 mrg extern __inline int
1483 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1484 1.1 mrg _mm_ucomige_sh (__m128h __A, __m128h __B)
1485 1.1 mrg {
1486 1.1 mrg return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GE_OQ,
1487 1.1 mrg (__mmask8) -1,
1488 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1489 1.1 mrg }
1490 1.1 mrg
1491 1.1 mrg extern __inline int
1492 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1493 1.1 mrg _mm_ucomineq_sh (__m128h __A, __m128h __B)
1494 1.1 mrg {
1495 1.1 mrg return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_NEQ_UQ,
1496 1.1 mrg (__mmask8) -1,
1497 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1498 1.1 mrg }
1499 1.1 mrg
1500 1.1 mrg #ifdef __OPTIMIZE__
1501 1.1 mrg extern __inline int
1502 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1503 1.1 mrg _mm_comi_sh (__m128h __A, __m128h __B, const int __P)
1504 1.1 mrg {
1505 1.1 mrg return __builtin_ia32_cmpsh_mask_round (__A, __B, __P,
1506 1.1 mrg (__mmask8) -1,
1507 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1508 1.1 mrg }
1509 1.1 mrg
1510 1.1 mrg extern __inline int
1511 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1512 1.1 mrg _mm_comi_round_sh (__m128h __A, __m128h __B, const int __P, const int __R)
1513 1.1 mrg {
1514 1.1 mrg return __builtin_ia32_cmpsh_mask_round (__A, __B, __P,
1515 1.1 mrg (__mmask8) -1,__R);
1516 1.1 mrg }
1517 1.1 mrg
1518 1.1 mrg #else
1519 1.1 mrg #define _mm_comi_round_sh(A, B, P, R) \
1520 1.1 mrg (__builtin_ia32_cmpsh_mask_round ((A), (B), (P), (__mmask8) (-1), (R)))
1521 1.1 mrg #define _mm_comi_sh(A, B, P) \
1522 1.1 mrg (__builtin_ia32_cmpsh_mask_round ((A), (B), (P), (__mmask8) (-1), \
1523 1.1 mrg _MM_FROUND_CUR_DIRECTION))
1524 1.1 mrg
1525 1.1 mrg #endif /* __OPTIMIZE__ */
1526 1.1 mrg
1527 1.1 mrg /* Intrinsics vsqrtph. */
1528 1.1 mrg extern __inline __m512h
1529 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1530 1.1 mrg _mm512_sqrt_ph (__m512h __A)
1531 1.1 mrg {
1532 1.1 mrg return __builtin_ia32_sqrtph512_mask_round (__A,
1533 1.1 mrg _mm512_setzero_ph(),
1534 1.1 mrg (__mmask32) -1,
1535 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1536 1.1 mrg }
1537 1.1 mrg
1538 1.1 mrg extern __inline __m512h
1539 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1540 1.1 mrg _mm512_mask_sqrt_ph (__m512h __A, __mmask32 __B, __m512h __C)
1541 1.1 mrg {
1542 1.1 mrg return __builtin_ia32_sqrtph512_mask_round (__C, __A, __B,
1543 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1544 1.1 mrg }
1545 1.1 mrg
1546 1.1 mrg extern __inline __m512h
1547 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1548 1.1 mrg _mm512_maskz_sqrt_ph (__mmask32 __A, __m512h __B)
1549 1.1 mrg {
1550 1.1 mrg return __builtin_ia32_sqrtph512_mask_round (__B,
1551 1.1 mrg _mm512_setzero_ph (),
1552 1.1 mrg __A,
1553 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1554 1.1 mrg }
1555 1.1 mrg
1556 1.1 mrg #ifdef __OPTIMIZE__
1557 1.1 mrg extern __inline __m512h
1558 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1559 1.1 mrg _mm512_sqrt_round_ph (__m512h __A, const int __B)
1560 1.1 mrg {
1561 1.1 mrg return __builtin_ia32_sqrtph512_mask_round (__A,
1562 1.1 mrg _mm512_setzero_ph(),
1563 1.1 mrg (__mmask32) -1, __B);
1564 1.1 mrg }
1565 1.1 mrg
1566 1.1 mrg extern __inline __m512h
1567 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1568 1.1 mrg _mm512_mask_sqrt_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
1569 1.1 mrg const int __D)
1570 1.1 mrg {
1571 1.1 mrg return __builtin_ia32_sqrtph512_mask_round (__C, __A, __B, __D);
1572 1.1 mrg }
1573 1.1 mrg
1574 1.1 mrg extern __inline __m512h
1575 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1576 1.1 mrg _mm512_maskz_sqrt_round_ph (__mmask32 __A, __m512h __B, const int __C)
1577 1.1 mrg {
1578 1.1 mrg return __builtin_ia32_sqrtph512_mask_round (__B,
1579 1.1 mrg _mm512_setzero_ph (),
1580 1.1 mrg __A, __C);
1581 1.1 mrg }
1582 1.1 mrg
1583 1.1 mrg #else
1584 1.1 mrg #define _mm512_sqrt_round_ph(A, B) \
1585 1.1 mrg (__builtin_ia32_sqrtph512_mask_round ((A), \
1586 1.1 mrg _mm512_setzero_ph (), \
1587 1.1 mrg (__mmask32)-1, (B)))
1588 1.1 mrg
1589 1.1 mrg #define _mm512_mask_sqrt_round_ph(A, B, C, D) \
1590 1.1 mrg (__builtin_ia32_sqrtph512_mask_round ((C), (A), (B), (D)))
1591 1.1 mrg
1592 1.1 mrg #define _mm512_maskz_sqrt_round_ph(A, B, C) \
1593 1.1 mrg (__builtin_ia32_sqrtph512_mask_round ((B), \
1594 1.1 mrg _mm512_setzero_ph (), \
1595 1.1 mrg (A), (C)))
1596 1.1 mrg
1597 1.1 mrg #endif /* __OPTIMIZE__ */
1598 1.1 mrg
1599 1.1 mrg /* Intrinsics vrsqrtph. */
1600 1.1 mrg extern __inline __m512h
1601 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1602 1.1 mrg _mm512_rsqrt_ph (__m512h __A)
1603 1.1 mrg {
1604 1.1 mrg return __builtin_ia32_rsqrtph512_mask (__A, _mm512_setzero_ph (),
1605 1.1 mrg (__mmask32) -1);
1606 1.1 mrg }
1607 1.1 mrg
1608 1.1 mrg extern __inline __m512h
1609 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1610 1.1 mrg _mm512_mask_rsqrt_ph (__m512h __A, __mmask32 __B, __m512h __C)
1611 1.1 mrg {
1612 1.1 mrg return __builtin_ia32_rsqrtph512_mask (__C, __A, __B);
1613 1.1 mrg }
1614 1.1 mrg
1615 1.1 mrg extern __inline __m512h
1616 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1617 1.1 mrg _mm512_maskz_rsqrt_ph (__mmask32 __A, __m512h __B)
1618 1.1 mrg {
1619 1.1 mrg return __builtin_ia32_rsqrtph512_mask (__B, _mm512_setzero_ph (),
1620 1.1 mrg __A);
1621 1.1 mrg }
1622 1.1 mrg
1623 1.1 mrg /* Intrinsics vrsqrtsh. */
1624 1.1 mrg extern __inline __m128h
1625 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1626 1.1 mrg _mm_rsqrt_sh (__m128h __A, __m128h __B)
1627 1.1 mrg {
1628 1.1 mrg return __builtin_ia32_rsqrtsh_mask (__B, __A, _mm_setzero_ph (),
1629 1.1 mrg (__mmask8) -1);
1630 1.1 mrg }
1631 1.1 mrg
1632 1.1 mrg extern __inline __m128h
1633 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1634 1.1 mrg _mm_mask_rsqrt_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
1635 1.1 mrg {
1636 1.1 mrg return __builtin_ia32_rsqrtsh_mask (__D, __C, __A, __B);
1637 1.1 mrg }
1638 1.1 mrg
1639 1.1 mrg extern __inline __m128h
1640 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1641 1.1 mrg _mm_maskz_rsqrt_sh (__mmask8 __A, __m128h __B, __m128h __C)
1642 1.1 mrg {
1643 1.1 mrg return __builtin_ia32_rsqrtsh_mask (__C, __B, _mm_setzero_ph (),
1644 1.1 mrg __A);
1645 1.1 mrg }
1646 1.1 mrg
1647 1.1 mrg /* Intrinsics vsqrtsh. */
1648 1.1 mrg extern __inline __m128h
1649 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1650 1.1 mrg _mm_sqrt_sh (__m128h __A, __m128h __B)
1651 1.1 mrg {
1652 1.1 mrg return __builtin_ia32_sqrtsh_mask_round (__B, __A,
1653 1.1 mrg _mm_setzero_ph (),
1654 1.1 mrg (__mmask8) -1,
1655 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1656 1.1 mrg }
1657 1.1 mrg
1658 1.1 mrg extern __inline __m128h
1659 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1660 1.1 mrg _mm_mask_sqrt_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
1661 1.1 mrg {
1662 1.1 mrg return __builtin_ia32_sqrtsh_mask_round (__D, __C, __A, __B,
1663 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1664 1.1 mrg }
1665 1.1 mrg
1666 1.1 mrg extern __inline __m128h
1667 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1668 1.1 mrg _mm_maskz_sqrt_sh (__mmask8 __A, __m128h __B, __m128h __C)
1669 1.1 mrg {
1670 1.1 mrg return __builtin_ia32_sqrtsh_mask_round (__C, __B,
1671 1.1 mrg _mm_setzero_ph (),
1672 1.1 mrg __A, _MM_FROUND_CUR_DIRECTION);
1673 1.1 mrg }
1674 1.1 mrg
1675 1.1 mrg #ifdef __OPTIMIZE__
1676 1.1 mrg extern __inline __m128h
1677 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1678 1.1 mrg _mm_sqrt_round_sh (__m128h __A, __m128h __B, const int __C)
1679 1.1 mrg {
1680 1.1 mrg return __builtin_ia32_sqrtsh_mask_round (__B, __A,
1681 1.1 mrg _mm_setzero_ph (),
1682 1.1 mrg (__mmask8) -1, __C);
1683 1.1 mrg }
1684 1.1 mrg
1685 1.1 mrg extern __inline __m128h
1686 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1687 1.1 mrg _mm_mask_sqrt_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
1688 1.1 mrg __m128h __D, const int __E)
1689 1.1 mrg {
1690 1.1 mrg return __builtin_ia32_sqrtsh_mask_round (__D, __C, __A, __B,
1691 1.1 mrg __E);
1692 1.1 mrg }
1693 1.1 mrg
1694 1.1 mrg extern __inline __m128h
1695 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1696 1.1 mrg _mm_maskz_sqrt_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
1697 1.1 mrg const int __D)
1698 1.1 mrg {
1699 1.1 mrg return __builtin_ia32_sqrtsh_mask_round (__C, __B,
1700 1.1 mrg _mm_setzero_ph (),
1701 1.1 mrg __A, __D);
1702 1.1 mrg }
1703 1.1 mrg
1704 1.1 mrg #else
1705 1.1 mrg #define _mm_sqrt_round_sh(A, B, C) \
1706 1.1 mrg (__builtin_ia32_sqrtsh_mask_round ((B), (A), \
1707 1.1 mrg _mm_setzero_ph (), \
1708 1.1 mrg (__mmask8)-1, (C)))
1709 1.1 mrg
1710 1.1 mrg #define _mm_mask_sqrt_round_sh(A, B, C, D, E) \
1711 1.1 mrg (__builtin_ia32_sqrtsh_mask_round ((D), (C), (A), (B), (E)))
1712 1.1 mrg
1713 1.1 mrg #define _mm_maskz_sqrt_round_sh(A, B, C, D) \
1714 1.1 mrg (__builtin_ia32_sqrtsh_mask_round ((C), (B), \
1715 1.1 mrg _mm_setzero_ph (), \
1716 1.1 mrg (A), (D)))
1717 1.1 mrg
1718 1.1 mrg #endif /* __OPTIMIZE__ */
1719 1.1 mrg
1720 1.1 mrg /* Intrinsics vrcpph. */
1721 1.1 mrg extern __inline __m512h
1722 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1723 1.1 mrg _mm512_rcp_ph (__m512h __A)
1724 1.1 mrg {
1725 1.1 mrg return __builtin_ia32_rcpph512_mask (__A, _mm512_setzero_ph (),
1726 1.1 mrg (__mmask32) -1);
1727 1.1 mrg }
1728 1.1 mrg
1729 1.1 mrg extern __inline __m512h
1730 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1731 1.1 mrg _mm512_mask_rcp_ph (__m512h __A, __mmask32 __B, __m512h __C)
1732 1.1 mrg {
1733 1.1 mrg return __builtin_ia32_rcpph512_mask (__C, __A, __B);
1734 1.1 mrg }
1735 1.1 mrg
1736 1.1 mrg extern __inline __m512h
1737 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1738 1.1 mrg _mm512_maskz_rcp_ph (__mmask32 __A, __m512h __B)
1739 1.1 mrg {
1740 1.1 mrg return __builtin_ia32_rcpph512_mask (__B, _mm512_setzero_ph (),
1741 1.1 mrg __A);
1742 1.1 mrg }
1743 1.1 mrg
1744 1.1 mrg /* Intrinsics vrcpsh. */
1745 1.1 mrg extern __inline __m128h
1746 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1747 1.1 mrg _mm_rcp_sh (__m128h __A, __m128h __B)
1748 1.1 mrg {
1749 1.1 mrg return __builtin_ia32_rcpsh_mask (__B, __A, _mm_setzero_ph (),
1750 1.1 mrg (__mmask8) -1);
1751 1.1 mrg }
1752 1.1 mrg
1753 1.1 mrg extern __inline __m128h
1754 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1755 1.1 mrg _mm_mask_rcp_sh (__m128h __A, __mmask32 __B, __m128h __C, __m128h __D)
1756 1.1 mrg {
1757 1.1 mrg return __builtin_ia32_rcpsh_mask (__D, __C, __A, __B);
1758 1.1 mrg }
1759 1.1 mrg
1760 1.1 mrg extern __inline __m128h
1761 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1762 1.1 mrg _mm_maskz_rcp_sh (__mmask32 __A, __m128h __B, __m128h __C)
1763 1.1 mrg {
1764 1.1 mrg return __builtin_ia32_rcpsh_mask (__C, __B, _mm_setzero_ph (),
1765 1.1 mrg __A);
1766 1.1 mrg }
1767 1.1 mrg
1768 1.1 mrg /* Intrinsics vscalefph. */
1769 1.1 mrg extern __inline __m512h
1770 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1771 1.1 mrg _mm512_scalef_ph (__m512h __A, __m512h __B)
1772 1.1 mrg {
1773 1.1 mrg return __builtin_ia32_scalefph512_mask_round (__A, __B,
1774 1.1 mrg _mm512_setzero_ph (),
1775 1.1 mrg (__mmask32) -1,
1776 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1777 1.1 mrg }
1778 1.1 mrg
1779 1.1 mrg extern __inline __m512h
1780 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1781 1.1 mrg _mm512_mask_scalef_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
1782 1.1 mrg {
1783 1.1 mrg return __builtin_ia32_scalefph512_mask_round (__C, __D, __A, __B,
1784 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1785 1.1 mrg }
1786 1.1 mrg
1787 1.1 mrg extern __inline __m512h
1788 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1789 1.1 mrg _mm512_maskz_scalef_ph (__mmask32 __A, __m512h __B, __m512h __C)
1790 1.1 mrg {
1791 1.1 mrg return __builtin_ia32_scalefph512_mask_round (__B, __C,
1792 1.1 mrg _mm512_setzero_ph (),
1793 1.1 mrg __A,
1794 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1795 1.1 mrg }
1796 1.1 mrg
1797 1.1 mrg #ifdef __OPTIMIZE__
1798 1.1 mrg extern __inline __m512h
1799 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1800 1.1 mrg _mm512_scalef_round_ph (__m512h __A, __m512h __B, const int __C)
1801 1.1 mrg {
1802 1.1 mrg return __builtin_ia32_scalefph512_mask_round (__A, __B,
1803 1.1 mrg _mm512_setzero_ph (),
1804 1.1 mrg (__mmask32) -1, __C);
1805 1.1 mrg }
1806 1.1 mrg
1807 1.1 mrg extern __inline __m512h
1808 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1809 1.1 mrg _mm512_mask_scalef_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
1810 1.1 mrg __m512h __D, const int __E)
1811 1.1 mrg {
1812 1.1 mrg return __builtin_ia32_scalefph512_mask_round (__C, __D, __A, __B,
1813 1.1 mrg __E);
1814 1.1 mrg }
1815 1.1 mrg
1816 1.1 mrg extern __inline __m512h
1817 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1818 1.1 mrg _mm512_maskz_scalef_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
1819 1.1 mrg const int __D)
1820 1.1 mrg {
1821 1.1 mrg return __builtin_ia32_scalefph512_mask_round (__B, __C,
1822 1.1 mrg _mm512_setzero_ph (),
1823 1.1 mrg __A, __D);
1824 1.1 mrg }
1825 1.1 mrg
1826 1.1 mrg #else
1827 1.1 mrg #define _mm512_scalef_round_ph(A, B, C) \
1828 1.1 mrg (__builtin_ia32_scalefph512_mask_round ((A), (B), \
1829 1.1 mrg _mm512_setzero_ph (), \
1830 1.1 mrg (__mmask32)-1, (C)))
1831 1.1 mrg
1832 1.1 mrg #define _mm512_mask_scalef_round_ph(A, B, C, D, E) \
1833 1.1 mrg (__builtin_ia32_scalefph512_mask_round ((C), (D), (A), (B), (E)))
1834 1.1 mrg
1835 1.1 mrg #define _mm512_maskz_scalef_round_ph(A, B, C, D) \
1836 1.1 mrg (__builtin_ia32_scalefph512_mask_round ((B), (C), \
1837 1.1 mrg _mm512_setzero_ph (), \
1838 1.1 mrg (A), (D)))
1839 1.1 mrg
1840 1.1 mrg #endif /* __OPTIMIZE__ */
1841 1.1 mrg
1842 1.1 mrg /* Intrinsics vscalefsh. */
1843 1.1 mrg extern __inline __m128h
1844 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1845 1.1 mrg _mm_scalef_sh (__m128h __A, __m128h __B)
1846 1.1 mrg {
1847 1.1 mrg return __builtin_ia32_scalefsh_mask_round (__A, __B,
1848 1.1 mrg _mm_setzero_ph (),
1849 1.1 mrg (__mmask8) -1,
1850 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1851 1.1 mrg }
1852 1.1 mrg
1853 1.1 mrg extern __inline __m128h
1854 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1855 1.1 mrg _mm_mask_scalef_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
1856 1.1 mrg {
1857 1.1 mrg return __builtin_ia32_scalefsh_mask_round (__C, __D, __A, __B,
1858 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1859 1.1 mrg }
1860 1.1 mrg
1861 1.1 mrg extern __inline __m128h
1862 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1863 1.1 mrg _mm_maskz_scalef_sh (__mmask8 __A, __m128h __B, __m128h __C)
1864 1.1 mrg {
1865 1.1 mrg return __builtin_ia32_scalefsh_mask_round (__B, __C,
1866 1.1 mrg _mm_setzero_ph (),
1867 1.1 mrg __A,
1868 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1869 1.1 mrg }
1870 1.1 mrg
1871 1.1 mrg #ifdef __OPTIMIZE__
1872 1.1 mrg extern __inline __m128h
1873 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1874 1.1 mrg _mm_scalef_round_sh (__m128h __A, __m128h __B, const int __C)
1875 1.1 mrg {
1876 1.1 mrg return __builtin_ia32_scalefsh_mask_round (__A, __B,
1877 1.1 mrg _mm_setzero_ph (),
1878 1.1 mrg (__mmask8) -1, __C);
1879 1.1 mrg }
1880 1.1 mrg
1881 1.1 mrg extern __inline __m128h
1882 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1883 1.1 mrg _mm_mask_scalef_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
1884 1.1 mrg __m128h __D, const int __E)
1885 1.1 mrg {
1886 1.1 mrg return __builtin_ia32_scalefsh_mask_round (__C, __D, __A, __B,
1887 1.1 mrg __E);
1888 1.1 mrg }
1889 1.1 mrg
1890 1.1 mrg extern __inline __m128h
1891 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1892 1.1 mrg _mm_maskz_scalef_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
1893 1.1 mrg const int __D)
1894 1.1 mrg {
1895 1.1 mrg return __builtin_ia32_scalefsh_mask_round (__B, __C,
1896 1.1 mrg _mm_setzero_ph (),
1897 1.1 mrg __A, __D);
1898 1.1 mrg }
1899 1.1 mrg
1900 1.1 mrg #else
1901 1.1 mrg #define _mm_scalef_round_sh(A, B, C) \
1902 1.1 mrg (__builtin_ia32_scalefsh_mask_round ((A), (B), \
1903 1.1 mrg _mm_setzero_ph (), \
1904 1.1 mrg (__mmask8)-1, (C)))
1905 1.1 mrg
1906 1.1 mrg #define _mm_mask_scalef_round_sh(A, B, C, D, E) \
1907 1.1 mrg (__builtin_ia32_scalefsh_mask_round ((C), (D), (A), (B), (E)))
1908 1.1 mrg
1909 1.1 mrg #define _mm_maskz_scalef_round_sh(A, B, C, D) \
1910 1.1 mrg (__builtin_ia32_scalefsh_mask_round ((B), (C), _mm_setzero_ph (), \
1911 1.1 mrg (A), (D)))
1912 1.1 mrg
1913 1.1 mrg #endif /* __OPTIMIZE__ */
1914 1.1 mrg
1915 1.1 mrg /* Intrinsics vreduceph. */
1916 1.1 mrg #ifdef __OPTIMIZE__
1917 1.1 mrg extern __inline __m512h
1918 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1919 1.1 mrg _mm512_reduce_ph (__m512h __A, int __B)
1920 1.1 mrg {
1921 1.1 mrg return __builtin_ia32_reduceph512_mask_round (__A, __B,
1922 1.1 mrg _mm512_setzero_ph (),
1923 1.1 mrg (__mmask32) -1,
1924 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1925 1.1 mrg }
1926 1.1 mrg
1927 1.1 mrg extern __inline __m512h
1928 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1929 1.1 mrg _mm512_mask_reduce_ph (__m512h __A, __mmask32 __B, __m512h __C, int __D)
1930 1.1 mrg {
1931 1.1 mrg return __builtin_ia32_reduceph512_mask_round (__C, __D, __A, __B,
1932 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1933 1.1 mrg }
1934 1.1 mrg
1935 1.1 mrg extern __inline __m512h
1936 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1937 1.1 mrg _mm512_maskz_reduce_ph (__mmask32 __A, __m512h __B, int __C)
1938 1.1 mrg {
1939 1.1 mrg return __builtin_ia32_reduceph512_mask_round (__B, __C,
1940 1.1 mrg _mm512_setzero_ph (),
1941 1.1 mrg __A,
1942 1.1 mrg _MM_FROUND_CUR_DIRECTION);
1943 1.1 mrg }
1944 1.1 mrg
1945 1.1 mrg extern __inline __m512h
1946 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1947 1.1 mrg _mm512_reduce_round_ph (__m512h __A, int __B, const int __C)
1948 1.1 mrg {
1949 1.1 mrg return __builtin_ia32_reduceph512_mask_round (__A, __B,
1950 1.1 mrg _mm512_setzero_ph (),
1951 1.1 mrg (__mmask32) -1, __C);
1952 1.1 mrg }
1953 1.1 mrg
1954 1.1 mrg extern __inline __m512h
1955 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1956 1.1 mrg _mm512_mask_reduce_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
1957 1.1 mrg int __D, const int __E)
1958 1.1 mrg {
1959 1.1 mrg return __builtin_ia32_reduceph512_mask_round (__C, __D, __A, __B,
1960 1.1 mrg __E);
1961 1.1 mrg }
1962 1.1 mrg
1963 1.1 mrg extern __inline __m512h
1964 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1965 1.1 mrg _mm512_maskz_reduce_round_ph (__mmask32 __A, __m512h __B, int __C,
1966 1.1 mrg const int __D)
1967 1.1 mrg {
1968 1.1 mrg return __builtin_ia32_reduceph512_mask_round (__B, __C,
1969 1.1 mrg _mm512_setzero_ph (),
1970 1.1 mrg __A, __D);
1971 1.1 mrg }
1972 1.1 mrg
1973 1.1 mrg #else
1974 1.1 mrg #define _mm512_reduce_ph(A, B) \
1975 1.1 mrg (__builtin_ia32_reduceph512_mask_round ((A), (B), \
1976 1.1 mrg _mm512_setzero_ph (), \
1977 1.1 mrg (__mmask32)-1, \
1978 1.1 mrg _MM_FROUND_CUR_DIRECTION))
1979 1.1 mrg
1980 1.1 mrg #define _mm512_mask_reduce_ph(A, B, C, D) \
1981 1.1 mrg (__builtin_ia32_reduceph512_mask_round ((C), (D), (A), (B), \
1982 1.1 mrg _MM_FROUND_CUR_DIRECTION))
1983 1.1 mrg
1984 1.1 mrg #define _mm512_maskz_reduce_ph(A, B, C) \
1985 1.1 mrg (__builtin_ia32_reduceph512_mask_round ((B), (C), \
1986 1.1 mrg _mm512_setzero_ph (), \
1987 1.1 mrg (A), _MM_FROUND_CUR_DIRECTION))
1988 1.1 mrg
1989 1.1 mrg #define _mm512_reduce_round_ph(A, B, C) \
1990 1.1 mrg (__builtin_ia32_reduceph512_mask_round ((A), (B), \
1991 1.1 mrg _mm512_setzero_ph (), \
1992 1.1 mrg (__mmask32)-1, (C)))
1993 1.1 mrg
1994 1.1 mrg #define _mm512_mask_reduce_round_ph(A, B, C, D, E) \
1995 1.1 mrg (__builtin_ia32_reduceph512_mask_round ((C), (D), (A), (B), (E)))
1996 1.1 mrg
1997 1.1 mrg #define _mm512_maskz_reduce_round_ph(A, B, C, D) \
1998 1.1 mrg (__builtin_ia32_reduceph512_mask_round ((B), (C), \
1999 1.1 mrg _mm512_setzero_ph (), \
2000 1.1 mrg (A), (D)))
2001 1.1 mrg
2002 1.1 mrg #endif /* __OPTIMIZE__ */
2003 1.1 mrg
2004 1.1 mrg /* Intrinsics vreducesh. */
2005 1.1 mrg #ifdef __OPTIMIZE__
2006 1.1 mrg extern __inline __m128h
2007 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2008 1.1 mrg _mm_reduce_sh (__m128h __A, __m128h __B, int __C)
2009 1.1 mrg {
2010 1.1 mrg return __builtin_ia32_reducesh_mask_round (__A, __B, __C,
2011 1.1 mrg _mm_setzero_ph (),
2012 1.1 mrg (__mmask8) -1,
2013 1.1 mrg _MM_FROUND_CUR_DIRECTION);
2014 1.1 mrg }
2015 1.1 mrg
2016 1.1 mrg extern __inline __m128h
2017 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2018 1.1 mrg _mm_mask_reduce_sh (__m128h __A, __mmask8 __B, __m128h __C,
2019 1.1 mrg __m128h __D, int __E)
2020 1.1 mrg {
2021 1.1 mrg return __builtin_ia32_reducesh_mask_round (__C, __D, __E, __A, __B,
2022 1.1 mrg _MM_FROUND_CUR_DIRECTION);
2023 1.1 mrg }
2024 1.1 mrg
2025 1.1 mrg extern __inline __m128h
2026 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2027 1.1 mrg _mm_maskz_reduce_sh (__mmask8 __A, __m128h __B, __m128h __C, int __D)
2028 1.1 mrg {
2029 1.1 mrg return __builtin_ia32_reducesh_mask_round (__B, __C, __D,
2030 1.1 mrg _mm_setzero_ph (), __A,
2031 1.1 mrg _MM_FROUND_CUR_DIRECTION);
2032 1.1 mrg }
2033 1.1 mrg
2034 1.1 mrg extern __inline __m128h
2035 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2036 1.1 mrg _mm_reduce_round_sh (__m128h __A, __m128h __B, int __C, const int __D)
2037 1.1 mrg {
2038 1.1 mrg return __builtin_ia32_reducesh_mask_round (__A, __B, __C,
2039 1.1 mrg _mm_setzero_ph (),
2040 1.1 mrg (__mmask8) -1, __D);
2041 1.1 mrg }
2042 1.1 mrg
2043 1.1 mrg extern __inline __m128h
2044 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2045 1.1 mrg _mm_mask_reduce_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
2046 1.1 mrg __m128h __D, int __E, const int __F)
2047 1.1 mrg {
2048 1.1 mrg return __builtin_ia32_reducesh_mask_round (__C, __D, __E, __A,
2049 1.1 mrg __B, __F);
2050 1.1 mrg }
2051 1.1 mrg
2052 1.1 mrg extern __inline __m128h
2053 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2054 1.1 mrg _mm_maskz_reduce_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
2055 1.1 mrg int __D, const int __E)
2056 1.1 mrg {
2057 1.1 mrg return __builtin_ia32_reducesh_mask_round (__B, __C, __D,
2058 1.1 mrg _mm_setzero_ph (),
2059 1.1 mrg __A, __E);
2060 1.1 mrg }
2061 1.1 mrg
2062 1.1 mrg #else
2063 1.1 mrg #define _mm_reduce_sh(A, B, C) \
2064 1.1 mrg (__builtin_ia32_reducesh_mask_round ((A), (B), (C), \
2065 1.1 mrg _mm_setzero_ph (), \
2066 1.1 mrg (__mmask8)-1, \
2067 1.1 mrg _MM_FROUND_CUR_DIRECTION))
2068 1.1 mrg
2069 1.1 mrg #define _mm_mask_reduce_sh(A, B, C, D, E) \
2070 1.1 mrg (__builtin_ia32_reducesh_mask_round ((C), (D), (E), (A), (B), \
2071 1.1 mrg _MM_FROUND_CUR_DIRECTION))
2072 1.1 mrg
2073 1.1 mrg #define _mm_maskz_reduce_sh(A, B, C, D) \
2074 1.1 mrg (__builtin_ia32_reducesh_mask_round ((B), (C), (D), \
2075 1.1 mrg _mm_setzero_ph (), \
2076 1.1 mrg (A), _MM_FROUND_CUR_DIRECTION))
2077 1.1 mrg
2078 1.1 mrg #define _mm_reduce_round_sh(A, B, C, D) \
2079 1.1 mrg (__builtin_ia32_reducesh_mask_round ((A), (B), (C), \
2080 1.1 mrg _mm_setzero_ph (), \
2081 1.1 mrg (__mmask8)-1, (D)))
2082 1.1 mrg
2083 1.1 mrg #define _mm_mask_reduce_round_sh(A, B, C, D, E, F) \
2084 1.1 mrg (__builtin_ia32_reducesh_mask_round ((C), (D), (E), (A), (B), (F)))
2085 1.1 mrg
2086 1.1 mrg #define _mm_maskz_reduce_round_sh(A, B, C, D, E) \
2087 1.1 mrg (__builtin_ia32_reducesh_mask_round ((B), (C), (D), \
2088 1.1 mrg _mm_setzero_ph (), \
2089 1.1 mrg (A), (E)))
2090 1.1 mrg
2091 1.1 mrg #endif /* __OPTIMIZE__ */
2092 1.1 mrg
2093 1.1 mrg /* Intrinsics vrndscaleph. */
2094 1.1 mrg #ifdef __OPTIMIZE__
2095 1.1 mrg extern __inline __m512h
2096 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2097 1.1 mrg _mm512_roundscale_ph (__m512h __A, int __B)
2098 1.1 mrg {
2099 1.1 mrg return __builtin_ia32_rndscaleph512_mask_round (__A, __B,
2100 1.1 mrg _mm512_setzero_ph (),
2101 1.1 mrg (__mmask32) -1,
2102 1.1 mrg _MM_FROUND_CUR_DIRECTION);
2103 1.1 mrg }
2104 1.1 mrg
2105 1.1 mrg extern __inline __m512h
2106 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2107 1.1 mrg _mm512_mask_roundscale_ph (__m512h __A, __mmask32 __B,
2108 1.1 mrg __m512h __C, int __D)
2109 1.1 mrg {
2110 1.1 mrg return __builtin_ia32_rndscaleph512_mask_round (__C, __D, __A, __B,
2111 1.1 mrg _MM_FROUND_CUR_DIRECTION);
2112 1.1 mrg }
2113 1.1 mrg
2114 1.1 mrg extern __inline __m512h
2115 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2116 1.1 mrg _mm512_maskz_roundscale_ph (__mmask32 __A, __m512h __B, int __C)
2117 1.1 mrg {
2118 1.1 mrg return __builtin_ia32_rndscaleph512_mask_round (__B, __C,
2119 1.1 mrg _mm512_setzero_ph (),
2120 1.1 mrg __A,
2121 1.1 mrg _MM_FROUND_CUR_DIRECTION);
2122 1.1 mrg }
2123 1.1 mrg
2124 1.1 mrg extern __inline __m512h
2125 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2126 1.1 mrg _mm512_roundscale_round_ph (__m512h __A, int __B, const int __C)
2127 1.1 mrg {
2128 1.1 mrg return __builtin_ia32_rndscaleph512_mask_round (__A, __B,
2129 1.1 mrg _mm512_setzero_ph (),
2130 1.1 mrg (__mmask32) -1,
2131 1.1 mrg __C);
2132 1.1 mrg }
2133 1.1 mrg
2134 1.1 mrg extern __inline __m512h
2135 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2136 1.1 mrg _mm512_mask_roundscale_round_ph (__m512h __A, __mmask32 __B,
2137 1.1 mrg __m512h __C, int __D, const int __E)
2138 1.1 mrg {
2139 1.1 mrg return __builtin_ia32_rndscaleph512_mask_round (__C, __D, __A,
2140 1.1 mrg __B, __E);
2141 1.1 mrg }
2142 1.1 mrg
2143 1.1 mrg extern __inline __m512h
2144 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2145 1.1 mrg _mm512_maskz_roundscale_round_ph (__mmask32 __A, __m512h __B, int __C,
2146 1.1 mrg const int __D)
2147 1.1 mrg {
2148 1.1 mrg return __builtin_ia32_rndscaleph512_mask_round (__B, __C,
2149 1.1 mrg _mm512_setzero_ph (),
2150 1.1 mrg __A, __D);
2151 1.1 mrg }
2152 1.1 mrg
2153 1.1 mrg #else
2154 1.1 mrg #define _mm512_roundscale_ph(A, B) \
2155 1.1 mrg (__builtin_ia32_rndscaleph512_mask_round ((A), (B), \
2156 1.1 mrg _mm512_setzero_ph (), \
2157 1.1 mrg (__mmask32)-1, \
2158 1.1 mrg _MM_FROUND_CUR_DIRECTION))
2159 1.1 mrg
2160 1.1 mrg #define _mm512_mask_roundscale_ph(A, B, C, D) \
2161 1.1 mrg (__builtin_ia32_rndscaleph512_mask_round ((C), (D), (A), (B), \
2162 1.1 mrg _MM_FROUND_CUR_DIRECTION))
2163 1.1 mrg
2164 1.1 mrg #define _mm512_maskz_roundscale_ph(A, B, C) \
2165 1.1 mrg (__builtin_ia32_rndscaleph512_mask_round ((B), (C), \
2166 1.1 mrg _mm512_setzero_ph (), \
2167 1.1 mrg (A), \
2168 1.1 mrg _MM_FROUND_CUR_DIRECTION))
2169 1.1 mrg #define _mm512_roundscale_round_ph(A, B, C) \
2170 1.1 mrg (__builtin_ia32_rndscaleph512_mask_round ((A), (B), \
2171 1.1 mrg _mm512_setzero_ph (), \
2172 1.1 mrg (__mmask32)-1, (C)))
2173 1.1 mrg
2174 1.1 mrg #define _mm512_mask_roundscale_round_ph(A, B, C, D, E) \
2175 1.1 mrg (__builtin_ia32_rndscaleph512_mask_round ((C), (D), (A), (B), (E)))
2176 1.1 mrg
2177 1.1 mrg #define _mm512_maskz_roundscale_round_ph(A, B, C, D) \
2178 1.1 mrg (__builtin_ia32_rndscaleph512_mask_round ((B), (C), \
2179 1.1 mrg _mm512_setzero_ph (), \
2180 1.1 mrg (A), (D)))
2181 1.1 mrg
2182 1.1 mrg #endif /* __OPTIMIZE__ */
2183 1.1 mrg
2184 1.1 mrg /* Intrinsics vrndscalesh. */
2185 1.1 mrg #ifdef __OPTIMIZE__
2186 1.1 mrg extern __inline __m128h
2187 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2188 1.1 mrg _mm_roundscale_sh (__m128h __A, __m128h __B, int __C)
2189 1.1 mrg {
2190 1.1 mrg return __builtin_ia32_rndscalesh_mask_round (__A, __B, __C,
2191 1.1 mrg _mm_setzero_ph (),
2192 1.1 mrg (__mmask8) -1,
2193 1.1 mrg _MM_FROUND_CUR_DIRECTION);
2194 1.1 mrg }
2195 1.1 mrg
2196 1.1 mrg extern __inline __m128h
2197 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2198 1.1 mrg _mm_mask_roundscale_sh (__m128h __A, __mmask8 __B, __m128h __C,
2199 1.1 mrg __m128h __D, int __E)
2200 1.1 mrg {
2201 1.1 mrg return __builtin_ia32_rndscalesh_mask_round (__C, __D, __E, __A, __B,
2202 1.1 mrg _MM_FROUND_CUR_DIRECTION);
2203 1.1 mrg }
2204 1.1 mrg
2205 1.1 mrg extern __inline __m128h
2206 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2207 1.1 mrg _mm_maskz_roundscale_sh (__mmask8 __A, __m128h __B, __m128h __C, int __D)
2208 1.1 mrg {
2209 1.1 mrg return __builtin_ia32_rndscalesh_mask_round (__B, __C, __D,
2210 1.1 mrg _mm_setzero_ph (), __A,
2211 1.1 mrg _MM_FROUND_CUR_DIRECTION);
2212 1.1 mrg }
2213 1.1 mrg
2214 1.1 mrg extern __inline __m128h
2215 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2216 1.1 mrg _mm_roundscale_round_sh (__m128h __A, __m128h __B, int __C, const int __D)
2217 1.1 mrg {
2218 1.1 mrg return __builtin_ia32_rndscalesh_mask_round (__A, __B, __C,
2219 1.1 mrg _mm_setzero_ph (),
2220 1.1 mrg (__mmask8) -1,
2221 1.1 mrg __D);
2222 1.1 mrg }
2223 1.1 mrg
2224 1.1 mrg extern __inline __m128h
2225 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2226 1.1 mrg _mm_mask_roundscale_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
2227 1.1 mrg __m128h __D, int __E, const int __F)
2228 1.1 mrg {
2229 1.1 mrg return __builtin_ia32_rndscalesh_mask_round (__C, __D, __E,
2230 1.1 mrg __A, __B, __F);
2231 1.1 mrg }
2232 1.1 mrg
2233 1.1 mrg extern __inline __m128h
2234 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2235 1.1 mrg _mm_maskz_roundscale_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
2236 1.1 mrg int __D, const int __E)
2237 1.1 mrg {
2238 1.1 mrg return __builtin_ia32_rndscalesh_mask_round (__B, __C, __D,
2239 1.1 mrg _mm_setzero_ph (),
2240 1.1 mrg __A, __E);
2241 1.1 mrg }
2242 1.1 mrg
2243 1.1 mrg #else
2244 1.1 mrg #define _mm_roundscale_sh(A, B, C) \
2245 1.1 mrg (__builtin_ia32_rndscalesh_mask_round ((A), (B), (C), \
2246 1.1 mrg _mm_setzero_ph (), \
2247 1.1 mrg (__mmask8)-1, \
2248 1.1 mrg _MM_FROUND_CUR_DIRECTION))
2249 1.1 mrg
2250 1.1 mrg #define _mm_mask_roundscale_sh(A, B, C, D, E) \
2251 1.1 mrg (__builtin_ia32_rndscalesh_mask_round ((C), (D), (E), (A), (B), \
2252 1.1 mrg _MM_FROUND_CUR_DIRECTION))
2253 1.1 mrg
2254 1.1 mrg #define _mm_maskz_roundscale_sh(A, B, C, D) \
2255 1.1 mrg (__builtin_ia32_rndscalesh_mask_round ((B), (C), (D), \
2256 1.1 mrg _mm_setzero_ph (), \
2257 1.1 mrg (A), _MM_FROUND_CUR_DIRECTION))
2258 1.1 mrg
2259 1.1 mrg #define _mm_roundscale_round_sh(A, B, C, D) \
2260 1.1 mrg (__builtin_ia32_rndscalesh_mask_round ((A), (B), (C), \
2261 1.1 mrg _mm_setzero_ph (), \
2262 1.1 mrg (__mmask8)-1, (D)))
2263 1.1 mrg
2264 1.1 mrg #define _mm_mask_roundscale_round_sh(A, B, C, D, E, F) \
2265 1.1 mrg (__builtin_ia32_rndscalesh_mask_round ((C), (D), (E), (A), (B), (F)))
2266 1.1 mrg
2267 1.1 mrg #define _mm_maskz_roundscale_round_sh(A, B, C, D, E) \
2268 1.1 mrg (__builtin_ia32_rndscalesh_mask_round ((B), (C), (D), \
2269 1.1 mrg _mm_setzero_ph (), \
2270 1.1 mrg (A), (E)))
2271 1.1 mrg
2272 1.1 mrg #endif /* __OPTIMIZE__ */
2273 1.1 mrg
2274 1.1 mrg /* Intrinsics vfpclasssh. */
2275 1.1 mrg #ifdef __OPTIMIZE__
2276 1.1 mrg extern __inline __mmask8
2277 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2278 1.1 mrg _mm_fpclass_sh_mask (__m128h __A, const int __imm)
2279 1.1 mrg {
2280 1.1 mrg return (__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) __A, __imm,
2281 1.1 mrg (__mmask8) -1);
2282 1.1 mrg }
2283 1.1 mrg
2284 1.1 mrg extern __inline __mmask8
2285 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2286 1.1 mrg _mm_mask_fpclass_sh_mask (__mmask8 __U, __m128h __A, const int __imm)
2287 1.1 mrg {
2288 1.1 mrg return (__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) __A, __imm, __U);
2289 1.1 mrg }
2290 1.1 mrg
2291 1.1 mrg #else
2292 1.1 mrg #define _mm_fpclass_sh_mask(X, C) \
2293 1.1 mrg ((__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) (__m128h) (X), \
2294 1.1 mrg (int) (C), (__mmask8) (-1))) \
2295 1.1 mrg
2296 1.1 mrg #define _mm_mask_fpclass_sh_mask(U, X, C) \
2297 1.1 mrg ((__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) (__m128h) (X), \
2298 1.1 mrg (int) (C), (__mmask8) (U)))
2299 1.1 mrg #endif /* __OPTIMIZE__ */
2300 1.1 mrg
2301 1.1 mrg /* Intrinsics vfpclassph. */
2302 1.1 mrg #ifdef __OPTIMIZE__
2303 1.1 mrg extern __inline __mmask32
2304 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2305 1.1 mrg _mm512_mask_fpclass_ph_mask (__mmask32 __U, __m512h __A,
2306 1.1 mrg const int __imm)
2307 1.1 mrg {
2308 1.1 mrg return (__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) __A,
2309 1.1 mrg __imm, __U);
2310 1.1 mrg }
2311 1.1 mrg
2312 1.1 mrg extern __inline __mmask32
2313 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2314 1.1 mrg _mm512_fpclass_ph_mask (__m512h __A, const int __imm)
2315 1.1 mrg {
2316 1.1 mrg return (__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) __A,
2317 1.1 mrg __imm,
2318 1.1 mrg (__mmask32) -1);
2319 1.1 mrg }
2320 1.1 mrg
2321 1.1 mrg #else
2322 1.1 mrg #define _mm512_mask_fpclass_ph_mask(u, x, c) \
2323 1.1 mrg ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \
2324 1.1 mrg (int) (c),(__mmask32)(u)))
2325 1.1 mrg
2326 1.1 mrg #define _mm512_fpclass_ph_mask(x, c) \
2327 1.1 mrg ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \
2328 1.1 mrg (int) (c),(__mmask32)-1))
2329 1.1 mrg #endif /* __OPIMTIZE__ */
2330 1.1 mrg
2331 1.1 mrg /* Intrinsics vgetexpph, vgetexpsh. */
2332 1.1 mrg extern __inline __m128h
2333 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2334 1.1 mrg _mm_getexp_sh (__m128h __A, __m128h __B)
2335 1.1 mrg {
2336 1.1 mrg return (__m128h)
2337 1.1 mrg __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, (__v8hf) __B,
2338 1.1 mrg (__v8hf) _mm_setzero_ph (),
2339 1.1 mrg (__mmask8) -1,
2340 1.1 mrg _MM_FROUND_CUR_DIRECTION);
2341 1.1 mrg }
2342 1.1 mrg
2343 1.1 mrg extern __inline __m128h
2344 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2345 1.1 mrg _mm_mask_getexp_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
2346 1.1 mrg {
2347 1.1 mrg return (__m128h)
2348 1.1 mrg __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, (__v8hf) __B,
2349 1.1 mrg (__v8hf) __W, (__mmask8) __U,
2350 1.1 mrg _MM_FROUND_CUR_DIRECTION);
2351 1.1 mrg }
2352 1.1 mrg
2353 1.1 mrg extern __inline __m128h
2354 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2355 1.1 mrg _mm_maskz_getexp_sh (__mmask8 __U, __m128h __A, __m128h __B)
2356 1.1 mrg {
2357 1.1 mrg return (__m128h)
2358 1.1 mrg __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, (__v8hf) __B,
2359 1.1 mrg (__v8hf) _mm_setzero_ph (),
2360 1.1 mrg (__mmask8) __U,
2361 1.1 mrg _MM_FROUND_CUR_DIRECTION);
2362 1.1 mrg }
2363 1.1 mrg
2364 1.1 mrg extern __inline __m512h
2365 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2366 1.1 mrg _mm512_getexp_ph (__m512h __A)
2367 1.1 mrg {
2368 1.1 mrg return (__m512h)
2369 1.1 mrg __builtin_ia32_getexpph512_mask ((__v32hf) __A,
2370 1.1 mrg (__v32hf) _mm512_setzero_ph (),
2371 1.1 mrg (__mmask32) -1, _MM_FROUND_CUR_DIRECTION);
2372 1.1 mrg }
2373 1.1 mrg
2374 1.1 mrg extern __inline __m512h
2375 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2376 1.1 mrg _mm512_mask_getexp_ph (__m512h __W, __mmask32 __U, __m512h __A)
2377 1.1 mrg {
2378 1.1 mrg return (__m512h)
2379 1.1 mrg __builtin_ia32_getexpph512_mask ((__v32hf) __A, (__v32hf) __W,
2380 1.1 mrg (__mmask32) __U, _MM_FROUND_CUR_DIRECTION);
2381 1.1 mrg }
2382 1.1 mrg
2383 1.1 mrg extern __inline __m512h
2384 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2385 1.1 mrg _mm512_maskz_getexp_ph (__mmask32 __U, __m512h __A)
2386 1.1 mrg {
2387 1.1 mrg return (__m512h)
2388 1.1 mrg __builtin_ia32_getexpph512_mask ((__v32hf) __A,
2389 1.1 mrg (__v32hf) _mm512_setzero_ph (),
2390 1.1 mrg (__mmask32) __U, _MM_FROUND_CUR_DIRECTION);
2391 1.1 mrg }
2392 1.1 mrg
2393 1.1 mrg #ifdef __OPTIMIZE__
2394 1.1 mrg extern __inline __m128h
2395 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2396 1.1 mrg _mm_getexp_round_sh (__m128h __A, __m128h __B, const int __R)
2397 1.1 mrg {
2398 1.1 mrg return (__m128h) __builtin_ia32_getexpsh_mask_round ((__v8hf) __A,
2399 1.1 mrg (__v8hf) __B,
2400 1.1 mrg _mm_setzero_ph (),
2401 1.1 mrg (__mmask8) -1,
2402 1.1 mrg __R);
2403 1.1 mrg }
2404 1.1 mrg
2405 1.1 mrg extern __inline __m128h
2406 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2407 1.1 mrg _mm_mask_getexp_round_sh (__m128h __W, __mmask8 __U, __m128h __A,
2408 1.1 mrg __m128h __B, const int __R)
2409 1.1 mrg {
2410 1.1 mrg return (__m128h) __builtin_ia32_getexpsh_mask_round ((__v8hf) __A,
2411 1.1 mrg (__v8hf) __B,
2412 1.1 mrg (__v8hf) __W,
2413 1.1 mrg (__mmask8) __U, __R);
2414 1.1 mrg }
2415 1.1 mrg
2416 1.1 mrg extern __inline __m128h
2417 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2418 1.1 mrg _mm_maskz_getexp_round_sh (__mmask8 __U, __m128h __A, __m128h __B,
2419 1.1 mrg const int __R)
2420 1.1 mrg {
2421 1.1 mrg return (__m128h) __builtin_ia32_getexpsh_mask_round ((__v8hf) __A,
2422 1.1 mrg (__v8hf) __B,
2423 1.1 mrg (__v8hf)
2424 1.1 mrg _mm_setzero_ph (),
2425 1.1 mrg (__mmask8) __U, __R);
2426 1.1 mrg }
2427 1.1 mrg
2428 1.1 mrg extern __inline __m512h
2429 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2430 1.1 mrg _mm512_getexp_round_ph (__m512h __A, const int __R)
2431 1.1 mrg {
2432 1.1 mrg return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
2433 1.1 mrg (__v32hf)
2434 1.1 mrg _mm512_setzero_ph (),
2435 1.1 mrg (__mmask32) -1, __R);
2436 1.1 mrg }
2437 1.1 mrg
2438 1.1 mrg extern __inline __m512h
2439 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2440 1.1 mrg _mm512_mask_getexp_round_ph (__m512h __W, __mmask32 __U, __m512h __A,
2441 1.1 mrg const int __R)
2442 1.1 mrg {
2443 1.1 mrg return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
2444 1.1 mrg (__v32hf) __W,
2445 1.1 mrg (__mmask32) __U, __R);
2446 1.1 mrg }
2447 1.1 mrg
2448 1.1 mrg extern __inline __m512h
2449 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2450 1.1 mrg _mm512_maskz_getexp_round_ph (__mmask32 __U, __m512h __A, const int __R)
2451 1.1 mrg {
2452 1.1 mrg return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
2453 1.1 mrg (__v32hf)
2454 1.1 mrg _mm512_setzero_ph (),
2455 1.1 mrg (__mmask32) __U, __R);
2456 1.1 mrg }
2457 1.1 mrg
2458 1.1 mrg #else
2459 1.1 mrg #define _mm_getexp_round_sh(A, B, R) \
2460 1.1 mrg ((__m128h)__builtin_ia32_getexpsh_mask_round((__v8hf)(__m128h)(A), \
2461 1.1 mrg (__v8hf)(__m128h)(B), \
2462 1.1 mrg (__v8hf)_mm_setzero_ph(), \
2463 1.1 mrg (__mmask8)-1, R))
2464 1.1 mrg
2465 1.1 mrg #define _mm_mask_getexp_round_sh(W, U, A, B, C) \
2466 1.1 mrg (__m128h)__builtin_ia32_getexpsh_mask_round(A, B, W, U, C)
2467 1.1 mrg
2468 1.1 mrg #define _mm_maskz_getexp_round_sh(U, A, B, C) \
2469 1.1 mrg (__m128h)__builtin_ia32_getexpsh_mask_round(A, B, \
2470 1.1 mrg (__v8hf)_mm_setzero_ph(), \
2471 1.1 mrg U, C)
2472 1.1 mrg
2473 1.1 mrg #define _mm512_getexp_round_ph(A, R) \
2474 1.1 mrg ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \
2475 1.1 mrg (__v32hf)_mm512_setzero_ph(), (__mmask32)-1, R))
2476 1.1 mrg
2477 1.1 mrg #define _mm512_mask_getexp_round_ph(W, U, A, R) \
2478 1.1 mrg ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \
2479 1.1 mrg (__v32hf)(__m512h)(W), (__mmask32)(U), R))
2480 1.1 mrg
2481 1.1 mrg #define _mm512_maskz_getexp_round_ph(U, A, R) \
2482 1.1 mrg ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \
2483 1.1 mrg (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), R))
2484 1.1 mrg
2485 1.1 mrg #endif /* __OPTIMIZE__ */
2486 1.1 mrg
2487 1.1 mrg /* Intrinsics vgetmantph, vgetmantsh. */
2488 1.1 mrg #ifdef __OPTIMIZE__
2489 1.1 mrg extern __inline __m128h
2490 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2491 1.1 mrg _mm_getmant_sh (__m128h __A, __m128h __B,
2492 1.1 mrg _MM_MANTISSA_NORM_ENUM __C,
2493 1.1 mrg _MM_MANTISSA_SIGN_ENUM __D)
2494 1.1 mrg {
2495 1.1 mrg return (__m128h)
2496 1.1 mrg __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, (__v8hf) __B,
2497 1.1 mrg (__D << 2) | __C, _mm_setzero_ph (),
2498 1.1 mrg (__mmask8) -1,
2499 1.1 mrg _MM_FROUND_CUR_DIRECTION);
2500 1.1 mrg }
2501 1.1 mrg
2502 1.1 mrg extern __inline __m128h
2503 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2504 1.1 mrg _mm_mask_getmant_sh (__m128h __W, __mmask8 __U, __m128h __A,
2505 1.1 mrg __m128h __B, _MM_MANTISSA_NORM_ENUM __C,
2506 1.1 mrg _MM_MANTISSA_SIGN_ENUM __D)
2507 1.1 mrg {
2508 1.1 mrg return (__m128h)
2509 1.1 mrg __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, (__v8hf) __B,
2510 1.1 mrg (__D << 2) | __C, (__v8hf) __W,
2511 1.1 mrg __U, _MM_FROUND_CUR_DIRECTION);
2512 1.1 mrg }
2513 1.1 mrg
2514 1.1 mrg extern __inline __m128h
2515 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2516 1.1 mrg _mm_maskz_getmant_sh (__mmask8 __U, __m128h __A, __m128h __B,
2517 1.1 mrg _MM_MANTISSA_NORM_ENUM __C,
2518 1.1 mrg _MM_MANTISSA_SIGN_ENUM __D)
2519 1.1 mrg {
2520 1.1 mrg return (__m128h)
2521 1.1 mrg __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, (__v8hf) __B,
2522 1.1 mrg (__D << 2) | __C,
2523 1.1 mrg (__v8hf) _mm_setzero_ph(),
2524 1.1 mrg __U, _MM_FROUND_CUR_DIRECTION);
2525 1.1 mrg }
2526 1.1 mrg
2527 1.1 mrg extern __inline __m512h
2528 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2529 1.1 mrg _mm512_getmant_ph (__m512h __A, _MM_MANTISSA_NORM_ENUM __B,
2530 1.1 mrg _MM_MANTISSA_SIGN_ENUM __C)
2531 1.1 mrg {
2532 1.1 mrg return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
2533 1.1 mrg (__C << 2) | __B,
2534 1.1 mrg _mm512_setzero_ph (),
2535 1.1 mrg (__mmask32) -1,
2536 1.1 mrg _MM_FROUND_CUR_DIRECTION);
2537 1.1 mrg }
2538 1.1 mrg
2539 1.1 mrg extern __inline __m512h
2540 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2541 1.1 mrg _mm512_mask_getmant_ph (__m512h __W, __mmask32 __U, __m512h __A,
2542 1.1 mrg _MM_MANTISSA_NORM_ENUM __B,
2543 1.1 mrg _MM_MANTISSA_SIGN_ENUM __C)
2544 1.1 mrg {
2545 1.1 mrg return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
2546 1.1 mrg (__C << 2) | __B,
2547 1.1 mrg (__v32hf) __W, __U,
2548 1.1 mrg _MM_FROUND_CUR_DIRECTION);
2549 1.1 mrg }
2550 1.1 mrg
2551 1.1 mrg extern __inline __m512h
2552 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2553 1.1 mrg _mm512_maskz_getmant_ph (__mmask32 __U, __m512h __A,
2554 1.1 mrg _MM_MANTISSA_NORM_ENUM __B,
2555 1.1 mrg _MM_MANTISSA_SIGN_ENUM __C)
2556 1.1 mrg {
2557 1.1 mrg return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
2558 1.1 mrg (__C << 2) | __B,
2559 1.1 mrg (__v32hf)
2560 1.1 mrg _mm512_setzero_ph (),
2561 1.1 mrg __U,
2562 1.1 mrg _MM_FROUND_CUR_DIRECTION);
2563 1.1 mrg }
2564 1.1 mrg
2565 1.1 mrg extern __inline __m128h
2566 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2567 1.1 mrg _mm_getmant_round_sh (__m128h __A, __m128h __B,
2568 1.1 mrg _MM_MANTISSA_NORM_ENUM __C,
2569 1.1 mrg _MM_MANTISSA_SIGN_ENUM __D, const int __R)
2570 1.1 mrg {
2571 1.1 mrg return (__m128h) __builtin_ia32_getmantsh_mask_round ((__v8hf) __A,
2572 1.1 mrg (__v8hf) __B,
2573 1.1 mrg (__D << 2) | __C,
2574 1.1 mrg _mm_setzero_ph (),
2575 1.1 mrg (__mmask8) -1,
2576 1.1 mrg __R);
2577 1.1 mrg }
2578 1.1 mrg
2579 1.1 mrg extern __inline __m128h
2580 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2581 1.1 mrg _mm_mask_getmant_round_sh (__m128h __W, __mmask8 __U, __m128h __A,
2582 1.1 mrg __m128h __B, _MM_MANTISSA_NORM_ENUM __C,
2583 1.1 mrg _MM_MANTISSA_SIGN_ENUM __D, const int __R)
2584 1.1 mrg {
2585 1.1 mrg return (__m128h) __builtin_ia32_getmantsh_mask_round ((__v8hf) __A,
2586 1.1 mrg (__v8hf) __B,
2587 1.1 mrg (__D << 2) | __C,
2588 1.1 mrg (__v8hf) __W,
2589 1.1 mrg __U, __R);
2590 1.1 mrg }
2591 1.1 mrg
2592 1.1 mrg extern __inline __m128h
2593 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2594 1.1 mrg _mm_maskz_getmant_round_sh (__mmask8 __U, __m128h __A, __m128h __B,
2595 1.1 mrg _MM_MANTISSA_NORM_ENUM __C,
2596 1.1 mrg _MM_MANTISSA_SIGN_ENUM __D, const int __R)
2597 1.1 mrg {
2598 1.1 mrg return (__m128h) __builtin_ia32_getmantsh_mask_round ((__v8hf) __A,
2599 1.1 mrg (__v8hf) __B,
2600 1.1 mrg (__D << 2) | __C,
2601 1.1 mrg (__v8hf)
2602 1.1 mrg _mm_setzero_ph(),
2603 1.1 mrg __U, __R);
2604 1.1 mrg }
2605 1.1 mrg
2606 1.1 mrg extern __inline __m512h
2607 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2608 1.1 mrg _mm512_getmant_round_ph (__m512h __A, _MM_MANTISSA_NORM_ENUM __B,
2609 1.1 mrg _MM_MANTISSA_SIGN_ENUM __C, const int __R)
2610 1.1 mrg {
2611 1.1 mrg return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
2612 1.1 mrg (__C << 2) | __B,
2613 1.1 mrg _mm512_setzero_ph (),
2614 1.1 mrg (__mmask32) -1, __R);
2615 1.1 mrg }
2616 1.1 mrg
2617 1.1 mrg extern __inline __m512h
2618 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2619 1.1 mrg _mm512_mask_getmant_round_ph (__m512h __W, __mmask32 __U, __m512h __A,
2620 1.1 mrg _MM_MANTISSA_NORM_ENUM __B,
2621 1.1 mrg _MM_MANTISSA_SIGN_ENUM __C, const int __R)
2622 1.1 mrg {
2623 1.1 mrg return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
2624 1.1 mrg (__C << 2) | __B,
2625 1.1 mrg (__v32hf) __W, __U,
2626 1.1 mrg __R);
2627 1.1 mrg }
2628 1.1 mrg
2629 1.1 mrg extern __inline __m512h
2630 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2631 1.1 mrg _mm512_maskz_getmant_round_ph (__mmask32 __U, __m512h __A,
2632 1.1 mrg _MM_MANTISSA_NORM_ENUM __B,
2633 1.1 mrg _MM_MANTISSA_SIGN_ENUM __C, const int __R)
2634 1.1 mrg {
2635 1.1 mrg return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
2636 1.1 mrg (__C << 2) | __B,
2637 1.1 mrg (__v32hf)
2638 1.1 mrg _mm512_setzero_ph (),
2639 1.1 mrg __U, __R);
2640 1.1 mrg }
2641 1.1 mrg
2642 1.1 mrg #else
2643 1.1 mrg #define _mm512_getmant_ph(X, B, C) \
2644 1.1 mrg ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
2645 1.1 mrg (int)(((C)<<2) | (B)), \
2646 1.1 mrg (__v32hf)(__m512h) \
2647 1.1 mrg _mm512_setzero_ph(), \
2648 1.1 mrg (__mmask32)-1, \
2649 1.1 mrg _MM_FROUND_CUR_DIRECTION))
2650 1.1 mrg
2651 1.1 mrg #define _mm512_mask_getmant_ph(W, U, X, B, C) \
2652 1.1 mrg ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
2653 1.1 mrg (int)(((C)<<2) | (B)), \
2654 1.1 mrg (__v32hf)(__m512h)(W), \
2655 1.1 mrg (__mmask32)(U), \
2656 1.1 mrg _MM_FROUND_CUR_DIRECTION))
2657 1.1 mrg
2658 1.1 mrg
2659 1.1 mrg #define _mm512_maskz_getmant_ph(U, X, B, C) \
2660 1.1 mrg ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
2661 1.1 mrg (int)(((C)<<2) | (B)), \
2662 1.1 mrg (__v32hf)(__m512h) \
2663 1.1 mrg _mm512_setzero_ph(), \
2664 1.1 mrg (__mmask32)(U), \
2665 1.1 mrg _MM_FROUND_CUR_DIRECTION))
2666 1.1 mrg
2667 1.1 mrg #define _mm_getmant_sh(X, Y, C, D) \
2668 1.1 mrg ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
2669 1.1 mrg (__v8hf)(__m128h)(Y), \
2670 1.1 mrg (int)(((D)<<2) | (C)), \
2671 1.1 mrg (__v8hf)(__m128h) \
2672 1.1 mrg _mm_setzero_ph (), \
2673 1.1 mrg (__mmask8)-1, \
2674 1.1 mrg _MM_FROUND_CUR_DIRECTION))
2675 1.1 mrg
2676 1.1 mrg #define _mm_mask_getmant_sh(W, U, X, Y, C, D) \
2677 1.1 mrg ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
2678 1.1 mrg (__v8hf)(__m128h)(Y), \
2679 1.1 mrg (int)(((D)<<2) | (C)), \
2680 1.1 mrg (__v8hf)(__m128h)(W), \
2681 1.1 mrg (__mmask8)(U), \
2682 1.1 mrg _MM_FROUND_CUR_DIRECTION))
2683 1.1 mrg
2684 1.1 mrg #define _mm_maskz_getmant_sh(U, X, Y, C, D) \
2685 1.1 mrg ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
2686 1.1 mrg (__v8hf)(__m128h)(Y), \
2687 1.1 mrg (int)(((D)<<2) | (C)), \
2688 1.1 mrg (__v8hf)(__m128h) \
2689 1.1 mrg _mm_setzero_ph(), \
2690 1.1 mrg (__mmask8)(U), \
2691 1.1 mrg _MM_FROUND_CUR_DIRECTION))
2692 1.1 mrg
2693 1.1 mrg #define _mm512_getmant_round_ph(X, B, C, R) \
2694 1.1 mrg ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
2695 1.1 mrg (int)(((C)<<2) | (B)), \
2696 1.1 mrg (__v32hf)(__m512h) \
2697 1.1 mrg _mm512_setzero_ph(), \
2698 1.1 mrg (__mmask32)-1, \
2699 1.1 mrg (R)))
2700 1.1 mrg
2701 1.1 mrg #define _mm512_mask_getmant_round_ph(W, U, X, B, C, R) \
2702 1.1 mrg ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
2703 1.1 mrg (int)(((C)<<2) | (B)), \
2704 1.1 mrg (__v32hf)(__m512h)(W), \
2705 1.1 mrg (__mmask32)(U), \
2706 1.1 mrg (R)))
2707 1.1 mrg
2708 1.1 mrg
2709 1.1 mrg #define _mm512_maskz_getmant_round_ph(U, X, B, C, R) \
2710 1.1 mrg ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
2711 1.1 mrg (int)(((C)<<2) | (B)), \
2712 1.1 mrg (__v32hf)(__m512h) \
2713 1.1 mrg _mm512_setzero_ph(), \
2714 1.1 mrg (__mmask32)(U), \
2715 1.1 mrg (R)))
2716 1.1 mrg
2717 1.1 mrg #define _mm_getmant_round_sh(X, Y, C, D, R) \
2718 1.1 mrg ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
2719 1.1 mrg (__v8hf)(__m128h)(Y), \
2720 1.1 mrg (int)(((D)<<2) | (C)), \
2721 1.1 mrg (__v8hf)(__m128h) \
2722 1.1 mrg _mm_setzero_ph (), \
2723 1.1 mrg (__mmask8)-1, \
2724 1.1 mrg (R)))
2725 1.1 mrg
2726 1.1 mrg #define _mm_mask_getmant_round_sh(W, U, X, Y, C, D, R) \
2727 1.1 mrg ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
2728 1.1 mrg (__v8hf)(__m128h)(Y), \
2729 1.1 mrg (int)(((D)<<2) | (C)), \
2730 1.1 mrg (__v8hf)(__m128h)(W), \
2731 1.1 mrg (__mmask8)(U), \
2732 1.1 mrg (R)))
2733 1.1 mrg
2734 1.1 mrg #define _mm_maskz_getmant_round_sh(U, X, Y, C, D, R) \
2735 1.1 mrg ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
2736 1.1 mrg (__v8hf)(__m128h)(Y), \
2737 1.1 mrg (int)(((D)<<2) | (C)), \
2738 1.1 mrg (__v8hf)(__m128h) \
2739 1.1 mrg _mm_setzero_ph(), \
2740 1.1 mrg (__mmask8)(U), \
2741 1.1 mrg (R)))
2742 1.1 mrg
2743 1.1 mrg #endif /* __OPTIMIZE__ */
2744 1.1 mrg
2745 1.1 mrg /* Intrinsics vmovw. */
2746 1.1 mrg extern __inline __m128i
2747 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2748 1.1 mrg _mm_cvtsi16_si128 (short __A)
2749 1.1 mrg {
2750 1.1 mrg return _mm_set_epi16 (0, 0, 0, 0, 0, 0, 0, __A);
2751 1.1 mrg }
2752 1.1 mrg
2753 1.1 mrg extern __inline short
2754 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2755 1.1 mrg _mm_cvtsi128_si16 (__m128i __A)
2756 1.1 mrg {
2757 1.1 mrg return __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, 0);
2758 1.1 mrg }
2759 1.1 mrg
2760 1.1 mrg /* Intrinsics vmovsh. */
2761 1.1 mrg extern __inline __m128h
2762 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2763 1.1 mrg _mm_mask_load_sh (__m128h __A, __mmask8 __B, _Float16 const* __C)
2764 1.1 mrg {
2765 1.1 mrg return __builtin_ia32_loadsh_mask (__C, __A, __B);
2766 1.1 mrg }
2767 1.1 mrg
2768 1.1 mrg extern __inline __m128h
2769 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2770 1.1 mrg _mm_maskz_load_sh (__mmask8 __A, _Float16 const* __B)
2771 1.1 mrg {
2772 1.1 mrg return __builtin_ia32_loadsh_mask (__B, _mm_setzero_ph (), __A);
2773 1.1 mrg }
2774 1.1 mrg
2775 1.1 mrg extern __inline void
2776 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2777 1.1 mrg _mm_mask_store_sh (_Float16 const* __A, __mmask8 __B, __m128h __C)
2778 1.1 mrg {
2779 1.1 mrg __builtin_ia32_storesh_mask (__A, __C, __B);
2780 1.1 mrg }
2781 1.1 mrg
2782 1.1 mrg extern __inline __m128h
2783 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2784 1.1 mrg _mm_move_sh (__m128h __A, __m128h __B)
2785 1.1 mrg {
2786 1.1 mrg __A[0] = __B[0];
2787 1.1 mrg return __A;
2788 1.1 mrg }
2789 1.1 mrg
2790 1.1 mrg extern __inline __m128h
2791 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2792 1.1 mrg _mm_mask_move_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
2793 1.1 mrg {
2794 1.1 mrg return __builtin_ia32_vmovsh_mask (__C, __D, __A, __B);
2795 1.1 mrg }
2796 1.1 mrg
2797 1.1 mrg extern __inline __m128h
2798 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2799 1.1 mrg _mm_maskz_move_sh (__mmask8 __A, __m128h __B, __m128h __C)
2800 1.1 mrg {
2801 1.1 mrg return __builtin_ia32_vmovsh_mask (__B, __C, _mm_setzero_ph (), __A);
2802 1.1 mrg }
2803 1.1 mrg
2804 1.1 mrg /* Intrinsics vcvtph2dq. */
2805 1.1 mrg extern __inline __m512i
2806 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2807 1.1 mrg _mm512_cvtph_epi32 (__m256h __A)
2808 1.1 mrg {
2809 1.1 mrg return (__m512i)
2810 1.1 mrg __builtin_ia32_vcvtph2dq512_mask_round (__A,
2811 1.1 mrg (__v16si)
2812 1.1 mrg _mm512_setzero_si512 (),
2813 1.1 mrg (__mmask16) -1,
2814 1.1 mrg _MM_FROUND_CUR_DIRECTION);
2815 1.1 mrg }
2816 1.1 mrg
2817 1.1 mrg extern __inline __m512i
2818 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2819 1.1 mrg _mm512_mask_cvtph_epi32 (__m512i __A, __mmask16 __B, __m256h __C)
2820 1.1 mrg {
2821 1.1 mrg return (__m512i)
2822 1.1 mrg __builtin_ia32_vcvtph2dq512_mask_round (__C,
2823 1.1 mrg (__v16si) __A,
2824 1.1 mrg __B,
2825 1.1 mrg _MM_FROUND_CUR_DIRECTION);
2826 1.1 mrg }
2827 1.1 mrg
2828 1.1 mrg extern __inline __m512i
2829 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2830 1.1 mrg _mm512_maskz_cvtph_epi32 (__mmask16 __A, __m256h __B)
2831 1.1 mrg {
2832 1.1 mrg return (__m512i)
2833 1.1 mrg __builtin_ia32_vcvtph2dq512_mask_round (__B,
2834 1.1 mrg (__v16si)
2835 1.1 mrg _mm512_setzero_si512 (),
2836 1.1 mrg __A,
2837 1.1 mrg _MM_FROUND_CUR_DIRECTION);
2838 1.1 mrg }
2839 1.1 mrg
2840 1.1 mrg #ifdef __OPTIMIZE__
2841 1.1 mrg extern __inline __m512i
2842 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2843 1.1 mrg _mm512_cvt_roundph_epi32 (__m256h __A, int __B)
2844 1.1 mrg {
2845 1.1 mrg return (__m512i)
2846 1.1 mrg __builtin_ia32_vcvtph2dq512_mask_round (__A,
2847 1.1 mrg (__v16si)
2848 1.1 mrg _mm512_setzero_si512 (),
2849 1.1 mrg (__mmask16) -1,
2850 1.1 mrg __B);
2851 1.1 mrg }
2852 1.1 mrg
2853 1.1 mrg extern __inline __m512i
2854 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2855 1.1 mrg _mm512_mask_cvt_roundph_epi32 (__m512i __A, __mmask16 __B, __m256h __C, int __D)
2856 1.1 mrg {
2857 1.1 mrg return (__m512i)
2858 1.1 mrg __builtin_ia32_vcvtph2dq512_mask_round (__C,
2859 1.1 mrg (__v16si) __A,
2860 1.1 mrg __B,
2861 1.1 mrg __D);
2862 1.1 mrg }
2863 1.1 mrg
2864 1.1 mrg extern __inline __m512i
2865 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2866 1.1 mrg _mm512_maskz_cvt_roundph_epi32 (__mmask16 __A, __m256h __B, int __C)
2867 1.1 mrg {
2868 1.1 mrg return (__m512i)
2869 1.1 mrg __builtin_ia32_vcvtph2dq512_mask_round (__B,
2870 1.1 mrg (__v16si)
2871 1.1 mrg _mm512_setzero_si512 (),
2872 1.1 mrg __A,
2873 1.1 mrg __C);
2874 1.1 mrg }
2875 1.1 mrg
2876 1.1 mrg #else
2877 1.1 mrg #define _mm512_cvt_roundph_epi32(A, B) \
2878 1.1 mrg ((__m512i) \
2879 1.1 mrg __builtin_ia32_vcvtph2dq512_mask_round ((A), \
2880 1.1 mrg (__v16si) \
2881 1.1 mrg _mm512_setzero_si512 (), \
2882 1.1 mrg (__mmask16)-1, \
2883 1.1 mrg (B)))
2884 1.1 mrg
2885 1.1 mrg #define _mm512_mask_cvt_roundph_epi32(A, B, C, D) \
2886 1.1 mrg ((__m512i) \
2887 1.1 mrg __builtin_ia32_vcvtph2dq512_mask_round ((C), (__v16si)(A), (B), (D)))
2888 1.1 mrg
2889 1.1 mrg #define _mm512_maskz_cvt_roundph_epi32(A, B, C) \
2890 1.1 mrg ((__m512i) \
2891 1.1 mrg __builtin_ia32_vcvtph2dq512_mask_round ((B), \
2892 1.1 mrg (__v16si) \
2893 1.1 mrg _mm512_setzero_si512 (), \
2894 1.1 mrg (A), \
2895 1.1 mrg (C)))
2896 1.1 mrg
2897 1.1 mrg #endif /* __OPTIMIZE__ */
2898 1.1 mrg
2899 1.1 mrg /* Intrinsics vcvtph2udq. */
2900 1.1 mrg extern __inline __m512i
2901 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2902 1.1 mrg _mm512_cvtph_epu32 (__m256h __A)
2903 1.1 mrg {
2904 1.1 mrg return (__m512i)
2905 1.1 mrg __builtin_ia32_vcvtph2udq512_mask_round (__A,
2906 1.1 mrg (__v16si)
2907 1.1 mrg _mm512_setzero_si512 (),
2908 1.1 mrg (__mmask16) -1,
2909 1.1 mrg _MM_FROUND_CUR_DIRECTION);
2910 1.1 mrg }
2911 1.1 mrg
2912 1.1 mrg extern __inline __m512i
2913 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2914 1.1 mrg _mm512_mask_cvtph_epu32 (__m512i __A, __mmask16 __B, __m256h __C)
2915 1.1 mrg {
2916 1.1 mrg return (__m512i)
2917 1.1 mrg __builtin_ia32_vcvtph2udq512_mask_round (__C,
2918 1.1 mrg (__v16si) __A,
2919 1.1 mrg __B,
2920 1.1 mrg _MM_FROUND_CUR_DIRECTION);
2921 1.1 mrg }
2922 1.1 mrg
2923 1.1 mrg extern __inline __m512i
2924 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2925 1.1 mrg _mm512_maskz_cvtph_epu32 (__mmask16 __A, __m256h __B)
2926 1.1 mrg {
2927 1.1 mrg return (__m512i)
2928 1.1 mrg __builtin_ia32_vcvtph2udq512_mask_round (__B,
2929 1.1 mrg (__v16si)
2930 1.1 mrg _mm512_setzero_si512 (),
2931 1.1 mrg __A,
2932 1.1 mrg _MM_FROUND_CUR_DIRECTION);
2933 1.1 mrg }
2934 1.1 mrg
2935 1.1 mrg #ifdef __OPTIMIZE__
2936 1.1 mrg extern __inline __m512i
2937 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2938 1.1 mrg _mm512_cvt_roundph_epu32 (__m256h __A, int __B)
2939 1.1 mrg {
2940 1.1 mrg return (__m512i)
2941 1.1 mrg __builtin_ia32_vcvtph2udq512_mask_round (__A,
2942 1.1 mrg (__v16si)
2943 1.1 mrg _mm512_setzero_si512 (),
2944 1.1 mrg (__mmask16) -1,
2945 1.1 mrg __B);
2946 1.1 mrg }
2947 1.1 mrg
2948 1.1 mrg extern __inline __m512i
2949 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2950 1.1 mrg _mm512_mask_cvt_roundph_epu32 (__m512i __A, __mmask16 __B, __m256h __C, int __D)
2951 1.1 mrg {
2952 1.1 mrg return (__m512i)
2953 1.1 mrg __builtin_ia32_vcvtph2udq512_mask_round (__C,
2954 1.1 mrg (__v16si) __A,
2955 1.1 mrg __B,
2956 1.1 mrg __D);
2957 1.1 mrg }
2958 1.1 mrg
2959 1.1 mrg extern __inline __m512i
2960 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2961 1.1 mrg _mm512_maskz_cvt_roundph_epu32 (__mmask16 __A, __m256h __B, int __C)
2962 1.1 mrg {
2963 1.1 mrg return (__m512i)
2964 1.1 mrg __builtin_ia32_vcvtph2udq512_mask_round (__B,
2965 1.1 mrg (__v16si)
2966 1.1 mrg _mm512_setzero_si512 (),
2967 1.1 mrg __A,
2968 1.1 mrg __C);
2969 1.1 mrg }
2970 1.1 mrg
2971 1.1 mrg #else
2972 1.1 mrg #define _mm512_cvt_roundph_epu32(A, B) \
2973 1.1 mrg ((__m512i) \
2974 1.1 mrg __builtin_ia32_vcvtph2udq512_mask_round ((A), \
2975 1.1 mrg (__v16si) \
2976 1.1 mrg _mm512_setzero_si512 (), \
2977 1.1 mrg (__mmask16)-1, \
2978 1.1 mrg (B)))
2979 1.1 mrg
2980 1.1 mrg #define _mm512_mask_cvt_roundph_epu32(A, B, C, D) \
2981 1.1 mrg ((__m512i) \
2982 1.1 mrg __builtin_ia32_vcvtph2udq512_mask_round ((C), (__v16si)(A), (B), (D)))
2983 1.1 mrg
2984 1.1 mrg #define _mm512_maskz_cvt_roundph_epu32(A, B, C) \
2985 1.1 mrg ((__m512i) \
2986 1.1 mrg __builtin_ia32_vcvtph2udq512_mask_round ((B), \
2987 1.1 mrg (__v16si) \
2988 1.1 mrg _mm512_setzero_si512 (), \
2989 1.1 mrg (A), \
2990 1.1 mrg (C)))
2991 1.1 mrg
2992 1.1 mrg #endif /* __OPTIMIZE__ */
2993 1.1 mrg
2994 1.1 mrg /* Intrinsics vcvttph2dq. */
2995 1.1 mrg extern __inline __m512i
2996 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2997 1.1 mrg _mm512_cvttph_epi32 (__m256h __A)
2998 1.1 mrg {
2999 1.1 mrg return (__m512i)
3000 1.1 mrg __builtin_ia32_vcvttph2dq512_mask_round (__A,
3001 1.1 mrg (__v16si)
3002 1.1 mrg _mm512_setzero_si512 (),
3003 1.1 mrg (__mmask16) -1,
3004 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3005 1.1 mrg }
3006 1.1 mrg
3007 1.1 mrg extern __inline __m512i
3008 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3009 1.1 mrg _mm512_mask_cvttph_epi32 (__m512i __A, __mmask16 __B, __m256h __C)
3010 1.1 mrg {
3011 1.1 mrg return (__m512i)
3012 1.1 mrg __builtin_ia32_vcvttph2dq512_mask_round (__C,
3013 1.1 mrg (__v16si) __A,
3014 1.1 mrg __B,
3015 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3016 1.1 mrg }
3017 1.1 mrg
3018 1.1 mrg extern __inline __m512i
3019 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3020 1.1 mrg _mm512_maskz_cvttph_epi32 (__mmask16 __A, __m256h __B)
3021 1.1 mrg {
3022 1.1 mrg return (__m512i)
3023 1.1 mrg __builtin_ia32_vcvttph2dq512_mask_round (__B,
3024 1.1 mrg (__v16si)
3025 1.1 mrg _mm512_setzero_si512 (),
3026 1.1 mrg __A,
3027 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3028 1.1 mrg }
3029 1.1 mrg
3030 1.1 mrg #ifdef __OPTIMIZE__
3031 1.1 mrg extern __inline __m512i
3032 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3033 1.1 mrg _mm512_cvtt_roundph_epi32 (__m256h __A, int __B)
3034 1.1 mrg {
3035 1.1 mrg return (__m512i)
3036 1.1 mrg __builtin_ia32_vcvttph2dq512_mask_round (__A,
3037 1.1 mrg (__v16si)
3038 1.1 mrg _mm512_setzero_si512 (),
3039 1.1 mrg (__mmask16) -1,
3040 1.1 mrg __B);
3041 1.1 mrg }
3042 1.1 mrg
3043 1.1 mrg extern __inline __m512i
3044 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3045 1.1 mrg _mm512_mask_cvtt_roundph_epi32 (__m512i __A, __mmask16 __B,
3046 1.1 mrg __m256h __C, int __D)
3047 1.1 mrg {
3048 1.1 mrg return (__m512i)
3049 1.1 mrg __builtin_ia32_vcvttph2dq512_mask_round (__C,
3050 1.1 mrg (__v16si) __A,
3051 1.1 mrg __B,
3052 1.1 mrg __D);
3053 1.1 mrg }
3054 1.1 mrg
3055 1.1 mrg extern __inline __m512i
3056 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3057 1.1 mrg _mm512_maskz_cvtt_roundph_epi32 (__mmask16 __A, __m256h __B, int __C)
3058 1.1 mrg {
3059 1.1 mrg return (__m512i)
3060 1.1 mrg __builtin_ia32_vcvttph2dq512_mask_round (__B,
3061 1.1 mrg (__v16si)
3062 1.1 mrg _mm512_setzero_si512 (),
3063 1.1 mrg __A,
3064 1.1 mrg __C);
3065 1.1 mrg }
3066 1.1 mrg
3067 1.1 mrg #else
3068 1.1 mrg #define _mm512_cvtt_roundph_epi32(A, B) \
3069 1.1 mrg ((__m512i) \
3070 1.1 mrg __builtin_ia32_vcvttph2dq512_mask_round ((A), \
3071 1.1 mrg (__v16si) \
3072 1.1 mrg (_mm512_setzero_si512 ()), \
3073 1.1 mrg (__mmask16)(-1), (B)))
3074 1.1 mrg
3075 1.1 mrg #define _mm512_mask_cvtt_roundph_epi32(A, B, C, D) \
3076 1.1 mrg ((__m512i) \
3077 1.1 mrg __builtin_ia32_vcvttph2dq512_mask_round ((C), \
3078 1.1 mrg (__v16si)(A), \
3079 1.1 mrg (B), \
3080 1.1 mrg (D)))
3081 1.1 mrg
3082 1.1 mrg #define _mm512_maskz_cvtt_roundph_epi32(A, B, C) \
3083 1.1 mrg ((__m512i) \
3084 1.1 mrg __builtin_ia32_vcvttph2dq512_mask_round ((B), \
3085 1.1 mrg (__v16si) \
3086 1.1 mrg _mm512_setzero_si512 (), \
3087 1.1 mrg (A), \
3088 1.1 mrg (C)))
3089 1.1 mrg
3090 1.1 mrg #endif /* __OPTIMIZE__ */
3091 1.1 mrg
3092 1.1 mrg /* Intrinsics vcvttph2udq. */
3093 1.1 mrg extern __inline __m512i
3094 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3095 1.1 mrg _mm512_cvttph_epu32 (__m256h __A)
3096 1.1 mrg {
3097 1.1 mrg return (__m512i)
3098 1.1 mrg __builtin_ia32_vcvttph2udq512_mask_round (__A,
3099 1.1 mrg (__v16si)
3100 1.1 mrg _mm512_setzero_si512 (),
3101 1.1 mrg (__mmask16) -1,
3102 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3103 1.1 mrg }
3104 1.1 mrg
3105 1.1 mrg extern __inline __m512i
3106 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3107 1.1 mrg _mm512_mask_cvttph_epu32 (__m512i __A, __mmask16 __B, __m256h __C)
3108 1.1 mrg {
3109 1.1 mrg return (__m512i)
3110 1.1 mrg __builtin_ia32_vcvttph2udq512_mask_round (__C,
3111 1.1 mrg (__v16si) __A,
3112 1.1 mrg __B,
3113 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3114 1.1 mrg }
3115 1.1 mrg
3116 1.1 mrg extern __inline __m512i
3117 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3118 1.1 mrg _mm512_maskz_cvttph_epu32 (__mmask16 __A, __m256h __B)
3119 1.1 mrg {
3120 1.1 mrg return (__m512i)
3121 1.1 mrg __builtin_ia32_vcvttph2udq512_mask_round (__B,
3122 1.1 mrg (__v16si)
3123 1.1 mrg _mm512_setzero_si512 (),
3124 1.1 mrg __A,
3125 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3126 1.1 mrg }
3127 1.1 mrg
3128 1.1 mrg #ifdef __OPTIMIZE__
3129 1.1 mrg extern __inline __m512i
3130 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3131 1.1 mrg _mm512_cvtt_roundph_epu32 (__m256h __A, int __B)
3132 1.1 mrg {
3133 1.1 mrg return (__m512i)
3134 1.1 mrg __builtin_ia32_vcvttph2udq512_mask_round (__A,
3135 1.1 mrg (__v16si)
3136 1.1 mrg _mm512_setzero_si512 (),
3137 1.1 mrg (__mmask16) -1,
3138 1.1 mrg __B);
3139 1.1 mrg }
3140 1.1 mrg
3141 1.1 mrg extern __inline __m512i
3142 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3143 1.1 mrg _mm512_mask_cvtt_roundph_epu32 (__m512i __A, __mmask16 __B,
3144 1.1 mrg __m256h __C, int __D)
3145 1.1 mrg {
3146 1.1 mrg return (__m512i)
3147 1.1 mrg __builtin_ia32_vcvttph2udq512_mask_round (__C,
3148 1.1 mrg (__v16si) __A,
3149 1.1 mrg __B,
3150 1.1 mrg __D);
3151 1.1 mrg }
3152 1.1 mrg
3153 1.1 mrg extern __inline __m512i
3154 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3155 1.1 mrg _mm512_maskz_cvtt_roundph_epu32 (__mmask16 __A, __m256h __B, int __C)
3156 1.1 mrg {
3157 1.1 mrg return (__m512i)
3158 1.1 mrg __builtin_ia32_vcvttph2udq512_mask_round (__B,
3159 1.1 mrg (__v16si)
3160 1.1 mrg _mm512_setzero_si512 (),
3161 1.1 mrg __A,
3162 1.1 mrg __C);
3163 1.1 mrg }
3164 1.1 mrg
3165 1.1 mrg #else
3166 1.1 mrg #define _mm512_cvtt_roundph_epu32(A, B) \
3167 1.1 mrg ((__m512i) \
3168 1.1 mrg __builtin_ia32_vcvttph2udq512_mask_round ((A), \
3169 1.1 mrg (__v16si) \
3170 1.1 mrg _mm512_setzero_si512 (), \
3171 1.1 mrg (__mmask16)-1, \
3172 1.1 mrg (B)))
3173 1.1 mrg
3174 1.1 mrg #define _mm512_mask_cvtt_roundph_epu32(A, B, C, D) \
3175 1.1 mrg ((__m512i) \
3176 1.1 mrg __builtin_ia32_vcvttph2udq512_mask_round ((C), \
3177 1.1 mrg (__v16si)(A), \
3178 1.1 mrg (B), \
3179 1.1 mrg (D)))
3180 1.1 mrg
3181 1.1 mrg #define _mm512_maskz_cvtt_roundph_epu32(A, B, C) \
3182 1.1 mrg ((__m512i) \
3183 1.1 mrg __builtin_ia32_vcvttph2udq512_mask_round ((B), \
3184 1.1 mrg (__v16si) \
3185 1.1 mrg _mm512_setzero_si512 (), \
3186 1.1 mrg (A), \
3187 1.1 mrg (C)))
3188 1.1 mrg
3189 1.1 mrg #endif /* __OPTIMIZE__ */
3190 1.1 mrg
3191 1.1 mrg /* Intrinsics vcvtdq2ph. */
3192 1.1 mrg extern __inline __m256h
3193 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3194 1.1 mrg _mm512_cvtepi32_ph (__m512i __A)
3195 1.1 mrg {
3196 1.1 mrg return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __A,
3197 1.1 mrg _mm256_setzero_ph (),
3198 1.1 mrg (__mmask16) -1,
3199 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3200 1.1 mrg }
3201 1.1 mrg
3202 1.1 mrg extern __inline __m256h
3203 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3204 1.1 mrg _mm512_mask_cvtepi32_ph (__m256h __A, __mmask16 __B, __m512i __C)
3205 1.1 mrg {
3206 1.1 mrg return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __C,
3207 1.1 mrg __A,
3208 1.1 mrg __B,
3209 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3210 1.1 mrg }
3211 1.1 mrg
3212 1.1 mrg extern __inline __m256h
3213 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3214 1.1 mrg _mm512_maskz_cvtepi32_ph (__mmask16 __A, __m512i __B)
3215 1.1 mrg {
3216 1.1 mrg return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __B,
3217 1.1 mrg _mm256_setzero_ph (),
3218 1.1 mrg __A,
3219 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3220 1.1 mrg }
3221 1.1 mrg
3222 1.1 mrg #ifdef __OPTIMIZE__
3223 1.1 mrg extern __inline __m256h
3224 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3225 1.1 mrg _mm512_cvt_roundepi32_ph (__m512i __A, int __B)
3226 1.1 mrg {
3227 1.1 mrg return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __A,
3228 1.1 mrg _mm256_setzero_ph (),
3229 1.1 mrg (__mmask16) -1,
3230 1.1 mrg __B);
3231 1.1 mrg }
3232 1.1 mrg
3233 1.1 mrg extern __inline __m256h
3234 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3235 1.1 mrg _mm512_mask_cvt_roundepi32_ph (__m256h __A, __mmask16 __B, __m512i __C, int __D)
3236 1.1 mrg {
3237 1.1 mrg return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __C,
3238 1.1 mrg __A,
3239 1.1 mrg __B,
3240 1.1 mrg __D);
3241 1.1 mrg }
3242 1.1 mrg
3243 1.1 mrg extern __inline __m256h
3244 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3245 1.1 mrg _mm512_maskz_cvt_roundepi32_ph (__mmask16 __A, __m512i __B, int __C)
3246 1.1 mrg {
3247 1.1 mrg return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __B,
3248 1.1 mrg _mm256_setzero_ph (),
3249 1.1 mrg __A,
3250 1.1 mrg __C);
3251 1.1 mrg }
3252 1.1 mrg
3253 1.1 mrg #else
3254 1.1 mrg #define _mm512_cvt_roundepi32_ph(A, B) \
3255 1.1 mrg (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(A), \
3256 1.1 mrg _mm256_setzero_ph (), \
3257 1.1 mrg (__mmask16)-1, \
3258 1.1 mrg (B)))
3259 1.1 mrg
3260 1.1 mrg #define _mm512_mask_cvt_roundepi32_ph(A, B, C, D) \
3261 1.1 mrg (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(C), \
3262 1.1 mrg (A), \
3263 1.1 mrg (B), \
3264 1.1 mrg (D)))
3265 1.1 mrg
3266 1.1 mrg #define _mm512_maskz_cvt_roundepi32_ph(A, B, C) \
3267 1.1 mrg (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(B), \
3268 1.1 mrg _mm256_setzero_ph (), \
3269 1.1 mrg (A), \
3270 1.1 mrg (C)))
3271 1.1 mrg
3272 1.1 mrg #endif /* __OPTIMIZE__ */
3273 1.1 mrg
3274 1.1 mrg /* Intrinsics vcvtudq2ph. */
3275 1.1 mrg extern __inline __m256h
3276 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3277 1.1 mrg _mm512_cvtepu32_ph (__m512i __A)
3278 1.1 mrg {
3279 1.1 mrg return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __A,
3280 1.1 mrg _mm256_setzero_ph (),
3281 1.1 mrg (__mmask16) -1,
3282 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3283 1.1 mrg }
3284 1.1 mrg
3285 1.1 mrg extern __inline __m256h
3286 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3287 1.1 mrg _mm512_mask_cvtepu32_ph (__m256h __A, __mmask16 __B, __m512i __C)
3288 1.1 mrg {
3289 1.1 mrg return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __C,
3290 1.1 mrg __A,
3291 1.1 mrg __B,
3292 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3293 1.1 mrg }
3294 1.1 mrg
3295 1.1 mrg extern __inline __m256h
3296 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3297 1.1 mrg _mm512_maskz_cvtepu32_ph (__mmask16 __A, __m512i __B)
3298 1.1 mrg {
3299 1.1 mrg return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __B,
3300 1.1 mrg _mm256_setzero_ph (),
3301 1.1 mrg __A,
3302 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3303 1.1 mrg }
3304 1.1 mrg
3305 1.1 mrg #ifdef __OPTIMIZE__
3306 1.1 mrg extern __inline __m256h
3307 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3308 1.1 mrg _mm512_cvt_roundepu32_ph (__m512i __A, int __B)
3309 1.1 mrg {
3310 1.1 mrg return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __A,
3311 1.1 mrg _mm256_setzero_ph (),
3312 1.1 mrg (__mmask16) -1,
3313 1.1 mrg __B);
3314 1.1 mrg }
3315 1.1 mrg
3316 1.1 mrg extern __inline __m256h
3317 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3318 1.1 mrg _mm512_mask_cvt_roundepu32_ph (__m256h __A, __mmask16 __B, __m512i __C, int __D)
3319 1.1 mrg {
3320 1.1 mrg return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __C,
3321 1.1 mrg __A,
3322 1.1 mrg __B,
3323 1.1 mrg __D);
3324 1.1 mrg }
3325 1.1 mrg
3326 1.1 mrg extern __inline __m256h
3327 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3328 1.1 mrg _mm512_maskz_cvt_roundepu32_ph (__mmask16 __A, __m512i __B, int __C)
3329 1.1 mrg {
3330 1.1 mrg return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __B,
3331 1.1 mrg _mm256_setzero_ph (),
3332 1.1 mrg __A,
3333 1.1 mrg __C);
3334 1.1 mrg }
3335 1.1 mrg
3336 1.1 mrg #else
3337 1.1 mrg #define _mm512_cvt_roundepu32_ph(A, B) \
3338 1.1 mrg (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)(A), \
3339 1.1 mrg _mm256_setzero_ph (), \
3340 1.1 mrg (__mmask16)-1, \
3341 1.1 mrg B))
3342 1.1 mrg
3343 1.1 mrg #define _mm512_mask_cvt_roundepu32_ph(A, B, C, D) \
3344 1.1 mrg (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)C, \
3345 1.1 mrg A, \
3346 1.1 mrg B, \
3347 1.1 mrg D))
3348 1.1 mrg
3349 1.1 mrg #define _mm512_maskz_cvt_roundepu32_ph(A, B, C) \
3350 1.1 mrg (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)B, \
3351 1.1 mrg _mm256_setzero_ph (), \
3352 1.1 mrg A, \
3353 1.1 mrg C))
3354 1.1 mrg
3355 1.1 mrg #endif /* __OPTIMIZE__ */
3356 1.1 mrg
3357 1.1 mrg /* Intrinsics vcvtph2qq. */
3358 1.1 mrg extern __inline __m512i
3359 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3360 1.1 mrg _mm512_cvtph_epi64 (__m128h __A)
3361 1.1 mrg {
3362 1.1 mrg return __builtin_ia32_vcvtph2qq512_mask_round (__A,
3363 1.1 mrg _mm512_setzero_si512 (),
3364 1.1 mrg (__mmask8) -1,
3365 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3366 1.1 mrg }
3367 1.1 mrg
3368 1.1 mrg extern __inline __m512i
3369 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3370 1.1 mrg _mm512_mask_cvtph_epi64 (__m512i __A, __mmask8 __B, __m128h __C)
3371 1.1 mrg {
3372 1.1 mrg return __builtin_ia32_vcvtph2qq512_mask_round (__C, __A, __B,
3373 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3374 1.1 mrg }
3375 1.1 mrg
3376 1.1 mrg extern __inline __m512i
3377 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3378 1.1 mrg _mm512_maskz_cvtph_epi64 (__mmask8 __A, __m128h __B)
3379 1.1 mrg {
3380 1.1 mrg return __builtin_ia32_vcvtph2qq512_mask_round (__B,
3381 1.1 mrg _mm512_setzero_si512 (),
3382 1.1 mrg __A,
3383 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3384 1.1 mrg }
3385 1.1 mrg
3386 1.1 mrg #ifdef __OPTIMIZE__
3387 1.1 mrg extern __inline __m512i
3388 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3389 1.1 mrg _mm512_cvt_roundph_epi64 (__m128h __A, int __B)
3390 1.1 mrg {
3391 1.1 mrg return __builtin_ia32_vcvtph2qq512_mask_round (__A,
3392 1.1 mrg _mm512_setzero_si512 (),
3393 1.1 mrg (__mmask8) -1,
3394 1.1 mrg __B);
3395 1.1 mrg }
3396 1.1 mrg
3397 1.1 mrg extern __inline __m512i
3398 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3399 1.1 mrg _mm512_mask_cvt_roundph_epi64 (__m512i __A, __mmask8 __B, __m128h __C, int __D)
3400 1.1 mrg {
3401 1.1 mrg return __builtin_ia32_vcvtph2qq512_mask_round (__C, __A, __B, __D);
3402 1.1 mrg }
3403 1.1 mrg
3404 1.1 mrg extern __inline __m512i
3405 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3406 1.1 mrg _mm512_maskz_cvt_roundph_epi64 (__mmask8 __A, __m128h __B, int __C)
3407 1.1 mrg {
3408 1.1 mrg return __builtin_ia32_vcvtph2qq512_mask_round (__B,
3409 1.1 mrg _mm512_setzero_si512 (),
3410 1.1 mrg __A,
3411 1.1 mrg __C);
3412 1.1 mrg }
3413 1.1 mrg
3414 1.1 mrg #else
3415 1.1 mrg #define _mm512_cvt_roundph_epi64(A, B) \
3416 1.1 mrg (__builtin_ia32_vcvtph2qq512_mask_round ((A), \
3417 1.1 mrg _mm512_setzero_si512 (), \
3418 1.1 mrg (__mmask8)-1, \
3419 1.1 mrg (B)))
3420 1.1 mrg
3421 1.1 mrg #define _mm512_mask_cvt_roundph_epi64(A, B, C, D) \
3422 1.1 mrg (__builtin_ia32_vcvtph2qq512_mask_round ((C), (A), (B), (D)))
3423 1.1 mrg
3424 1.1 mrg #define _mm512_maskz_cvt_roundph_epi64(A, B, C) \
3425 1.1 mrg (__builtin_ia32_vcvtph2qq512_mask_round ((B), \
3426 1.1 mrg _mm512_setzero_si512 (), \
3427 1.1 mrg (A), \
3428 1.1 mrg (C)))
3429 1.1 mrg
3430 1.1 mrg #endif /* __OPTIMIZE__ */
3431 1.1 mrg
3432 1.1 mrg /* Intrinsics vcvtph2uqq. */
3433 1.1 mrg extern __inline __m512i
3434 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3435 1.1 mrg _mm512_cvtph_epu64 (__m128h __A)
3436 1.1 mrg {
3437 1.1 mrg return __builtin_ia32_vcvtph2uqq512_mask_round (__A,
3438 1.1 mrg _mm512_setzero_si512 (),
3439 1.1 mrg (__mmask8) -1,
3440 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3441 1.1 mrg }
3442 1.1 mrg
3443 1.1 mrg extern __inline __m512i
3444 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3445 1.1 mrg _mm512_mask_cvtph_epu64 (__m512i __A, __mmask8 __B, __m128h __C)
3446 1.1 mrg {
3447 1.1 mrg return __builtin_ia32_vcvtph2uqq512_mask_round (__C, __A, __B,
3448 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3449 1.1 mrg }
3450 1.1 mrg
3451 1.1 mrg extern __inline __m512i
3452 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3453 1.1 mrg _mm512_maskz_cvtph_epu64 (__mmask8 __A, __m128h __B)
3454 1.1 mrg {
3455 1.1 mrg return __builtin_ia32_vcvtph2uqq512_mask_round (__B,
3456 1.1 mrg _mm512_setzero_si512 (),
3457 1.1 mrg __A,
3458 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3459 1.1 mrg }
3460 1.1 mrg
3461 1.1 mrg #ifdef __OPTIMIZE__
3462 1.1 mrg
3463 1.1 mrg extern __inline __m512i
3464 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3465 1.1 mrg _mm512_cvt_roundph_epu64 (__m128h __A, int __B)
3466 1.1 mrg {
3467 1.1 mrg return __builtin_ia32_vcvtph2uqq512_mask_round (__A,
3468 1.1 mrg _mm512_setzero_si512 (),
3469 1.1 mrg (__mmask8) -1,
3470 1.1 mrg __B);
3471 1.1 mrg }
3472 1.1 mrg
3473 1.1 mrg extern __inline __m512i
3474 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3475 1.1 mrg _mm512_mask_cvt_roundph_epu64 (__m512i __A, __mmask8 __B, __m128h __C, int __D)
3476 1.1 mrg {
3477 1.1 mrg return __builtin_ia32_vcvtph2uqq512_mask_round (__C, __A, __B, __D);
3478 1.1 mrg }
3479 1.1 mrg
3480 1.1 mrg extern __inline __m512i
3481 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3482 1.1 mrg _mm512_maskz_cvt_roundph_epu64 (__mmask8 __A, __m128h __B, int __C)
3483 1.1 mrg {
3484 1.1 mrg return __builtin_ia32_vcvtph2uqq512_mask_round (__B,
3485 1.1 mrg _mm512_setzero_si512 (),
3486 1.1 mrg __A,
3487 1.1 mrg __C);
3488 1.1 mrg }
3489 1.1 mrg
3490 1.1 mrg #else
3491 1.1 mrg #define _mm512_cvt_roundph_epu64(A, B) \
3492 1.1 mrg (__builtin_ia32_vcvtph2uqq512_mask_round ((A), \
3493 1.1 mrg _mm512_setzero_si512 (), \
3494 1.1 mrg (__mmask8)-1, \
3495 1.1 mrg (B)))
3496 1.1 mrg
3497 1.1 mrg #define _mm512_mask_cvt_roundph_epu64(A, B, C, D) \
3498 1.1 mrg (__builtin_ia32_vcvtph2uqq512_mask_round ((C), (A), (B), (D)))
3499 1.1 mrg
3500 1.1 mrg #define _mm512_maskz_cvt_roundph_epu64(A, B, C) \
3501 1.1 mrg (__builtin_ia32_vcvtph2uqq512_mask_round ((B), \
3502 1.1 mrg _mm512_setzero_si512 (), \
3503 1.1 mrg (A), \
3504 1.1 mrg (C)))
3505 1.1 mrg
3506 1.1 mrg #endif /* __OPTIMIZE__ */
3507 1.1 mrg
3508 1.1 mrg /* Intrinsics vcvttph2qq. */
3509 1.1 mrg extern __inline __m512i
3510 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3511 1.1 mrg _mm512_cvttph_epi64 (__m128h __A)
3512 1.1 mrg {
3513 1.1 mrg return __builtin_ia32_vcvttph2qq512_mask_round (__A,
3514 1.1 mrg _mm512_setzero_si512 (),
3515 1.1 mrg (__mmask8) -1,
3516 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3517 1.1 mrg }
3518 1.1 mrg
3519 1.1 mrg extern __inline __m512i
3520 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3521 1.1 mrg _mm512_mask_cvttph_epi64 (__m512i __A, __mmask8 __B, __m128h __C)
3522 1.1 mrg {
3523 1.1 mrg return __builtin_ia32_vcvttph2qq512_mask_round (__C, __A, __B,
3524 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3525 1.1 mrg }
3526 1.1 mrg
3527 1.1 mrg extern __inline __m512i
3528 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3529 1.1 mrg _mm512_maskz_cvttph_epi64 (__mmask8 __A, __m128h __B)
3530 1.1 mrg {
3531 1.1 mrg return __builtin_ia32_vcvttph2qq512_mask_round (__B,
3532 1.1 mrg _mm512_setzero_si512 (),
3533 1.1 mrg __A,
3534 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3535 1.1 mrg }
3536 1.1 mrg
3537 1.1 mrg #ifdef __OPTIMIZE__
3538 1.1 mrg extern __inline __m512i
3539 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3540 1.1 mrg _mm512_cvtt_roundph_epi64 (__m128h __A, int __B)
3541 1.1 mrg {
3542 1.1 mrg return __builtin_ia32_vcvttph2qq512_mask_round (__A,
3543 1.1 mrg _mm512_setzero_si512 (),
3544 1.1 mrg (__mmask8) -1,
3545 1.1 mrg __B);
3546 1.1 mrg }
3547 1.1 mrg
3548 1.1 mrg extern __inline __m512i
3549 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3550 1.1 mrg _mm512_mask_cvtt_roundph_epi64 (__m512i __A, __mmask8 __B, __m128h __C, int __D)
3551 1.1 mrg {
3552 1.1 mrg return __builtin_ia32_vcvttph2qq512_mask_round (__C, __A, __B, __D);
3553 1.1 mrg }
3554 1.1 mrg
3555 1.1 mrg extern __inline __m512i
3556 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3557 1.1 mrg _mm512_maskz_cvtt_roundph_epi64 (__mmask8 __A, __m128h __B, int __C)
3558 1.1 mrg {
3559 1.1 mrg return __builtin_ia32_vcvttph2qq512_mask_round (__B,
3560 1.1 mrg _mm512_setzero_si512 (),
3561 1.1 mrg __A,
3562 1.1 mrg __C);
3563 1.1 mrg }
3564 1.1 mrg
3565 1.1 mrg #else
3566 1.1 mrg #define _mm512_cvtt_roundph_epi64(A, B) \
3567 1.1 mrg (__builtin_ia32_vcvttph2qq512_mask_round ((A), \
3568 1.1 mrg _mm512_setzero_si512 (), \
3569 1.1 mrg (__mmask8)-1, \
3570 1.1 mrg (B)))
3571 1.1 mrg
3572 1.1 mrg #define _mm512_mask_cvtt_roundph_epi64(A, B, C, D) \
3573 1.1 mrg __builtin_ia32_vcvttph2qq512_mask_round ((C), (A), (B), (D))
3574 1.1 mrg
3575 1.1 mrg #define _mm512_maskz_cvtt_roundph_epi64(A, B, C) \
3576 1.1 mrg (__builtin_ia32_vcvttph2qq512_mask_round ((B), \
3577 1.1 mrg _mm512_setzero_si512 (), \
3578 1.1 mrg (A), \
3579 1.1 mrg (C)))
3580 1.1 mrg
3581 1.1 mrg #endif /* __OPTIMIZE__ */
3582 1.1 mrg
3583 1.1 mrg /* Intrinsics vcvttph2uqq. */
3584 1.1 mrg extern __inline __m512i
3585 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3586 1.1 mrg _mm512_cvttph_epu64 (__m128h __A)
3587 1.1 mrg {
3588 1.1 mrg return __builtin_ia32_vcvttph2uqq512_mask_round (__A,
3589 1.1 mrg _mm512_setzero_si512 (),
3590 1.1 mrg (__mmask8) -1,
3591 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3592 1.1 mrg }
3593 1.1 mrg
3594 1.1 mrg extern __inline __m512i
3595 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3596 1.1 mrg _mm512_mask_cvttph_epu64 (__m512i __A, __mmask8 __B, __m128h __C)
3597 1.1 mrg {
3598 1.1 mrg return __builtin_ia32_vcvttph2uqq512_mask_round (__C, __A, __B,
3599 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3600 1.1 mrg }
3601 1.1 mrg
3602 1.1 mrg extern __inline __m512i
3603 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3604 1.1 mrg _mm512_maskz_cvttph_epu64 (__mmask8 __A, __m128h __B)
3605 1.1 mrg {
3606 1.1 mrg return __builtin_ia32_vcvttph2uqq512_mask_round (__B,
3607 1.1 mrg _mm512_setzero_si512 (),
3608 1.1 mrg __A,
3609 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3610 1.1 mrg }
3611 1.1 mrg
3612 1.1 mrg #ifdef __OPTIMIZE__
3613 1.1 mrg extern __inline __m512i
3614 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3615 1.1 mrg _mm512_cvtt_roundph_epu64 (__m128h __A, int __B)
3616 1.1 mrg {
3617 1.1 mrg return __builtin_ia32_vcvttph2uqq512_mask_round (__A,
3618 1.1 mrg _mm512_setzero_si512 (),
3619 1.1 mrg (__mmask8) -1,
3620 1.1 mrg __B);
3621 1.1 mrg }
3622 1.1 mrg
3623 1.1 mrg extern __inline __m512i
3624 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3625 1.1 mrg _mm512_mask_cvtt_roundph_epu64 (__m512i __A, __mmask8 __B, __m128h __C, int __D)
3626 1.1 mrg {
3627 1.1 mrg return __builtin_ia32_vcvttph2uqq512_mask_round (__C, __A, __B, __D);
3628 1.1 mrg }
3629 1.1 mrg
3630 1.1 mrg extern __inline __m512i
3631 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3632 1.1 mrg _mm512_maskz_cvtt_roundph_epu64 (__mmask8 __A, __m128h __B, int __C)
3633 1.1 mrg {
3634 1.1 mrg return __builtin_ia32_vcvttph2uqq512_mask_round (__B,
3635 1.1 mrg _mm512_setzero_si512 (),
3636 1.1 mrg __A,
3637 1.1 mrg __C);
3638 1.1 mrg }
3639 1.1 mrg
3640 1.1 mrg #else
3641 1.1 mrg #define _mm512_cvtt_roundph_epu64(A, B) \
3642 1.1 mrg (__builtin_ia32_vcvttph2uqq512_mask_round ((A), \
3643 1.1 mrg _mm512_setzero_si512 (), \
3644 1.1 mrg (__mmask8)-1, \
3645 1.1 mrg (B)))
3646 1.1 mrg
3647 1.1 mrg #define _mm512_mask_cvtt_roundph_epu64(A, B, C, D) \
3648 1.1 mrg __builtin_ia32_vcvttph2uqq512_mask_round ((C), (A), (B), (D))
3649 1.1 mrg
3650 1.1 mrg #define _mm512_maskz_cvtt_roundph_epu64(A, B, C) \
3651 1.1 mrg (__builtin_ia32_vcvttph2uqq512_mask_round ((B), \
3652 1.1 mrg _mm512_setzero_si512 (), \
3653 1.1 mrg (A), \
3654 1.1 mrg (C)))
3655 1.1 mrg
3656 1.1 mrg #endif /* __OPTIMIZE__ */
3657 1.1 mrg
3658 1.1 mrg /* Intrinsics vcvtqq2ph. */
3659 1.1 mrg extern __inline __m128h
3660 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3661 1.1 mrg _mm512_cvtepi64_ph (__m512i __A)
3662 1.1 mrg {
3663 1.1 mrg return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __A,
3664 1.1 mrg _mm_setzero_ph (),
3665 1.1 mrg (__mmask8) -1,
3666 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3667 1.1 mrg }
3668 1.1 mrg
3669 1.1 mrg extern __inline __m128h
3670 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3671 1.1 mrg _mm512_mask_cvtepi64_ph (__m128h __A, __mmask8 __B, __m512i __C)
3672 1.1 mrg {
3673 1.1 mrg return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __C,
3674 1.1 mrg __A,
3675 1.1 mrg __B,
3676 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3677 1.1 mrg }
3678 1.1 mrg
3679 1.1 mrg extern __inline __m128h
3680 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3681 1.1 mrg _mm512_maskz_cvtepi64_ph (__mmask8 __A, __m512i __B)
3682 1.1 mrg {
3683 1.1 mrg return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __B,
3684 1.1 mrg _mm_setzero_ph (),
3685 1.1 mrg __A,
3686 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3687 1.1 mrg }
3688 1.1 mrg
3689 1.1 mrg #ifdef __OPTIMIZE__
3690 1.1 mrg extern __inline __m128h
3691 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3692 1.1 mrg _mm512_cvt_roundepi64_ph (__m512i __A, int __B)
3693 1.1 mrg {
3694 1.1 mrg return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __A,
3695 1.1 mrg _mm_setzero_ph (),
3696 1.1 mrg (__mmask8) -1,
3697 1.1 mrg __B);
3698 1.1 mrg }
3699 1.1 mrg
3700 1.1 mrg extern __inline __m128h
3701 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3702 1.1 mrg _mm512_mask_cvt_roundepi64_ph (__m128h __A, __mmask8 __B, __m512i __C, int __D)
3703 1.1 mrg {
3704 1.1 mrg return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __C,
3705 1.1 mrg __A,
3706 1.1 mrg __B,
3707 1.1 mrg __D);
3708 1.1 mrg }
3709 1.1 mrg
3710 1.1 mrg extern __inline __m128h
3711 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3712 1.1 mrg _mm512_maskz_cvt_roundepi64_ph (__mmask8 __A, __m512i __B, int __C)
3713 1.1 mrg {
3714 1.1 mrg return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __B,
3715 1.1 mrg _mm_setzero_ph (),
3716 1.1 mrg __A,
3717 1.1 mrg __C);
3718 1.1 mrg }
3719 1.1 mrg
3720 1.1 mrg #else
3721 1.1 mrg #define _mm512_cvt_roundepi64_ph(A, B) \
3722 1.1 mrg (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(A), \
3723 1.1 mrg _mm_setzero_ph (), \
3724 1.1 mrg (__mmask8)-1, \
3725 1.1 mrg (B)))
3726 1.1 mrg
3727 1.1 mrg #define _mm512_mask_cvt_roundepi64_ph(A, B, C, D) \
3728 1.1 mrg (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(C), (A), (B), (D)))
3729 1.1 mrg
3730 1.1 mrg #define _mm512_maskz_cvt_roundepi64_ph(A, B, C) \
3731 1.1 mrg (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(B), \
3732 1.1 mrg _mm_setzero_ph (), \
3733 1.1 mrg (A), \
3734 1.1 mrg (C)))
3735 1.1 mrg
3736 1.1 mrg #endif /* __OPTIMIZE__ */
3737 1.1 mrg
3738 1.1 mrg /* Intrinsics vcvtuqq2ph. */
3739 1.1 mrg extern __inline __m128h
3740 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3741 1.1 mrg _mm512_cvtepu64_ph (__m512i __A)
3742 1.1 mrg {
3743 1.1 mrg return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __A,
3744 1.1 mrg _mm_setzero_ph (),
3745 1.1 mrg (__mmask8) -1,
3746 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3747 1.1 mrg }
3748 1.1 mrg
3749 1.1 mrg extern __inline __m128h
3750 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3751 1.1 mrg _mm512_mask_cvtepu64_ph (__m128h __A, __mmask8 __B, __m512i __C)
3752 1.1 mrg {
3753 1.1 mrg return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __C,
3754 1.1 mrg __A,
3755 1.1 mrg __B,
3756 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3757 1.1 mrg }
3758 1.1 mrg
3759 1.1 mrg extern __inline __m128h
3760 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3761 1.1 mrg _mm512_maskz_cvtepu64_ph (__mmask8 __A, __m512i __B)
3762 1.1 mrg {
3763 1.1 mrg return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __B,
3764 1.1 mrg _mm_setzero_ph (),
3765 1.1 mrg __A,
3766 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3767 1.1 mrg }
3768 1.1 mrg
3769 1.1 mrg #ifdef __OPTIMIZE__
3770 1.1 mrg extern __inline __m128h
3771 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3772 1.1 mrg _mm512_cvt_roundepu64_ph (__m512i __A, int __B)
3773 1.1 mrg {
3774 1.1 mrg return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __A,
3775 1.1 mrg _mm_setzero_ph (),
3776 1.1 mrg (__mmask8) -1,
3777 1.1 mrg __B);
3778 1.1 mrg }
3779 1.1 mrg
3780 1.1 mrg extern __inline __m128h
3781 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3782 1.1 mrg _mm512_mask_cvt_roundepu64_ph (__m128h __A, __mmask8 __B, __m512i __C, int __D)
3783 1.1 mrg {
3784 1.1 mrg return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __C,
3785 1.1 mrg __A,
3786 1.1 mrg __B,
3787 1.1 mrg __D);
3788 1.1 mrg }
3789 1.1 mrg
3790 1.1 mrg extern __inline __m128h
3791 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3792 1.1 mrg _mm512_maskz_cvt_roundepu64_ph (__mmask8 __A, __m512i __B, int __C)
3793 1.1 mrg {
3794 1.1 mrg return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __B,
3795 1.1 mrg _mm_setzero_ph (),
3796 1.1 mrg __A,
3797 1.1 mrg __C);
3798 1.1 mrg }
3799 1.1 mrg
3800 1.1 mrg #else
3801 1.1 mrg #define _mm512_cvt_roundepu64_ph(A, B) \
3802 1.1 mrg (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(A), \
3803 1.1 mrg _mm_setzero_ph (), \
3804 1.1 mrg (__mmask8)-1, \
3805 1.1 mrg (B)))
3806 1.1 mrg
3807 1.1 mrg #define _mm512_mask_cvt_roundepu64_ph(A, B, C, D) \
3808 1.1 mrg (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(C), (A), (B), (D)))
3809 1.1 mrg
3810 1.1 mrg #define _mm512_maskz_cvt_roundepu64_ph(A, B, C) \
3811 1.1 mrg (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(B), \
3812 1.1 mrg _mm_setzero_ph (), \
3813 1.1 mrg (A), \
3814 1.1 mrg (C)))
3815 1.1 mrg
3816 1.1 mrg #endif /* __OPTIMIZE__ */
3817 1.1 mrg
3818 1.1 mrg /* Intrinsics vcvtph2w. */
3819 1.1 mrg extern __inline __m512i
3820 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3821 1.1 mrg _mm512_cvtph_epi16 (__m512h __A)
3822 1.1 mrg {
3823 1.1 mrg return (__m512i)
3824 1.1 mrg __builtin_ia32_vcvtph2w512_mask_round (__A,
3825 1.1 mrg (__v32hi)
3826 1.1 mrg _mm512_setzero_si512 (),
3827 1.1 mrg (__mmask32) -1,
3828 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3829 1.1 mrg }
3830 1.1 mrg
3831 1.1 mrg extern __inline __m512i
3832 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3833 1.1 mrg _mm512_mask_cvtph_epi16 (__m512i __A, __mmask32 __B, __m512h __C)
3834 1.1 mrg {
3835 1.1 mrg return (__m512i)
3836 1.1 mrg __builtin_ia32_vcvtph2w512_mask_round (__C,
3837 1.1 mrg (__v32hi) __A,
3838 1.1 mrg __B,
3839 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3840 1.1 mrg }
3841 1.1 mrg
3842 1.1 mrg extern __inline __m512i
3843 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3844 1.1 mrg _mm512_maskz_cvtph_epi16 (__mmask32 __A, __m512h __B)
3845 1.1 mrg {
3846 1.1 mrg return (__m512i)
3847 1.1 mrg __builtin_ia32_vcvtph2w512_mask_round (__B,
3848 1.1 mrg (__v32hi)
3849 1.1 mrg _mm512_setzero_si512 (),
3850 1.1 mrg __A,
3851 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3852 1.1 mrg }
3853 1.1 mrg
3854 1.1 mrg #ifdef __OPTIMIZE__
3855 1.1 mrg extern __inline __m512i
3856 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3857 1.1 mrg _mm512_cvt_roundph_epi16 (__m512h __A, int __B)
3858 1.1 mrg {
3859 1.1 mrg return (__m512i)
3860 1.1 mrg __builtin_ia32_vcvtph2w512_mask_round (__A,
3861 1.1 mrg (__v32hi)
3862 1.1 mrg _mm512_setzero_si512 (),
3863 1.1 mrg (__mmask32) -1,
3864 1.1 mrg __B);
3865 1.1 mrg }
3866 1.1 mrg
3867 1.1 mrg extern __inline __m512i
3868 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3869 1.1 mrg _mm512_mask_cvt_roundph_epi16 (__m512i __A, __mmask32 __B, __m512h __C, int __D)
3870 1.1 mrg {
3871 1.1 mrg return (__m512i)
3872 1.1 mrg __builtin_ia32_vcvtph2w512_mask_round (__C,
3873 1.1 mrg (__v32hi) __A,
3874 1.1 mrg __B,
3875 1.1 mrg __D);
3876 1.1 mrg }
3877 1.1 mrg
3878 1.1 mrg extern __inline __m512i
3879 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3880 1.1 mrg _mm512_maskz_cvt_roundph_epi16 (__mmask32 __A, __m512h __B, int __C)
3881 1.1 mrg {
3882 1.1 mrg return (__m512i)
3883 1.1 mrg __builtin_ia32_vcvtph2w512_mask_round (__B,
3884 1.1 mrg (__v32hi)
3885 1.1 mrg _mm512_setzero_si512 (),
3886 1.1 mrg __A,
3887 1.1 mrg __C);
3888 1.1 mrg }
3889 1.1 mrg
3890 1.1 mrg #else
3891 1.1 mrg #define _mm512_cvt_roundph_epi16(A, B) \
3892 1.1 mrg ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((A), \
3893 1.1 mrg (__v32hi) \
3894 1.1 mrg _mm512_setzero_si512 (), \
3895 1.1 mrg (__mmask32)-1, \
3896 1.1 mrg (B)))
3897 1.1 mrg
3898 1.1 mrg #define _mm512_mask_cvt_roundph_epi16(A, B, C, D) \
3899 1.1 mrg ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((C), \
3900 1.1 mrg (__v32hi)(A), \
3901 1.1 mrg (B), \
3902 1.1 mrg (D)))
3903 1.1 mrg
3904 1.1 mrg #define _mm512_maskz_cvt_roundph_epi16(A, B, C) \
3905 1.1 mrg ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((B), \
3906 1.1 mrg (__v32hi) \
3907 1.1 mrg _mm512_setzero_si512 (), \
3908 1.1 mrg (A), \
3909 1.1 mrg (C)))
3910 1.1 mrg
3911 1.1 mrg #endif /* __OPTIMIZE__ */
3912 1.1 mrg
3913 1.1 mrg /* Intrinsics vcvtph2uw. */
3914 1.1 mrg extern __inline __m512i
3915 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3916 1.1 mrg _mm512_cvtph_epu16 (__m512h __A)
3917 1.1 mrg {
3918 1.1 mrg return (__m512i)
3919 1.1 mrg __builtin_ia32_vcvtph2uw512_mask_round (__A,
3920 1.1 mrg (__v32hi)
3921 1.1 mrg _mm512_setzero_si512 (),
3922 1.1 mrg (__mmask32) -1,
3923 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3924 1.1 mrg }
3925 1.1 mrg
3926 1.1 mrg extern __inline __m512i
3927 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3928 1.1 mrg _mm512_mask_cvtph_epu16 (__m512i __A, __mmask32 __B, __m512h __C)
3929 1.1 mrg {
3930 1.1 mrg return (__m512i)
3931 1.1 mrg __builtin_ia32_vcvtph2uw512_mask_round (__C, (__v32hi) __A, __B,
3932 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3933 1.1 mrg }
3934 1.1 mrg
3935 1.1 mrg extern __inline __m512i
3936 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3937 1.1 mrg _mm512_maskz_cvtph_epu16 (__mmask32 __A, __m512h __B)
3938 1.1 mrg {
3939 1.1 mrg return (__m512i)
3940 1.1 mrg __builtin_ia32_vcvtph2uw512_mask_round (__B,
3941 1.1 mrg (__v32hi)
3942 1.1 mrg _mm512_setzero_si512 (),
3943 1.1 mrg __A,
3944 1.1 mrg _MM_FROUND_CUR_DIRECTION);
3945 1.1 mrg }
3946 1.1 mrg
3947 1.1 mrg #ifdef __OPTIMIZE__
3948 1.1 mrg extern __inline __m512i
3949 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3950 1.1 mrg _mm512_cvt_roundph_epu16 (__m512h __A, int __B)
3951 1.1 mrg {
3952 1.1 mrg return (__m512i)
3953 1.1 mrg __builtin_ia32_vcvtph2uw512_mask_round (__A,
3954 1.1 mrg (__v32hi)
3955 1.1 mrg _mm512_setzero_si512 (),
3956 1.1 mrg (__mmask32) -1,
3957 1.1 mrg __B);
3958 1.1 mrg }
3959 1.1 mrg
3960 1.1 mrg extern __inline __m512i
3961 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3962 1.1 mrg _mm512_mask_cvt_roundph_epu16 (__m512i __A, __mmask32 __B, __m512h __C, int __D)
3963 1.1 mrg {
3964 1.1 mrg return (__m512i)
3965 1.1 mrg __builtin_ia32_vcvtph2uw512_mask_round (__C, (__v32hi) __A, __B, __D);
3966 1.1 mrg }
3967 1.1 mrg
3968 1.1 mrg extern __inline __m512i
3969 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3970 1.1 mrg _mm512_maskz_cvt_roundph_epu16 (__mmask32 __A, __m512h __B, int __C)
3971 1.1 mrg {
3972 1.1 mrg return (__m512i)
3973 1.1 mrg __builtin_ia32_vcvtph2uw512_mask_round (__B,
3974 1.1 mrg (__v32hi)
3975 1.1 mrg _mm512_setzero_si512 (),
3976 1.1 mrg __A,
3977 1.1 mrg __C);
3978 1.1 mrg }
3979 1.1 mrg
3980 1.1 mrg #else
3981 1.1 mrg #define _mm512_cvt_roundph_epu16(A, B) \
3982 1.1 mrg ((__m512i) \
3983 1.1 mrg __builtin_ia32_vcvtph2uw512_mask_round ((A), \
3984 1.1 mrg (__v32hi) \
3985 1.1 mrg _mm512_setzero_si512 (), \
3986 1.1 mrg (__mmask32)-1, (B)))
3987 1.1 mrg
3988 1.1 mrg #define _mm512_mask_cvt_roundph_epu16(A, B, C, D) \
3989 1.1 mrg ((__m512i) \
3990 1.1 mrg __builtin_ia32_vcvtph2uw512_mask_round ((C), (__v32hi)(A), (B), (D)))
3991 1.1 mrg
3992 1.1 mrg #define _mm512_maskz_cvt_roundph_epu16(A, B, C) \
3993 1.1 mrg ((__m512i) \
3994 1.1 mrg __builtin_ia32_vcvtph2uw512_mask_round ((B), \
3995 1.1 mrg (__v32hi) \
3996 1.1 mrg _mm512_setzero_si512 (), \
3997 1.1 mrg (A), \
3998 1.1 mrg (C)))
3999 1.1 mrg
4000 1.1 mrg #endif /* __OPTIMIZE__ */
4001 1.1 mrg
4002 1.1 mrg /* Intrinsics vcvttph2w. */
4003 1.1 mrg extern __inline __m512i
4004 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4005 1.1 mrg _mm512_cvttph_epi16 (__m512h __A)
4006 1.1 mrg {
4007 1.1 mrg return (__m512i)
4008 1.1 mrg __builtin_ia32_vcvttph2w512_mask_round (__A,
4009 1.1 mrg (__v32hi)
4010 1.1 mrg _mm512_setzero_si512 (),
4011 1.1 mrg (__mmask32) -1,
4012 1.1 mrg _MM_FROUND_CUR_DIRECTION);
4013 1.1 mrg }
4014 1.1 mrg
4015 1.1 mrg extern __inline __m512i
4016 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4017 1.1 mrg _mm512_mask_cvttph_epi16 (__m512i __A, __mmask32 __B, __m512h __C)
4018 1.1 mrg {
4019 1.1 mrg return (__m512i)
4020 1.1 mrg __builtin_ia32_vcvttph2w512_mask_round (__C,
4021 1.1 mrg (__v32hi) __A,
4022 1.1 mrg __B,
4023 1.1 mrg _MM_FROUND_CUR_DIRECTION);
4024 1.1 mrg }
4025 1.1 mrg
4026 1.1 mrg extern __inline __m512i
4027 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4028 1.1 mrg _mm512_maskz_cvttph_epi16 (__mmask32 __A, __m512h __B)
4029 1.1 mrg {
4030 1.1 mrg return (__m512i)
4031 1.1 mrg __builtin_ia32_vcvttph2w512_mask_round (__B,
4032 1.1 mrg (__v32hi)
4033 1.1 mrg _mm512_setzero_si512 (),
4034 1.1 mrg __A,
4035 1.1 mrg _MM_FROUND_CUR_DIRECTION);
4036 1.1 mrg }
4037 1.1 mrg
4038 1.1 mrg #ifdef __OPTIMIZE__
4039 1.1 mrg extern __inline __m512i
4040 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4041 1.1 mrg _mm512_cvtt_roundph_epi16 (__m512h __A, int __B)
4042 1.1 mrg {
4043 1.1 mrg return (__m512i)
4044 1.1 mrg __builtin_ia32_vcvttph2w512_mask_round (__A,
4045 1.1 mrg (__v32hi)
4046 1.1 mrg _mm512_setzero_si512 (),
4047 1.1 mrg (__mmask32) -1,
4048 1.1 mrg __B);
4049 1.1 mrg }
4050 1.1 mrg
4051 1.1 mrg extern __inline __m512i
4052 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4053 1.1 mrg _mm512_mask_cvtt_roundph_epi16 (__m512i __A, __mmask32 __B,
4054 1.1 mrg __m512h __C, int __D)
4055 1.1 mrg {
4056 1.1 mrg return (__m512i)
4057 1.1 mrg __builtin_ia32_vcvttph2w512_mask_round (__C,
4058 1.1 mrg (__v32hi) __A,
4059 1.1 mrg __B,
4060 1.1 mrg __D);
4061 1.1 mrg }
4062 1.1 mrg
4063 1.1 mrg extern __inline __m512i
4064 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4065 1.1 mrg _mm512_maskz_cvtt_roundph_epi16 (__mmask32 __A, __m512h __B, int __C)
4066 1.1 mrg {
4067 1.1 mrg return (__m512i)
4068 1.1 mrg __builtin_ia32_vcvttph2w512_mask_round (__B,
4069 1.1 mrg (__v32hi)
4070 1.1 mrg _mm512_setzero_si512 (),
4071 1.1 mrg __A,
4072 1.1 mrg __C);
4073 1.1 mrg }
4074 1.1 mrg
4075 1.1 mrg #else
4076 1.1 mrg #define _mm512_cvtt_roundph_epi16(A, B) \
4077 1.1 mrg ((__m512i) \
4078 1.1 mrg __builtin_ia32_vcvttph2w512_mask_round ((A), \
4079 1.1 mrg (__v32hi) \
4080 1.1 mrg _mm512_setzero_si512 (), \
4081 1.1 mrg (__mmask32)-1, \
4082 1.1 mrg (B)))
4083 1.1 mrg
4084 1.1 mrg #define _mm512_mask_cvtt_roundph_epi16(A, B, C, D) \
4085 1.1 mrg ((__m512i) \
4086 1.1 mrg __builtin_ia32_vcvttph2w512_mask_round ((C), \
4087 1.1 mrg (__v32hi)(A), \
4088 1.1 mrg (B), \
4089 1.1 mrg (D)))
4090 1.1 mrg
4091 1.1 mrg #define _mm512_maskz_cvtt_roundph_epi16(A, B, C) \
4092 1.1 mrg ((__m512i) \
4093 1.1 mrg __builtin_ia32_vcvttph2w512_mask_round ((B), \
4094 1.1 mrg (__v32hi) \
4095 1.1 mrg _mm512_setzero_si512 (), \
4096 1.1 mrg (A), \
4097 1.1 mrg (C)))
4098 1.1 mrg
4099 1.1 mrg #endif /* __OPTIMIZE__ */
4100 1.1 mrg
4101 1.1 mrg /* Intrinsics vcvttph2uw. */
4102 1.1 mrg extern __inline __m512i
4103 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4104 1.1 mrg _mm512_cvttph_epu16 (__m512h __A)
4105 1.1 mrg {
4106 1.1 mrg return (__m512i)
4107 1.1 mrg __builtin_ia32_vcvttph2uw512_mask_round (__A,
4108 1.1 mrg (__v32hi)
4109 1.1 mrg _mm512_setzero_si512 (),
4110 1.1 mrg (__mmask32) -1,
4111 1.1 mrg _MM_FROUND_CUR_DIRECTION);
4112 1.1 mrg }
4113 1.1 mrg
4114 1.1 mrg extern __inline __m512i
4115 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4116 1.1 mrg _mm512_mask_cvttph_epu16 (__m512i __A, __mmask32 __B, __m512h __C)
4117 1.1 mrg {
4118 1.1 mrg return (__m512i)
4119 1.1 mrg __builtin_ia32_vcvttph2uw512_mask_round (__C,
4120 1.1 mrg (__v32hi) __A,
4121 1.1 mrg __B,
4122 1.1 mrg _MM_FROUND_CUR_DIRECTION);
4123 1.1 mrg }
4124 1.1 mrg
4125 1.1 mrg extern __inline __m512i
4126 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4127 1.1 mrg _mm512_maskz_cvttph_epu16 (__mmask32 __A, __m512h __B)
4128 1.1 mrg {
4129 1.1 mrg return (__m512i)
4130 1.1 mrg __builtin_ia32_vcvttph2uw512_mask_round (__B,
4131 1.1 mrg (__v32hi)
4132 1.1 mrg _mm512_setzero_si512 (),
4133 1.1 mrg __A,
4134 1.1 mrg _MM_FROUND_CUR_DIRECTION);
4135 1.1 mrg }
4136 1.1 mrg
4137 1.1 mrg #ifdef __OPTIMIZE__
4138 1.1 mrg extern __inline __m512i
4139 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4140 1.1 mrg _mm512_cvtt_roundph_epu16 (__m512h __A, int __B)
4141 1.1 mrg {
4142 1.1 mrg return (__m512i)
4143 1.1 mrg __builtin_ia32_vcvttph2uw512_mask_round (__A,
4144 1.1 mrg (__v32hi)
4145 1.1 mrg _mm512_setzero_si512 (),
4146 1.1 mrg (__mmask32) -1,
4147 1.1 mrg __B);
4148 1.1 mrg }
4149 1.1 mrg
4150 1.1 mrg extern __inline __m512i
4151 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4152 1.1 mrg _mm512_mask_cvtt_roundph_epu16 (__m512i __A, __mmask32 __B,
4153 1.1 mrg __m512h __C, int __D)
4154 1.1 mrg {
4155 1.1 mrg return (__m512i)
4156 1.1 mrg __builtin_ia32_vcvttph2uw512_mask_round (__C,
4157 1.1 mrg (__v32hi) __A,
4158 1.1 mrg __B,
4159 1.1 mrg __D);
4160 1.1 mrg }
4161 1.1 mrg
4162 1.1 mrg extern __inline __m512i
4163 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4164 1.1 mrg _mm512_maskz_cvtt_roundph_epu16 (__mmask32 __A, __m512h __B, int __C)
4165 1.1 mrg {
4166 1.1 mrg return (__m512i)
4167 1.1 mrg __builtin_ia32_vcvttph2uw512_mask_round (__B,
4168 1.1 mrg (__v32hi)
4169 1.1 mrg _mm512_setzero_si512 (),
4170 1.1 mrg __A,
4171 1.1 mrg __C);
4172 1.1 mrg }
4173 1.1 mrg
4174 1.1 mrg #else
4175 1.1 mrg #define _mm512_cvtt_roundph_epu16(A, B) \
4176 1.1 mrg ((__m512i) \
4177 1.1 mrg __builtin_ia32_vcvttph2uw512_mask_round ((A), \
4178 1.1 mrg (__v32hi) \
4179 1.1 mrg _mm512_setzero_si512 (), \
4180 1.1 mrg (__mmask32)-1, \
4181 1.1 mrg (B)))
4182 1.1 mrg
4183 1.1 mrg #define _mm512_mask_cvtt_roundph_epu16(A, B, C, D) \
4184 1.1 mrg ((__m512i) \
4185 1.1 mrg __builtin_ia32_vcvttph2uw512_mask_round ((C), \
4186 1.1 mrg (__v32hi)(A), \
4187 1.1 mrg (B), \
4188 1.1 mrg (D)))
4189 1.1 mrg
4190 1.1 mrg #define _mm512_maskz_cvtt_roundph_epu16(A, B, C) \
4191 1.1 mrg ((__m512i) \
4192 1.1 mrg __builtin_ia32_vcvttph2uw512_mask_round ((B), \
4193 1.1 mrg (__v32hi) \
4194 1.1 mrg _mm512_setzero_si512 (), \
4195 1.1 mrg (A), \
4196 1.1 mrg (C)))
4197 1.1 mrg
4198 1.1 mrg #endif /* __OPTIMIZE__ */
4199 1.1 mrg
4200 1.1 mrg /* Intrinsics vcvtw2ph. */
4201 1.1 mrg extern __inline __m512h
4202 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4203 1.1 mrg _mm512_cvtepi16_ph (__m512i __A)
4204 1.1 mrg {
4205 1.1 mrg return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __A,
4206 1.1 mrg _mm512_setzero_ph (),
4207 1.1 mrg (__mmask32) -1,
4208 1.1 mrg _MM_FROUND_CUR_DIRECTION);
4209 1.1 mrg }
4210 1.1 mrg
4211 1.1 mrg extern __inline __m512h
4212 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4213 1.1 mrg _mm512_mask_cvtepi16_ph (__m512h __A, __mmask32 __B, __m512i __C)
4214 1.1 mrg {
4215 1.1 mrg return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __C,
4216 1.1 mrg __A,
4217 1.1 mrg __B,
4218 1.1 mrg _MM_FROUND_CUR_DIRECTION);
4219 1.1 mrg }
4220 1.1 mrg
4221 1.1 mrg extern __inline __m512h
4222 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4223 1.1 mrg _mm512_maskz_cvtepi16_ph (__mmask32 __A, __m512i __B)
4224 1.1 mrg {
4225 1.1 mrg return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __B,
4226 1.1 mrg _mm512_setzero_ph (),
4227 1.1 mrg __A,
4228 1.1 mrg _MM_FROUND_CUR_DIRECTION);
4229 1.1 mrg }
4230 1.1 mrg
4231 1.1 mrg #ifdef __OPTIMIZE__
4232 1.1 mrg extern __inline __m512h
4233 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4234 1.1 mrg _mm512_cvt_roundepi16_ph (__m512i __A, int __B)
4235 1.1 mrg {
4236 1.1 mrg return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __A,
4237 1.1 mrg _mm512_setzero_ph (),
4238 1.1 mrg (__mmask32) -1,
4239 1.1 mrg __B);
4240 1.1 mrg }
4241 1.1 mrg
4242 1.1 mrg extern __inline __m512h
4243 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4244 1.1 mrg _mm512_mask_cvt_roundepi16_ph (__m512h __A, __mmask32 __B, __m512i __C, int __D)
4245 1.1 mrg {
4246 1.1 mrg return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __C,
4247 1.1 mrg __A,
4248 1.1 mrg __B,
4249 1.1 mrg __D);
4250 1.1 mrg }
4251 1.1 mrg
4252 1.1 mrg extern __inline __m512h
4253 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4254 1.1 mrg _mm512_maskz_cvt_roundepi16_ph (__mmask32 __A, __m512i __B, int __C)
4255 1.1 mrg {
4256 1.1 mrg return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __B,
4257 1.1 mrg _mm512_setzero_ph (),
4258 1.1 mrg __A,
4259 1.1 mrg __C);
4260 1.1 mrg }
4261 1.1 mrg
4262 1.1 mrg #else
4263 1.1 mrg #define _mm512_cvt_roundepi16_ph(A, B) \
4264 1.1 mrg (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(A), \
4265 1.1 mrg _mm512_setzero_ph (), \
4266 1.1 mrg (__mmask32)-1, \
4267 1.1 mrg (B)))
4268 1.1 mrg
4269 1.1 mrg #define _mm512_mask_cvt_roundepi16_ph(A, B, C, D) \
4270 1.1 mrg (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(C), \
4271 1.1 mrg (A), \
4272 1.1 mrg (B), \
4273 1.1 mrg (D)))
4274 1.1 mrg
4275 1.1 mrg #define _mm512_maskz_cvt_roundepi16_ph(A, B, C) \
4276 1.1 mrg (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(B), \
4277 1.1 mrg _mm512_setzero_ph (), \
4278 1.1 mrg (A), \
4279 1.1 mrg (C)))
4280 1.1 mrg
4281 1.1 mrg #endif /* __OPTIMIZE__ */
4282 1.1 mrg
4283 1.1 mrg /* Intrinsics vcvtuw2ph. */
4284 1.1 mrg extern __inline __m512h
4285 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4286 1.1 mrg _mm512_cvtepu16_ph (__m512i __A)
4287 1.1 mrg {
4288 1.1 mrg return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __A,
4289 1.1 mrg _mm512_setzero_ph (),
4290 1.1 mrg (__mmask32) -1,
4291 1.1 mrg _MM_FROUND_CUR_DIRECTION);
4292 1.1 mrg }
4293 1.1 mrg
4294 1.1 mrg extern __inline __m512h
4295 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4296 1.1 mrg _mm512_mask_cvtepu16_ph (__m512h __A, __mmask32 __B, __m512i __C)
4297 1.1 mrg {
4298 1.1 mrg return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __C,
4299 1.1 mrg __A,
4300 1.1 mrg __B,
4301 1.1 mrg _MM_FROUND_CUR_DIRECTION);
4302 1.1 mrg }
4303 1.1 mrg
4304 1.1 mrg extern __inline __m512h
4305 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4306 1.1 mrg _mm512_maskz_cvtepu16_ph (__mmask32 __A, __m512i __B)
4307 1.1 mrg {
4308 1.1 mrg return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __B,
4309 1.1 mrg _mm512_setzero_ph (),
4310 1.1 mrg __A,
4311 1.1 mrg _MM_FROUND_CUR_DIRECTION);
4312 1.1 mrg }
4313 1.1 mrg
4314 1.1 mrg #ifdef __OPTIMIZE__
4315 1.1 mrg extern __inline __m512h
4316 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4317 1.1 mrg _mm512_cvt_roundepu16_ph (__m512i __A, int __B)
4318 1.1 mrg {
4319 1.1 mrg return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __A,
4320 1.1 mrg _mm512_setzero_ph (),
4321 1.1 mrg (__mmask32) -1,
4322 1.1 mrg __B);
4323 1.1 mrg }
4324 1.1 mrg
4325 1.1 mrg extern __inline __m512h
4326 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4327 1.1 mrg _mm512_mask_cvt_roundepu16_ph (__m512h __A, __mmask32 __B, __m512i __C, int __D)
4328 1.1 mrg {
4329 1.1 mrg return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __C,
4330 1.1 mrg __A,
4331 1.1 mrg __B,
4332 1.1 mrg __D);
4333 1.1 mrg }
4334 1.1 mrg
4335 1.1 mrg extern __inline __m512h
4336 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4337 1.1 mrg _mm512_maskz_cvt_roundepu16_ph (__mmask32 __A, __m512i __B, int __C)
4338 1.1 mrg {
4339 1.1 mrg return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __B,
4340 1.1 mrg _mm512_setzero_ph (),
4341 1.1 mrg __A,
4342 1.1 mrg __C);
4343 1.1 mrg }
4344 1.1 mrg
4345 1.1 mrg #else
4346 1.1 mrg #define _mm512_cvt_roundepu16_ph(A, B) \
4347 1.1 mrg (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(A), \
4348 1.1 mrg _mm512_setzero_ph (), \
4349 1.1 mrg (__mmask32)-1, \
4350 1.1 mrg (B)))
4351 1.1 mrg
4352 1.1 mrg #define _mm512_mask_cvt_roundepu16_ph(A, B, C, D) \
4353 1.1 mrg (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(C), \
4354 1.1 mrg (A), \
4355 1.1 mrg (B), \
4356 1.1 mrg (D)))
4357 1.1 mrg
4358 1.1 mrg #define _mm512_maskz_cvt_roundepu16_ph(A, B, C) \
4359 1.1 mrg (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(B), \
4360 1.1 mrg _mm512_setzero_ph (), \
4361 1.1 mrg (A), \
4362 1.1 mrg (C)))
4363 1.1 mrg
4364 1.1 mrg #endif /* __OPTIMIZE__ */
4365 1.1 mrg
4366 1.1 mrg /* Intrinsics vcvtsh2si, vcvtsh2us. */
4367 1.1 mrg extern __inline int
4368 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4369 1.1 mrg _mm_cvtsh_i32 (__m128h __A)
4370 1.1 mrg {
4371 1.1 mrg return (int) __builtin_ia32_vcvtsh2si32_round (__A, _MM_FROUND_CUR_DIRECTION);
4372 1.1 mrg }
4373 1.1 mrg
4374 1.1 mrg extern __inline unsigned
4375 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4376 1.1 mrg _mm_cvtsh_u32 (__m128h __A)
4377 1.1 mrg {
4378 1.1 mrg return (int) __builtin_ia32_vcvtsh2usi32_round (__A,
4379 1.1 mrg _MM_FROUND_CUR_DIRECTION);
4380 1.1 mrg }
4381 1.1 mrg
4382 1.1 mrg #ifdef __OPTIMIZE__
4383 1.1 mrg extern __inline int
4384 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4385 1.1 mrg _mm_cvt_roundsh_i32 (__m128h __A, const int __R)
4386 1.1 mrg {
4387 1.1 mrg return (int) __builtin_ia32_vcvtsh2si32_round (__A, __R);
4388 1.1 mrg }
4389 1.1 mrg
4390 1.1 mrg extern __inline unsigned
4391 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4392 1.1 mrg _mm_cvt_roundsh_u32 (__m128h __A, const int __R)
4393 1.1 mrg {
4394 1.1 mrg return (int) __builtin_ia32_vcvtsh2usi32_round (__A, __R);
4395 1.1 mrg }
4396 1.1 mrg
4397 1.1 mrg #else
4398 1.1 mrg #define _mm_cvt_roundsh_i32(A, B) \
4399 1.1 mrg ((int)__builtin_ia32_vcvtsh2si32_round ((A), (B)))
4400 1.1 mrg #define _mm_cvt_roundsh_u32(A, B) \
4401 1.1 mrg ((int)__builtin_ia32_vcvtsh2usi32_round ((A), (B)))
4402 1.1 mrg
4403 1.1 mrg #endif /* __OPTIMIZE__ */
4404 1.1 mrg
4405 1.1 mrg #ifdef __x86_64__
4406 1.1 mrg extern __inline long long
4407 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4408 1.1 mrg _mm_cvtsh_i64 (__m128h __A)
4409 1.1 mrg {
4410 1.1 mrg return (long long)
4411 1.1 mrg __builtin_ia32_vcvtsh2si64_round (__A, _MM_FROUND_CUR_DIRECTION);
4412 1.1 mrg }
4413 1.1 mrg
4414 1.1 mrg extern __inline unsigned long long
4415 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4416 1.1 mrg _mm_cvtsh_u64 (__m128h __A)
4417 1.1 mrg {
4418 1.1 mrg return (long long)
4419 1.1 mrg __builtin_ia32_vcvtsh2usi64_round (__A, _MM_FROUND_CUR_DIRECTION);
4420 1.1 mrg }
4421 1.1 mrg
4422 1.1 mrg #ifdef __OPTIMIZE__
4423 1.1 mrg extern __inline long long
4424 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4425 1.1 mrg _mm_cvt_roundsh_i64 (__m128h __A, const int __R)
4426 1.1 mrg {
4427 1.1 mrg return (long long) __builtin_ia32_vcvtsh2si64_round (__A, __R);
4428 1.1 mrg }
4429 1.1 mrg
4430 1.1 mrg extern __inline unsigned long long
4431 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4432 1.1 mrg _mm_cvt_roundsh_u64 (__m128h __A, const int __R)
4433 1.1 mrg {
4434 1.1 mrg return (long long) __builtin_ia32_vcvtsh2usi64_round (__A, __R);
4435 1.1 mrg }
4436 1.1 mrg
4437 1.1 mrg #else
4438 1.1 mrg #define _mm_cvt_roundsh_i64(A, B) \
4439 1.1 mrg ((long long)__builtin_ia32_vcvtsh2si64_round ((A), (B)))
4440 1.1 mrg #define _mm_cvt_roundsh_u64(A, B) \
4441 1.1 mrg ((long long)__builtin_ia32_vcvtsh2usi64_round ((A), (B)))
4442 1.1 mrg
4443 1.1 mrg #endif /* __OPTIMIZE__ */
4444 1.1 mrg #endif /* __x86_64__ */
4445 1.1 mrg
4446 1.1 mrg /* Intrinsics vcvttsh2si, vcvttsh2us. */
4447 1.1 mrg extern __inline int
4448 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4449 1.1 mrg _mm_cvttsh_i32 (__m128h __A)
4450 1.1 mrg {
4451 1.1 mrg return (int)
4452 1.1 mrg __builtin_ia32_vcvttsh2si32_round (__A, _MM_FROUND_CUR_DIRECTION);
4453 1.1 mrg }
4454 1.1 mrg
4455 1.1 mrg extern __inline unsigned
4456 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4457 1.1 mrg _mm_cvttsh_u32 (__m128h __A)
4458 1.1 mrg {
4459 1.1 mrg return (int)
4460 1.1 mrg __builtin_ia32_vcvttsh2usi32_round (__A, _MM_FROUND_CUR_DIRECTION);
4461 1.1 mrg }
4462 1.1 mrg
4463 1.1 mrg #ifdef __OPTIMIZE__
4464 1.1 mrg extern __inline int
4465 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4466 1.1 mrg _mm_cvtt_roundsh_i32 (__m128h __A, const int __R)
4467 1.1 mrg {
4468 1.1 mrg return (int) __builtin_ia32_vcvttsh2si32_round (__A, __R);
4469 1.1 mrg }
4470 1.1 mrg
4471 1.1 mrg extern __inline unsigned
4472 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4473 1.1 mrg _mm_cvtt_roundsh_u32 (__m128h __A, const int __R)
4474 1.1 mrg {
4475 1.1 mrg return (int) __builtin_ia32_vcvttsh2usi32_round (__A, __R);
4476 1.1 mrg }
4477 1.1 mrg
4478 1.1 mrg #else
4479 1.1 mrg #define _mm_cvtt_roundsh_i32(A, B) \
4480 1.1 mrg ((int)__builtin_ia32_vcvttsh2si32_round ((A), (B)))
4481 1.1 mrg #define _mm_cvtt_roundsh_u32(A, B) \
4482 1.1 mrg ((int)__builtin_ia32_vcvttsh2usi32_round ((A), (B)))
4483 1.1 mrg
4484 1.1 mrg #endif /* __OPTIMIZE__ */
4485 1.1 mrg
4486 1.1 mrg #ifdef __x86_64__
4487 1.1 mrg extern __inline long long
4488 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4489 1.1 mrg _mm_cvttsh_i64 (__m128h __A)
4490 1.1 mrg {
4491 1.1 mrg return (long long)
4492 1.1 mrg __builtin_ia32_vcvttsh2si64_round (__A, _MM_FROUND_CUR_DIRECTION);
4493 1.1 mrg }
4494 1.1 mrg
4495 1.1 mrg extern __inline unsigned long long
4496 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4497 1.1 mrg _mm_cvttsh_u64 (__m128h __A)
4498 1.1 mrg {
4499 1.1 mrg return (long long)
4500 1.1 mrg __builtin_ia32_vcvttsh2usi64_round (__A, _MM_FROUND_CUR_DIRECTION);
4501 1.1 mrg }
4502 1.1 mrg
4503 1.1 mrg #ifdef __OPTIMIZE__
4504 1.1 mrg extern __inline long long
4505 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4506 1.1 mrg _mm_cvtt_roundsh_i64 (__m128h __A, const int __R)
4507 1.1 mrg {
4508 1.1 mrg return (long long) __builtin_ia32_vcvttsh2si64_round (__A, __R);
4509 1.1 mrg }
4510 1.1 mrg
4511 1.1 mrg extern __inline unsigned long long
4512 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4513 1.1 mrg _mm_cvtt_roundsh_u64 (__m128h __A, const int __R)
4514 1.1 mrg {
4515 1.1 mrg return (long long) __builtin_ia32_vcvttsh2usi64_round (__A, __R);
4516 1.1 mrg }
4517 1.1 mrg
4518 1.1 mrg #else
4519 1.1 mrg #define _mm_cvtt_roundsh_i64(A, B) \
4520 1.1 mrg ((long long)__builtin_ia32_vcvttsh2si64_round ((A), (B)))
4521 1.1 mrg #define _mm_cvtt_roundsh_u64(A, B) \
4522 1.1 mrg ((long long)__builtin_ia32_vcvttsh2usi64_round ((A), (B)))
4523 1.1 mrg
4524 1.1 mrg #endif /* __OPTIMIZE__ */
4525 1.1 mrg #endif /* __x86_64__ */
4526 1.1 mrg
4527 1.1 mrg /* Intrinsics vcvtsi2sh, vcvtusi2sh. */
4528 1.1 mrg extern __inline __m128h
4529 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4530 1.1 mrg _mm_cvti32_sh (__m128h __A, int __B)
4531 1.1 mrg {
4532 1.1 mrg return __builtin_ia32_vcvtsi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
4533 1.1 mrg }
4534 1.1 mrg
4535 1.1 mrg extern __inline __m128h
4536 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4537 1.1 mrg _mm_cvtu32_sh (__m128h __A, unsigned int __B)
4538 1.1 mrg {
4539 1.1 mrg return __builtin_ia32_vcvtusi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
4540 1.1 mrg }
4541 1.1 mrg
4542 1.1 mrg #ifdef __OPTIMIZE__
4543 1.1 mrg extern __inline __m128h
4544 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4545 1.1 mrg _mm_cvt_roundi32_sh (__m128h __A, int __B, const int __R)
4546 1.1 mrg {
4547 1.1 mrg return __builtin_ia32_vcvtsi2sh32_round (__A, __B, __R);
4548 1.1 mrg }
4549 1.1 mrg
4550 1.1 mrg extern __inline __m128h
4551 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4552 1.1 mrg _mm_cvt_roundu32_sh (__m128h __A, unsigned int __B, const int __R)
4553 1.1 mrg {
4554 1.1 mrg return __builtin_ia32_vcvtusi2sh32_round (__A, __B, __R);
4555 1.1 mrg }
4556 1.1 mrg
4557 1.1 mrg #else
4558 1.1 mrg #define _mm_cvt_roundi32_sh(A, B, C) \
4559 1.1 mrg (__builtin_ia32_vcvtsi2sh32_round ((A), (B), (C)))
4560 1.1 mrg #define _mm_cvt_roundu32_sh(A, B, C) \
4561 1.1 mrg (__builtin_ia32_vcvtusi2sh32_round ((A), (B), (C)))
4562 1.1 mrg
4563 1.1 mrg #endif /* __OPTIMIZE__ */
4564 1.1 mrg
4565 1.1 mrg #ifdef __x86_64__
4566 1.1 mrg extern __inline __m128h
4567 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4568 1.1 mrg _mm_cvti64_sh (__m128h __A, long long __B)
4569 1.1 mrg {
4570 1.1 mrg return __builtin_ia32_vcvtsi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
4571 1.1 mrg }
4572 1.1 mrg
4573 1.1 mrg extern __inline __m128h
4574 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4575 1.1 mrg _mm_cvtu64_sh (__m128h __A, unsigned long long __B)
4576 1.1 mrg {
4577 1.1 mrg return __builtin_ia32_vcvtusi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
4578 1.1 mrg }
4579 1.1 mrg
4580 1.1 mrg #ifdef __OPTIMIZE__
4581 1.1 mrg extern __inline __m128h
4582 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4583 1.1 mrg _mm_cvt_roundi64_sh (__m128h __A, long long __B, const int __R)
4584 1.1 mrg {
4585 1.1 mrg return __builtin_ia32_vcvtsi2sh64_round (__A, __B, __R);
4586 1.1 mrg }
4587 1.1 mrg
4588 1.1 mrg extern __inline __m128h
4589 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4590 1.1 mrg _mm_cvt_roundu64_sh (__m128h __A, unsigned long long __B, const int __R)
4591 1.1 mrg {
4592 1.1 mrg return __builtin_ia32_vcvtusi2sh64_round (__A, __B, __R);
4593 1.1 mrg }
4594 1.1 mrg
4595 1.1 mrg #else
4596 1.1 mrg #define _mm_cvt_roundi64_sh(A, B, C) \
4597 1.1 mrg (__builtin_ia32_vcvtsi2sh64_round ((A), (B), (C)))
4598 1.1 mrg #define _mm_cvt_roundu64_sh(A, B, C) \
4599 1.1 mrg (__builtin_ia32_vcvtusi2sh64_round ((A), (B), (C)))
4600 1.1 mrg
4601 1.1 mrg #endif /* __OPTIMIZE__ */
4602 1.1 mrg #endif /* __x86_64__ */
4603 1.1 mrg
4604 1.1 mrg /* Intrinsics vcvtph2pd. */
4605 1.1 mrg extern __inline __m512d
4606 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4607 1.1 mrg _mm512_cvtph_pd (__m128h __A)
4608 1.1 mrg {
4609 1.1 mrg return __builtin_ia32_vcvtph2pd512_mask_round (__A,
4610 1.1 mrg _mm512_setzero_pd (),
4611 1.1 mrg (__mmask8) -1,
4612 1.1 mrg _MM_FROUND_CUR_DIRECTION);
4613 1.1 mrg }
4614 1.1 mrg
4615 1.1 mrg extern __inline __m512d
4616 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4617 1.1 mrg _mm512_mask_cvtph_pd (__m512d __A, __mmask8 __B, __m128h __C)
4618 1.1 mrg {
4619 1.1 mrg return __builtin_ia32_vcvtph2pd512_mask_round (__C, __A, __B,
4620 1.1 mrg _MM_FROUND_CUR_DIRECTION);
4621 1.1 mrg }
4622 1.1 mrg
4623 1.1 mrg extern __inline __m512d
4624 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4625 1.1 mrg _mm512_maskz_cvtph_pd (__mmask8 __A, __m128h __B)
4626 1.1 mrg {
4627 1.1 mrg return __builtin_ia32_vcvtph2pd512_mask_round (__B,
4628 1.1 mrg _mm512_setzero_pd (),
4629 1.1 mrg __A,
4630 1.1 mrg _MM_FROUND_CUR_DIRECTION);
4631 1.1 mrg }
4632 1.1 mrg
4633 1.1 mrg #ifdef __OPTIMIZE__
4634 1.1 mrg extern __inline __m512d
4635 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4636 1.1 mrg _mm512_cvt_roundph_pd (__m128h __A, int __B)
4637 1.1 mrg {
4638 1.1 mrg return __builtin_ia32_vcvtph2pd512_mask_round (__A,
4639 1.1 mrg _mm512_setzero_pd (),
4640 1.1 mrg (__mmask8) -1,
4641 1.1 mrg __B);
4642 1.1 mrg }
4643 1.1 mrg
4644 1.1 mrg extern __inline __m512d
4645 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4646 1.1 mrg _mm512_mask_cvt_roundph_pd (__m512d __A, __mmask8 __B, __m128h __C, int __D)
4647 1.1 mrg {
4648 1.1 mrg return __builtin_ia32_vcvtph2pd512_mask_round (__C, __A, __B, __D);
4649 1.1 mrg }
4650 1.1 mrg
4651 1.1 mrg extern __inline __m512d
4652 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4653 1.1 mrg _mm512_maskz_cvt_roundph_pd (__mmask8 __A, __m128h __B, int __C)
4654 1.1 mrg {
4655 1.1 mrg return __builtin_ia32_vcvtph2pd512_mask_round (__B,
4656 1.1 mrg _mm512_setzero_pd (),
4657 1.1 mrg __A,
4658 1.1 mrg __C);
4659 1.1 mrg }
4660 1.1 mrg
4661 1.1 mrg #else
4662 1.1 mrg #define _mm512_cvt_roundph_pd(A, B) \
4663 1.1 mrg (__builtin_ia32_vcvtph2pd512_mask_round ((A), \
4664 1.1 mrg _mm512_setzero_pd (), \
4665 1.1 mrg (__mmask8)-1, \
4666 1.1 mrg (B)))
4667 1.1 mrg
4668 1.1 mrg #define _mm512_mask_cvt_roundph_pd(A, B, C, D) \
4669 1.1 mrg (__builtin_ia32_vcvtph2pd512_mask_round ((C), (A), (B), (D)))
4670 1.1 mrg
4671 1.1 mrg #define _mm512_maskz_cvt_roundph_pd(A, B, C) \
4672 1.1 mrg (__builtin_ia32_vcvtph2pd512_mask_round ((B), \
4673 1.1 mrg _mm512_setzero_pd (), \
4674 1.1 mrg (A), \
4675 1.1 mrg (C)))
4676 1.1 mrg
4677 1.1 mrg #endif /* __OPTIMIZE__ */
4678 1.1 mrg
4679 1.1 mrg /* Intrinsics vcvtph2psx. */
4680 1.1 mrg extern __inline __m512
4681 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4682 1.1 mrg _mm512_cvtxph_ps (__m256h __A)
4683 1.1 mrg {
4684 1.1 mrg return __builtin_ia32_vcvtph2psx512_mask_round (__A,
4685 1.1 mrg _mm512_setzero_ps (),
4686 1.1 mrg (__mmask16) -1,
4687 1.1 mrg _MM_FROUND_CUR_DIRECTION);
4688 1.1 mrg }
4689 1.1 mrg
4690 1.1 mrg extern __inline __m512
4691 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4692 1.1 mrg _mm512_mask_cvtxph_ps (__m512 __A, __mmask16 __B, __m256h __C)
4693 1.1 mrg {
4694 1.1 mrg return __builtin_ia32_vcvtph2psx512_mask_round (__C, __A, __B,
4695 1.1 mrg _MM_FROUND_CUR_DIRECTION);
4696 1.1 mrg }
4697 1.1 mrg
4698 1.1 mrg extern __inline __m512
4699 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4700 1.1 mrg _mm512_maskz_cvtxph_ps (__mmask16 __A, __m256h __B)
4701 1.1 mrg {
4702 1.1 mrg return __builtin_ia32_vcvtph2psx512_mask_round (__B,
4703 1.1 mrg _mm512_setzero_ps (),
4704 1.1 mrg __A,
4705 1.1 mrg _MM_FROUND_CUR_DIRECTION);
4706 1.1 mrg }
4707 1.1 mrg
4708 1.1 mrg #ifdef __OPTIMIZE__
4709 1.1 mrg extern __inline __m512
4710 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4711 1.1 mrg _mm512_cvtx_roundph_ps (__m256h __A, int __B)
4712 1.1 mrg {
4713 1.1 mrg return __builtin_ia32_vcvtph2psx512_mask_round (__A,
4714 1.1 mrg _mm512_setzero_ps (),
4715 1.1 mrg (__mmask16) -1,
4716 1.1 mrg __B);
4717 1.1 mrg }
4718 1.1 mrg
4719 1.1 mrg extern __inline __m512
4720 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4721 1.1 mrg _mm512_mask_cvtx_roundph_ps (__m512 __A, __mmask16 __B, __m256h __C, int __D)
4722 1.1 mrg {
4723 1.1 mrg return __builtin_ia32_vcvtph2psx512_mask_round (__C, __A, __B, __D);
4724 1.1 mrg }
4725 1.1 mrg
4726 1.1 mrg extern __inline __m512
4727 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4728 1.1 mrg _mm512_maskz_cvtx_roundph_ps (__mmask16 __A, __m256h __B, int __C)
4729 1.1 mrg {
4730 1.1 mrg return __builtin_ia32_vcvtph2psx512_mask_round (__B,
4731 1.1 mrg _mm512_setzero_ps (),
4732 1.1 mrg __A,
4733 1.1 mrg __C);
4734 1.1 mrg }
4735 1.1 mrg
4736 1.1 mrg #else
4737 1.1 mrg #define _mm512_cvtx_roundph_ps(A, B) \
4738 1.1 mrg (__builtin_ia32_vcvtph2psx512_mask_round ((A), \
4739 1.1 mrg _mm512_setzero_ps (), \
4740 1.1 mrg (__mmask16)-1, \
4741 1.1 mrg (B)))
4742 1.1 mrg
4743 1.1 mrg #define _mm512_mask_cvtx_roundph_ps(A, B, C, D) \
4744 1.1 mrg (__builtin_ia32_vcvtph2psx512_mask_round ((C), (A), (B), (D)))
4745 1.1 mrg
4746 1.1 mrg #define _mm512_maskz_cvtx_roundph_ps(A, B, C) \
4747 1.1 mrg (__builtin_ia32_vcvtph2psx512_mask_round ((B), \
4748 1.1 mrg _mm512_setzero_ps (), \
4749 1.1 mrg (A), \
4750 1.1 mrg (C)))
4751 1.1 mrg #endif /* __OPTIMIZE__ */
4752 1.1 mrg
4753 1.1 mrg /* Intrinsics vcvtps2ph. */
4754 1.1 mrg extern __inline __m256h
4755 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4756 1.1 mrg _mm512_cvtxps_ph (__m512 __A)
4757 1.1 mrg {
4758 1.1 mrg return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __A,
4759 1.1 mrg _mm256_setzero_ph (),
4760 1.1 mrg (__mmask16) -1,
4761 1.1 mrg _MM_FROUND_CUR_DIRECTION);
4762 1.1 mrg }
4763 1.1 mrg
4764 1.1 mrg extern __inline __m256h
4765 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4766 1.1 mrg _mm512_mask_cvtxps_ph (__m256h __A, __mmask16 __B, __m512 __C)
4767 1.1 mrg {
4768 1.1 mrg return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __C,
4769 1.1 mrg __A, __B,
4770 1.1 mrg _MM_FROUND_CUR_DIRECTION);
4771 1.1 mrg }
4772 1.1 mrg
4773 1.1 mrg extern __inline __m256h
4774 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4775 1.1 mrg _mm512_maskz_cvtxps_ph (__mmask16 __A, __m512 __B)
4776 1.1 mrg {
4777 1.1 mrg return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __B,
4778 1.1 mrg _mm256_setzero_ph (),
4779 1.1 mrg __A,
4780 1.1 mrg _MM_FROUND_CUR_DIRECTION);
4781 1.1 mrg }
4782 1.1 mrg
4783 1.1 mrg #ifdef __OPTIMIZE__
4784 1.1 mrg extern __inline __m256h
4785 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4786 1.1 mrg _mm512_cvtx_roundps_ph (__m512 __A, int __B)
4787 1.1 mrg {
4788 1.1 mrg return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __A,
4789 1.1 mrg _mm256_setzero_ph (),
4790 1.1 mrg (__mmask16) -1,
4791 1.1 mrg __B);
4792 1.1 mrg }
4793 1.1 mrg
4794 1.1 mrg extern __inline __m256h
4795 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4796 1.1 mrg _mm512_mask_cvtx_roundps_ph (__m256h __A, __mmask16 __B, __m512 __C, int __D)
4797 1.1 mrg {
4798 1.1 mrg return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __C,
4799 1.1 mrg __A, __B, __D);
4800 1.1 mrg }
4801 1.1 mrg
4802 1.1 mrg extern __inline __m256h
4803 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4804 1.1 mrg _mm512_maskz_cvtx_roundps_ph (__mmask16 __A, __m512 __B, int __C)
4805 1.1 mrg {
4806 1.1 mrg return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __B,
4807 1.1 mrg _mm256_setzero_ph (),
4808 1.1 mrg __A, __C);
4809 1.1 mrg }
4810 1.1 mrg
4811 1.1 mrg #else
4812 1.1 mrg #define _mm512_cvtx_roundps_ph(A, B) \
4813 1.1 mrg (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(A), \
4814 1.1 mrg _mm256_setzero_ph (),\
4815 1.1 mrg (__mmask16)-1, (B)))
4816 1.1 mrg
4817 1.1 mrg #define _mm512_mask_cvtx_roundps_ph(A, B, C, D) \
4818 1.1 mrg (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(C), \
4819 1.1 mrg (A), (B), (D)))
4820 1.1 mrg
4821 1.1 mrg #define _mm512_maskz_cvtx_roundps_ph(A, B, C) \
4822 1.1 mrg (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(B), \
4823 1.1 mrg _mm256_setzero_ph (),\
4824 1.1 mrg (A), (C)))
4825 1.1 mrg #endif /* __OPTIMIZE__ */
4826 1.1 mrg
4827 1.1 mrg /* Intrinsics vcvtpd2ph. */
4828 1.1 mrg extern __inline __m128h
4829 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4830 1.1 mrg _mm512_cvtpd_ph (__m512d __A)
4831 1.1 mrg {
4832 1.1 mrg return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __A,
4833 1.1 mrg _mm_setzero_ph (),
4834 1.1 mrg (__mmask8) -1,
4835 1.1 mrg _MM_FROUND_CUR_DIRECTION);
4836 1.1 mrg }
4837 1.1 mrg
4838 1.1 mrg extern __inline __m128h
4839 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4840 1.1 mrg _mm512_mask_cvtpd_ph (__m128h __A, __mmask8 __B, __m512d __C)
4841 1.1 mrg {
4842 1.1 mrg return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __C,
4843 1.1 mrg __A, __B,
4844 1.1 mrg _MM_FROUND_CUR_DIRECTION);
4845 1.1 mrg }
4846 1.1 mrg
4847 1.1 mrg extern __inline __m128h
4848 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4849 1.1 mrg _mm512_maskz_cvtpd_ph (__mmask8 __A, __m512d __B)
4850 1.1 mrg {
4851 1.1 mrg return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __B,
4852 1.1 mrg _mm_setzero_ph (),
4853 1.1 mrg __A,
4854 1.1 mrg _MM_FROUND_CUR_DIRECTION);
4855 1.1 mrg }
4856 1.1 mrg
4857 1.1 mrg #ifdef __OPTIMIZE__
4858 1.1 mrg extern __inline __m128h
4859 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4860 1.1 mrg _mm512_cvt_roundpd_ph (__m512d __A, int __B)
4861 1.1 mrg {
4862 1.1 mrg return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __A,
4863 1.1 mrg _mm_setzero_ph (),
4864 1.1 mrg (__mmask8) -1,
4865 1.1 mrg __B);
4866 1.1 mrg }
4867 1.1 mrg
4868 1.1 mrg extern __inline __m128h
4869 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4870 1.1 mrg _mm512_mask_cvt_roundpd_ph (__m128h __A, __mmask8 __B, __m512d __C, int __D)
4871 1.1 mrg {
4872 1.1 mrg return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __C,
4873 1.1 mrg __A, __B, __D);
4874 1.1 mrg }
4875 1.1 mrg
4876 1.1 mrg extern __inline __m128h
4877 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4878 1.1 mrg _mm512_maskz_cvt_roundpd_ph (__mmask8 __A, __m512d __B, int __C)
4879 1.1 mrg {
4880 1.1 mrg return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __B,
4881 1.1 mrg _mm_setzero_ph (),
4882 1.1 mrg __A, __C);
4883 1.1 mrg }
4884 1.1 mrg
4885 1.1 mrg #else
4886 1.1 mrg #define _mm512_cvt_roundpd_ph(A, B) \
4887 1.1 mrg (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(A), \
4888 1.1 mrg _mm_setzero_ph (), \
4889 1.1 mrg (__mmask8)-1, (B)))
4890 1.1 mrg
4891 1.1 mrg #define _mm512_mask_cvt_roundpd_ph(A, B, C, D) \
4892 1.1 mrg (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(C), \
4893 1.1 mrg (A), (B), (D)))
4894 1.1 mrg
4895 1.1 mrg #define _mm512_maskz_cvt_roundpd_ph(A, B, C) \
4896 1.1 mrg (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(B), \
4897 1.1 mrg _mm_setzero_ph (), \
4898 1.1 mrg (A), (C)))
4899 1.1 mrg
4900 1.1 mrg #endif /* __OPTIMIZE__ */
4901 1.1 mrg
4902 1.1 mrg /* Intrinsics vcvtsh2ss, vcvtsh2sd. */
4903 1.1 mrg extern __inline __m128
4904 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4905 1.1 mrg _mm_cvtsh_ss (__m128 __A, __m128h __B)
4906 1.1 mrg {
4907 1.1 mrg return __builtin_ia32_vcvtsh2ss_mask_round (__B, __A,
4908 1.1 mrg _mm_setzero_ps (),
4909 1.1 mrg (__mmask8) -1,
4910 1.1 mrg _MM_FROUND_CUR_DIRECTION);
4911 1.1 mrg }
4912 1.1 mrg
4913 1.1 mrg extern __inline __m128
4914 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4915 1.1 mrg _mm_mask_cvtsh_ss (__m128 __A, __mmask8 __B, __m128 __C,
4916 1.1 mrg __m128h __D)
4917 1.1 mrg {
4918 1.1 mrg return __builtin_ia32_vcvtsh2ss_mask_round (__D, __C, __A, __B,
4919 1.1 mrg _MM_FROUND_CUR_DIRECTION);
4920 1.1 mrg }
4921 1.1 mrg
4922 1.1 mrg extern __inline __m128
4923 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4924 1.1 mrg _mm_maskz_cvtsh_ss (__mmask8 __A, __m128 __B,
4925 1.1 mrg __m128h __C)
4926 1.1 mrg {
4927 1.1 mrg return __builtin_ia32_vcvtsh2ss_mask_round (__C, __B,
4928 1.1 mrg _mm_setzero_ps (),
4929 1.1 mrg __A, _MM_FROUND_CUR_DIRECTION);
4930 1.1 mrg }
4931 1.1 mrg
4932 1.1 mrg extern __inline __m128d
4933 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4934 1.1 mrg _mm_cvtsh_sd (__m128d __A, __m128h __B)
4935 1.1 mrg {
4936 1.1 mrg return __builtin_ia32_vcvtsh2sd_mask_round (__B, __A,
4937 1.1 mrg _mm_setzero_pd (),
4938 1.1 mrg (__mmask8) -1,
4939 1.1 mrg _MM_FROUND_CUR_DIRECTION);
4940 1.1 mrg }
4941 1.1 mrg
4942 1.1 mrg extern __inline __m128d
4943 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4944 1.1 mrg _mm_mask_cvtsh_sd (__m128d __A, __mmask8 __B, __m128d __C,
4945 1.1 mrg __m128h __D)
4946 1.1 mrg {
4947 1.1 mrg return __builtin_ia32_vcvtsh2sd_mask_round (__D, __C, __A, __B,
4948 1.1 mrg _MM_FROUND_CUR_DIRECTION);
4949 1.1 mrg }
4950 1.1 mrg
4951 1.1 mrg extern __inline __m128d
4952 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4953 1.1 mrg _mm_maskz_cvtsh_sd (__mmask8 __A, __m128d __B, __m128h __C)
4954 1.1 mrg {
4955 1.1 mrg return __builtin_ia32_vcvtsh2sd_mask_round (__C, __B,
4956 1.1 mrg _mm_setzero_pd (),
4957 1.1 mrg __A, _MM_FROUND_CUR_DIRECTION);
4958 1.1 mrg }
4959 1.1 mrg
4960 1.1 mrg #ifdef __OPTIMIZE__
4961 1.1 mrg extern __inline __m128
4962 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4963 1.1 mrg _mm_cvt_roundsh_ss (__m128 __A, __m128h __B, const int __R)
4964 1.1 mrg {
4965 1.1 mrg return __builtin_ia32_vcvtsh2ss_mask_round (__B, __A,
4966 1.1 mrg _mm_setzero_ps (),
4967 1.1 mrg (__mmask8) -1, __R);
4968 1.1 mrg }
4969 1.1 mrg
4970 1.1 mrg extern __inline __m128
4971 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4972 1.1 mrg _mm_mask_cvt_roundsh_ss (__m128 __A, __mmask8 __B, __m128 __C,
4973 1.1 mrg __m128h __D, const int __R)
4974 1.1 mrg {
4975 1.1 mrg return __builtin_ia32_vcvtsh2ss_mask_round (__D, __C, __A, __B, __R);
4976 1.1 mrg }
4977 1.1 mrg
4978 1.1 mrg extern __inline __m128
4979 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4980 1.1 mrg _mm_maskz_cvt_roundsh_ss (__mmask8 __A, __m128 __B,
4981 1.1 mrg __m128h __C, const int __R)
4982 1.1 mrg {
4983 1.1 mrg return __builtin_ia32_vcvtsh2ss_mask_round (__C, __B,
4984 1.1 mrg _mm_setzero_ps (),
4985 1.1 mrg __A, __R);
4986 1.1 mrg }
4987 1.1 mrg
4988 1.1 mrg extern __inline __m128d
4989 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4990 1.1 mrg _mm_cvt_roundsh_sd (__m128d __A, __m128h __B, const int __R)
4991 1.1 mrg {
4992 1.1 mrg return __builtin_ia32_vcvtsh2sd_mask_round (__B, __A,
4993 1.1 mrg _mm_setzero_pd (),
4994 1.1 mrg (__mmask8) -1, __R);
4995 1.1 mrg }
4996 1.1 mrg
4997 1.1 mrg extern __inline __m128d
4998 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4999 1.1 mrg _mm_mask_cvt_roundsh_sd (__m128d __A, __mmask8 __B, __m128d __C,
5000 1.1 mrg __m128h __D, const int __R)
5001 1.1 mrg {
5002 1.1 mrg return __builtin_ia32_vcvtsh2sd_mask_round (__D, __C, __A, __B, __R);
5003 1.1 mrg }
5004 1.1 mrg
5005 1.1 mrg extern __inline __m128d
5006 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5007 1.1 mrg _mm_maskz_cvt_roundsh_sd (__mmask8 __A, __m128d __B, __m128h __C, const int __R)
5008 1.1 mrg {
5009 1.1 mrg return __builtin_ia32_vcvtsh2sd_mask_round (__C, __B,
5010 1.1 mrg _mm_setzero_pd (),
5011 1.1 mrg __A, __R);
5012 1.1 mrg }
5013 1.1 mrg
5014 1.1 mrg #else
5015 1.1 mrg #define _mm_cvt_roundsh_ss(A, B, R) \
5016 1.1 mrg (__builtin_ia32_vcvtsh2ss_mask_round ((B), (A), \
5017 1.1 mrg _mm_setzero_ps (), \
5018 1.1 mrg (__mmask8) -1, (R)))
5019 1.1 mrg
5020 1.1 mrg #define _mm_mask_cvt_roundsh_ss(A, B, C, D, R) \
5021 1.1 mrg (__builtin_ia32_vcvtsh2ss_mask_round ((D), (C), (A), (B), (R)))
5022 1.1 mrg
5023 1.1 mrg #define _mm_maskz_cvt_roundsh_ss(A, B, C, R) \
5024 1.1 mrg (__builtin_ia32_vcvtsh2ss_mask_round ((C), (B), \
5025 1.1 mrg _mm_setzero_ps (), \
5026 1.1 mrg (A), (R)))
5027 1.1 mrg
5028 1.1 mrg #define _mm_cvt_roundsh_sd(A, B, R) \
5029 1.1 mrg (__builtin_ia32_vcvtsh2sd_mask_round ((B), (A), \
5030 1.1 mrg _mm_setzero_pd (), \
5031 1.1 mrg (__mmask8) -1, (R)))
5032 1.1 mrg
5033 1.1 mrg #define _mm_mask_cvt_roundsh_sd(A, B, C, D, R) \
5034 1.1 mrg (__builtin_ia32_vcvtsh2sd_mask_round ((D), (C), (A), (B), (R)))
5035 1.1 mrg
5036 1.1 mrg #define _mm_maskz_cvt_roundsh_sd(A, B, C, R) \
5037 1.1 mrg (__builtin_ia32_vcvtsh2sd_mask_round ((C), (B), \
5038 1.1 mrg _mm_setzero_pd (), \
5039 1.1 mrg (A), (R)))
5040 1.1 mrg
5041 1.1 mrg #endif /* __OPTIMIZE__ */
5042 1.1 mrg
5043 1.1 mrg /* Intrinsics vcvtss2sh, vcvtsd2sh. */
5044 1.1 mrg extern __inline __m128h
5045 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5046 1.1 mrg _mm_cvtss_sh (__m128h __A, __m128 __B)
5047 1.1 mrg {
5048 1.1 mrg return __builtin_ia32_vcvtss2sh_mask_round (__B, __A,
5049 1.1 mrg _mm_setzero_ph (),
5050 1.1 mrg (__mmask8) -1,
5051 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5052 1.1 mrg }
5053 1.1 mrg
5054 1.1 mrg extern __inline __m128h
5055 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5056 1.1 mrg _mm_mask_cvtss_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128 __D)
5057 1.1 mrg {
5058 1.1 mrg return __builtin_ia32_vcvtss2sh_mask_round (__D, __C, __A, __B,
5059 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5060 1.1 mrg }
5061 1.1 mrg
5062 1.1 mrg extern __inline __m128h
5063 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5064 1.1 mrg _mm_maskz_cvtss_sh (__mmask8 __A, __m128h __B, __m128 __C)
5065 1.1 mrg {
5066 1.1 mrg return __builtin_ia32_vcvtss2sh_mask_round (__C, __B,
5067 1.1 mrg _mm_setzero_ph (),
5068 1.1 mrg __A, _MM_FROUND_CUR_DIRECTION);
5069 1.1 mrg }
5070 1.1 mrg
5071 1.1 mrg extern __inline __m128h
5072 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5073 1.1 mrg _mm_cvtsd_sh (__m128h __A, __m128d __B)
5074 1.1 mrg {
5075 1.1 mrg return __builtin_ia32_vcvtsd2sh_mask_round (__B, __A,
5076 1.1 mrg _mm_setzero_ph (),
5077 1.1 mrg (__mmask8) -1,
5078 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5079 1.1 mrg }
5080 1.1 mrg
5081 1.1 mrg extern __inline __m128h
5082 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5083 1.1 mrg _mm_mask_cvtsd_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128d __D)
5084 1.1 mrg {
5085 1.1 mrg return __builtin_ia32_vcvtsd2sh_mask_round (__D, __C, __A, __B,
5086 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5087 1.1 mrg }
5088 1.1 mrg
5089 1.1 mrg extern __inline __m128h
5090 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5091 1.1 mrg _mm_maskz_cvtsd_sh (__mmask8 __A, __m128h __B, __m128d __C)
5092 1.1 mrg {
5093 1.1 mrg return __builtin_ia32_vcvtsd2sh_mask_round (__C, __B,
5094 1.1 mrg _mm_setzero_ph (),
5095 1.1 mrg __A, _MM_FROUND_CUR_DIRECTION);
5096 1.1 mrg }
5097 1.1 mrg
5098 1.1 mrg #ifdef __OPTIMIZE__
5099 1.1 mrg extern __inline __m128h
5100 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5101 1.1 mrg _mm_cvt_roundss_sh (__m128h __A, __m128 __B, const int __R)
5102 1.1 mrg {
5103 1.1 mrg return __builtin_ia32_vcvtss2sh_mask_round (__B, __A,
5104 1.1 mrg _mm_setzero_ph (),
5105 1.1 mrg (__mmask8) -1, __R);
5106 1.1 mrg }
5107 1.1 mrg
5108 1.1 mrg extern __inline __m128h
5109 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5110 1.1 mrg _mm_mask_cvt_roundss_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128 __D,
5111 1.1 mrg const int __R)
5112 1.1 mrg {
5113 1.1 mrg return __builtin_ia32_vcvtss2sh_mask_round (__D, __C, __A, __B, __R);
5114 1.1 mrg }
5115 1.1 mrg
5116 1.1 mrg extern __inline __m128h
5117 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5118 1.1 mrg _mm_maskz_cvt_roundss_sh (__mmask8 __A, __m128h __B, __m128 __C,
5119 1.1 mrg const int __R)
5120 1.1 mrg {
5121 1.1 mrg return __builtin_ia32_vcvtss2sh_mask_round (__C, __B,
5122 1.1 mrg _mm_setzero_ph (),
5123 1.1 mrg __A, __R);
5124 1.1 mrg }
5125 1.1 mrg
5126 1.1 mrg extern __inline __m128h
5127 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5128 1.1 mrg _mm_cvt_roundsd_sh (__m128h __A, __m128d __B, const int __R)
5129 1.1 mrg {
5130 1.1 mrg return __builtin_ia32_vcvtsd2sh_mask_round (__B, __A,
5131 1.1 mrg _mm_setzero_ph (),
5132 1.1 mrg (__mmask8) -1, __R);
5133 1.1 mrg }
5134 1.1 mrg
5135 1.1 mrg extern __inline __m128h
5136 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5137 1.1 mrg _mm_mask_cvt_roundsd_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128d __D,
5138 1.1 mrg const int __R)
5139 1.1 mrg {
5140 1.1 mrg return __builtin_ia32_vcvtsd2sh_mask_round (__D, __C, __A, __B, __R);
5141 1.1 mrg }
5142 1.1 mrg
5143 1.1 mrg extern __inline __m128h
5144 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5145 1.1 mrg _mm_maskz_cvt_roundsd_sh (__mmask8 __A, __m128h __B, __m128d __C,
5146 1.1 mrg const int __R)
5147 1.1 mrg {
5148 1.1 mrg return __builtin_ia32_vcvtsd2sh_mask_round (__C, __B,
5149 1.1 mrg _mm_setzero_ph (),
5150 1.1 mrg __A, __R);
5151 1.1 mrg }
5152 1.1 mrg
5153 1.1 mrg #else
5154 1.1 mrg #define _mm_cvt_roundss_sh(A, B, R) \
5155 1.1 mrg (__builtin_ia32_vcvtss2sh_mask_round ((B), (A), \
5156 1.1 mrg _mm_setzero_ph (), \
5157 1.1 mrg (__mmask8) -1, R))
5158 1.1 mrg
5159 1.1 mrg #define _mm_mask_cvt_roundss_sh(A, B, C, D, R) \
5160 1.1 mrg (__builtin_ia32_vcvtss2sh_mask_round ((D), (C), (A), (B), (R)))
5161 1.1 mrg
5162 1.1 mrg #define _mm_maskz_cvt_roundss_sh(A, B, C, R) \
5163 1.1 mrg (__builtin_ia32_vcvtss2sh_mask_round ((C), (B), \
5164 1.1 mrg _mm_setzero_ph (), \
5165 1.1 mrg A, R))
5166 1.1 mrg
5167 1.1 mrg #define _mm_cvt_roundsd_sh(A, B, R) \
5168 1.1 mrg (__builtin_ia32_vcvtsd2sh_mask_round ((B), (A), \
5169 1.1 mrg _mm_setzero_ph (), \
5170 1.1 mrg (__mmask8) -1, R))
5171 1.1 mrg
5172 1.1 mrg #define _mm_mask_cvt_roundsd_sh(A, B, C, D, R) \
5173 1.1 mrg (__builtin_ia32_vcvtsd2sh_mask_round ((D), (C), (A), (B), (R)))
5174 1.1 mrg
5175 1.1 mrg #define _mm_maskz_cvt_roundsd_sh(A, B, C, R) \
5176 1.1 mrg (__builtin_ia32_vcvtsd2sh_mask_round ((C), (B), \
5177 1.1 mrg _mm_setzero_ph (), \
5178 1.1 mrg (A), (R)))
5179 1.1 mrg
5180 1.1 mrg #endif /* __OPTIMIZE__ */
5181 1.1 mrg
5182 1.1 mrg /* Intrinsics vfmaddsub[132,213,231]ph. */
5183 1.1 mrg extern __inline __m512h
5184 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5185 1.1 mrg _mm512_fmaddsub_ph (__m512h __A, __m512h __B, __m512h __C)
5186 1.1 mrg {
5187 1.1 mrg return (__m512h)
5188 1.1 mrg __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A,
5189 1.1 mrg (__v32hf) __B,
5190 1.1 mrg (__v32hf) __C,
5191 1.1 mrg (__mmask32) -1,
5192 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5193 1.1 mrg }
5194 1.1 mrg
5195 1.1 mrg extern __inline __m512h
5196 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5197 1.1 mrg _mm512_mask_fmaddsub_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
5198 1.1 mrg {
5199 1.1 mrg return (__m512h)
5200 1.1 mrg __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A,
5201 1.1 mrg (__v32hf) __B,
5202 1.1 mrg (__v32hf) __C,
5203 1.1 mrg (__mmask32) __U,
5204 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5205 1.1 mrg }
5206 1.1 mrg
5207 1.1 mrg extern __inline __m512h
5208 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5209 1.1 mrg _mm512_mask3_fmaddsub_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
5210 1.1 mrg {
5211 1.1 mrg return (__m512h)
5212 1.1 mrg __builtin_ia32_vfmaddsubph512_mask3 ((__v32hf) __A,
5213 1.1 mrg (__v32hf) __B,
5214 1.1 mrg (__v32hf) __C,
5215 1.1 mrg (__mmask32) __U,
5216 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5217 1.1 mrg }
5218 1.1 mrg
5219 1.1 mrg extern __inline __m512h
5220 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5221 1.1 mrg _mm512_maskz_fmaddsub_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
5222 1.1 mrg {
5223 1.1 mrg return (__m512h)
5224 1.1 mrg __builtin_ia32_vfmaddsubph512_maskz ((__v32hf) __A,
5225 1.1 mrg (__v32hf) __B,
5226 1.1 mrg (__v32hf) __C,
5227 1.1 mrg (__mmask32) __U,
5228 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5229 1.1 mrg }
5230 1.1 mrg
5231 1.1 mrg #ifdef __OPTIMIZE__
5232 1.1 mrg extern __inline __m512h
5233 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5234 1.1 mrg _mm512_fmaddsub_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
5235 1.1 mrg {
5236 1.1 mrg return (__m512h)
5237 1.1 mrg __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A,
5238 1.1 mrg (__v32hf) __B,
5239 1.1 mrg (__v32hf) __C,
5240 1.1 mrg (__mmask32) -1, __R);
5241 1.1 mrg }
5242 1.1 mrg
5243 1.1 mrg extern __inline __m512h
5244 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5245 1.1 mrg _mm512_mask_fmaddsub_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
5246 1.1 mrg __m512h __C, const int __R)
5247 1.1 mrg {
5248 1.1 mrg return (__m512h)
5249 1.1 mrg __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A,
5250 1.1 mrg (__v32hf) __B,
5251 1.1 mrg (__v32hf) __C,
5252 1.1 mrg (__mmask32) __U, __R);
5253 1.1 mrg }
5254 1.1 mrg
5255 1.1 mrg extern __inline __m512h
5256 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5257 1.1 mrg _mm512_mask3_fmaddsub_round_ph (__m512h __A, __m512h __B, __m512h __C,
5258 1.1 mrg __mmask32 __U, const int __R)
5259 1.1 mrg {
5260 1.1 mrg return (__m512h)
5261 1.1 mrg __builtin_ia32_vfmaddsubph512_mask3 ((__v32hf) __A,
5262 1.1 mrg (__v32hf) __B,
5263 1.1 mrg (__v32hf) __C,
5264 1.1 mrg (__mmask32) __U, __R);
5265 1.1 mrg }
5266 1.1 mrg
5267 1.1 mrg extern __inline __m512h
5268 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5269 1.1 mrg _mm512_maskz_fmaddsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
5270 1.1 mrg __m512h __C, const int __R)
5271 1.1 mrg {
5272 1.1 mrg return (__m512h)
5273 1.1 mrg __builtin_ia32_vfmaddsubph512_maskz ((__v32hf) __A,
5274 1.1 mrg (__v32hf) __B,
5275 1.1 mrg (__v32hf) __C,
5276 1.1 mrg (__mmask32) __U, __R);
5277 1.1 mrg }
5278 1.1 mrg
5279 1.1 mrg #else
5280 1.1 mrg #define _mm512_fmaddsub_round_ph(A, B, C, R) \
5281 1.1 mrg ((__m512h)__builtin_ia32_vfmaddsubph512_mask ((A), (B), (C), -1, (R)))
5282 1.1 mrg
5283 1.1 mrg #define _mm512_mask_fmaddsub_round_ph(A, U, B, C, R) \
5284 1.1 mrg ((__m512h)__builtin_ia32_vfmaddsubph512_mask ((A), (B), (C), (U), (R)))
5285 1.1 mrg
5286 1.1 mrg #define _mm512_mask3_fmaddsub_round_ph(A, B, C, U, R) \
5287 1.1 mrg ((__m512h)__builtin_ia32_vfmaddsubph512_mask3 ((A), (B), (C), (U), (R)))
5288 1.1 mrg
5289 1.1 mrg #define _mm512_maskz_fmaddsub_round_ph(U, A, B, C, R) \
5290 1.1 mrg ((__m512h)__builtin_ia32_vfmaddsubph512_maskz ((A), (B), (C), (U), (R)))
5291 1.1 mrg
5292 1.1 mrg #endif /* __OPTIMIZE__ */
5293 1.1 mrg
5294 1.1 mrg /* Intrinsics vfmsubadd[132,213,231]ph. */
5295 1.1 mrg extern __inline __m512h
5296 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5297 1.1 mrg _mm512_fmsubadd_ph (__m512h __A, __m512h __B, __m512h __C)
5298 1.1 mrg {
5299 1.1 mrg return (__m512h)
5300 1.1 mrg __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A,
5301 1.1 mrg (__v32hf) __B,
5302 1.1 mrg (__v32hf) __C,
5303 1.1 mrg (__mmask32) -1,
5304 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5305 1.1 mrg }
5306 1.1 mrg
5307 1.1 mrg extern __inline __m512h
5308 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5309 1.1 mrg _mm512_mask_fmsubadd_ph (__m512h __A, __mmask32 __U,
5310 1.1 mrg __m512h __B, __m512h __C)
5311 1.1 mrg {
5312 1.1 mrg return (__m512h)
5313 1.1 mrg __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A,
5314 1.1 mrg (__v32hf) __B,
5315 1.1 mrg (__v32hf) __C,
5316 1.1 mrg (__mmask32) __U,
5317 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5318 1.1 mrg }
5319 1.1 mrg
5320 1.1 mrg extern __inline __m512h
5321 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5322 1.1 mrg _mm512_mask3_fmsubadd_ph (__m512h __A, __m512h __B,
5323 1.1 mrg __m512h __C, __mmask32 __U)
5324 1.1 mrg {
5325 1.1 mrg return (__m512h)
5326 1.1 mrg __builtin_ia32_vfmsubaddph512_mask3 ((__v32hf) __A,
5327 1.1 mrg (__v32hf) __B,
5328 1.1 mrg (__v32hf) __C,
5329 1.1 mrg (__mmask32) __U,
5330 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5331 1.1 mrg }
5332 1.1 mrg
5333 1.1 mrg extern __inline __m512h
5334 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5335 1.1 mrg _mm512_maskz_fmsubadd_ph (__mmask32 __U, __m512h __A,
5336 1.1 mrg __m512h __B, __m512h __C)
5337 1.1 mrg {
5338 1.1 mrg return (__m512h)
5339 1.1 mrg __builtin_ia32_vfmsubaddph512_maskz ((__v32hf) __A,
5340 1.1 mrg (__v32hf) __B,
5341 1.1 mrg (__v32hf) __C,
5342 1.1 mrg (__mmask32) __U,
5343 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5344 1.1 mrg }
5345 1.1 mrg
5346 1.1 mrg #ifdef __OPTIMIZE__
5347 1.1 mrg extern __inline __m512h
5348 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5349 1.1 mrg _mm512_fmsubadd_round_ph (__m512h __A, __m512h __B,
5350 1.1 mrg __m512h __C, const int __R)
5351 1.1 mrg {
5352 1.1 mrg return (__m512h)
5353 1.1 mrg __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A,
5354 1.1 mrg (__v32hf) __B,
5355 1.1 mrg (__v32hf) __C,
5356 1.1 mrg (__mmask32) -1, __R);
5357 1.1 mrg }
5358 1.1 mrg
5359 1.1 mrg extern __inline __m512h
5360 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5361 1.1 mrg _mm512_mask_fmsubadd_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
5362 1.1 mrg __m512h __C, const int __R)
5363 1.1 mrg {
5364 1.1 mrg return (__m512h)
5365 1.1 mrg __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A,
5366 1.1 mrg (__v32hf) __B,
5367 1.1 mrg (__v32hf) __C,
5368 1.1 mrg (__mmask32) __U, __R);
5369 1.1 mrg }
5370 1.1 mrg
5371 1.1 mrg extern __inline __m512h
5372 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5373 1.1 mrg _mm512_mask3_fmsubadd_round_ph (__m512h __A, __m512h __B, __m512h __C,
5374 1.1 mrg __mmask32 __U, const int __R)
5375 1.1 mrg {
5376 1.1 mrg return (__m512h)
5377 1.1 mrg __builtin_ia32_vfmsubaddph512_mask3 ((__v32hf) __A,
5378 1.1 mrg (__v32hf) __B,
5379 1.1 mrg (__v32hf) __C,
5380 1.1 mrg (__mmask32) __U, __R);
5381 1.1 mrg }
5382 1.1 mrg
5383 1.1 mrg extern __inline __m512h
5384 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5385 1.1 mrg _mm512_maskz_fmsubadd_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
5386 1.1 mrg __m512h __C, const int __R)
5387 1.1 mrg {
5388 1.1 mrg return (__m512h)
5389 1.1 mrg __builtin_ia32_vfmsubaddph512_maskz ((__v32hf) __A,
5390 1.1 mrg (__v32hf) __B,
5391 1.1 mrg (__v32hf) __C,
5392 1.1 mrg (__mmask32) __U, __R);
5393 1.1 mrg }
5394 1.1 mrg
5395 1.1 mrg #else
5396 1.1 mrg #define _mm512_fmsubadd_round_ph(A, B, C, R) \
5397 1.1 mrg ((__m512h)__builtin_ia32_vfmsubaddph512_mask ((A), (B), (C), -1, (R)))
5398 1.1 mrg
5399 1.1 mrg #define _mm512_mask_fmsubadd_round_ph(A, U, B, C, R) \
5400 1.1 mrg ((__m512h)__builtin_ia32_vfmsubaddph512_mask ((A), (B), (C), (U), (R)))
5401 1.1 mrg
5402 1.1 mrg #define _mm512_mask3_fmsubadd_round_ph(A, B, C, U, R) \
5403 1.1 mrg ((__m512h)__builtin_ia32_vfmsubaddph512_mask3 ((A), (B), (C), (U), (R)))
5404 1.1 mrg
5405 1.1 mrg #define _mm512_maskz_fmsubadd_round_ph(U, A, B, C, R) \
5406 1.1 mrg ((__m512h)__builtin_ia32_vfmsubaddph512_maskz ((A), (B), (C), (U), (R)))
5407 1.1 mrg
5408 1.1 mrg #endif /* __OPTIMIZE__ */
5409 1.1 mrg
5410 1.1 mrg /* Intrinsics vfmadd[132,213,231]ph. */
5411 1.1 mrg extern __inline __m512h
5412 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5413 1.1 mrg _mm512_fmadd_ph (__m512h __A, __m512h __B, __m512h __C)
5414 1.1 mrg {
5415 1.1 mrg return (__m512h)
5416 1.1 mrg __builtin_ia32_vfmaddph512_mask ((__v32hf) __A,
5417 1.1 mrg (__v32hf) __B,
5418 1.1 mrg (__v32hf) __C,
5419 1.1 mrg (__mmask32) -1,
5420 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5421 1.1 mrg }
5422 1.1 mrg
5423 1.1 mrg extern __inline __m512h
5424 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5425 1.1 mrg _mm512_mask_fmadd_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
5426 1.1 mrg {
5427 1.1 mrg return (__m512h)
5428 1.1 mrg __builtin_ia32_vfmaddph512_mask ((__v32hf) __A,
5429 1.1 mrg (__v32hf) __B,
5430 1.1 mrg (__v32hf) __C,
5431 1.1 mrg (__mmask32) __U,
5432 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5433 1.1 mrg }
5434 1.1 mrg
5435 1.1 mrg extern __inline __m512h
5436 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5437 1.1 mrg _mm512_mask3_fmadd_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
5438 1.1 mrg {
5439 1.1 mrg return (__m512h)
5440 1.1 mrg __builtin_ia32_vfmaddph512_mask3 ((__v32hf) __A,
5441 1.1 mrg (__v32hf) __B,
5442 1.1 mrg (__v32hf) __C,
5443 1.1 mrg (__mmask32) __U,
5444 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5445 1.1 mrg }
5446 1.1 mrg
5447 1.1 mrg extern __inline __m512h
5448 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5449 1.1 mrg _mm512_maskz_fmadd_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
5450 1.1 mrg {
5451 1.1 mrg return (__m512h)
5452 1.1 mrg __builtin_ia32_vfmaddph512_maskz ((__v32hf) __A,
5453 1.1 mrg (__v32hf) __B,
5454 1.1 mrg (__v32hf) __C,
5455 1.1 mrg (__mmask32) __U,
5456 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5457 1.1 mrg }
5458 1.1 mrg
5459 1.1 mrg #ifdef __OPTIMIZE__
5460 1.1 mrg extern __inline __m512h
5461 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5462 1.1 mrg _mm512_fmadd_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
5463 1.1 mrg {
5464 1.1 mrg return (__m512h) __builtin_ia32_vfmaddph512_mask ((__v32hf) __A,
5465 1.1 mrg (__v32hf) __B,
5466 1.1 mrg (__v32hf) __C,
5467 1.1 mrg (__mmask32) -1, __R);
5468 1.1 mrg }
5469 1.1 mrg
5470 1.1 mrg extern __inline __m512h
5471 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5472 1.1 mrg _mm512_mask_fmadd_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
5473 1.1 mrg __m512h __C, const int __R)
5474 1.1 mrg {
5475 1.1 mrg return (__m512h) __builtin_ia32_vfmaddph512_mask ((__v32hf) __A,
5476 1.1 mrg (__v32hf) __B,
5477 1.1 mrg (__v32hf) __C,
5478 1.1 mrg (__mmask32) __U, __R);
5479 1.1 mrg }
5480 1.1 mrg
5481 1.1 mrg extern __inline __m512h
5482 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5483 1.1 mrg _mm512_mask3_fmadd_round_ph (__m512h __A, __m512h __B, __m512h __C,
5484 1.1 mrg __mmask32 __U, const int __R)
5485 1.1 mrg {
5486 1.1 mrg return (__m512h) __builtin_ia32_vfmaddph512_mask3 ((__v32hf) __A,
5487 1.1 mrg (__v32hf) __B,
5488 1.1 mrg (__v32hf) __C,
5489 1.1 mrg (__mmask32) __U, __R);
5490 1.1 mrg }
5491 1.1 mrg
5492 1.1 mrg extern __inline __m512h
5493 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5494 1.1 mrg _mm512_maskz_fmadd_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
5495 1.1 mrg __m512h __C, const int __R)
5496 1.1 mrg {
5497 1.1 mrg return (__m512h) __builtin_ia32_vfmaddph512_maskz ((__v32hf) __A,
5498 1.1 mrg (__v32hf) __B,
5499 1.1 mrg (__v32hf) __C,
5500 1.1 mrg (__mmask32) __U, __R);
5501 1.1 mrg }
5502 1.1 mrg
5503 1.1 mrg #else
5504 1.1 mrg #define _mm512_fmadd_round_ph(A, B, C, R) \
5505 1.1 mrg ((__m512h)__builtin_ia32_vfmaddph512_mask ((A), (B), (C), -1, (R)))
5506 1.1 mrg
5507 1.1 mrg #define _mm512_mask_fmadd_round_ph(A, U, B, C, R) \
5508 1.1 mrg ((__m512h)__builtin_ia32_vfmaddph512_mask ((A), (B), (C), (U), (R)))
5509 1.1 mrg
5510 1.1 mrg #define _mm512_mask3_fmadd_round_ph(A, B, C, U, R) \
5511 1.1 mrg ((__m512h)__builtin_ia32_vfmaddph512_mask3 ((A), (B), (C), (U), (R)))
5512 1.1 mrg
5513 1.1 mrg #define _mm512_maskz_fmadd_round_ph(U, A, B, C, R) \
5514 1.1 mrg ((__m512h)__builtin_ia32_vfmaddph512_maskz ((A), (B), (C), (U), (R)))
5515 1.1 mrg
5516 1.1 mrg #endif /* __OPTIMIZE__ */
5517 1.1 mrg
5518 1.1 mrg /* Intrinsics vfnmadd[132,213,231]ph. */
5519 1.1 mrg extern __inline __m512h
5520 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5521 1.1 mrg _mm512_fnmadd_ph (__m512h __A, __m512h __B, __m512h __C)
5522 1.1 mrg {
5523 1.1 mrg return (__m512h)
5524 1.1 mrg __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A,
5525 1.1 mrg (__v32hf) __B,
5526 1.1 mrg (__v32hf) __C,
5527 1.1 mrg (__mmask32) -1,
5528 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5529 1.1 mrg }
5530 1.1 mrg
5531 1.1 mrg extern __inline __m512h
5532 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5533 1.1 mrg _mm512_mask_fnmadd_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
5534 1.1 mrg {
5535 1.1 mrg return (__m512h)
5536 1.1 mrg __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A,
5537 1.1 mrg (__v32hf) __B,
5538 1.1 mrg (__v32hf) __C,
5539 1.1 mrg (__mmask32) __U,
5540 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5541 1.1 mrg }
5542 1.1 mrg
5543 1.1 mrg extern __inline __m512h
5544 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5545 1.1 mrg _mm512_mask3_fnmadd_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
5546 1.1 mrg {
5547 1.1 mrg return (__m512h)
5548 1.1 mrg __builtin_ia32_vfnmaddph512_mask3 ((__v32hf) __A,
5549 1.1 mrg (__v32hf) __B,
5550 1.1 mrg (__v32hf) __C,
5551 1.1 mrg (__mmask32) __U,
5552 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5553 1.1 mrg }
5554 1.1 mrg
5555 1.1 mrg extern __inline __m512h
5556 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5557 1.1 mrg _mm512_maskz_fnmadd_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
5558 1.1 mrg {
5559 1.1 mrg return (__m512h)
5560 1.1 mrg __builtin_ia32_vfnmaddph512_maskz ((__v32hf) __A,
5561 1.1 mrg (__v32hf) __B,
5562 1.1 mrg (__v32hf) __C,
5563 1.1 mrg (__mmask32) __U,
5564 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5565 1.1 mrg }
5566 1.1 mrg
5567 1.1 mrg #ifdef __OPTIMIZE__
5568 1.1 mrg extern __inline __m512h
5569 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5570 1.1 mrg _mm512_fnmadd_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
5571 1.1 mrg {
5572 1.1 mrg return (__m512h) __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A,
5573 1.1 mrg (__v32hf) __B,
5574 1.1 mrg (__v32hf) __C,
5575 1.1 mrg (__mmask32) -1, __R);
5576 1.1 mrg }
5577 1.1 mrg
5578 1.1 mrg extern __inline __m512h
5579 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5580 1.1 mrg _mm512_mask_fnmadd_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
5581 1.1 mrg __m512h __C, const int __R)
5582 1.1 mrg {
5583 1.1 mrg return (__m512h) __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A,
5584 1.1 mrg (__v32hf) __B,
5585 1.1 mrg (__v32hf) __C,
5586 1.1 mrg (__mmask32) __U, __R);
5587 1.1 mrg }
5588 1.1 mrg
5589 1.1 mrg extern __inline __m512h
5590 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5591 1.1 mrg _mm512_mask3_fnmadd_round_ph (__m512h __A, __m512h __B, __m512h __C,
5592 1.1 mrg __mmask32 __U, const int __R)
5593 1.1 mrg {
5594 1.1 mrg return (__m512h) __builtin_ia32_vfnmaddph512_mask3 ((__v32hf) __A,
5595 1.1 mrg (__v32hf) __B,
5596 1.1 mrg (__v32hf) __C,
5597 1.1 mrg (__mmask32) __U, __R);
5598 1.1 mrg }
5599 1.1 mrg
5600 1.1 mrg extern __inline __m512h
5601 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5602 1.1 mrg _mm512_maskz_fnmadd_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
5603 1.1 mrg __m512h __C, const int __R)
5604 1.1 mrg {
5605 1.1 mrg return (__m512h) __builtin_ia32_vfnmaddph512_maskz ((__v32hf) __A,
5606 1.1 mrg (__v32hf) __B,
5607 1.1 mrg (__v32hf) __C,
5608 1.1 mrg (__mmask32) __U, __R);
5609 1.1 mrg }
5610 1.1 mrg
5611 1.1 mrg #else
5612 1.1 mrg #define _mm512_fnmadd_round_ph(A, B, C, R) \
5613 1.1 mrg ((__m512h)__builtin_ia32_vfnmaddph512_mask ((A), (B), (C), -1, (R)))
5614 1.1 mrg
5615 1.1 mrg #define _mm512_mask_fnmadd_round_ph(A, U, B, C, R) \
5616 1.1 mrg ((__m512h)__builtin_ia32_vfnmaddph512_mask ((A), (B), (C), (U), (R)))
5617 1.1 mrg
5618 1.1 mrg #define _mm512_mask3_fnmadd_round_ph(A, B, C, U, R) \
5619 1.1 mrg ((__m512h)__builtin_ia32_vfnmaddph512_mask3 ((A), (B), (C), (U), (R)))
5620 1.1 mrg
5621 1.1 mrg #define _mm512_maskz_fnmadd_round_ph(U, A, B, C, R) \
5622 1.1 mrg ((__m512h)__builtin_ia32_vfnmaddph512_maskz ((A), (B), (C), (U), (R)))
5623 1.1 mrg
5624 1.1 mrg #endif /* __OPTIMIZE__ */
5625 1.1 mrg
5626 1.1 mrg /* Intrinsics vfmsub[132,213,231]ph. */
5627 1.1 mrg extern __inline __m512h
5628 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5629 1.1 mrg _mm512_fmsub_ph (__m512h __A, __m512h __B, __m512h __C)
5630 1.1 mrg {
5631 1.1 mrg return (__m512h)
5632 1.1 mrg __builtin_ia32_vfmsubph512_mask ((__v32hf) __A,
5633 1.1 mrg (__v32hf) __B,
5634 1.1 mrg (__v32hf) __C,
5635 1.1 mrg (__mmask32) -1,
5636 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5637 1.1 mrg }
5638 1.1 mrg
5639 1.1 mrg extern __inline __m512h
5640 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5641 1.1 mrg _mm512_mask_fmsub_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
5642 1.1 mrg {
5643 1.1 mrg return (__m512h)
5644 1.1 mrg __builtin_ia32_vfmsubph512_mask ((__v32hf) __A,
5645 1.1 mrg (__v32hf) __B,
5646 1.1 mrg (__v32hf) __C,
5647 1.1 mrg (__mmask32) __U,
5648 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5649 1.1 mrg }
5650 1.1 mrg
5651 1.1 mrg extern __inline __m512h
5652 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5653 1.1 mrg _mm512_mask3_fmsub_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
5654 1.1 mrg {
5655 1.1 mrg return (__m512h)
5656 1.1 mrg __builtin_ia32_vfmsubph512_mask3 ((__v32hf) __A,
5657 1.1 mrg (__v32hf) __B,
5658 1.1 mrg (__v32hf) __C,
5659 1.1 mrg (__mmask32) __U,
5660 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5661 1.1 mrg }
5662 1.1 mrg
5663 1.1 mrg extern __inline __m512h
5664 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5665 1.1 mrg _mm512_maskz_fmsub_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
5666 1.1 mrg {
5667 1.1 mrg return (__m512h)
5668 1.1 mrg __builtin_ia32_vfmsubph512_maskz ((__v32hf) __A,
5669 1.1 mrg (__v32hf) __B,
5670 1.1 mrg (__v32hf) __C,
5671 1.1 mrg (__mmask32) __U,
5672 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5673 1.1 mrg }
5674 1.1 mrg
5675 1.1 mrg #ifdef __OPTIMIZE__
5676 1.1 mrg extern __inline __m512h
5677 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5678 1.1 mrg _mm512_fmsub_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
5679 1.1 mrg {
5680 1.1 mrg return (__m512h) __builtin_ia32_vfmsubph512_mask ((__v32hf) __A,
5681 1.1 mrg (__v32hf) __B,
5682 1.1 mrg (__v32hf) __C,
5683 1.1 mrg (__mmask32) -1, __R);
5684 1.1 mrg }
5685 1.1 mrg
5686 1.1 mrg extern __inline __m512h
5687 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5688 1.1 mrg _mm512_mask_fmsub_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
5689 1.1 mrg __m512h __C, const int __R)
5690 1.1 mrg {
5691 1.1 mrg return (__m512h) __builtin_ia32_vfmsubph512_mask ((__v32hf) __A,
5692 1.1 mrg (__v32hf) __B,
5693 1.1 mrg (__v32hf) __C,
5694 1.1 mrg (__mmask32) __U, __R);
5695 1.1 mrg }
5696 1.1 mrg
5697 1.1 mrg extern __inline __m512h
5698 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5699 1.1 mrg _mm512_mask3_fmsub_round_ph (__m512h __A, __m512h __B, __m512h __C,
5700 1.1 mrg __mmask32 __U, const int __R)
5701 1.1 mrg {
5702 1.1 mrg return (__m512h) __builtin_ia32_vfmsubph512_mask3 ((__v32hf) __A,
5703 1.1 mrg (__v32hf) __B,
5704 1.1 mrg (__v32hf) __C,
5705 1.1 mrg (__mmask32) __U, __R);
5706 1.1 mrg }
5707 1.1 mrg
5708 1.1 mrg extern __inline __m512h
5709 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5710 1.1 mrg _mm512_maskz_fmsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
5711 1.1 mrg __m512h __C, const int __R)
5712 1.1 mrg {
5713 1.1 mrg return (__m512h) __builtin_ia32_vfmsubph512_maskz ((__v32hf) __A,
5714 1.1 mrg (__v32hf) __B,
5715 1.1 mrg (__v32hf) __C,
5716 1.1 mrg (__mmask32) __U, __R);
5717 1.1 mrg }
5718 1.1 mrg
5719 1.1 mrg #else
5720 1.1 mrg #define _mm512_fmsub_round_ph(A, B, C, R) \
5721 1.1 mrg ((__m512h)__builtin_ia32_vfmsubph512_mask ((A), (B), (C), -1, (R)))
5722 1.1 mrg
5723 1.1 mrg #define _mm512_mask_fmsub_round_ph(A, U, B, C, R) \
5724 1.1 mrg ((__m512h)__builtin_ia32_vfmsubph512_mask ((A), (B), (C), (U), (R)))
5725 1.1 mrg
5726 1.1 mrg #define _mm512_mask3_fmsub_round_ph(A, B, C, U, R) \
5727 1.1 mrg ((__m512h)__builtin_ia32_vfmsubph512_mask3 ((A), (B), (C), (U), (R)))
5728 1.1 mrg
5729 1.1 mrg #define _mm512_maskz_fmsub_round_ph(U, A, B, C, R) \
5730 1.1 mrg ((__m512h)__builtin_ia32_vfmsubph512_maskz ((A), (B), (C), (U), (R)))
5731 1.1 mrg
5732 1.1 mrg #endif /* __OPTIMIZE__ */
5733 1.1 mrg
5734 1.1 mrg /* Intrinsics vfnmsub[132,213,231]ph. */
5735 1.1 mrg extern __inline __m512h
5736 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5737 1.1 mrg _mm512_fnmsub_ph (__m512h __A, __m512h __B, __m512h __C)
5738 1.1 mrg {
5739 1.1 mrg return (__m512h)
5740 1.1 mrg __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A,
5741 1.1 mrg (__v32hf) __B,
5742 1.1 mrg (__v32hf) __C,
5743 1.1 mrg (__mmask32) -1,
5744 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5745 1.1 mrg }
5746 1.1 mrg
5747 1.1 mrg extern __inline __m512h
5748 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5749 1.1 mrg _mm512_mask_fnmsub_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
5750 1.1 mrg {
5751 1.1 mrg return (__m512h)
5752 1.1 mrg __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A,
5753 1.1 mrg (__v32hf) __B,
5754 1.1 mrg (__v32hf) __C,
5755 1.1 mrg (__mmask32) __U,
5756 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5757 1.1 mrg }
5758 1.1 mrg
5759 1.1 mrg extern __inline __m512h
5760 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5761 1.1 mrg _mm512_mask3_fnmsub_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
5762 1.1 mrg {
5763 1.1 mrg return (__m512h)
5764 1.1 mrg __builtin_ia32_vfnmsubph512_mask3 ((__v32hf) __A,
5765 1.1 mrg (__v32hf) __B,
5766 1.1 mrg (__v32hf) __C,
5767 1.1 mrg (__mmask32) __U,
5768 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5769 1.1 mrg }
5770 1.1 mrg
5771 1.1 mrg extern __inline __m512h
5772 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5773 1.1 mrg _mm512_maskz_fnmsub_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
5774 1.1 mrg {
5775 1.1 mrg return (__m512h)
5776 1.1 mrg __builtin_ia32_vfnmsubph512_maskz ((__v32hf) __A,
5777 1.1 mrg (__v32hf) __B,
5778 1.1 mrg (__v32hf) __C,
5779 1.1 mrg (__mmask32) __U,
5780 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5781 1.1 mrg }
5782 1.1 mrg
5783 1.1 mrg #ifdef __OPTIMIZE__
5784 1.1 mrg extern __inline __m512h
5785 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5786 1.1 mrg _mm512_fnmsub_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
5787 1.1 mrg {
5788 1.1 mrg return (__m512h) __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A,
5789 1.1 mrg (__v32hf) __B,
5790 1.1 mrg (__v32hf) __C,
5791 1.1 mrg (__mmask32) -1, __R);
5792 1.1 mrg }
5793 1.1 mrg
5794 1.1 mrg extern __inline __m512h
5795 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5796 1.1 mrg _mm512_mask_fnmsub_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
5797 1.1 mrg __m512h __C, const int __R)
5798 1.1 mrg {
5799 1.1 mrg return (__m512h) __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A,
5800 1.1 mrg (__v32hf) __B,
5801 1.1 mrg (__v32hf) __C,
5802 1.1 mrg (__mmask32) __U, __R);
5803 1.1 mrg }
5804 1.1 mrg
5805 1.1 mrg extern __inline __m512h
5806 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5807 1.1 mrg _mm512_mask3_fnmsub_round_ph (__m512h __A, __m512h __B, __m512h __C,
5808 1.1 mrg __mmask32 __U, const int __R)
5809 1.1 mrg {
5810 1.1 mrg return (__m512h) __builtin_ia32_vfnmsubph512_mask3 ((__v32hf) __A,
5811 1.1 mrg (__v32hf) __B,
5812 1.1 mrg (__v32hf) __C,
5813 1.1 mrg (__mmask32) __U, __R);
5814 1.1 mrg }
5815 1.1 mrg
5816 1.1 mrg extern __inline __m512h
5817 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5818 1.1 mrg _mm512_maskz_fnmsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
5819 1.1 mrg __m512h __C, const int __R)
5820 1.1 mrg {
5821 1.1 mrg return (__m512h) __builtin_ia32_vfnmsubph512_maskz ((__v32hf) __A,
5822 1.1 mrg (__v32hf) __B,
5823 1.1 mrg (__v32hf) __C,
5824 1.1 mrg (__mmask32) __U, __R);
5825 1.1 mrg }
5826 1.1 mrg
5827 1.1 mrg #else
5828 1.1 mrg #define _mm512_fnmsub_round_ph(A, B, C, R) \
5829 1.1 mrg ((__m512h)__builtin_ia32_vfnmsubph512_mask ((A), (B), (C), -1, (R)))
5830 1.1 mrg
5831 1.1 mrg #define _mm512_mask_fnmsub_round_ph(A, U, B, C, R) \
5832 1.1 mrg ((__m512h)__builtin_ia32_vfnmsubph512_mask ((A), (B), (C), (U), (R)))
5833 1.1 mrg
5834 1.1 mrg #define _mm512_mask3_fnmsub_round_ph(A, B, C, U, R) \
5835 1.1 mrg ((__m512h)__builtin_ia32_vfnmsubph512_mask3 ((A), (B), (C), (U), (R)))
5836 1.1 mrg
5837 1.1 mrg #define _mm512_maskz_fnmsub_round_ph(U, A, B, C, R) \
5838 1.1 mrg ((__m512h)__builtin_ia32_vfnmsubph512_maskz ((A), (B), (C), (U), (R)))
5839 1.1 mrg
5840 1.1 mrg #endif /* __OPTIMIZE__ */
5841 1.1 mrg
5842 1.1 mrg /* Intrinsics vfmadd[132,213,231]sh. */
5843 1.1 mrg extern __inline __m128h
5844 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5845 1.1 mrg _mm_fmadd_sh (__m128h __W, __m128h __A, __m128h __B)
5846 1.1 mrg {
5847 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
5848 1.1 mrg (__v8hf) __A,
5849 1.1 mrg (__v8hf) __B,
5850 1.1 mrg (__mmask8) -1,
5851 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5852 1.1 mrg }
5853 1.1 mrg
5854 1.1 mrg extern __inline __m128h
5855 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5856 1.1 mrg _mm_mask_fmadd_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
5857 1.1 mrg {
5858 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
5859 1.1 mrg (__v8hf) __A,
5860 1.1 mrg (__v8hf) __B,
5861 1.1 mrg (__mmask8) __U,
5862 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5863 1.1 mrg }
5864 1.1 mrg
5865 1.1 mrg extern __inline __m128h
5866 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5867 1.1 mrg _mm_mask3_fmadd_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
5868 1.1 mrg {
5869 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_mask3 ((__v8hf) __W,
5870 1.1 mrg (__v8hf) __A,
5871 1.1 mrg (__v8hf) __B,
5872 1.1 mrg (__mmask8) __U,
5873 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5874 1.1 mrg }
5875 1.1 mrg
5876 1.1 mrg extern __inline __m128h
5877 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5878 1.1 mrg _mm_maskz_fmadd_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
5879 1.1 mrg {
5880 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
5881 1.1 mrg (__v8hf) __A,
5882 1.1 mrg (__v8hf) __B,
5883 1.1 mrg (__mmask8) __U,
5884 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5885 1.1 mrg }
5886 1.1 mrg
5887 1.1 mrg
5888 1.1 mrg #ifdef __OPTIMIZE__
5889 1.1 mrg extern __inline __m128h
5890 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5891 1.1 mrg _mm_fmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
5892 1.1 mrg {
5893 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
5894 1.1 mrg (__v8hf) __A,
5895 1.1 mrg (__v8hf) __B,
5896 1.1 mrg (__mmask8) -1,
5897 1.1 mrg __R);
5898 1.1 mrg }
5899 1.1 mrg
5900 1.1 mrg extern __inline __m128h
5901 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5902 1.1 mrg _mm_mask_fmadd_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
5903 1.1 mrg const int __R)
5904 1.1 mrg {
5905 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
5906 1.1 mrg (__v8hf) __A,
5907 1.1 mrg (__v8hf) __B,
5908 1.1 mrg (__mmask8) __U, __R);
5909 1.1 mrg }
5910 1.1 mrg
5911 1.1 mrg extern __inline __m128h
5912 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5913 1.1 mrg _mm_mask3_fmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
5914 1.1 mrg const int __R)
5915 1.1 mrg {
5916 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_mask3 ((__v8hf) __W,
5917 1.1 mrg (__v8hf) __A,
5918 1.1 mrg (__v8hf) __B,
5919 1.1 mrg (__mmask8) __U, __R);
5920 1.1 mrg }
5921 1.1 mrg
5922 1.1 mrg extern __inline __m128h
5923 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5924 1.1 mrg _mm_maskz_fmadd_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
5925 1.1 mrg __m128h __B, const int __R)
5926 1.1 mrg {
5927 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
5928 1.1 mrg (__v8hf) __A,
5929 1.1 mrg (__v8hf) __B,
5930 1.1 mrg (__mmask8) __U, __R);
5931 1.1 mrg }
5932 1.1 mrg
5933 1.1 mrg #else
5934 1.1 mrg #define _mm_fmadd_round_sh(A, B, C, R) \
5935 1.1 mrg ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), (C), (-1), (R)))
5936 1.1 mrg #define _mm_mask_fmadd_round_sh(A, U, B, C, R) \
5937 1.1 mrg ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), (C), (U), (R)))
5938 1.1 mrg #define _mm_mask3_fmadd_round_sh(A, B, C, U, R) \
5939 1.1 mrg ((__m128h) __builtin_ia32_vfmaddsh3_mask3 ((A), (B), (C), (U), (R)))
5940 1.1 mrg #define _mm_maskz_fmadd_round_sh(U, A, B, C, R) \
5941 1.1 mrg ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), (B), (C), (U), (R)))
5942 1.1 mrg
5943 1.1 mrg #endif /* __OPTIMIZE__ */
5944 1.1 mrg
5945 1.1 mrg /* Intrinsics vfnmadd[132,213,231]sh. */
5946 1.1 mrg extern __inline __m128h
5947 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5948 1.1 mrg _mm_fnmadd_sh (__m128h __W, __m128h __A, __m128h __B)
5949 1.1 mrg {
5950 1.1 mrg return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
5951 1.1 mrg (__v8hf) __A,
5952 1.1 mrg (__v8hf) __B,
5953 1.1 mrg (__mmask8) -1,
5954 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5955 1.1 mrg }
5956 1.1 mrg
5957 1.1 mrg extern __inline __m128h
5958 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5959 1.1 mrg _mm_mask_fnmadd_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
5960 1.1 mrg {
5961 1.1 mrg return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
5962 1.1 mrg (__v8hf) __A,
5963 1.1 mrg (__v8hf) __B,
5964 1.1 mrg (__mmask8) __U,
5965 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5966 1.1 mrg }
5967 1.1 mrg
5968 1.1 mrg extern __inline __m128h
5969 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5970 1.1 mrg _mm_mask3_fnmadd_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
5971 1.1 mrg {
5972 1.1 mrg return (__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((__v8hf) __W,
5973 1.1 mrg (__v8hf) __A,
5974 1.1 mrg (__v8hf) __B,
5975 1.1 mrg (__mmask8) __U,
5976 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5977 1.1 mrg }
5978 1.1 mrg
5979 1.1 mrg extern __inline __m128h
5980 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5981 1.1 mrg _mm_maskz_fnmadd_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
5982 1.1 mrg {
5983 1.1 mrg return (__m128h) __builtin_ia32_vfnmaddsh3_maskz ((__v8hf) __W,
5984 1.1 mrg (__v8hf) __A,
5985 1.1 mrg (__v8hf) __B,
5986 1.1 mrg (__mmask8) __U,
5987 1.1 mrg _MM_FROUND_CUR_DIRECTION);
5988 1.1 mrg }
5989 1.1 mrg
5990 1.1 mrg
5991 1.1 mrg #ifdef __OPTIMIZE__
5992 1.1 mrg extern __inline __m128h
5993 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5994 1.1 mrg _mm_fnmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
5995 1.1 mrg {
5996 1.1 mrg return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
5997 1.1 mrg (__v8hf) __A,
5998 1.1 mrg (__v8hf) __B,
5999 1.1 mrg (__mmask8) -1,
6000 1.1 mrg __R);
6001 1.1 mrg }
6002 1.1 mrg
6003 1.1 mrg extern __inline __m128h
6004 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6005 1.1 mrg _mm_mask_fnmadd_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
6006 1.1 mrg const int __R)
6007 1.1 mrg {
6008 1.1 mrg return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
6009 1.1 mrg (__v8hf) __A,
6010 1.1 mrg (__v8hf) __B,
6011 1.1 mrg (__mmask8) __U, __R);
6012 1.1 mrg }
6013 1.1 mrg
6014 1.1 mrg extern __inline __m128h
6015 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6016 1.1 mrg _mm_mask3_fnmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
6017 1.1 mrg const int __R)
6018 1.1 mrg {
6019 1.1 mrg return (__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((__v8hf) __W,
6020 1.1 mrg (__v8hf) __A,
6021 1.1 mrg (__v8hf) __B,
6022 1.1 mrg (__mmask8) __U, __R);
6023 1.1 mrg }
6024 1.1 mrg
6025 1.1 mrg extern __inline __m128h
6026 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6027 1.1 mrg _mm_maskz_fnmadd_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
6028 1.1 mrg __m128h __B, const int __R)
6029 1.1 mrg {
6030 1.1 mrg return (__m128h) __builtin_ia32_vfnmaddsh3_maskz ((__v8hf) __W,
6031 1.1 mrg (__v8hf) __A,
6032 1.1 mrg (__v8hf) __B,
6033 1.1 mrg (__mmask8) __U, __R);
6034 1.1 mrg }
6035 1.1 mrg
6036 1.1 mrg #else
6037 1.1 mrg #define _mm_fnmadd_round_sh(A, B, C, R) \
6038 1.1 mrg ((__m128h) __builtin_ia32_vfnmaddsh3_mask ((A), (B), (C), (-1), (R)))
6039 1.1 mrg #define _mm_mask_fnmadd_round_sh(A, U, B, C, R) \
6040 1.1 mrg ((__m128h) __builtin_ia32_vfnmaddsh3_mask ((A), (B), (C), (U), (R)))
6041 1.1 mrg #define _mm_mask3_fnmadd_round_sh(A, B, C, U, R) \
6042 1.1 mrg ((__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((A), (B), (C), (U), (R)))
6043 1.1 mrg #define _mm_maskz_fnmadd_round_sh(U, A, B, C, R) \
6044 1.1 mrg ((__m128h) __builtin_ia32_vfnmaddsh3_maskz ((A), (B), (C), (U), (R)))
6045 1.1 mrg
6046 1.1 mrg #endif /* __OPTIMIZE__ */
6047 1.1 mrg
6048 1.1 mrg /* Intrinsics vfmsub[132,213,231]sh. */
6049 1.1 mrg extern __inline __m128h
6050 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6051 1.1 mrg _mm_fmsub_sh (__m128h __W, __m128h __A, __m128h __B)
6052 1.1 mrg {
6053 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
6054 1.1 mrg (__v8hf) __A,
6055 1.1 mrg -(__v8hf) __B,
6056 1.1 mrg (__mmask8) -1,
6057 1.1 mrg _MM_FROUND_CUR_DIRECTION);
6058 1.1 mrg }
6059 1.1 mrg
6060 1.1 mrg extern __inline __m128h
6061 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6062 1.1 mrg _mm_mask_fmsub_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
6063 1.1 mrg {
6064 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
6065 1.1 mrg (__v8hf) __A,
6066 1.1 mrg -(__v8hf) __B,
6067 1.1 mrg (__mmask8) __U,
6068 1.1 mrg _MM_FROUND_CUR_DIRECTION);
6069 1.1 mrg }
6070 1.1 mrg
6071 1.1 mrg extern __inline __m128h
6072 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6073 1.1 mrg _mm_mask3_fmsub_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
6074 1.1 mrg {
6075 1.1 mrg return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
6076 1.1 mrg (__v8hf) __A,
6077 1.1 mrg (__v8hf) __B,
6078 1.1 mrg (__mmask8) __U,
6079 1.1 mrg _MM_FROUND_CUR_DIRECTION);
6080 1.1 mrg }
6081 1.1 mrg
6082 1.1 mrg extern __inline __m128h
6083 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6084 1.1 mrg _mm_maskz_fmsub_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
6085 1.1 mrg {
6086 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
6087 1.1 mrg (__v8hf) __A,
6088 1.1 mrg -(__v8hf) __B,
6089 1.1 mrg (__mmask8) __U,
6090 1.1 mrg _MM_FROUND_CUR_DIRECTION);
6091 1.1 mrg }
6092 1.1 mrg
6093 1.1 mrg
6094 1.1 mrg #ifdef __OPTIMIZE__
6095 1.1 mrg extern __inline __m128h
6096 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6097 1.1 mrg _mm_fmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
6098 1.1 mrg {
6099 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
6100 1.1 mrg (__v8hf) __A,
6101 1.1 mrg -(__v8hf) __B,
6102 1.1 mrg (__mmask8) -1,
6103 1.1 mrg __R);
6104 1.1 mrg }
6105 1.1 mrg
6106 1.1 mrg extern __inline __m128h
6107 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6108 1.1 mrg _mm_mask_fmsub_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
6109 1.1 mrg const int __R)
6110 1.1 mrg {
6111 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
6112 1.1 mrg (__v8hf) __A,
6113 1.1 mrg -(__v8hf) __B,
6114 1.1 mrg (__mmask8) __U, __R);
6115 1.1 mrg }
6116 1.1 mrg
6117 1.1 mrg extern __inline __m128h
6118 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6119 1.1 mrg _mm_mask3_fmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
6120 1.1 mrg const int __R)
6121 1.1 mrg {
6122 1.1 mrg return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
6123 1.1 mrg (__v8hf) __A,
6124 1.1 mrg (__v8hf) __B,
6125 1.1 mrg (__mmask8) __U, __R);
6126 1.1 mrg }
6127 1.1 mrg
6128 1.1 mrg extern __inline __m128h
6129 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6130 1.1 mrg _mm_maskz_fmsub_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
6131 1.1 mrg __m128h __B, const int __R)
6132 1.1 mrg {
6133 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
6134 1.1 mrg (__v8hf) __A,
6135 1.1 mrg -(__v8hf) __B,
6136 1.1 mrg (__mmask8) __U, __R);
6137 1.1 mrg }
6138 1.1 mrg
6139 1.1 mrg #else
6140 1.1 mrg #define _mm_fmsub_round_sh(A, B, C, R) \
6141 1.1 mrg ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), -(C), (-1), (R)))
6142 1.1 mrg #define _mm_mask_fmsub_round_sh(A, U, B, C, R) \
6143 1.1 mrg ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), -(C), (U), (R)))
6144 1.1 mrg #define _mm_mask3_fmsub_round_sh(A, B, C, U, R) \
6145 1.1 mrg ((__m128h) __builtin_ia32_vfmsubsh3_mask3 ((A), (B), (C), (U), (R)))
6146 1.1 mrg #define _mm_maskz_fmsub_round_sh(U, A, B, C, R) \
6147 1.1 mrg ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), (B), -(C), (U), (R)))
6148 1.1 mrg
6149 1.1 mrg #endif /* __OPTIMIZE__ */
6150 1.1 mrg
6151 1.1 mrg /* Intrinsics vfnmsub[132,213,231]sh. */
6152 1.1 mrg extern __inline __m128h
6153 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6154 1.1 mrg _mm_fnmsub_sh (__m128h __W, __m128h __A, __m128h __B)
6155 1.1 mrg {
6156 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
6157 1.1 mrg -(__v8hf) __A,
6158 1.1 mrg -(__v8hf) __B,
6159 1.1 mrg (__mmask8) -1,
6160 1.1 mrg _MM_FROUND_CUR_DIRECTION);
6161 1.1 mrg }
6162 1.1 mrg
6163 1.1 mrg extern __inline __m128h
6164 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6165 1.1 mrg _mm_mask_fnmsub_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
6166 1.1 mrg {
6167 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
6168 1.1 mrg -(__v8hf) __A,
6169 1.1 mrg -(__v8hf) __B,
6170 1.1 mrg (__mmask8) __U,
6171 1.1 mrg _MM_FROUND_CUR_DIRECTION);
6172 1.1 mrg }
6173 1.1 mrg
6174 1.1 mrg extern __inline __m128h
6175 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6176 1.1 mrg _mm_mask3_fnmsub_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
6177 1.1 mrg {
6178 1.1 mrg return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
6179 1.1 mrg -(__v8hf) __A,
6180 1.1 mrg (__v8hf) __B,
6181 1.1 mrg (__mmask8) __U,
6182 1.1 mrg _MM_FROUND_CUR_DIRECTION);
6183 1.1 mrg }
6184 1.1 mrg
6185 1.1 mrg extern __inline __m128h
6186 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6187 1.1 mrg _mm_maskz_fnmsub_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
6188 1.1 mrg {
6189 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
6190 1.1 mrg -(__v8hf) __A,
6191 1.1 mrg -(__v8hf) __B,
6192 1.1 mrg (__mmask8) __U,
6193 1.1 mrg _MM_FROUND_CUR_DIRECTION);
6194 1.1 mrg }
6195 1.1 mrg
6196 1.1 mrg
6197 1.1 mrg #ifdef __OPTIMIZE__
6198 1.1 mrg extern __inline __m128h
6199 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6200 1.1 mrg _mm_fnmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
6201 1.1 mrg {
6202 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
6203 1.1 mrg -(__v8hf) __A,
6204 1.1 mrg -(__v8hf) __B,
6205 1.1 mrg (__mmask8) -1,
6206 1.1 mrg __R);
6207 1.1 mrg }
6208 1.1 mrg
6209 1.1 mrg extern __inline __m128h
6210 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6211 1.1 mrg _mm_mask_fnmsub_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
6212 1.1 mrg const int __R)
6213 1.1 mrg {
6214 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
6215 1.1 mrg -(__v8hf) __A,
6216 1.1 mrg -(__v8hf) __B,
6217 1.1 mrg (__mmask8) __U, __R);
6218 1.1 mrg }
6219 1.1 mrg
6220 1.1 mrg extern __inline __m128h
6221 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6222 1.1 mrg _mm_mask3_fnmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
6223 1.1 mrg const int __R)
6224 1.1 mrg {
6225 1.1 mrg return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
6226 1.1 mrg -(__v8hf) __A,
6227 1.1 mrg (__v8hf) __B,
6228 1.1 mrg (__mmask8) __U, __R);
6229 1.1 mrg }
6230 1.1 mrg
6231 1.1 mrg extern __inline __m128h
6232 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6233 1.1 mrg _mm_maskz_fnmsub_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
6234 1.1 mrg __m128h __B, const int __R)
6235 1.1 mrg {
6236 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
6237 1.1 mrg -(__v8hf) __A,
6238 1.1 mrg -(__v8hf) __B,
6239 1.1 mrg (__mmask8) __U, __R);
6240 1.1 mrg }
6241 1.1 mrg
6242 1.1 mrg #else
6243 1.1 mrg #define _mm_fnmsub_round_sh(A, B, C, R) \
6244 1.1 mrg ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), -(B), -(C), (-1), (R)))
6245 1.1 mrg #define _mm_mask_fnmsub_round_sh(A, U, B, C, R) \
6246 1.1 mrg ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), -(B), -(C), (U), (R)))
6247 1.1 mrg #define _mm_mask3_fnmsub_round_sh(A, B, C, U, R) \
6248 1.1 mrg ((__m128h) __builtin_ia32_vfmsubsh3_mask3 ((A), -(B), (C), (U), (R)))
6249 1.1 mrg #define _mm_maskz_fnmsub_round_sh(U, A, B, C, R) \
6250 1.1 mrg ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), -(B), -(C), (U), (R)))
6251 1.1 mrg
6252 1.1 mrg #endif /* __OPTIMIZE__ */
6253 1.1 mrg
6254 1.1 mrg /* Intrinsics vf[,c]maddcph. */
6255 1.1 mrg extern __inline __m512h
6256 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6257 1.1 mrg _mm512_fcmadd_pch (__m512h __A, __m512h __B, __m512h __C)
6258 1.1 mrg {
6259 1.1 mrg return (__m512h)
6260 1.1 mrg __builtin_ia32_vfcmaddcph512_round ((__v32hf) __A,
6261 1.1 mrg (__v32hf) __B,
6262 1.1 mrg (__v32hf) __C,
6263 1.1 mrg _MM_FROUND_CUR_DIRECTION);
6264 1.1 mrg }
6265 1.1 mrg
6266 1.1 mrg extern __inline __m512h
6267 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6268 1.1 mrg _mm512_mask_fcmadd_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D)
6269 1.1 mrg {
6270 1.1 mrg return (__m512h)
6271 1.1 mrg __builtin_ia32_vfcmaddcph512_mask_round ((__v32hf) __A,
6272 1.1 mrg (__v32hf) __C,
6273 1.1 mrg (__v32hf) __D, __B,
6274 1.1 mrg _MM_FROUND_CUR_DIRECTION);
6275 1.1 mrg }
6276 1.1 mrg
6277 1.1 mrg extern __inline __m512h
6278 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6279 1.1 mrg _mm512_mask3_fcmadd_pch (__m512h __A, __m512h __B, __m512h __C, __mmask16 __D)
6280 1.1 mrg {
6281 1.1 mrg return (__m512h)
6282 1.1 mrg __builtin_ia32_vfcmaddcph512_mask3_round ((__v32hf) __A,
6283 1.1 mrg (__v32hf) __B,
6284 1.1 mrg (__v32hf) __C,
6285 1.1 mrg __D, _MM_FROUND_CUR_DIRECTION);
6286 1.1 mrg }
6287 1.1 mrg
6288 1.1 mrg extern __inline __m512h
6289 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6290 1.1 mrg _mm512_maskz_fcmadd_pch (__mmask16 __A, __m512h __B, __m512h __C, __m512h __D)
6291 1.1 mrg {
6292 1.1 mrg return (__m512h)
6293 1.1 mrg __builtin_ia32_vfcmaddcph512_maskz_round ((__v32hf) __B,
6294 1.1 mrg (__v32hf) __C,
6295 1.1 mrg (__v32hf) __D,
6296 1.1 mrg __A, _MM_FROUND_CUR_DIRECTION);
6297 1.1 mrg }
6298 1.1 mrg
6299 1.1 mrg extern __inline __m512h
6300 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6301 1.1 mrg _mm512_fmadd_pch (__m512h __A, __m512h __B, __m512h __C)
6302 1.1 mrg {
6303 1.1 mrg return (__m512h)
6304 1.1 mrg __builtin_ia32_vfmaddcph512_round ((__v32hf) __A,
6305 1.1 mrg (__v32hf) __B,
6306 1.1 mrg (__v32hf) __C,
6307 1.1 mrg _MM_FROUND_CUR_DIRECTION);
6308 1.1 mrg }
6309 1.1 mrg
6310 1.1 mrg extern __inline __m512h
6311 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6312 1.1 mrg _mm512_mask_fmadd_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D)
6313 1.1 mrg {
6314 1.1 mrg return (__m512h)
6315 1.1 mrg __builtin_ia32_vfmaddcph512_mask_round ((__v32hf) __A,
6316 1.1 mrg (__v32hf) __C,
6317 1.1 mrg (__v32hf) __D, __B,
6318 1.1 mrg _MM_FROUND_CUR_DIRECTION);
6319 1.1 mrg }
6320 1.1 mrg
6321 1.1 mrg extern __inline __m512h
6322 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6323 1.1 mrg _mm512_mask3_fmadd_pch (__m512h __A, __m512h __B, __m512h __C, __mmask16 __D)
6324 1.1 mrg {
6325 1.1 mrg return (__m512h)
6326 1.1 mrg __builtin_ia32_vfmaddcph512_mask3_round ((__v32hf) __A,
6327 1.1 mrg (__v32hf) __B,
6328 1.1 mrg (__v32hf) __C,
6329 1.1 mrg __D, _MM_FROUND_CUR_DIRECTION);
6330 1.1 mrg }
6331 1.1 mrg
6332 1.1 mrg extern __inline __m512h
6333 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6334 1.1 mrg _mm512_maskz_fmadd_pch (__mmask16 __A, __m512h __B, __m512h __C, __m512h __D)
6335 1.1 mrg {
6336 1.1 mrg return (__m512h)
6337 1.1 mrg __builtin_ia32_vfmaddcph512_maskz_round ((__v32hf) __B,
6338 1.1 mrg (__v32hf) __C,
6339 1.1 mrg (__v32hf) __D,
6340 1.1 mrg __A, _MM_FROUND_CUR_DIRECTION);
6341 1.1 mrg }
6342 1.1 mrg
6343 1.1 mrg #ifdef __OPTIMIZE__
6344 1.1 mrg extern __inline __m512h
6345 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6346 1.1 mrg _mm512_fcmadd_round_pch (__m512h __A, __m512h __B, __m512h __C, const int __D)
6347 1.1 mrg {
6348 1.1 mrg return (__m512h)
6349 1.1 mrg __builtin_ia32_vfcmaddcph512_round ((__v32hf) __A,
6350 1.1 mrg (__v32hf) __B,
6351 1.1 mrg (__v32hf) __C,
6352 1.1 mrg __D);
6353 1.1 mrg }
6354 1.1 mrg
6355 1.1 mrg extern __inline __m512h
6356 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6357 1.1 mrg _mm512_mask_fcmadd_round_pch (__m512h __A, __mmask16 __B, __m512h __C,
6358 1.1 mrg __m512h __D, const int __E)
6359 1.1 mrg {
6360 1.1 mrg return (__m512h)
6361 1.1 mrg __builtin_ia32_vfcmaddcph512_mask_round ((__v32hf) __A,
6362 1.1 mrg (__v32hf) __C,
6363 1.1 mrg (__v32hf) __D, __B,
6364 1.1 mrg __E);
6365 1.1 mrg }
6366 1.1 mrg
6367 1.1 mrg extern __inline __m512h
6368 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6369 1.1 mrg _mm512_mask3_fcmadd_round_pch (__m512h __A, __m512h __B, __m512h __C,
6370 1.1 mrg __mmask16 __D, const int __E)
6371 1.1 mrg {
6372 1.1 mrg return (__m512h)
6373 1.1 mrg __builtin_ia32_vfcmaddcph512_mask3_round ((__v32hf) __A,
6374 1.1 mrg (__v32hf) __B,
6375 1.1 mrg (__v32hf) __C,
6376 1.1 mrg __D, __E);
6377 1.1 mrg }
6378 1.1 mrg
6379 1.1 mrg extern __inline __m512h
6380 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6381 1.1 mrg _mm512_maskz_fcmadd_round_pch (__mmask16 __A, __m512h __B, __m512h __C,
6382 1.1 mrg __m512h __D, const int __E)
6383 1.1 mrg {
6384 1.1 mrg return (__m512h)
6385 1.1 mrg __builtin_ia32_vfcmaddcph512_maskz_round ((__v32hf) __B,
6386 1.1 mrg (__v32hf) __C,
6387 1.1 mrg (__v32hf) __D,
6388 1.1 mrg __A, __E);
6389 1.1 mrg }
6390 1.1 mrg
6391 1.1 mrg extern __inline __m512h
6392 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6393 1.1 mrg _mm512_fmadd_round_pch (__m512h __A, __m512h __B, __m512h __C, const int __D)
6394 1.1 mrg {
6395 1.1 mrg return (__m512h)
6396 1.1 mrg __builtin_ia32_vfmaddcph512_round ((__v32hf) __A,
6397 1.1 mrg (__v32hf) __B,
6398 1.1 mrg (__v32hf) __C,
6399 1.1 mrg __D);
6400 1.1 mrg }
6401 1.1 mrg
6402 1.1 mrg extern __inline __m512h
6403 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6404 1.1 mrg _mm512_mask_fmadd_round_pch (__m512h __A, __mmask16 __B, __m512h __C,
6405 1.1 mrg __m512h __D, const int __E)
6406 1.1 mrg {
6407 1.1 mrg return (__m512h)
6408 1.1 mrg __builtin_ia32_vfmaddcph512_mask_round ((__v32hf) __A,
6409 1.1 mrg (__v32hf) __C,
6410 1.1 mrg (__v32hf) __D, __B,
6411 1.1 mrg __E);
6412 1.1 mrg }
6413 1.1 mrg
6414 1.1 mrg extern __inline __m512h
6415 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6416 1.1 mrg _mm512_mask3_fmadd_round_pch (__m512h __A, __m512h __B, __m512h __C,
6417 1.1 mrg __mmask16 __D, const int __E)
6418 1.1 mrg {
6419 1.1 mrg return (__m512h)
6420 1.1 mrg __builtin_ia32_vfmaddcph512_mask3_round ((__v32hf) __A,
6421 1.1 mrg (__v32hf) __B,
6422 1.1 mrg (__v32hf) __C,
6423 1.1 mrg __D, __E);
6424 1.1 mrg }
6425 1.1 mrg
6426 1.1 mrg extern __inline __m512h
6427 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6428 1.1 mrg _mm512_maskz_fmadd_round_pch (__mmask16 __A, __m512h __B, __m512h __C,
6429 1.1 mrg __m512h __D, const int __E)
6430 1.1 mrg {
6431 1.1 mrg return (__m512h)
6432 1.1 mrg __builtin_ia32_vfmaddcph512_maskz_round ((__v32hf) __B,
6433 1.1 mrg (__v32hf) __C,
6434 1.1 mrg (__v32hf) __D,
6435 1.1 mrg __A, __E);
6436 1.1 mrg }
6437 1.1 mrg
6438 1.1 mrg #else
6439 1.1 mrg #define _mm512_fcmadd_round_pch(A, B, C, D) \
6440 1.1 mrg (__m512h) __builtin_ia32_vfcmaddcph512_round ((A), (B), (C), (D))
6441 1.1 mrg
6442 1.1 mrg #define _mm512_mask_fcmadd_round_pch(A, B, C, D, E) \
6443 1.1 mrg ((__m512h) \
6444 1.1 mrg __builtin_ia32_vfcmaddcph512_mask_round ((__v32hf) (A), \
6445 1.1 mrg (__v32hf) (C), \
6446 1.1 mrg (__v32hf) (D), \
6447 1.1 mrg (B), (E)))
6448 1.1 mrg
6449 1.1 mrg
6450 1.1 mrg #define _mm512_mask3_fcmadd_round_pch(A, B, C, D, E) \
6451 1.1 mrg ((__m512h) \
6452 1.1 mrg __builtin_ia32_vfcmaddcph512_mask3_round ((A), (B), (C), (D), (E)))
6453 1.1 mrg
6454 1.1 mrg #define _mm512_maskz_fcmadd_round_pch(A, B, C, D, E) \
6455 1.1 mrg (__m512h) \
6456 1.1 mrg __builtin_ia32_vfcmaddcph512_maskz_round ((B), (C), (D), (A), (E))
6457 1.1 mrg
6458 1.1 mrg #define _mm512_fmadd_round_pch(A, B, C, D) \
6459 1.1 mrg (__m512h) __builtin_ia32_vfmaddcph512_round ((A), (B), (C), (D))
6460 1.1 mrg
6461 1.1 mrg #define _mm512_mask_fmadd_round_pch(A, B, C, D, E) \
6462 1.1 mrg ((__m512h) \
6463 1.1 mrg __builtin_ia32_vfmaddcph512_mask_round ((__v32hf) (A), \
6464 1.1 mrg (__v32hf) (C), \
6465 1.1 mrg (__v32hf) (D), \
6466 1.1 mrg (B), (E)))
6467 1.1 mrg
6468 1.1 mrg #define _mm512_mask3_fmadd_round_pch(A, B, C, D, E) \
6469 1.1 mrg (__m512h) \
6470 1.1 mrg __builtin_ia32_vfmaddcph512_mask3_round ((A), (B), (C), (D), (E))
6471 1.1 mrg
6472 1.1 mrg #define _mm512_maskz_fmadd_round_pch(A, B, C, D, E) \
6473 1.1 mrg (__m512h) \
6474 1.1 mrg __builtin_ia32_vfmaddcph512_maskz_round ((B), (C), (D), (A), (E))
6475 1.1 mrg
6476 1.1 mrg #endif /* __OPTIMIZE__ */
6477 1.1 mrg
6478 1.1 mrg /* Intrinsics vf[,c]mulcph. */
6479 1.1 mrg extern __inline __m512h
6480 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6481 1.1 mrg _mm512_fcmul_pch (__m512h __A, __m512h __B)
6482 1.1 mrg {
6483 1.1 mrg return (__m512h)
6484 1.1 mrg __builtin_ia32_vfcmulcph512_round ((__v32hf) __A,
6485 1.1 mrg (__v32hf) __B,
6486 1.1 mrg _MM_FROUND_CUR_DIRECTION);
6487 1.1 mrg }
6488 1.1 mrg
6489 1.1 mrg extern __inline __m512h
6490 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6491 1.1 mrg _mm512_mask_fcmul_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D)
6492 1.1 mrg {
6493 1.1 mrg return (__m512h)
6494 1.1 mrg __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __C,
6495 1.1 mrg (__v32hf) __D,
6496 1.1 mrg (__v32hf) __A,
6497 1.1 mrg __B, _MM_FROUND_CUR_DIRECTION);
6498 1.1 mrg }
6499 1.1 mrg
6500 1.1 mrg extern __inline __m512h
6501 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6502 1.1 mrg _mm512_maskz_fcmul_pch (__mmask16 __A, __m512h __B, __m512h __C)
6503 1.1 mrg {
6504 1.1 mrg return (__m512h)
6505 1.1 mrg __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __B,
6506 1.1 mrg (__v32hf) __C,
6507 1.1 mrg _mm512_setzero_ph (),
6508 1.1 mrg __A, _MM_FROUND_CUR_DIRECTION);
6509 1.1 mrg }
6510 1.1 mrg
6511 1.1 mrg extern __inline __m512h
6512 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6513 1.1 mrg _mm512_fmul_pch (__m512h __A, __m512h __B)
6514 1.1 mrg {
6515 1.1 mrg return (__m512h)
6516 1.1 mrg __builtin_ia32_vfmulcph512_round ((__v32hf) __A,
6517 1.1 mrg (__v32hf) __B,
6518 1.1 mrg _MM_FROUND_CUR_DIRECTION);
6519 1.1 mrg }
6520 1.1 mrg
6521 1.1 mrg extern __inline __m512h
6522 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6523 1.1 mrg _mm512_mask_fmul_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D)
6524 1.1 mrg {
6525 1.1 mrg return (__m512h)
6526 1.1 mrg __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __C,
6527 1.1 mrg (__v32hf) __D,
6528 1.1 mrg (__v32hf) __A,
6529 1.1 mrg __B, _MM_FROUND_CUR_DIRECTION);
6530 1.1 mrg }
6531 1.1 mrg
6532 1.1 mrg extern __inline __m512h
6533 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6534 1.1 mrg _mm512_maskz_fmul_pch (__mmask16 __A, __m512h __B, __m512h __C)
6535 1.1 mrg {
6536 1.1 mrg return (__m512h)
6537 1.1 mrg __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __B,
6538 1.1 mrg (__v32hf) __C,
6539 1.1 mrg _mm512_setzero_ph (),
6540 1.1 mrg __A, _MM_FROUND_CUR_DIRECTION);
6541 1.1 mrg }
6542 1.1 mrg
6543 1.1 mrg #ifdef __OPTIMIZE__
6544 1.1 mrg extern __inline __m512h
6545 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6546 1.1 mrg _mm512_fcmul_round_pch (__m512h __A, __m512h __B, const int __D)
6547 1.1 mrg {
6548 1.1 mrg return (__m512h)
6549 1.1 mrg __builtin_ia32_vfcmulcph512_round ((__v32hf) __A,
6550 1.1 mrg (__v32hf) __B, __D);
6551 1.1 mrg }
6552 1.1 mrg
6553 1.1 mrg extern __inline __m512h
6554 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6555 1.1 mrg _mm512_mask_fcmul_round_pch (__m512h __A, __mmask16 __B, __m512h __C,
6556 1.1 mrg __m512h __D, const int __E)
6557 1.1 mrg {
6558 1.1 mrg return (__m512h)
6559 1.1 mrg __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __C,
6560 1.1 mrg (__v32hf) __D,
6561 1.1 mrg (__v32hf) __A,
6562 1.1 mrg __B, __E);
6563 1.1 mrg }
6564 1.1 mrg
6565 1.1 mrg extern __inline __m512h
6566 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6567 1.1 mrg _mm512_maskz_fcmul_round_pch (__mmask16 __A, __m512h __B,
6568 1.1 mrg __m512h __C, const int __E)
6569 1.1 mrg {
6570 1.1 mrg return (__m512h)
6571 1.1 mrg __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __B,
6572 1.1 mrg (__v32hf) __C,
6573 1.1 mrg _mm512_setzero_ph (),
6574 1.1 mrg __A, __E);
6575 1.1 mrg }
6576 1.1 mrg
6577 1.1 mrg extern __inline __m512h
6578 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6579 1.1 mrg _mm512_fmul_round_pch (__m512h __A, __m512h __B, const int __D)
6580 1.1 mrg {
6581 1.1 mrg return (__m512h)
6582 1.1 mrg __builtin_ia32_vfmulcph512_round ((__v32hf) __A,
6583 1.1 mrg (__v32hf) __B,
6584 1.1 mrg __D);
6585 1.1 mrg }
6586 1.1 mrg
6587 1.1 mrg extern __inline __m512h
6588 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6589 1.1 mrg _mm512_mask_fmul_round_pch (__m512h __A, __mmask16 __B, __m512h __C,
6590 1.1 mrg __m512h __D, const int __E)
6591 1.1 mrg {
6592 1.1 mrg return (__m512h)
6593 1.1 mrg __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __C,
6594 1.1 mrg (__v32hf) __D,
6595 1.1 mrg (__v32hf) __A,
6596 1.1 mrg __B, __E);
6597 1.1 mrg }
6598 1.1 mrg
6599 1.1 mrg extern __inline __m512h
6600 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6601 1.1 mrg _mm512_maskz_fmul_round_pch (__mmask16 __A, __m512h __B,
6602 1.1 mrg __m512h __C, const int __E)
6603 1.1 mrg {
6604 1.1 mrg return (__m512h)
6605 1.1 mrg __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __B,
6606 1.1 mrg (__v32hf) __C,
6607 1.1 mrg _mm512_setzero_ph (),
6608 1.1 mrg __A, __E);
6609 1.1 mrg }
6610 1.1 mrg
6611 1.1 mrg #else
6612 1.1 mrg #define _mm512_fcmul_round_pch(A, B, D) \
6613 1.1 mrg (__m512h) __builtin_ia32_vfcmulcph512_round ((A), (B), (D))
6614 1.1 mrg
6615 1.1 mrg #define _mm512_mask_fcmul_round_pch(A, B, C, D, E) \
6616 1.1 mrg (__m512h) __builtin_ia32_vfcmulcph512_mask_round ((C), (D), (A), (B), (E))
6617 1.1 mrg
6618 1.1 mrg #define _mm512_maskz_fcmul_round_pch(A, B, C, E) \
6619 1.1 mrg (__m512h) __builtin_ia32_vfcmulcph512_mask_round ((B), (C), \
6620 1.1 mrg (__v32hf) \
6621 1.1 mrg _mm512_setzero_ph (), \
6622 1.1 mrg (A), (E))
6623 1.1 mrg
6624 1.1 mrg #define _mm512_fmul_round_pch(A, B, D) \
6625 1.1 mrg (__m512h) __builtin_ia32_vfmulcph512_round ((A), (B), (D))
6626 1.1 mrg
6627 1.1 mrg #define _mm512_mask_fmul_round_pch(A, B, C, D, E) \
6628 1.1 mrg (__m512h) __builtin_ia32_vfmulcph512_mask_round ((C), (D), (A), (B), (E))
6629 1.1 mrg
6630 1.1 mrg #define _mm512_maskz_fmul_round_pch(A, B, C, E) \
6631 1.1 mrg (__m512h) __builtin_ia32_vfmulcph512_mask_round ((B), (C), \
6632 1.1 mrg (__v32hf) \
6633 1.1 mrg _mm512_setzero_ph (), \
6634 1.1 mrg (A), (E))
6635 1.1 mrg
6636 1.1 mrg #endif /* __OPTIMIZE__ */
6637 1.1 mrg
6638 1.1 mrg /* Intrinsics vf[,c]maddcsh. */
6639 1.1 mrg extern __inline __m128h
6640 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6641 1.1 mrg _mm_mask_fcmadd_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
6642 1.1 mrg {
6643 1.1 mrg return (__m128h)
6644 1.1 mrg __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) __A,
6645 1.1 mrg (__v8hf) __C,
6646 1.1 mrg (__v8hf) __D, __B,
6647 1.1 mrg _MM_FROUND_CUR_DIRECTION);
6648 1.1 mrg }
6649 1.1 mrg
6650 1.1 mrg extern __inline __m128h
6651 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6652 1.1 mrg _mm_mask3_fcmadd_sch (__m128h __A, __m128h __B, __m128h __C, __mmask8 __D)
6653 1.1 mrg {
6654 1.1 mrg return (__m128h)
6655 1.1 mrg __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) __A,
6656 1.1 mrg (__v8hf) __B,
6657 1.1 mrg (__v8hf) __C, __D,
6658 1.1 mrg _MM_FROUND_CUR_DIRECTION);
6659 1.1 mrg }
6660 1.1 mrg
6661 1.1 mrg extern __inline __m128h
6662 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6663 1.1 mrg _mm_maskz_fcmadd_sch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D)
6664 1.1 mrg {
6665 1.1 mrg return (__m128h)
6666 1.1 mrg __builtin_ia32_vfcmaddcsh_maskz_round ((__v8hf) __B,
6667 1.1 mrg (__v8hf) __C,
6668 1.1 mrg (__v8hf) __D,
6669 1.1 mrg __A, _MM_FROUND_CUR_DIRECTION);
6670 1.1 mrg }
6671 1.1 mrg
6672 1.1 mrg extern __inline __m128h
6673 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6674 1.1 mrg _mm_fcmadd_sch (__m128h __A, __m128h __B, __m128h __C)
6675 1.1 mrg {
6676 1.1 mrg return (__m128h)
6677 1.1 mrg __builtin_ia32_vfcmaddcsh_round ((__v8hf) __A,
6678 1.1 mrg (__v8hf) __B,
6679 1.1 mrg (__v8hf) __C,
6680 1.1 mrg _MM_FROUND_CUR_DIRECTION);
6681 1.1 mrg }
6682 1.1 mrg
6683 1.1 mrg extern __inline __m128h
6684 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6685 1.1 mrg _mm_mask_fmadd_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
6686 1.1 mrg {
6687 1.1 mrg return (__m128h)
6688 1.1 mrg __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) __A,
6689 1.1 mrg (__v8hf) __C,
6690 1.1 mrg (__v8hf) __D, __B,
6691 1.1 mrg _MM_FROUND_CUR_DIRECTION);
6692 1.1 mrg }
6693 1.1 mrg
6694 1.1 mrg extern __inline __m128h
6695 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6696 1.1 mrg _mm_mask3_fmadd_sch (__m128h __A, __m128h __B, __m128h __C, __mmask8 __D)
6697 1.1 mrg {
6698 1.1 mrg return (__m128h)
6699 1.1 mrg __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) __A,
6700 1.1 mrg (__v8hf) __B,
6701 1.1 mrg (__v8hf) __C, __D,
6702 1.1 mrg _MM_FROUND_CUR_DIRECTION);
6703 1.1 mrg }
6704 1.1 mrg
6705 1.1 mrg extern __inline __m128h
6706 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6707 1.1 mrg _mm_maskz_fmadd_sch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D)
6708 1.1 mrg {
6709 1.1 mrg return (__m128h)
6710 1.1 mrg __builtin_ia32_vfmaddcsh_maskz_round ((__v8hf) __B,
6711 1.1 mrg (__v8hf) __C,
6712 1.1 mrg (__v8hf) __D,
6713 1.1 mrg __A, _MM_FROUND_CUR_DIRECTION);
6714 1.1 mrg }
6715 1.1 mrg
6716 1.1 mrg extern __inline __m128h
6717 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6718 1.1 mrg _mm_fmadd_sch (__m128h __A, __m128h __B, __m128h __C)
6719 1.1 mrg {
6720 1.1 mrg return (__m128h)
6721 1.1 mrg __builtin_ia32_vfmaddcsh_round ((__v8hf) __A,
6722 1.1 mrg (__v8hf) __B,
6723 1.1 mrg (__v8hf) __C,
6724 1.1 mrg _MM_FROUND_CUR_DIRECTION);
6725 1.1 mrg }
6726 1.1 mrg
6727 1.1 mrg #ifdef __OPTIMIZE__
6728 1.1 mrg extern __inline __m128h
6729 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6730 1.1 mrg _mm_mask_fcmadd_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
6731 1.1 mrg __m128h __D, const int __E)
6732 1.1 mrg {
6733 1.1 mrg return (__m128h)
6734 1.1 mrg __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) __A,
6735 1.1 mrg (__v8hf) __C,
6736 1.1 mrg (__v8hf) __D,
6737 1.1 mrg __B, __E);
6738 1.1 mrg }
6739 1.1 mrg
6740 1.1 mrg extern __inline __m128h
6741 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6742 1.1 mrg _mm_mask3_fcmadd_round_sch (__m128h __A, __m128h __B, __m128h __C,
6743 1.1 mrg __mmask8 __D, const int __E)
6744 1.1 mrg {
6745 1.1 mrg return (__m128h)
6746 1.1 mrg __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) __A,
6747 1.1 mrg (__v8hf) __B,
6748 1.1 mrg (__v8hf) __C,
6749 1.1 mrg __D, __E);
6750 1.1 mrg }
6751 1.1 mrg
6752 1.1 mrg extern __inline __m128h
6753 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6754 1.1 mrg _mm_maskz_fcmadd_round_sch (__mmask8 __A, __m128h __B, __m128h __C,
6755 1.1 mrg __m128h __D, const int __E)
6756 1.1 mrg {
6757 1.1 mrg return (__m128h)
6758 1.1 mrg __builtin_ia32_vfcmaddcsh_maskz_round ((__v8hf) __B,
6759 1.1 mrg (__v8hf) __C,
6760 1.1 mrg (__v8hf) __D,
6761 1.1 mrg __A, __E);
6762 1.1 mrg }
6763 1.1 mrg
6764 1.1 mrg extern __inline __m128h
6765 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6766 1.1 mrg _mm_fcmadd_round_sch (__m128h __A, __m128h __B, __m128h __C, const int __D)
6767 1.1 mrg {
6768 1.1 mrg return (__m128h)
6769 1.1 mrg __builtin_ia32_vfcmaddcsh_round ((__v8hf) __A,
6770 1.1 mrg (__v8hf) __B,
6771 1.1 mrg (__v8hf) __C,
6772 1.1 mrg __D);
6773 1.1 mrg }
6774 1.1 mrg
6775 1.1 mrg extern __inline __m128h
6776 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6777 1.1 mrg _mm_mask_fmadd_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
6778 1.1 mrg __m128h __D, const int __E)
6779 1.1 mrg {
6780 1.1 mrg return (__m128h)
6781 1.1 mrg __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) __A,
6782 1.1 mrg (__v8hf) __C,
6783 1.1 mrg (__v8hf) __D,
6784 1.1 mrg __B, __E);
6785 1.1 mrg }
6786 1.1 mrg
6787 1.1 mrg extern __inline __m128h
6788 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6789 1.1 mrg _mm_mask3_fmadd_round_sch (__m128h __A, __m128h __B, __m128h __C,
6790 1.1 mrg __mmask8 __D, const int __E)
6791 1.1 mrg {
6792 1.1 mrg return (__m128h)
6793 1.1 mrg __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) __A,
6794 1.1 mrg (__v8hf) __B,
6795 1.1 mrg (__v8hf) __C,
6796 1.1 mrg __D, __E);
6797 1.1 mrg }
6798 1.1 mrg
6799 1.1 mrg extern __inline __m128h
6800 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6801 1.1 mrg _mm_maskz_fmadd_round_sch (__mmask8 __A, __m128h __B, __m128h __C,
6802 1.1 mrg __m128h __D, const int __E)
6803 1.1 mrg {
6804 1.1 mrg return (__m128h)
6805 1.1 mrg __builtin_ia32_vfmaddcsh_maskz_round ((__v8hf) __B,
6806 1.1 mrg (__v8hf) __C,
6807 1.1 mrg (__v8hf) __D,
6808 1.1 mrg __A, __E);
6809 1.1 mrg }
6810 1.1 mrg
6811 1.1 mrg extern __inline __m128h
6812 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6813 1.1 mrg _mm_fmadd_round_sch (__m128h __A, __m128h __B, __m128h __C, const int __D)
6814 1.1 mrg {
6815 1.1 mrg return (__m128h)
6816 1.1 mrg __builtin_ia32_vfmaddcsh_round ((__v8hf) __A,
6817 1.1 mrg (__v8hf) __B,
6818 1.1 mrg (__v8hf) __C,
6819 1.1 mrg __D);
6820 1.1 mrg }
6821 1.1 mrg #else
6822 1.1 mrg #define _mm_mask_fcmadd_round_sch(A, B, C, D, E) \
6823 1.1 mrg ((__m128h) \
6824 1.1 mrg __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) (A), \
6825 1.1 mrg (__v8hf) (C), \
6826 1.1 mrg (__v8hf) (D), \
6827 1.1 mrg (B), (E)))
6828 1.1 mrg
6829 1.1 mrg
6830 1.1 mrg #define _mm_mask3_fcmadd_round_sch(A, B, C, D, E) \
6831 1.1 mrg ((__m128h) \
6832 1.1 mrg __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) (A), \
6833 1.1 mrg (__v8hf) (B), \
6834 1.1 mrg (__v8hf) (C), \
6835 1.1 mrg (D), (E)))
6836 1.1 mrg
6837 1.1 mrg #define _mm_maskz_fcmadd_round_sch(A, B, C, D, E) \
6838 1.1 mrg __builtin_ia32_vfcmaddcsh_maskz_round ((B), (C), (D), (A), (E))
6839 1.1 mrg
6840 1.1 mrg #define _mm_fcmadd_round_sch(A, B, C, D) \
6841 1.1 mrg __builtin_ia32_vfcmaddcsh_round ((A), (B), (C), (D))
6842 1.1 mrg
6843 1.1 mrg #define _mm_mask_fmadd_round_sch(A, B, C, D, E) \
6844 1.1 mrg ((__m128h) \
6845 1.1 mrg __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) (A), \
6846 1.1 mrg (__v8hf) (C), \
6847 1.1 mrg (__v8hf) (D), \
6848 1.1 mrg (B), (E)))
6849 1.1 mrg
6850 1.1 mrg #define _mm_mask3_fmadd_round_sch(A, B, C, D, E) \
6851 1.1 mrg ((__m128h) \
6852 1.1 mrg __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) (A), \
6853 1.1 mrg (__v8hf) (B), \
6854 1.1 mrg (__v8hf) (C), \
6855 1.1 mrg (D), (E)))
6856 1.1 mrg
6857 1.1 mrg #define _mm_maskz_fmadd_round_sch(A, B, C, D, E) \
6858 1.1 mrg __builtin_ia32_vfmaddcsh_maskz_round ((B), (C), (D), (A), (E))
6859 1.1 mrg
6860 1.1 mrg #define _mm_fmadd_round_sch(A, B, C, D) \
6861 1.1 mrg __builtin_ia32_vfmaddcsh_round ((A), (B), (C), (D))
6862 1.1 mrg
6863 1.1 mrg #endif /* __OPTIMIZE__ */
6864 1.1 mrg
6865 1.1 mrg /* Intrinsics vf[,c]mulcsh. */
6866 1.1 mrg extern __inline __m128h
6867 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6868 1.1 mrg _mm_fcmul_sch (__m128h __A, __m128h __B)
6869 1.1 mrg {
6870 1.1 mrg return (__m128h)
6871 1.1 mrg __builtin_ia32_vfcmulcsh_round ((__v8hf) __A,
6872 1.1 mrg (__v8hf) __B,
6873 1.1 mrg _MM_FROUND_CUR_DIRECTION);
6874 1.1 mrg }
6875 1.1 mrg
6876 1.1 mrg extern __inline __m128h
6877 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6878 1.1 mrg _mm_mask_fcmul_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
6879 1.1 mrg {
6880 1.1 mrg return (__m128h)
6881 1.1 mrg __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C,
6882 1.1 mrg (__v8hf) __D,
6883 1.1 mrg (__v8hf) __A,
6884 1.1 mrg __B, _MM_FROUND_CUR_DIRECTION);
6885 1.1 mrg }
6886 1.1 mrg
6887 1.1 mrg extern __inline __m128h
6888 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6889 1.1 mrg _mm_maskz_fcmul_sch (__mmask8 __A, __m128h __B, __m128h __C)
6890 1.1 mrg {
6891 1.1 mrg return (__m128h)
6892 1.1 mrg __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B,
6893 1.1 mrg (__v8hf) __C,
6894 1.1 mrg _mm_setzero_ph (),
6895 1.1 mrg __A, _MM_FROUND_CUR_DIRECTION);
6896 1.1 mrg }
6897 1.1 mrg
6898 1.1 mrg extern __inline __m128h
6899 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6900 1.1 mrg _mm_fmul_sch (__m128h __A, __m128h __B)
6901 1.1 mrg {
6902 1.1 mrg return (__m128h)
6903 1.1 mrg __builtin_ia32_vfmulcsh_round ((__v8hf) __A,
6904 1.1 mrg (__v8hf) __B,
6905 1.1 mrg _MM_FROUND_CUR_DIRECTION);
6906 1.1 mrg }
6907 1.1 mrg
6908 1.1 mrg extern __inline __m128h
6909 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6910 1.1 mrg _mm_mask_fmul_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
6911 1.1 mrg {
6912 1.1 mrg return (__m128h)
6913 1.1 mrg __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C,
6914 1.1 mrg (__v8hf) __D,
6915 1.1 mrg (__v8hf) __A,
6916 1.1 mrg __B, _MM_FROUND_CUR_DIRECTION);
6917 1.1 mrg }
6918 1.1 mrg
6919 1.1 mrg extern __inline __m128h
6920 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6921 1.1 mrg _mm_maskz_fmul_sch (__mmask8 __A, __m128h __B, __m128h __C)
6922 1.1 mrg {
6923 1.1 mrg return (__m128h)
6924 1.1 mrg __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B,
6925 1.1 mrg (__v8hf) __C,
6926 1.1 mrg _mm_setzero_ph (),
6927 1.1 mrg __A, _MM_FROUND_CUR_DIRECTION);
6928 1.1 mrg }
6929 1.1 mrg
6930 1.1 mrg #ifdef __OPTIMIZE__
6931 1.1 mrg extern __inline __m128h
6932 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6933 1.1 mrg _mm_fcmul_round_sch (__m128h __A, __m128h __B, const int __D)
6934 1.1 mrg {
6935 1.1 mrg return (__m128h)
6936 1.1 mrg __builtin_ia32_vfcmulcsh_round ((__v8hf) __A,
6937 1.1 mrg (__v8hf) __B,
6938 1.1 mrg __D);
6939 1.1 mrg }
6940 1.1 mrg
6941 1.1 mrg extern __inline __m128h
6942 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6943 1.1 mrg _mm_mask_fcmul_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
6944 1.1 mrg __m128h __D, const int __E)
6945 1.1 mrg {
6946 1.1 mrg return (__m128h)
6947 1.1 mrg __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C,
6948 1.1 mrg (__v8hf) __D,
6949 1.1 mrg (__v8hf) __A,
6950 1.1 mrg __B, __E);
6951 1.1 mrg }
6952 1.1 mrg
6953 1.1 mrg extern __inline __m128h
6954 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6955 1.1 mrg _mm_maskz_fcmul_round_sch (__mmask8 __A, __m128h __B, __m128h __C,
6956 1.1 mrg const int __E)
6957 1.1 mrg {
6958 1.1 mrg return (__m128h)
6959 1.1 mrg __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B,
6960 1.1 mrg (__v8hf) __C,
6961 1.1 mrg _mm_setzero_ph (),
6962 1.1 mrg __A, __E);
6963 1.1 mrg }
6964 1.1 mrg
6965 1.1 mrg extern __inline __m128h
6966 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6967 1.1 mrg _mm_fmul_round_sch (__m128h __A, __m128h __B, const int __D)
6968 1.1 mrg {
6969 1.1 mrg return (__m128h)
6970 1.1 mrg __builtin_ia32_vfmulcsh_round ((__v8hf) __A,
6971 1.1 mrg (__v8hf) __B, __D);
6972 1.1 mrg }
6973 1.1 mrg
6974 1.1 mrg extern __inline __m128h
6975 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6976 1.1 mrg _mm_mask_fmul_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
6977 1.1 mrg __m128h __D, const int __E)
6978 1.1 mrg {
6979 1.1 mrg return (__m128h)
6980 1.1 mrg __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C,
6981 1.1 mrg (__v8hf) __D,
6982 1.1 mrg (__v8hf) __A,
6983 1.1 mrg __B, __E);
6984 1.1 mrg }
6985 1.1 mrg
6986 1.1 mrg extern __inline __m128h
6987 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6988 1.1 mrg _mm_maskz_fmul_round_sch (__mmask8 __A, __m128h __B, __m128h __C, const int __E)
6989 1.1 mrg {
6990 1.1 mrg return (__m128h)
6991 1.1 mrg __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B,
6992 1.1 mrg (__v8hf) __C,
6993 1.1 mrg _mm_setzero_ph (),
6994 1.1 mrg __A, __E);
6995 1.1 mrg }
6996 1.1 mrg
6997 1.1 mrg #else
6998 1.1 mrg #define _mm_fcmul_round_sch(__A, __B, __D) \
6999 1.1 mrg (__m128h) __builtin_ia32_vfcmulcsh_round ((__v8hf) __A, \
7000 1.1 mrg (__v8hf) __B, __D)
7001 1.1 mrg
7002 1.1 mrg #define _mm_mask_fcmul_round_sch(__A, __B, __C, __D, __E) \
7003 1.1 mrg (__m128h) __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C, \
7004 1.1 mrg (__v8hf) __D, \
7005 1.1 mrg (__v8hf) __A, \
7006 1.1 mrg __B, __E)
7007 1.1 mrg
7008 1.1 mrg #define _mm_maskz_fcmul_round_sch(__A, __B, __C, __E) \
7009 1.1 mrg (__m128h) __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B, \
7010 1.1 mrg (__v8hf) __C, \
7011 1.1 mrg _mm_setzero_ph (), \
7012 1.1 mrg __A, __E)
7013 1.1 mrg
7014 1.1 mrg #define _mm_fmul_round_sch(__A, __B, __D) \
7015 1.1 mrg (__m128h) __builtin_ia32_vfmulcsh_round ((__v8hf) __A, \
7016 1.1 mrg (__v8hf) __B, __D)
7017 1.1 mrg
7018 1.1 mrg #define _mm_mask_fmul_round_sch(__A, __B, __C, __D, __E) \
7019 1.1 mrg (__m128h) __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C, \
7020 1.1 mrg (__v8hf) __D, \
7021 1.1 mrg (__v8hf) __A, \
7022 1.1 mrg __B, __E)
7023 1.1 mrg
7024 1.1 mrg #define _mm_maskz_fmul_round_sch(__A, __B, __C, __E) \
7025 1.1 mrg (__m128h) __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B, \
7026 1.1 mrg (__v8hf) __C, \
7027 1.1 mrg _mm_setzero_ph (), \
7028 1.1 mrg __A, __E)
7029 1.1 mrg
7030 1.1 mrg #endif /* __OPTIMIZE__ */
7031 1.1 mrg
7032 1.1 mrg #define _MM512_REDUCE_OP(op) \
7033 1.1 mrg __m256h __T1 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
7034 1.1 mrg __m256h __T2 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
7035 1.1 mrg __m256h __T3 = (__T1 op __T2); \
7036 1.1 mrg __m128h __T4 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 0); \
7037 1.1 mrg __m128h __T5 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 1); \
7038 1.1 mrg __m128h __T6 = (__T4 op __T5); \
7039 1.1 mrg __m128h __T7 = (__m128h) __builtin_shuffle ((__m128h)__T6, \
7040 1.1 mrg (__v8hi) { 4, 5, 6, 7, 0, 1, 2, 3 }); \
7041 1.1 mrg __m128h __T8 = (__T6 op __T7); \
7042 1.1 mrg __m128h __T9 = (__m128h) __builtin_shuffle ((__m128h)__T8, \
7043 1.1 mrg (__v8hi) { 2, 3, 0, 1, 4, 5, 6, 7 }); \
7044 1.1 mrg __m128h __T10 = __T8 op __T9; \
7045 1.1 mrg return __T10[0] op __T10[1]
7046 1.1 mrg
7047 1.1 mrg // TODO reduce
7048 1.1 mrg extern __inline _Float16
7049 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7050 1.1 mrg _mm512_reduce_add_ph (__m512h __A)
7051 1.1 mrg {
7052 1.1 mrg _MM512_REDUCE_OP (+);
7053 1.1 mrg }
7054 1.1 mrg
7055 1.1 mrg extern __inline _Float16
7056 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7057 1.1 mrg _mm512_reduce_mul_ph (__m512h __A)
7058 1.1 mrg {
7059 1.1 mrg _MM512_REDUCE_OP (*);
7060 1.1 mrg }
7061 1.1 mrg
7062 1.1 mrg #undef _MM512_REDUCE_OP
7063 1.1 mrg
7064 1.1 mrg #ifdef __AVX512VL__
7065 1.1 mrg
7066 1.1 mrg #define _MM512_REDUCE_OP(op) \
7067 1.1 mrg __m256h __T1 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
7068 1.1 mrg __m256h __T2 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
7069 1.1 mrg __m256h __T3 = __builtin_ia32_##op##ph256_mask (__T1, __T2, \
7070 1.1 mrg _mm256_setzero_ph (), (__mmask16) -1); \
7071 1.1 mrg __m128h __T4 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 0); \
7072 1.1 mrg __m128h __T5 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 1); \
7073 1.1 mrg __m128h __T6 = __builtin_ia32_##op##ph128_mask \
7074 1.1 mrg (__T4, __T5, _mm_setzero_ph (),(__mmask8) -1); \
7075 1.1 mrg __m128h __T7 = (__m128h) __builtin_shuffle ((__m128h)__T6, \
7076 1.1 mrg (__v8hi) { 2, 3, 0, 1, 6, 7, 4, 5 }); \
7077 1.1 mrg __m128h __T8 = (__m128h) __builtin_ia32_##op##ph128_mask \
7078 1.1 mrg (__T6, __T7, _mm_setzero_ph (),(__mmask8) -1); \
7079 1.1 mrg __m128h __T9 = (__m128h) __builtin_shuffle ((__m128h)__T8, \
7080 1.1 mrg (__v8hi) { 4, 5 }); \
7081 1.1 mrg __m128h __T10 = __builtin_ia32_##op##ph128_mask \
7082 1.1 mrg (__T8, __T9, _mm_setzero_ph (),(__mmask8) -1); \
7083 1.1 mrg __m128h __T11 = (__m128h) __builtin_shuffle (__T10, \
7084 1.1 mrg (__v8hi) { 1, 0 }); \
7085 1.1 mrg __m128h __T12 = __builtin_ia32_##op##ph128_mask \
7086 1.1 mrg (__T10, __T11, _mm_setzero_ph (),(__mmask8) -1); \
7087 1.1 mrg return __T12[0]
7088 1.1 mrg
7089 1.1 mrg #else
7090 1.1 mrg
7091 1.1 mrg #define _MM512_REDUCE_OP(op) \
7092 1.1 mrg __m512h __T1 = (__m512h) __builtin_shuffle ((__m512d) __A, \
7093 1.1 mrg (__v8di) { 4, 5, 6, 7, 0, 0, 0, 0 }); \
7094 1.1 mrg __m512h __T2 = _mm512_##op##_ph (__A, __T1); \
7095 1.1 mrg __m512h __T3 = (__m512h) __builtin_shuffle ((__m512d) __T2, \
7096 1.1 mrg (__v8di) { 2, 3, 0, 0, 0, 0, 0, 0 }); \
7097 1.1 mrg __m512h __T4 = _mm512_##op##_ph (__T2, __T3); \
7098 1.1 mrg __m512h __T5 = (__m512h) __builtin_shuffle ((__m512d) __T4, \
7099 1.1 mrg (__v8di) { 1, 0, 0, 0, 0, 0, 0, 0 }); \
7100 1.1 mrg __m512h __T6 = _mm512_##op##_ph (__T4, __T5); \
7101 1.1 mrg __m512h __T7 = (__m512h) __builtin_shuffle ((__m512) __T6, \
7102 1.1 mrg (__v16si) { 1, 0, 0, 0, 0, 0, 0, 0, \
7103 1.1 mrg 0, 0, 0, 0, 0, 0, 0, 0 }); \
7104 1.1 mrg __m512h __T8 = _mm512_##op##_ph (__T6, __T7); \
7105 1.1 mrg __m512h __T9 = (__m512h) __builtin_shuffle (__T8, \
7106 1.1 mrg (__v32hi) { 1, 0, 0, 0, 0, 0, 0, 0, \
7107 1.1 mrg 0, 0, 0, 0, 0, 0, 0, 0, \
7108 1.1 mrg 0, 0, 0, 0, 0, 0, 0, 0, \
7109 1.1 mrg 0, 0, 0, 0, 0, 0, 0, 0 }); \
7110 1.1 mrg __m512h __T10 = _mm512_##op##_ph (__T8, __T9); \
7111 1.1 mrg return __T10[0]
7112 1.1 mrg #endif
7113 1.1 mrg
7114 1.1 mrg extern __inline _Float16
7115 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7116 1.1 mrg _mm512_reduce_min_ph (__m512h __A)
7117 1.1 mrg {
7118 1.1 mrg _MM512_REDUCE_OP (min);
7119 1.1 mrg }
7120 1.1 mrg
7121 1.1 mrg extern __inline _Float16
7122 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7123 1.1 mrg _mm512_reduce_max_ph (__m512h __A)
7124 1.1 mrg {
7125 1.1 mrg _MM512_REDUCE_OP (max);
7126 1.1 mrg }
7127 1.1 mrg
7128 1.1 mrg #undef _MM512_REDUCE_OP
7129 1.1 mrg
7130 1.1 mrg extern __inline __m512h
7131 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7132 1.1 mrg _mm512_mask_blend_ph (__mmask32 __U, __m512h __A, __m512h __W)
7133 1.1 mrg {
7134 1.1 mrg return (__m512h) __builtin_ia32_movdquhi512_mask ((__v32hi) __W,
7135 1.1 mrg (__v32hi) __A,
7136 1.1 mrg (__mmask32) __U);
7137 1.1 mrg
7138 1.1 mrg }
7139 1.1 mrg
7140 1.1 mrg extern __inline __m512h
7141 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7142 1.1 mrg _mm512_permutex2var_ph (__m512h __A, __m512i __I, __m512h __B)
7143 1.1 mrg {
7144 1.1 mrg return (__m512h) __builtin_ia32_vpermi2varhi512_mask ((__v32hi) __A,
7145 1.1 mrg (__v32hi) __I,
7146 1.1 mrg (__v32hi) __B,
7147 1.1 mrg (__mmask32)-1);
7148 1.1 mrg }
7149 1.1 mrg
7150 1.1 mrg extern __inline __m512h
7151 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7152 1.1 mrg _mm512_permutexvar_ph (__m512i __A, __m512h __B)
7153 1.1 mrg {
7154 1.1 mrg return (__m512h) __builtin_ia32_permvarhi512_mask ((__v32hi) __B,
7155 1.1 mrg (__v32hi) __A,
7156 1.1 mrg (__v32hi)
7157 1.1 mrg (_mm512_setzero_ph ()),
7158 1.1 mrg (__mmask32)-1);
7159 1.1 mrg }
7160 1.1 mrg
7161 1.1 mrg extern __inline __m512h
7162 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7163 1.1 mrg _mm512_set1_pch (_Float16 _Complex __A)
7164 1.1 mrg {
7165 1.1 mrg union
7166 1.1 mrg {
7167 1.1 mrg _Float16 _Complex __a;
7168 1.1 mrg float __b;
7169 1.1 mrg } __u = { .__a = __A};
7170 1.1 mrg
7171 1.1 mrg return (__m512h) _mm512_set1_ps (__u.__b);
7172 1.1 mrg }
7173 1.1 mrg
7174 1.1 mrg // intrinsics below are alias for f*mul_*ch
7175 1.1 mrg #define _mm512_mul_pch(A, B) _mm512_fmul_pch ((A), (B))
7176 1.1 mrg #define _mm512_mask_mul_pch(W, U, A, B) \
7177 1.1 mrg _mm512_mask_fmul_pch ((W), (U), (A), (B))
7178 1.1 mrg #define _mm512_maskz_mul_pch(U, A, B) _mm512_maskz_fmul_pch ((U), (A), (B))
7179 1.1 mrg #define _mm512_mul_round_pch(A, B, R) _mm512_fmul_round_pch ((A), (B), (R))
7180 1.1 mrg #define _mm512_mask_mul_round_pch(W, U, A, B, R) \
7181 1.1 mrg _mm512_mask_fmul_round_pch ((W), (U), (A), (B), (R))
7182 1.1 mrg #define _mm512_maskz_mul_round_pch(U, A, B, R) \
7183 1.1 mrg _mm512_maskz_fmul_round_pch ((U), (A), (B), (R))
7184 1.1 mrg
7185 1.1 mrg #define _mm512_cmul_pch(A, B) _mm512_fcmul_pch ((A), (B))
7186 1.1 mrg #define _mm512_mask_cmul_pch(W, U, A, B) \
7187 1.1 mrg _mm512_mask_fcmul_pch ((W), (U), (A), (B))
7188 1.1 mrg #define _mm512_maskz_cmul_pch(U, A, B) _mm512_maskz_fcmul_pch ((U), (A), (B))
7189 1.1 mrg #define _mm512_cmul_round_pch(A, B, R) _mm512_fcmul_round_pch ((A), (B), (R))
7190 1.1 mrg #define _mm512_mask_cmul_round_pch(W, U, A, B, R) \
7191 1.1 mrg _mm512_mask_fcmul_round_pch ((W), (U), (A), (B), (R))
7192 1.1 mrg #define _mm512_maskz_cmul_round_pch(U, A, B, R) \
7193 1.1 mrg _mm512_maskz_fcmul_round_pch ((U), (A), (B), (R))
7194 1.1 mrg
7195 1.1 mrg #define _mm_mul_sch(A, B) _mm_fmul_sch ((A), (B))
7196 1.1 mrg #define _mm_mask_mul_sch(W, U, A, B) _mm_mask_fmul_sch ((W), (U), (A), (B))
7197 1.1 mrg #define _mm_maskz_mul_sch(U, A, B) _mm_maskz_fmul_sch ((U), (A), (B))
7198 1.1 mrg #define _mm_mul_round_sch(A, B, R) _mm_fmul_round_sch ((A), (B), (R))
7199 1.1 mrg #define _mm_mask_mul_round_sch(W, U, A, B, R) \
7200 1.1 mrg _mm_mask_fmul_round_sch ((W), (U), (A), (B), (R))
7201 1.1 mrg #define _mm_maskz_mul_round_sch(U, A, B, R) \
7202 1.1 mrg _mm_maskz_fmul_round_sch ((U), (A), (B), (R))
7203 1.1 mrg
7204 1.1 mrg #define _mm_cmul_sch(A, B) _mm_fcmul_sch ((A), (B))
7205 1.1 mrg #define _mm_mask_cmul_sch(W, U, A, B) _mm_mask_fcmul_sch ((W), (U), (A), (B))
7206 1.1 mrg #define _mm_maskz_cmul_sch(U, A, B) _mm_maskz_fcmul_sch ((U), (A), (B))
7207 1.1 mrg #define _mm_cmul_round_sch(A, B, R) _mm_fcmul_round_sch ((A), (B), (R))
7208 1.1 mrg #define _mm_mask_cmul_round_sch(W, U, A, B, R) \
7209 1.1 mrg _mm_mask_fcmul_round_sch ((W), (U), (A), (B), (R))
7210 1.1 mrg #define _mm_maskz_cmul_round_sch(U, A, B, R) \
7211 1.1 mrg _mm_maskz_fcmul_round_sch ((U), (A), (B), (R))
7212 1.1 mrg
7213 1.1 mrg #ifdef __DISABLE_AVX512FP16__
7214 1.1 mrg #undef __DISABLE_AVX512FP16__
7215 1.1 mrg #pragma GCC pop_options
7216 1.1 mrg #endif /* __DISABLE_AVX512FP16__ */
7217 1.1 mrg
7218 1.1 mrg #endif /* __AVX512FP16INTRIN_H_INCLUDED */
7219