1 1.1 mrg /* Copyright (C) 2019-2022 Free Software Foundation, Inc. 2 1.1 mrg 3 1.1 mrg This file is part of GCC. 4 1.1 mrg 5 1.1 mrg GCC is free software; you can redistribute it and/or modify 6 1.1 mrg it under the terms of the GNU General Public License as published by 7 1.1 mrg the Free Software Foundation; either version 3, or (at your option) 8 1.1 mrg any later version. 9 1.1 mrg 10 1.1 mrg GCC is distributed in the hope that it will be useful, 11 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of 12 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 1.1 mrg GNU General Public License for more details. 14 1.1 mrg 15 1.1 mrg Under Section 7 of GPL version 3, you are granted additional 16 1.1 mrg permissions described in the GCC Runtime Library Exception, version 17 1.1 mrg 3.1, as published by the Free Software Foundation. 18 1.1 mrg 19 1.1 mrg You should have received a copy of the GNU General Public License and 20 1.1 mrg a copy of the GCC Runtime Library Exception along with this program; 21 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 1.1 mrg <http://www.gnu.org/licenses/>. */ 23 1.1 mrg 24 1.1 mrg #ifndef _IMMINTRIN_H_INCLUDED 25 1.1 mrg #error "Never use <avx512fp16intrin.h> directly; include <immintrin.h> instead." 26 1.1 mrg #endif 27 1.1 mrg 28 1.1 mrg #ifndef __AVX512FP16INTRIN_H_INCLUDED 29 1.1 mrg #define __AVX512FP16INTRIN_H_INCLUDED 30 1.1 mrg 31 1.1 mrg #ifndef __AVX512FP16__ 32 1.1 mrg #pragma GCC push_options 33 1.1 mrg #pragma GCC target("avx512fp16") 34 1.1 mrg #define __DISABLE_AVX512FP16__ 35 1.1 mrg #endif /* __AVX512FP16__ */ 36 1.1 mrg 37 1.1 mrg /* Internal data types for implementing the intrinsics. */ 38 1.1 mrg typedef _Float16 __v8hf __attribute__ ((__vector_size__ (16))); 39 1.1 mrg typedef _Float16 __v16hf __attribute__ ((__vector_size__ (32))); 40 1.1 mrg typedef _Float16 __v32hf __attribute__ ((__vector_size__ (64))); 41 1.1 mrg 42 1.1 mrg /* The Intel API is flexible enough that we must allow aliasing with other 43 1.1 mrg vector types, and their scalar components. */ 44 1.1 mrg typedef _Float16 __m128h __attribute__ ((__vector_size__ (16), __may_alias__)); 45 1.1 mrg typedef _Float16 __m256h __attribute__ ((__vector_size__ (32), __may_alias__)); 46 1.1 mrg typedef _Float16 __m512h __attribute__ ((__vector_size__ (64), __may_alias__)); 47 1.1 mrg 48 1.1 mrg /* Unaligned version of the same type. */ 49 1.1 mrg typedef _Float16 __m128h_u __attribute__ ((__vector_size__ (16), \ 50 1.1 mrg __may_alias__, __aligned__ (1))); 51 1.1 mrg typedef _Float16 __m256h_u __attribute__ ((__vector_size__ (32), \ 52 1.1 mrg __may_alias__, __aligned__ (1))); 53 1.1 mrg typedef _Float16 __m512h_u __attribute__ ((__vector_size__ (64), \ 54 1.1 mrg __may_alias__, __aligned__ (1))); 55 1.1 mrg 56 1.1 mrg extern __inline __m128h 57 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 58 1.1 mrg _mm_set_ph (_Float16 __A7, _Float16 __A6, _Float16 __A5, 59 1.1 mrg _Float16 __A4, _Float16 __A3, _Float16 __A2, 60 1.1 mrg _Float16 __A1, _Float16 __A0) 61 1.1 mrg { 62 1.1 mrg return __extension__ (__m128h)(__v8hf){ __A0, __A1, __A2, __A3, 63 1.1 mrg __A4, __A5, __A6, __A7 }; 64 1.1 mrg } 65 1.1 mrg 66 1.1 mrg extern __inline __m256h 67 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 68 1.1 mrg _mm256_set_ph (_Float16 __A15, _Float16 __A14, _Float16 __A13, 69 1.1 mrg _Float16 __A12, _Float16 __A11, _Float16 __A10, 70 1.1 mrg _Float16 __A9, _Float16 __A8, _Float16 __A7, 71 1.1 mrg _Float16 __A6, _Float16 __A5, _Float16 __A4, 72 1.1 mrg _Float16 __A3, _Float16 __A2, _Float16 __A1, 73 1.1 mrg _Float16 __A0) 74 1.1 mrg { 75 1.1 mrg return __extension__ (__m256h)(__v16hf){ __A0, __A1, __A2, __A3, 76 1.1 mrg __A4, __A5, __A6, __A7, 77 1.1 mrg __A8, __A9, __A10, __A11, 78 1.1 mrg __A12, __A13, __A14, __A15 }; 79 1.1 mrg } 80 1.1 mrg 81 1.1 mrg extern __inline __m512h 82 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 83 1.1 mrg _mm512_set_ph (_Float16 __A31, _Float16 __A30, _Float16 __A29, 84 1.1 mrg _Float16 __A28, _Float16 __A27, _Float16 __A26, 85 1.1 mrg _Float16 __A25, _Float16 __A24, _Float16 __A23, 86 1.1 mrg _Float16 __A22, _Float16 __A21, _Float16 __A20, 87 1.1 mrg _Float16 __A19, _Float16 __A18, _Float16 __A17, 88 1.1 mrg _Float16 __A16, _Float16 __A15, _Float16 __A14, 89 1.1 mrg _Float16 __A13, _Float16 __A12, _Float16 __A11, 90 1.1 mrg _Float16 __A10, _Float16 __A9, _Float16 __A8, 91 1.1 mrg _Float16 __A7, _Float16 __A6, _Float16 __A5, 92 1.1 mrg _Float16 __A4, _Float16 __A3, _Float16 __A2, 93 1.1 mrg _Float16 __A1, _Float16 __A0) 94 1.1 mrg { 95 1.1 mrg return __extension__ (__m512h)(__v32hf){ __A0, __A1, __A2, __A3, 96 1.1 mrg __A4, __A5, __A6, __A7, 97 1.1 mrg __A8, __A9, __A10, __A11, 98 1.1 mrg __A12, __A13, __A14, __A15, 99 1.1 mrg __A16, __A17, __A18, __A19, 100 1.1 mrg __A20, __A21, __A22, __A23, 101 1.1 mrg __A24, __A25, __A26, __A27, 102 1.1 mrg __A28, __A29, __A30, __A31 }; 103 1.1 mrg } 104 1.1 mrg 105 1.1 mrg /* Create vectors of elements in the reversed order from _mm_set_ph, 106 1.1 mrg _mm256_set_ph and _mm512_set_ph functions. */ 107 1.1 mrg 108 1.1 mrg extern __inline __m128h 109 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 110 1.1 mrg _mm_setr_ph (_Float16 __A0, _Float16 __A1, _Float16 __A2, 111 1.1 mrg _Float16 __A3, _Float16 __A4, _Float16 __A5, 112 1.1 mrg _Float16 __A6, _Float16 __A7) 113 1.1 mrg { 114 1.1 mrg return _mm_set_ph (__A7, __A6, __A5, __A4, __A3, __A2, __A1, __A0); 115 1.1 mrg } 116 1.1 mrg 117 1.1 mrg extern __inline __m256h 118 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 119 1.1 mrg _mm256_setr_ph (_Float16 __A0, _Float16 __A1, _Float16 __A2, 120 1.1 mrg _Float16 __A3, _Float16 __A4, _Float16 __A5, 121 1.1 mrg _Float16 __A6, _Float16 __A7, _Float16 __A8, 122 1.1 mrg _Float16 __A9, _Float16 __A10, _Float16 __A11, 123 1.1 mrg _Float16 __A12, _Float16 __A13, _Float16 __A14, 124 1.1 mrg _Float16 __A15) 125 1.1 mrg { 126 1.1 mrg return _mm256_set_ph (__A15, __A14, __A13, __A12, __A11, __A10, __A9, 127 1.1 mrg __A8, __A7, __A6, __A5, __A4, __A3, __A2, __A1, 128 1.1 mrg __A0); 129 1.1 mrg } 130 1.1 mrg 131 1.1 mrg extern __inline __m512h 132 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 133 1.1 mrg _mm512_setr_ph (_Float16 __A0, _Float16 __A1, _Float16 __A2, 134 1.1 mrg _Float16 __A3, _Float16 __A4, _Float16 __A5, 135 1.1 mrg _Float16 __A6, _Float16 __A7, _Float16 __A8, 136 1.1 mrg _Float16 __A9, _Float16 __A10, _Float16 __A11, 137 1.1 mrg _Float16 __A12, _Float16 __A13, _Float16 __A14, 138 1.1 mrg _Float16 __A15, _Float16 __A16, _Float16 __A17, 139 1.1 mrg _Float16 __A18, _Float16 __A19, _Float16 __A20, 140 1.1 mrg _Float16 __A21, _Float16 __A22, _Float16 __A23, 141 1.1 mrg _Float16 __A24, _Float16 __A25, _Float16 __A26, 142 1.1 mrg _Float16 __A27, _Float16 __A28, _Float16 __A29, 143 1.1 mrg _Float16 __A30, _Float16 __A31) 144 1.1 mrg 145 1.1 mrg { 146 1.1 mrg return _mm512_set_ph (__A31, __A30, __A29, __A28, __A27, __A26, __A25, 147 1.1 mrg __A24, __A23, __A22, __A21, __A20, __A19, __A18, 148 1.1 mrg __A17, __A16, __A15, __A14, __A13, __A12, __A11, 149 1.1 mrg __A10, __A9, __A8, __A7, __A6, __A5, __A4, __A3, 150 1.1 mrg __A2, __A1, __A0); 151 1.1 mrg } 152 1.1 mrg 153 1.1 mrg /* Broadcast _Float16 to vector. */ 154 1.1 mrg 155 1.1 mrg extern __inline __m128h 156 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 157 1.1 mrg _mm_set1_ph (_Float16 __A) 158 1.1 mrg { 159 1.1 mrg return _mm_set_ph (__A, __A, __A, __A, __A, __A, __A, __A); 160 1.1 mrg } 161 1.1 mrg 162 1.1 mrg extern __inline __m256h 163 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 164 1.1 mrg _mm256_set1_ph (_Float16 __A) 165 1.1 mrg { 166 1.1 mrg return _mm256_set_ph (__A, __A, __A, __A, __A, __A, __A, __A, 167 1.1 mrg __A, __A, __A, __A, __A, __A, __A, __A); 168 1.1 mrg } 169 1.1 mrg 170 1.1 mrg extern __inline __m512h 171 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 172 1.1 mrg _mm512_set1_ph (_Float16 __A) 173 1.1 mrg { 174 1.1 mrg return _mm512_set_ph (__A, __A, __A, __A, __A, __A, __A, __A, 175 1.1 mrg __A, __A, __A, __A, __A, __A, __A, __A, 176 1.1 mrg __A, __A, __A, __A, __A, __A, __A, __A, 177 1.1 mrg __A, __A, __A, __A, __A, __A, __A, __A); 178 1.1 mrg } 179 1.1 mrg 180 1.1 mrg /* Create a vector with all zeros. */ 181 1.1 mrg 182 1.1 mrg extern __inline __m128h 183 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 184 1.1 mrg _mm_setzero_ph (void) 185 1.1 mrg { 186 1.1 mrg return _mm_set1_ph (0.0f); 187 1.1 mrg } 188 1.1 mrg 189 1.1 mrg extern __inline __m256h 190 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 191 1.1 mrg _mm256_setzero_ph (void) 192 1.1 mrg { 193 1.1 mrg return _mm256_set1_ph (0.0f); 194 1.1 mrg } 195 1.1 mrg 196 1.1 mrg extern __inline __m512h 197 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 198 1.1 mrg _mm512_setzero_ph (void) 199 1.1 mrg { 200 1.1 mrg return _mm512_set1_ph (0.0f); 201 1.1 mrg } 202 1.1 mrg 203 1.1 mrg extern __inline __m128h 204 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 205 1.1 mrg _mm_undefined_ph (void) 206 1.1 mrg { 207 1.1 mrg #pragma GCC diagnostic push 208 1.1 mrg #pragma GCC diagnostic ignored "-Winit-self" 209 1.1 mrg __m128h __Y = __Y; 210 1.1 mrg #pragma GCC diagnostic pop 211 1.1 mrg return __Y; 212 1.1 mrg } 213 1.1 mrg 214 1.1 mrg extern __inline __m256h 215 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 216 1.1 mrg _mm256_undefined_ph (void) 217 1.1 mrg { 218 1.1 mrg #pragma GCC diagnostic push 219 1.1 mrg #pragma GCC diagnostic ignored "-Winit-self" 220 1.1 mrg __m256h __Y = __Y; 221 1.1 mrg #pragma GCC diagnostic pop 222 1.1 mrg return __Y; 223 1.1 mrg } 224 1.1 mrg 225 1.1 mrg extern __inline __m512h 226 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 227 1.1 mrg _mm512_undefined_ph (void) 228 1.1 mrg { 229 1.1 mrg #pragma GCC diagnostic push 230 1.1 mrg #pragma GCC diagnostic ignored "-Winit-self" 231 1.1 mrg __m512h __Y = __Y; 232 1.1 mrg #pragma GCC diagnostic pop 233 1.1 mrg return __Y; 234 1.1 mrg } 235 1.1 mrg 236 1.1 mrg extern __inline _Float16 237 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 238 1.1 mrg _mm_cvtsh_h (__m128h __A) 239 1.1 mrg { 240 1.1 mrg return __A[0]; 241 1.1 mrg } 242 1.1 mrg 243 1.1 mrg extern __inline _Float16 244 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 245 1.1 mrg _mm256_cvtsh_h (__m256h __A) 246 1.1 mrg { 247 1.1 mrg return __A[0]; 248 1.1 mrg } 249 1.1 mrg 250 1.1 mrg extern __inline _Float16 251 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 252 1.1 mrg _mm512_cvtsh_h (__m512h __A) 253 1.1 mrg { 254 1.1 mrg return __A[0]; 255 1.1 mrg } 256 1.1 mrg 257 1.1 mrg extern __inline __m512 258 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 259 1.1 mrg _mm512_castph_ps (__m512h __a) 260 1.1 mrg { 261 1.1 mrg return (__m512) __a; 262 1.1 mrg } 263 1.1 mrg 264 1.1 mrg extern __inline __m512d 265 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 266 1.1 mrg _mm512_castph_pd (__m512h __a) 267 1.1 mrg { 268 1.1 mrg return (__m512d) __a; 269 1.1 mrg } 270 1.1 mrg 271 1.1 mrg extern __inline __m512i 272 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 273 1.1 mrg _mm512_castph_si512 (__m512h __a) 274 1.1 mrg { 275 1.1 mrg return (__m512i) __a; 276 1.1 mrg } 277 1.1 mrg 278 1.1 mrg extern __inline __m128h 279 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 280 1.1 mrg _mm512_castph512_ph128 (__m512h __A) 281 1.1 mrg { 282 1.1 mrg union 283 1.1 mrg { 284 1.1 mrg __m128h __a[4]; 285 1.1 mrg __m512h __v; 286 1.1 mrg } __u = { .__v = __A }; 287 1.1 mrg return __u.__a[0]; 288 1.1 mrg } 289 1.1 mrg 290 1.1 mrg extern __inline __m256h 291 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 292 1.1 mrg _mm512_castph512_ph256 (__m512h __A) 293 1.1 mrg { 294 1.1 mrg union 295 1.1 mrg { 296 1.1 mrg __m256h __a[2]; 297 1.1 mrg __m512h __v; 298 1.1 mrg } __u = { .__v = __A }; 299 1.1 mrg return __u.__a[0]; 300 1.1 mrg } 301 1.1 mrg 302 1.1 mrg extern __inline __m512h 303 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 304 1.1 mrg _mm512_castph128_ph512 (__m128h __A) 305 1.1 mrg { 306 1.1 mrg union 307 1.1 mrg { 308 1.1 mrg __m128h __a[4]; 309 1.1 mrg __m512h __v; 310 1.1 mrg } __u; 311 1.1 mrg __u.__a[0] = __A; 312 1.1 mrg return __u.__v; 313 1.1 mrg } 314 1.1 mrg 315 1.1 mrg extern __inline __m512h 316 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 317 1.1 mrg _mm512_castph256_ph512 (__m256h __A) 318 1.1 mrg { 319 1.1 mrg union 320 1.1 mrg { 321 1.1 mrg __m256h __a[2]; 322 1.1 mrg __m512h __v; 323 1.1 mrg } __u; 324 1.1 mrg __u.__a[0] = __A; 325 1.1 mrg return __u.__v; 326 1.1 mrg } 327 1.1 mrg 328 1.1 mrg extern __inline __m512h 329 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 330 1.1 mrg _mm512_zextph128_ph512 (__m128h __A) 331 1.1 mrg { 332 1.1 mrg return (__m512h) _mm512_insertf32x4 (_mm512_setzero_ps (), 333 1.1 mrg (__m128) __A, 0); 334 1.1 mrg } 335 1.1 mrg 336 1.1 mrg extern __inline __m512h 337 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 338 1.1 mrg _mm512_zextph256_ph512 (__m256h __A) 339 1.1 mrg { 340 1.1 mrg return (__m512h) _mm512_insertf64x4 (_mm512_setzero_pd (), 341 1.1 mrg (__m256d) __A, 0); 342 1.1 mrg } 343 1.1 mrg 344 1.1 mrg extern __inline __m512h 345 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 346 1.1 mrg _mm512_castps_ph (__m512 __a) 347 1.1 mrg { 348 1.1 mrg return (__m512h) __a; 349 1.1 mrg } 350 1.1 mrg 351 1.1 mrg extern __inline __m512h 352 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 353 1.1 mrg _mm512_castpd_ph (__m512d __a) 354 1.1 mrg { 355 1.1 mrg return (__m512h) __a; 356 1.1 mrg } 357 1.1 mrg 358 1.1 mrg extern __inline __m512h 359 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 360 1.1 mrg _mm512_castsi512_ph (__m512i __a) 361 1.1 mrg { 362 1.1 mrg return (__m512h) __a; 363 1.1 mrg } 364 1.1 mrg 365 1.1 mrg /* Create a vector with element 0 as F and the rest zero. */ 366 1.1 mrg extern __inline __m128h 367 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 368 1.1 mrg _mm_set_sh (_Float16 __F) 369 1.1 mrg { 370 1.1 mrg return _mm_set_ph (0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, __F); 371 1.1 mrg } 372 1.1 mrg 373 1.1 mrg /* Create a vector with element 0 as *P and the rest zero. */ 374 1.1 mrg extern __inline __m128h 375 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 376 1.1 mrg _mm_load_sh (void const *__P) 377 1.1 mrg { 378 1.1 mrg return _mm_set_ph (0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 379 1.1 mrg *(_Float16 const *) __P); 380 1.1 mrg } 381 1.1 mrg 382 1.1 mrg extern __inline __m512h 383 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 384 1.1 mrg _mm512_load_ph (void const *__P) 385 1.1 mrg { 386 1.1 mrg return *(const __m512h *) __P; 387 1.1 mrg } 388 1.1 mrg 389 1.1 mrg extern __inline __m256h 390 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 391 1.1 mrg _mm256_load_ph (void const *__P) 392 1.1 mrg { 393 1.1 mrg return *(const __m256h *) __P; 394 1.1 mrg } 395 1.1 mrg 396 1.1 mrg extern __inline __m128h 397 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 398 1.1 mrg _mm_load_ph (void const *__P) 399 1.1 mrg { 400 1.1 mrg return *(const __m128h *) __P; 401 1.1 mrg } 402 1.1 mrg 403 1.1 mrg extern __inline __m512h 404 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 405 1.1 mrg _mm512_loadu_ph (void const *__P) 406 1.1 mrg { 407 1.1 mrg return *(const __m512h_u *) __P; 408 1.1 mrg } 409 1.1 mrg 410 1.1 mrg extern __inline __m256h 411 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 412 1.1 mrg _mm256_loadu_ph (void const *__P) 413 1.1 mrg { 414 1.1 mrg return *(const __m256h_u *) __P; 415 1.1 mrg } 416 1.1 mrg 417 1.1 mrg extern __inline __m128h 418 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 419 1.1 mrg _mm_loadu_ph (void const *__P) 420 1.1 mrg { 421 1.1 mrg return *(const __m128h_u *) __P; 422 1.1 mrg } 423 1.1 mrg 424 1.1 mrg /* Stores the lower _Float16 value. */ 425 1.1 mrg extern __inline void 426 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 427 1.1 mrg _mm_store_sh (void *__P, __m128h __A) 428 1.1 mrg { 429 1.1 mrg *(_Float16 *) __P = ((__v8hf)__A)[0]; 430 1.1 mrg } 431 1.1 mrg 432 1.1 mrg extern __inline void 433 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 434 1.1 mrg _mm512_store_ph (void *__P, __m512h __A) 435 1.1 mrg { 436 1.1 mrg *(__m512h *) __P = __A; 437 1.1 mrg } 438 1.1 mrg 439 1.1 mrg extern __inline void 440 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 441 1.1 mrg _mm256_store_ph (void *__P, __m256h __A) 442 1.1 mrg { 443 1.1 mrg *(__m256h *) __P = __A; 444 1.1 mrg } 445 1.1 mrg 446 1.1 mrg extern __inline void 447 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 448 1.1 mrg _mm_store_ph (void *__P, __m128h __A) 449 1.1 mrg { 450 1.1 mrg *(__m128h *) __P = __A; 451 1.1 mrg } 452 1.1 mrg 453 1.1 mrg extern __inline void 454 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 455 1.1 mrg _mm512_storeu_ph (void *__P, __m512h __A) 456 1.1 mrg { 457 1.1 mrg *(__m512h_u *) __P = __A; 458 1.1 mrg } 459 1.1 mrg 460 1.1 mrg extern __inline void 461 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 462 1.1 mrg _mm256_storeu_ph (void *__P, __m256h __A) 463 1.1 mrg { 464 1.1 mrg *(__m256h_u *) __P = __A; 465 1.1 mrg } 466 1.1 mrg 467 1.1 mrg extern __inline void 468 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 469 1.1 mrg _mm_storeu_ph (void *__P, __m128h __A) 470 1.1 mrg { 471 1.1 mrg *(__m128h_u *) __P = __A; 472 1.1 mrg } 473 1.1 mrg 474 1.1 mrg extern __inline __m512h 475 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 476 1.1 mrg _mm512_abs_ph (__m512h __A) 477 1.1 mrg { 478 1.1 mrg return (__m512h) _mm512_and_epi32 ( _mm512_set1_epi32 (0x7FFF7FFF), 479 1.1 mrg (__m512i) __A); 480 1.1 mrg } 481 1.1 mrg 482 1.1 mrg /* Intrinsics v[add,sub,mul,div]ph. */ 483 1.1 mrg extern __inline __m512h 484 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 485 1.1 mrg _mm512_add_ph (__m512h __A, __m512h __B) 486 1.1 mrg { 487 1.1 mrg return (__m512h) ((__v32hf) __A + (__v32hf) __B); 488 1.1 mrg } 489 1.1 mrg 490 1.1 mrg extern __inline __m512h 491 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 492 1.1 mrg _mm512_mask_add_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D) 493 1.1 mrg { 494 1.1 mrg return __builtin_ia32_addph512_mask (__C, __D, __A, __B); 495 1.1 mrg } 496 1.1 mrg 497 1.1 mrg extern __inline __m512h 498 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 499 1.1 mrg _mm512_maskz_add_ph (__mmask32 __A, __m512h __B, __m512h __C) 500 1.1 mrg { 501 1.1 mrg return __builtin_ia32_addph512_mask (__B, __C, 502 1.1 mrg _mm512_setzero_ph (), __A); 503 1.1 mrg } 504 1.1 mrg 505 1.1 mrg extern __inline __m512h 506 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 507 1.1 mrg _mm512_sub_ph (__m512h __A, __m512h __B) 508 1.1 mrg { 509 1.1 mrg return (__m512h) ((__v32hf) __A - (__v32hf) __B); 510 1.1 mrg } 511 1.1 mrg 512 1.1 mrg extern __inline __m512h 513 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 514 1.1 mrg _mm512_mask_sub_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D) 515 1.1 mrg { 516 1.1 mrg return __builtin_ia32_subph512_mask (__C, __D, __A, __B); 517 1.1 mrg } 518 1.1 mrg 519 1.1 mrg extern __inline __m512h 520 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 521 1.1 mrg _mm512_maskz_sub_ph (__mmask32 __A, __m512h __B, __m512h __C) 522 1.1 mrg { 523 1.1 mrg return __builtin_ia32_subph512_mask (__B, __C, 524 1.1 mrg _mm512_setzero_ph (), __A); 525 1.1 mrg } 526 1.1 mrg 527 1.1 mrg extern __inline __m512h 528 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 529 1.1 mrg _mm512_mul_ph (__m512h __A, __m512h __B) 530 1.1 mrg { 531 1.1 mrg return (__m512h) ((__v32hf) __A * (__v32hf) __B); 532 1.1 mrg } 533 1.1 mrg 534 1.1 mrg extern __inline __m512h 535 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 536 1.1 mrg _mm512_mask_mul_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D) 537 1.1 mrg { 538 1.1 mrg return __builtin_ia32_mulph512_mask (__C, __D, __A, __B); 539 1.1 mrg } 540 1.1 mrg 541 1.1 mrg extern __inline __m512h 542 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 543 1.1 mrg _mm512_maskz_mul_ph (__mmask32 __A, __m512h __B, __m512h __C) 544 1.1 mrg { 545 1.1 mrg return __builtin_ia32_mulph512_mask (__B, __C, 546 1.1 mrg _mm512_setzero_ph (), __A); 547 1.1 mrg } 548 1.1 mrg 549 1.1 mrg extern __inline __m512h 550 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 551 1.1 mrg _mm512_div_ph (__m512h __A, __m512h __B) 552 1.1 mrg { 553 1.1 mrg return (__m512h) ((__v32hf) __A / (__v32hf) __B); 554 1.1 mrg } 555 1.1 mrg 556 1.1 mrg extern __inline __m512h 557 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 558 1.1 mrg _mm512_mask_div_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D) 559 1.1 mrg { 560 1.1 mrg return __builtin_ia32_divph512_mask (__C, __D, __A, __B); 561 1.1 mrg } 562 1.1 mrg 563 1.1 mrg extern __inline __m512h 564 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 565 1.1 mrg _mm512_maskz_div_ph (__mmask32 __A, __m512h __B, __m512h __C) 566 1.1 mrg { 567 1.1 mrg return __builtin_ia32_divph512_mask (__B, __C, 568 1.1 mrg _mm512_setzero_ph (), __A); 569 1.1 mrg } 570 1.1 mrg 571 1.1 mrg #ifdef __OPTIMIZE__ 572 1.1 mrg extern __inline __m512h 573 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 574 1.1 mrg _mm512_add_round_ph (__m512h __A, __m512h __B, const int __C) 575 1.1 mrg { 576 1.1 mrg return __builtin_ia32_addph512_mask_round (__A, __B, 577 1.1 mrg _mm512_setzero_ph (), 578 1.1 mrg (__mmask32) -1, __C); 579 1.1 mrg } 580 1.1 mrg 581 1.1 mrg extern __inline __m512h 582 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 583 1.1 mrg _mm512_mask_add_round_ph (__m512h __A, __mmask32 __B, __m512h __C, 584 1.1 mrg __m512h __D, const int __E) 585 1.1 mrg { 586 1.1 mrg return __builtin_ia32_addph512_mask_round (__C, __D, __A, __B, __E); 587 1.1 mrg } 588 1.1 mrg 589 1.1 mrg extern __inline __m512h 590 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 591 1.1 mrg _mm512_maskz_add_round_ph (__mmask32 __A, __m512h __B, __m512h __C, 592 1.1 mrg const int __D) 593 1.1 mrg { 594 1.1 mrg return __builtin_ia32_addph512_mask_round (__B, __C, 595 1.1 mrg _mm512_setzero_ph (), 596 1.1 mrg __A, __D); 597 1.1 mrg } 598 1.1 mrg 599 1.1 mrg extern __inline __m512h 600 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 601 1.1 mrg _mm512_sub_round_ph (__m512h __A, __m512h __B, const int __C) 602 1.1 mrg { 603 1.1 mrg return __builtin_ia32_subph512_mask_round (__A, __B, 604 1.1 mrg _mm512_setzero_ph (), 605 1.1 mrg (__mmask32) -1, __C); 606 1.1 mrg } 607 1.1 mrg 608 1.1 mrg extern __inline __m512h 609 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 610 1.1 mrg _mm512_mask_sub_round_ph (__m512h __A, __mmask32 __B, __m512h __C, 611 1.1 mrg __m512h __D, const int __E) 612 1.1 mrg { 613 1.1 mrg return __builtin_ia32_subph512_mask_round (__C, __D, __A, __B, __E); 614 1.1 mrg } 615 1.1 mrg 616 1.1 mrg extern __inline __m512h 617 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 618 1.1 mrg _mm512_maskz_sub_round_ph (__mmask32 __A, __m512h __B, __m512h __C, 619 1.1 mrg const int __D) 620 1.1 mrg { 621 1.1 mrg return __builtin_ia32_subph512_mask_round (__B, __C, 622 1.1 mrg _mm512_setzero_ph (), 623 1.1 mrg __A, __D); 624 1.1 mrg } 625 1.1 mrg 626 1.1 mrg extern __inline __m512h 627 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 628 1.1 mrg _mm512_mul_round_ph (__m512h __A, __m512h __B, const int __C) 629 1.1 mrg { 630 1.1 mrg return __builtin_ia32_mulph512_mask_round (__A, __B, 631 1.1 mrg _mm512_setzero_ph (), 632 1.1 mrg (__mmask32) -1, __C); 633 1.1 mrg } 634 1.1 mrg 635 1.1 mrg extern __inline __m512h 636 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 637 1.1 mrg _mm512_mask_mul_round_ph (__m512h __A, __mmask32 __B, __m512h __C, 638 1.1 mrg __m512h __D, const int __E) 639 1.1 mrg { 640 1.1 mrg return __builtin_ia32_mulph512_mask_round (__C, __D, __A, __B, __E); 641 1.1 mrg } 642 1.1 mrg 643 1.1 mrg extern __inline __m512h 644 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 645 1.1 mrg _mm512_maskz_mul_round_ph (__mmask32 __A, __m512h __B, __m512h __C, 646 1.1 mrg const int __D) 647 1.1 mrg { 648 1.1 mrg return __builtin_ia32_mulph512_mask_round (__B, __C, 649 1.1 mrg _mm512_setzero_ph (), 650 1.1 mrg __A, __D); 651 1.1 mrg } 652 1.1 mrg 653 1.1 mrg extern __inline __m512h 654 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 655 1.1 mrg _mm512_div_round_ph (__m512h __A, __m512h __B, const int __C) 656 1.1 mrg { 657 1.1 mrg return __builtin_ia32_divph512_mask_round (__A, __B, 658 1.1 mrg _mm512_setzero_ph (), 659 1.1 mrg (__mmask32) -1, __C); 660 1.1 mrg } 661 1.1 mrg 662 1.1 mrg extern __inline __m512h 663 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 664 1.1 mrg _mm512_mask_div_round_ph (__m512h __A, __mmask32 __B, __m512h __C, 665 1.1 mrg __m512h __D, const int __E) 666 1.1 mrg { 667 1.1 mrg return __builtin_ia32_divph512_mask_round (__C, __D, __A, __B, __E); 668 1.1 mrg } 669 1.1 mrg 670 1.1 mrg extern __inline __m512h 671 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 672 1.1 mrg _mm512_maskz_div_round_ph (__mmask32 __A, __m512h __B, __m512h __C, 673 1.1 mrg const int __D) 674 1.1 mrg { 675 1.1 mrg return __builtin_ia32_divph512_mask_round (__B, __C, 676 1.1 mrg _mm512_setzero_ph (), 677 1.1 mrg __A, __D); 678 1.1 mrg } 679 1.1 mrg #else 680 1.1 mrg #define _mm512_add_round_ph(A, B, C) \ 681 1.1 mrg ((__m512h)__builtin_ia32_addph512_mask_round((A), (B), \ 682 1.1 mrg _mm512_setzero_ph (), \ 683 1.1 mrg (__mmask32)-1, (C))) 684 1.1 mrg 685 1.1 mrg #define _mm512_mask_add_round_ph(A, B, C, D, E) \ 686 1.1 mrg ((__m512h)__builtin_ia32_addph512_mask_round((C), (D), (A), (B), (E))) 687 1.1 mrg 688 1.1 mrg #define _mm512_maskz_add_round_ph(A, B, C, D) \ 689 1.1 mrg ((__m512h)__builtin_ia32_addph512_mask_round((B), (C), \ 690 1.1 mrg _mm512_setzero_ph (), \ 691 1.1 mrg (A), (D))) 692 1.1 mrg 693 1.1 mrg #define _mm512_sub_round_ph(A, B, C) \ 694 1.1 mrg ((__m512h)__builtin_ia32_subph512_mask_round((A), (B), \ 695 1.1 mrg _mm512_setzero_ph (), \ 696 1.1 mrg (__mmask32)-1, (C))) 697 1.1 mrg 698 1.1 mrg #define _mm512_mask_sub_round_ph(A, B, C, D, E) \ 699 1.1 mrg ((__m512h)__builtin_ia32_subph512_mask_round((C), (D), (A), (B), (E))) 700 1.1 mrg 701 1.1 mrg #define _mm512_maskz_sub_round_ph(A, B, C, D) \ 702 1.1 mrg ((__m512h)__builtin_ia32_subph512_mask_round((B), (C), \ 703 1.1 mrg _mm512_setzero_ph (), \ 704 1.1 mrg (A), (D))) 705 1.1 mrg 706 1.1 mrg #define _mm512_mul_round_ph(A, B, C) \ 707 1.1 mrg ((__m512h)__builtin_ia32_mulph512_mask_round((A), (B), \ 708 1.1 mrg _mm512_setzero_ph (), \ 709 1.1 mrg (__mmask32)-1, (C))) 710 1.1 mrg 711 1.1 mrg #define _mm512_mask_mul_round_ph(A, B, C, D, E) \ 712 1.1 mrg ((__m512h)__builtin_ia32_mulph512_mask_round((C), (D), (A), (B), (E))) 713 1.1 mrg 714 1.1 mrg #define _mm512_maskz_mul_round_ph(A, B, C, D) \ 715 1.1 mrg ((__m512h)__builtin_ia32_mulph512_mask_round((B), (C), \ 716 1.1 mrg _mm512_setzero_ph (), \ 717 1.1 mrg (A), (D))) 718 1.1 mrg 719 1.1 mrg #define _mm512_div_round_ph(A, B, C) \ 720 1.1 mrg ((__m512h)__builtin_ia32_divph512_mask_round((A), (B), \ 721 1.1 mrg _mm512_setzero_ph (), \ 722 1.1 mrg (__mmask32)-1, (C))) 723 1.1 mrg 724 1.1 mrg #define _mm512_mask_div_round_ph(A, B, C, D, E) \ 725 1.1 mrg ((__m512h)__builtin_ia32_divph512_mask_round((C), (D), (A), (B), (E))) 726 1.1 mrg 727 1.1 mrg #define _mm512_maskz_div_round_ph(A, B, C, D) \ 728 1.1 mrg ((__m512h)__builtin_ia32_divph512_mask_round((B), (C), \ 729 1.1 mrg _mm512_setzero_ph (), \ 730 1.1 mrg (A), (D))) 731 1.1 mrg #endif /* __OPTIMIZE__ */ 732 1.1 mrg 733 1.1 mrg extern __inline __m512h 734 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 735 1.1 mrg _mm512_conj_pch (__m512h __A) 736 1.1 mrg { 737 1.1 mrg return (__m512h) _mm512_xor_epi32 ((__m512i) __A, _mm512_set1_epi32 (1<<31)); 738 1.1 mrg } 739 1.1 mrg 740 1.1 mrg extern __inline __m512h 741 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 742 1.1 mrg _mm512_mask_conj_pch (__m512h __W, __mmask16 __U, __m512h __A) 743 1.1 mrg { 744 1.1 mrg return (__m512h) 745 1.1 mrg __builtin_ia32_movaps512_mask ((__v16sf) _mm512_conj_pch (__A), 746 1.1 mrg (__v16sf) __W, 747 1.1 mrg (__mmask16) __U); 748 1.1 mrg } 749 1.1 mrg 750 1.1 mrg extern __inline __m512h 751 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 752 1.1 mrg _mm512_maskz_conj_pch (__mmask16 __U, __m512h __A) 753 1.1 mrg { 754 1.1 mrg return (__m512h) 755 1.1 mrg __builtin_ia32_movaps512_mask ((__v16sf) _mm512_conj_pch (__A), 756 1.1 mrg (__v16sf) _mm512_setzero_ps (), 757 1.1 mrg (__mmask16) __U); 758 1.1 mrg } 759 1.1 mrg 760 1.1 mrg /* Intrinsics of v[add,sub,mul,div]sh. */ 761 1.1 mrg extern __inline __m128h 762 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 763 1.1 mrg _mm_add_sh (__m128h __A, __m128h __B) 764 1.1 mrg { 765 1.1 mrg __A[0] += __B[0]; 766 1.1 mrg return __A; 767 1.1 mrg } 768 1.1 mrg 769 1.1 mrg extern __inline __m128h 770 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 771 1.1 mrg _mm_mask_add_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) 772 1.1 mrg { 773 1.1 mrg return __builtin_ia32_addsh_mask (__C, __D, __A, __B); 774 1.1 mrg } 775 1.1 mrg 776 1.1 mrg extern __inline __m128h 777 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 778 1.1 mrg _mm_maskz_add_sh (__mmask8 __A, __m128h __B, __m128h __C) 779 1.1 mrg { 780 1.1 mrg return __builtin_ia32_addsh_mask (__B, __C, _mm_setzero_ph (), 781 1.1 mrg __A); 782 1.1 mrg } 783 1.1 mrg 784 1.1 mrg extern __inline __m128h 785 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 786 1.1 mrg _mm_sub_sh (__m128h __A, __m128h __B) 787 1.1 mrg { 788 1.1 mrg __A[0] -= __B[0]; 789 1.1 mrg return __A; 790 1.1 mrg } 791 1.1 mrg 792 1.1 mrg extern __inline __m128h 793 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 794 1.1 mrg _mm_mask_sub_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) 795 1.1 mrg { 796 1.1 mrg return __builtin_ia32_subsh_mask (__C, __D, __A, __B); 797 1.1 mrg } 798 1.1 mrg 799 1.1 mrg extern __inline __m128h 800 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 801 1.1 mrg _mm_maskz_sub_sh (__mmask8 __A, __m128h __B, __m128h __C) 802 1.1 mrg { 803 1.1 mrg return __builtin_ia32_subsh_mask (__B, __C, _mm_setzero_ph (), 804 1.1 mrg __A); 805 1.1 mrg } 806 1.1 mrg 807 1.1 mrg extern __inline __m128h 808 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 809 1.1 mrg _mm_mul_sh (__m128h __A, __m128h __B) 810 1.1 mrg { 811 1.1 mrg __A[0] *= __B[0]; 812 1.1 mrg return __A; 813 1.1 mrg } 814 1.1 mrg 815 1.1 mrg extern __inline __m128h 816 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 817 1.1 mrg _mm_mask_mul_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) 818 1.1 mrg { 819 1.1 mrg return __builtin_ia32_mulsh_mask (__C, __D, __A, __B); 820 1.1 mrg } 821 1.1 mrg 822 1.1 mrg extern __inline __m128h 823 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 824 1.1 mrg _mm_maskz_mul_sh (__mmask8 __A, __m128h __B, __m128h __C) 825 1.1 mrg { 826 1.1 mrg return __builtin_ia32_mulsh_mask (__B, __C, _mm_setzero_ph (), __A); 827 1.1 mrg } 828 1.1 mrg 829 1.1 mrg extern __inline __m128h 830 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 831 1.1 mrg _mm_div_sh (__m128h __A, __m128h __B) 832 1.1 mrg { 833 1.1 mrg __A[0] /= __B[0]; 834 1.1 mrg return __A; 835 1.1 mrg } 836 1.1 mrg 837 1.1 mrg extern __inline __m128h 838 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 839 1.1 mrg _mm_mask_div_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) 840 1.1 mrg { 841 1.1 mrg return __builtin_ia32_divsh_mask (__C, __D, __A, __B); 842 1.1 mrg } 843 1.1 mrg 844 1.1 mrg extern __inline __m128h 845 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 846 1.1 mrg _mm_maskz_div_sh (__mmask8 __A, __m128h __B, __m128h __C) 847 1.1 mrg { 848 1.1 mrg return __builtin_ia32_divsh_mask (__B, __C, _mm_setzero_ph (), 849 1.1 mrg __A); 850 1.1 mrg } 851 1.1 mrg 852 1.1 mrg #ifdef __OPTIMIZE__ 853 1.1 mrg extern __inline __m128h 854 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 855 1.1 mrg _mm_add_round_sh (__m128h __A, __m128h __B, const int __C) 856 1.1 mrg { 857 1.1 mrg return __builtin_ia32_addsh_mask_round (__A, __B, 858 1.1 mrg _mm_setzero_ph (), 859 1.1 mrg (__mmask8) -1, __C); 860 1.1 mrg } 861 1.1 mrg 862 1.1 mrg extern __inline __m128h 863 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 864 1.1 mrg _mm_mask_add_round_sh (__m128h __A, __mmask8 __B, __m128h __C, 865 1.1 mrg __m128h __D, const int __E) 866 1.1 mrg { 867 1.1 mrg return __builtin_ia32_addsh_mask_round (__C, __D, __A, __B, __E); 868 1.1 mrg } 869 1.1 mrg 870 1.1 mrg extern __inline __m128h 871 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 872 1.1 mrg _mm_maskz_add_round_sh (__mmask8 __A, __m128h __B, __m128h __C, 873 1.1 mrg const int __D) 874 1.1 mrg { 875 1.1 mrg return __builtin_ia32_addsh_mask_round (__B, __C, 876 1.1 mrg _mm_setzero_ph (), 877 1.1 mrg __A, __D); 878 1.1 mrg } 879 1.1 mrg 880 1.1 mrg extern __inline __m128h 881 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 882 1.1 mrg _mm_sub_round_sh (__m128h __A, __m128h __B, const int __C) 883 1.1 mrg { 884 1.1 mrg return __builtin_ia32_subsh_mask_round (__A, __B, 885 1.1 mrg _mm_setzero_ph (), 886 1.1 mrg (__mmask8) -1, __C); 887 1.1 mrg } 888 1.1 mrg 889 1.1 mrg extern __inline __m128h 890 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 891 1.1 mrg _mm_mask_sub_round_sh (__m128h __A, __mmask8 __B, __m128h __C, 892 1.1 mrg __m128h __D, const int __E) 893 1.1 mrg { 894 1.1 mrg return __builtin_ia32_subsh_mask_round (__C, __D, __A, __B, __E); 895 1.1 mrg } 896 1.1 mrg 897 1.1 mrg extern __inline __m128h 898 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 899 1.1 mrg _mm_maskz_sub_round_sh (__mmask8 __A, __m128h __B, __m128h __C, 900 1.1 mrg const int __D) 901 1.1 mrg { 902 1.1 mrg return __builtin_ia32_subsh_mask_round (__B, __C, 903 1.1 mrg _mm_setzero_ph (), 904 1.1 mrg __A, __D); 905 1.1 mrg } 906 1.1 mrg 907 1.1 mrg extern __inline __m128h 908 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 909 1.1 mrg _mm_mul_round_sh (__m128h __A, __m128h __B, const int __C) 910 1.1 mrg { 911 1.1 mrg return __builtin_ia32_mulsh_mask_round (__A, __B, 912 1.1 mrg _mm_setzero_ph (), 913 1.1 mrg (__mmask8) -1, __C); 914 1.1 mrg } 915 1.1 mrg 916 1.1 mrg extern __inline __m128h 917 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 918 1.1 mrg _mm_mask_mul_round_sh (__m128h __A, __mmask8 __B, __m128h __C, 919 1.1 mrg __m128h __D, const int __E) 920 1.1 mrg { 921 1.1 mrg return __builtin_ia32_mulsh_mask_round (__C, __D, __A, __B, __E); 922 1.1 mrg } 923 1.1 mrg 924 1.1 mrg extern __inline __m128h 925 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 926 1.1 mrg _mm_maskz_mul_round_sh (__mmask8 __A, __m128h __B, __m128h __C, 927 1.1 mrg const int __D) 928 1.1 mrg { 929 1.1 mrg return __builtin_ia32_mulsh_mask_round (__B, __C, 930 1.1 mrg _mm_setzero_ph (), 931 1.1 mrg __A, __D); 932 1.1 mrg } 933 1.1 mrg 934 1.1 mrg extern __inline __m128h 935 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 936 1.1 mrg _mm_div_round_sh (__m128h __A, __m128h __B, const int __C) 937 1.1 mrg { 938 1.1 mrg return __builtin_ia32_divsh_mask_round (__A, __B, 939 1.1 mrg _mm_setzero_ph (), 940 1.1 mrg (__mmask8) -1, __C); 941 1.1 mrg } 942 1.1 mrg 943 1.1 mrg extern __inline __m128h 944 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 945 1.1 mrg _mm_mask_div_round_sh (__m128h __A, __mmask8 __B, __m128h __C, 946 1.1 mrg __m128h __D, const int __E) 947 1.1 mrg { 948 1.1 mrg return __builtin_ia32_divsh_mask_round (__C, __D, __A, __B, __E); 949 1.1 mrg } 950 1.1 mrg 951 1.1 mrg extern __inline __m128h 952 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 953 1.1 mrg _mm_maskz_div_round_sh (__mmask8 __A, __m128h __B, __m128h __C, 954 1.1 mrg const int __D) 955 1.1 mrg { 956 1.1 mrg return __builtin_ia32_divsh_mask_round (__B, __C, 957 1.1 mrg _mm_setzero_ph (), 958 1.1 mrg __A, __D); 959 1.1 mrg } 960 1.1 mrg #else 961 1.1 mrg #define _mm_add_round_sh(A, B, C) \ 962 1.1 mrg ((__m128h)__builtin_ia32_addsh_mask_round ((A), (B), \ 963 1.1 mrg _mm_setzero_ph (), \ 964 1.1 mrg (__mmask8)-1, (C))) 965 1.1 mrg 966 1.1 mrg #define _mm_mask_add_round_sh(A, B, C, D, E) \ 967 1.1 mrg ((__m128h)__builtin_ia32_addsh_mask_round ((C), (D), (A), (B), (E))) 968 1.1 mrg 969 1.1 mrg #define _mm_maskz_add_round_sh(A, B, C, D) \ 970 1.1 mrg ((__m128h)__builtin_ia32_addsh_mask_round ((B), (C), \ 971 1.1 mrg _mm_setzero_ph (), \ 972 1.1 mrg (A), (D))) 973 1.1 mrg 974 1.1 mrg #define _mm_sub_round_sh(A, B, C) \ 975 1.1 mrg ((__m128h)__builtin_ia32_subsh_mask_round ((A), (B), \ 976 1.1 mrg _mm_setzero_ph (), \ 977 1.1 mrg (__mmask8)-1, (C))) 978 1.1 mrg 979 1.1 mrg #define _mm_mask_sub_round_sh(A, B, C, D, E) \ 980 1.1 mrg ((__m128h)__builtin_ia32_subsh_mask_round ((C), (D), (A), (B), (E))) 981 1.1 mrg 982 1.1 mrg #define _mm_maskz_sub_round_sh(A, B, C, D) \ 983 1.1 mrg ((__m128h)__builtin_ia32_subsh_mask_round ((B), (C), \ 984 1.1 mrg _mm_setzero_ph (), \ 985 1.1 mrg (A), (D))) 986 1.1 mrg 987 1.1 mrg #define _mm_mul_round_sh(A, B, C) \ 988 1.1 mrg ((__m128h)__builtin_ia32_mulsh_mask_round ((A), (B), \ 989 1.1 mrg _mm_setzero_ph (), \ 990 1.1 mrg (__mmask8)-1, (C))) 991 1.1 mrg 992 1.1 mrg #define _mm_mask_mul_round_sh(A, B, C, D, E) \ 993 1.1 mrg ((__m128h)__builtin_ia32_mulsh_mask_round ((C), (D), (A), (B), (E))) 994 1.1 mrg 995 1.1 mrg #define _mm_maskz_mul_round_sh(A, B, C, D) \ 996 1.1 mrg ((__m128h)__builtin_ia32_mulsh_mask_round ((B), (C), \ 997 1.1 mrg _mm_setzero_ph (), \ 998 1.1 mrg (A), (D))) 999 1.1 mrg 1000 1.1 mrg #define _mm_div_round_sh(A, B, C) \ 1001 1.1 mrg ((__m128h)__builtin_ia32_divsh_mask_round ((A), (B), \ 1002 1.1 mrg _mm_setzero_ph (), \ 1003 1.1 mrg (__mmask8)-1, (C))) 1004 1.1 mrg 1005 1.1 mrg #define _mm_mask_div_round_sh(A, B, C, D, E) \ 1006 1.1 mrg ((__m128h)__builtin_ia32_divsh_mask_round ((C), (D), (A), (B), (E))) 1007 1.1 mrg 1008 1.1 mrg #define _mm_maskz_div_round_sh(A, B, C, D) \ 1009 1.1 mrg ((__m128h)__builtin_ia32_divsh_mask_round ((B), (C), \ 1010 1.1 mrg _mm_setzero_ph (), \ 1011 1.1 mrg (A), (D))) 1012 1.1 mrg #endif /* __OPTIMIZE__ */ 1013 1.1 mrg 1014 1.1 mrg /* Intrinsic vmaxph vminph. */ 1015 1.1 mrg extern __inline __m512h 1016 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1017 1.1 mrg _mm512_max_ph (__m512h __A, __m512h __B) 1018 1.1 mrg { 1019 1.1 mrg return __builtin_ia32_maxph512_mask (__A, __B, 1020 1.1 mrg _mm512_setzero_ph (), 1021 1.1 mrg (__mmask32) -1); 1022 1.1 mrg } 1023 1.1 mrg 1024 1.1 mrg extern __inline __m512h 1025 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1026 1.1 mrg _mm512_mask_max_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D) 1027 1.1 mrg { 1028 1.1 mrg return __builtin_ia32_maxph512_mask (__C, __D, __A, __B); 1029 1.1 mrg } 1030 1.1 mrg 1031 1.1 mrg extern __inline __m512h 1032 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1033 1.1 mrg _mm512_maskz_max_ph (__mmask32 __A, __m512h __B, __m512h __C) 1034 1.1 mrg { 1035 1.1 mrg return __builtin_ia32_maxph512_mask (__B, __C, 1036 1.1 mrg _mm512_setzero_ph (), __A); 1037 1.1 mrg } 1038 1.1 mrg 1039 1.1 mrg extern __inline __m512h 1040 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1041 1.1 mrg _mm512_min_ph (__m512h __A, __m512h __B) 1042 1.1 mrg { 1043 1.1 mrg return __builtin_ia32_minph512_mask (__A, __B, 1044 1.1 mrg _mm512_setzero_ph (), 1045 1.1 mrg (__mmask32) -1); 1046 1.1 mrg } 1047 1.1 mrg 1048 1.1 mrg extern __inline __m512h 1049 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1050 1.1 mrg _mm512_mask_min_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D) 1051 1.1 mrg { 1052 1.1 mrg return __builtin_ia32_minph512_mask (__C, __D, __A, __B); 1053 1.1 mrg } 1054 1.1 mrg 1055 1.1 mrg extern __inline __m512h 1056 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1057 1.1 mrg _mm512_maskz_min_ph (__mmask32 __A, __m512h __B, __m512h __C) 1058 1.1 mrg { 1059 1.1 mrg return __builtin_ia32_minph512_mask (__B, __C, 1060 1.1 mrg _mm512_setzero_ph (), __A); 1061 1.1 mrg } 1062 1.1 mrg 1063 1.1 mrg #ifdef __OPTIMIZE__ 1064 1.1 mrg extern __inline __m512h 1065 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1066 1.1 mrg _mm512_max_round_ph (__m512h __A, __m512h __B, const int __C) 1067 1.1 mrg { 1068 1.1 mrg return __builtin_ia32_maxph512_mask_round (__A, __B, 1069 1.1 mrg _mm512_setzero_ph (), 1070 1.1 mrg (__mmask32) -1, __C); 1071 1.1 mrg } 1072 1.1 mrg 1073 1.1 mrg extern __inline __m512h 1074 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1075 1.1 mrg _mm512_mask_max_round_ph (__m512h __A, __mmask32 __B, __m512h __C, 1076 1.1 mrg __m512h __D, const int __E) 1077 1.1 mrg { 1078 1.1 mrg return __builtin_ia32_maxph512_mask_round (__C, __D, __A, __B, __E); 1079 1.1 mrg } 1080 1.1 mrg 1081 1.1 mrg extern __inline __m512h 1082 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1083 1.1 mrg _mm512_maskz_max_round_ph (__mmask32 __A, __m512h __B, __m512h __C, 1084 1.1 mrg const int __D) 1085 1.1 mrg { 1086 1.1 mrg return __builtin_ia32_maxph512_mask_round (__B, __C, 1087 1.1 mrg _mm512_setzero_ph (), 1088 1.1 mrg __A, __D); 1089 1.1 mrg } 1090 1.1 mrg 1091 1.1 mrg extern __inline __m512h 1092 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1093 1.1 mrg _mm512_min_round_ph (__m512h __A, __m512h __B, const int __C) 1094 1.1 mrg { 1095 1.1 mrg return __builtin_ia32_minph512_mask_round (__A, __B, 1096 1.1 mrg _mm512_setzero_ph (), 1097 1.1 mrg (__mmask32) -1, __C); 1098 1.1 mrg } 1099 1.1 mrg 1100 1.1 mrg extern __inline __m512h 1101 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1102 1.1 mrg _mm512_mask_min_round_ph (__m512h __A, __mmask32 __B, __m512h __C, 1103 1.1 mrg __m512h __D, const int __E) 1104 1.1 mrg { 1105 1.1 mrg return __builtin_ia32_minph512_mask_round (__C, __D, __A, __B, __E); 1106 1.1 mrg } 1107 1.1 mrg 1108 1.1 mrg extern __inline __m512h 1109 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1110 1.1 mrg _mm512_maskz_min_round_ph (__mmask32 __A, __m512h __B, __m512h __C, 1111 1.1 mrg const int __D) 1112 1.1 mrg { 1113 1.1 mrg return __builtin_ia32_minph512_mask_round (__B, __C, 1114 1.1 mrg _mm512_setzero_ph (), 1115 1.1 mrg __A, __D); 1116 1.1 mrg } 1117 1.1 mrg 1118 1.1 mrg #else 1119 1.1 mrg #define _mm512_max_round_ph(A, B, C) \ 1120 1.1 mrg (__builtin_ia32_maxph512_mask_round ((A), (B), \ 1121 1.1 mrg _mm512_setzero_ph (), \ 1122 1.1 mrg (__mmask32)-1, (C))) 1123 1.1 mrg 1124 1.1 mrg #define _mm512_mask_max_round_ph(A, B, C, D, E) \ 1125 1.1 mrg (__builtin_ia32_maxph512_mask_round ((C), (D), (A), (B), (E))) 1126 1.1 mrg 1127 1.1 mrg #define _mm512_maskz_max_round_ph(A, B, C, D) \ 1128 1.1 mrg (__builtin_ia32_maxph512_mask_round ((B), (C), \ 1129 1.1 mrg _mm512_setzero_ph (), \ 1130 1.1 mrg (A), (D))) 1131 1.1 mrg 1132 1.1 mrg #define _mm512_min_round_ph(A, B, C) \ 1133 1.1 mrg (__builtin_ia32_minph512_mask_round ((A), (B), \ 1134 1.1 mrg _mm512_setzero_ph (), \ 1135 1.1 mrg (__mmask32)-1, (C))) 1136 1.1 mrg 1137 1.1 mrg #define _mm512_mask_min_round_ph(A, B, C, D, E) \ 1138 1.1 mrg (__builtin_ia32_minph512_mask_round ((C), (D), (A), (B), (E))) 1139 1.1 mrg 1140 1.1 mrg #define _mm512_maskz_min_round_ph(A, B, C, D) \ 1141 1.1 mrg (__builtin_ia32_minph512_mask_round ((B), (C), \ 1142 1.1 mrg _mm512_setzero_ph (), \ 1143 1.1 mrg (A), (D))) 1144 1.1 mrg #endif /* __OPTIMIZE__ */ 1145 1.1 mrg 1146 1.1 mrg /* Intrinsic vmaxsh vminsh. */ 1147 1.1 mrg extern __inline __m128h 1148 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1149 1.1 mrg _mm_max_sh (__m128h __A, __m128h __B) 1150 1.1 mrg { 1151 1.1 mrg __A[0] = __A[0] > __B[0] ? __A[0] : __B[0]; 1152 1.1 mrg return __A; 1153 1.1 mrg } 1154 1.1 mrg 1155 1.1 mrg extern __inline __m128h 1156 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1157 1.1 mrg _mm_mask_max_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) 1158 1.1 mrg { 1159 1.1 mrg return __builtin_ia32_maxsh_mask (__C, __D, __A, __B); 1160 1.1 mrg } 1161 1.1 mrg 1162 1.1 mrg extern __inline __m128h 1163 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1164 1.1 mrg _mm_maskz_max_sh (__mmask8 __A, __m128h __B, __m128h __C) 1165 1.1 mrg { 1166 1.1 mrg return __builtin_ia32_maxsh_mask (__B, __C, _mm_setzero_ph (), 1167 1.1 mrg __A); 1168 1.1 mrg } 1169 1.1 mrg 1170 1.1 mrg extern __inline __m128h 1171 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1172 1.1 mrg _mm_min_sh (__m128h __A, __m128h __B) 1173 1.1 mrg { 1174 1.1 mrg __A[0] = __A[0] < __B[0] ? __A[0] : __B[0]; 1175 1.1 mrg return __A; 1176 1.1 mrg } 1177 1.1 mrg 1178 1.1 mrg extern __inline __m128h 1179 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1180 1.1 mrg _mm_mask_min_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) 1181 1.1 mrg { 1182 1.1 mrg return __builtin_ia32_minsh_mask (__C, __D, __A, __B); 1183 1.1 mrg } 1184 1.1 mrg 1185 1.1 mrg extern __inline __m128h 1186 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1187 1.1 mrg _mm_maskz_min_sh (__mmask8 __A, __m128h __B, __m128h __C) 1188 1.1 mrg { 1189 1.1 mrg return __builtin_ia32_minsh_mask (__B, __C, _mm_setzero_ph (), 1190 1.1 mrg __A); 1191 1.1 mrg } 1192 1.1 mrg 1193 1.1 mrg #ifdef __OPTIMIZE__ 1194 1.1 mrg extern __inline __m128h 1195 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1196 1.1 mrg _mm_max_round_sh (__m128h __A, __m128h __B, const int __C) 1197 1.1 mrg { 1198 1.1 mrg return __builtin_ia32_maxsh_mask_round (__A, __B, 1199 1.1 mrg _mm_setzero_ph (), 1200 1.1 mrg (__mmask8) -1, __C); 1201 1.1 mrg } 1202 1.1 mrg 1203 1.1 mrg extern __inline __m128h 1204 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1205 1.1 mrg _mm_mask_max_round_sh (__m128h __A, __mmask8 __B, __m128h __C, 1206 1.1 mrg __m128h __D, const int __E) 1207 1.1 mrg { 1208 1.1 mrg return __builtin_ia32_maxsh_mask_round (__C, __D, __A, __B, __E); 1209 1.1 mrg } 1210 1.1 mrg 1211 1.1 mrg extern __inline __m128h 1212 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1213 1.1 mrg _mm_maskz_max_round_sh (__mmask8 __A, __m128h __B, __m128h __C, 1214 1.1 mrg const int __D) 1215 1.1 mrg { 1216 1.1 mrg return __builtin_ia32_maxsh_mask_round (__B, __C, 1217 1.1 mrg _mm_setzero_ph (), 1218 1.1 mrg __A, __D); 1219 1.1 mrg } 1220 1.1 mrg 1221 1.1 mrg extern __inline __m128h 1222 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1223 1.1 mrg _mm_min_round_sh (__m128h __A, __m128h __B, const int __C) 1224 1.1 mrg { 1225 1.1 mrg return __builtin_ia32_minsh_mask_round (__A, __B, 1226 1.1 mrg _mm_setzero_ph (), 1227 1.1 mrg (__mmask8) -1, __C); 1228 1.1 mrg } 1229 1.1 mrg 1230 1.1 mrg extern __inline __m128h 1231 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1232 1.1 mrg _mm_mask_min_round_sh (__m128h __A, __mmask8 __B, __m128h __C, 1233 1.1 mrg __m128h __D, const int __E) 1234 1.1 mrg { 1235 1.1 mrg return __builtin_ia32_minsh_mask_round (__C, __D, __A, __B, __E); 1236 1.1 mrg } 1237 1.1 mrg 1238 1.1 mrg extern __inline __m128h 1239 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1240 1.1 mrg _mm_maskz_min_round_sh (__mmask8 __A, __m128h __B, __m128h __C, 1241 1.1 mrg const int __D) 1242 1.1 mrg { 1243 1.1 mrg return __builtin_ia32_minsh_mask_round (__B, __C, 1244 1.1 mrg _mm_setzero_ph (), 1245 1.1 mrg __A, __D); 1246 1.1 mrg } 1247 1.1 mrg 1248 1.1 mrg #else 1249 1.1 mrg #define _mm_max_round_sh(A, B, C) \ 1250 1.1 mrg (__builtin_ia32_maxsh_mask_round ((A), (B), \ 1251 1.1 mrg _mm_setzero_ph (), \ 1252 1.1 mrg (__mmask8)-1, (C))) 1253 1.1 mrg 1254 1.1 mrg #define _mm_mask_max_round_sh(A, B, C, D, E) \ 1255 1.1 mrg (__builtin_ia32_maxsh_mask_round ((C), (D), (A), (B), (E))) 1256 1.1 mrg 1257 1.1 mrg #define _mm_maskz_max_round_sh(A, B, C, D) \ 1258 1.1 mrg (__builtin_ia32_maxsh_mask_round ((B), (C), \ 1259 1.1 mrg _mm_setzero_ph (), \ 1260 1.1 mrg (A), (D))) 1261 1.1 mrg 1262 1.1 mrg #define _mm_min_round_sh(A, B, C) \ 1263 1.1 mrg (__builtin_ia32_minsh_mask_round ((A), (B), \ 1264 1.1 mrg _mm_setzero_ph (), \ 1265 1.1 mrg (__mmask8)-1, (C))) 1266 1.1 mrg 1267 1.1 mrg #define _mm_mask_min_round_sh(A, B, C, D, E) \ 1268 1.1 mrg (__builtin_ia32_minsh_mask_round ((C), (D), (A), (B), (E))) 1269 1.1 mrg 1270 1.1 mrg #define _mm_maskz_min_round_sh(A, B, C, D) \ 1271 1.1 mrg (__builtin_ia32_minsh_mask_round ((B), (C), \ 1272 1.1 mrg _mm_setzero_ph (), \ 1273 1.1 mrg (A), (D))) 1274 1.1 mrg 1275 1.1 mrg #endif /* __OPTIMIZE__ */ 1276 1.1 mrg 1277 1.1 mrg /* vcmpph */ 1278 1.1 mrg #ifdef __OPTIMIZE 1279 1.1 mrg extern __inline __mmask32 1280 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1281 1.1 mrg _mm512_cmp_ph_mask (__m512h __A, __m512h __B, const int __C) 1282 1.1 mrg { 1283 1.1 mrg return (__mmask32) __builtin_ia32_cmpph512_mask (__A, __B, __C, 1284 1.1 mrg (__mmask32) -1); 1285 1.1 mrg } 1286 1.1 mrg 1287 1.1 mrg extern __inline __mmask32 1288 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1289 1.1 mrg _mm512_mask_cmp_ph_mask (__mmask32 __A, __m512h __B, __m512h __C, 1290 1.1 mrg const int __D) 1291 1.1 mrg { 1292 1.1 mrg return (__mmask32) __builtin_ia32_cmpph512_mask (__B, __C, __D, 1293 1.1 mrg __A); 1294 1.1 mrg } 1295 1.1 mrg 1296 1.1 mrg extern __inline __mmask32 1297 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1298 1.1 mrg _mm512_cmp_round_ph_mask (__m512h __A, __m512h __B, const int __C, 1299 1.1 mrg const int __D) 1300 1.1 mrg { 1301 1.1 mrg return (__mmask32) __builtin_ia32_cmpph512_mask_round (__A, __B, 1302 1.1 mrg __C, (__mmask32) -1, 1303 1.1 mrg __D); 1304 1.1 mrg } 1305 1.1 mrg 1306 1.1 mrg extern __inline __mmask32 1307 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1308 1.1 mrg _mm512_mask_cmp_round_ph_mask (__mmask32 __A, __m512h __B, __m512h __C, 1309 1.1 mrg const int __D, const int __E) 1310 1.1 mrg { 1311 1.1 mrg return (__mmask32) __builtin_ia32_cmpph512_mask_round (__B, __C, 1312 1.1 mrg __D, __A, 1313 1.1 mrg __E); 1314 1.1 mrg } 1315 1.1 mrg 1316 1.1 mrg #else 1317 1.1 mrg #define _mm512_cmp_ph_mask(A, B, C) \ 1318 1.1 mrg (__builtin_ia32_cmpph512_mask ((A), (B), (C), (-1))) 1319 1.1 mrg 1320 1.1 mrg #define _mm512_mask_cmp_ph_mask(A, B, C, D) \ 1321 1.1 mrg (__builtin_ia32_cmpph512_mask ((B), (C), (D), (A))) 1322 1.1 mrg 1323 1.1 mrg #define _mm512_cmp_round_ph_mask(A, B, C, D) \ 1324 1.1 mrg (__builtin_ia32_cmpph512_mask_round ((A), (B), (C), (-1), (D))) 1325 1.1 mrg 1326 1.1 mrg #define _mm512_mask_cmp_round_ph_mask(A, B, C, D, E) \ 1327 1.1 mrg (__builtin_ia32_cmpph512_mask_round ((B), (C), (D), (A), (E))) 1328 1.1 mrg 1329 1.1 mrg #endif /* __OPTIMIZE__ */ 1330 1.1 mrg 1331 1.1 mrg /* Intrinsics vcmpsh. */ 1332 1.1 mrg #ifdef __OPTIMIZE__ 1333 1.1 mrg extern __inline __mmask8 1334 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1335 1.1 mrg _mm_cmp_sh_mask (__m128h __A, __m128h __B, const int __C) 1336 1.1 mrg { 1337 1.1 mrg return (__mmask8) 1338 1.1 mrg __builtin_ia32_cmpsh_mask_round (__A, __B, 1339 1.1 mrg __C, (__mmask8) -1, 1340 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1341 1.1 mrg } 1342 1.1 mrg 1343 1.1 mrg extern __inline __mmask8 1344 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1345 1.1 mrg _mm_mask_cmp_sh_mask (__mmask8 __A, __m128h __B, __m128h __C, 1346 1.1 mrg const int __D) 1347 1.1 mrg { 1348 1.1 mrg return (__mmask8) 1349 1.1 mrg __builtin_ia32_cmpsh_mask_round (__B, __C, 1350 1.1 mrg __D, __A, 1351 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1352 1.1 mrg } 1353 1.1 mrg 1354 1.1 mrg extern __inline __mmask8 1355 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1356 1.1 mrg _mm_cmp_round_sh_mask (__m128h __A, __m128h __B, const int __C, 1357 1.1 mrg const int __D) 1358 1.1 mrg { 1359 1.1 mrg return (__mmask8) __builtin_ia32_cmpsh_mask_round (__A, __B, 1360 1.1 mrg __C, (__mmask8) -1, 1361 1.1 mrg __D); 1362 1.1 mrg } 1363 1.1 mrg 1364 1.1 mrg extern __inline __mmask8 1365 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1366 1.1 mrg _mm_mask_cmp_round_sh_mask (__mmask8 __A, __m128h __B, __m128h __C, 1367 1.1 mrg const int __D, const int __E) 1368 1.1 mrg { 1369 1.1 mrg return (__mmask8) __builtin_ia32_cmpsh_mask_round (__B, __C, 1370 1.1 mrg __D, __A, 1371 1.1 mrg __E); 1372 1.1 mrg } 1373 1.1 mrg 1374 1.1 mrg #else 1375 1.1 mrg #define _mm_cmp_sh_mask(A, B, C) \ 1376 1.1 mrg (__builtin_ia32_cmpsh_mask_round ((A), (B), (C), (-1), \ 1377 1.1 mrg (_MM_FROUND_CUR_DIRECTION))) 1378 1.1 mrg 1379 1.1 mrg #define _mm_mask_cmp_sh_mask(A, B, C, D) \ 1380 1.1 mrg (__builtin_ia32_cmpsh_mask_round ((B), (C), (D), (A), \ 1381 1.1 mrg (_MM_FROUND_CUR_DIRECTION))) 1382 1.1 mrg 1383 1.1 mrg #define _mm_cmp_round_sh_mask(A, B, C, D) \ 1384 1.1 mrg (__builtin_ia32_cmpsh_mask_round ((A), (B), (C), (-1), (D))) 1385 1.1 mrg 1386 1.1 mrg #define _mm_mask_cmp_round_sh_mask(A, B, C, D, E) \ 1387 1.1 mrg (__builtin_ia32_cmpsh_mask_round ((B), (C), (D), (A), (E))) 1388 1.1 mrg 1389 1.1 mrg #endif /* __OPTIMIZE__ */ 1390 1.1 mrg 1391 1.1 mrg /* Intrinsics vcomish. */ 1392 1.1 mrg extern __inline int 1393 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1394 1.1 mrg _mm_comieq_sh (__m128h __A, __m128h __B) 1395 1.1 mrg { 1396 1.1 mrg return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_EQ_OS, 1397 1.1 mrg (__mmask8) -1, 1398 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1399 1.1 mrg } 1400 1.1 mrg 1401 1.1 mrg extern __inline int 1402 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1403 1.1 mrg _mm_comilt_sh (__m128h __A, __m128h __B) 1404 1.1 mrg { 1405 1.1 mrg return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LT_OS, 1406 1.1 mrg (__mmask8) -1, 1407 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1408 1.1 mrg } 1409 1.1 mrg 1410 1.1 mrg extern __inline int 1411 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1412 1.1 mrg _mm_comile_sh (__m128h __A, __m128h __B) 1413 1.1 mrg { 1414 1.1 mrg return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LE_OS, 1415 1.1 mrg (__mmask8) -1, 1416 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1417 1.1 mrg } 1418 1.1 mrg 1419 1.1 mrg extern __inline int 1420 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1421 1.1 mrg _mm_comigt_sh (__m128h __A, __m128h __B) 1422 1.1 mrg { 1423 1.1 mrg return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GT_OS, 1424 1.1 mrg (__mmask8) -1, 1425 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1426 1.1 mrg } 1427 1.1 mrg 1428 1.1 mrg extern __inline int 1429 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1430 1.1 mrg _mm_comige_sh (__m128h __A, __m128h __B) 1431 1.1 mrg { 1432 1.1 mrg return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GE_OS, 1433 1.1 mrg (__mmask8) -1, 1434 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1435 1.1 mrg } 1436 1.1 mrg 1437 1.1 mrg extern __inline int 1438 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1439 1.1 mrg _mm_comineq_sh (__m128h __A, __m128h __B) 1440 1.1 mrg { 1441 1.1 mrg return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_NEQ_US, 1442 1.1 mrg (__mmask8) -1, 1443 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1444 1.1 mrg } 1445 1.1 mrg 1446 1.1 mrg extern __inline int 1447 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1448 1.1 mrg _mm_ucomieq_sh (__m128h __A, __m128h __B) 1449 1.1 mrg { 1450 1.1 mrg return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_EQ_OQ, 1451 1.1 mrg (__mmask8) -1, 1452 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1453 1.1 mrg } 1454 1.1 mrg 1455 1.1 mrg extern __inline int 1456 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1457 1.1 mrg _mm_ucomilt_sh (__m128h __A, __m128h __B) 1458 1.1 mrg { 1459 1.1 mrg return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LT_OQ, 1460 1.1 mrg (__mmask8) -1, 1461 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1462 1.1 mrg } 1463 1.1 mrg 1464 1.1 mrg extern __inline int 1465 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1466 1.1 mrg _mm_ucomile_sh (__m128h __A, __m128h __B) 1467 1.1 mrg { 1468 1.1 mrg return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LE_OQ, 1469 1.1 mrg (__mmask8) -1, 1470 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1471 1.1 mrg } 1472 1.1 mrg 1473 1.1 mrg extern __inline int 1474 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1475 1.1 mrg _mm_ucomigt_sh (__m128h __A, __m128h __B) 1476 1.1 mrg { 1477 1.1 mrg return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GT_OQ, 1478 1.1 mrg (__mmask8) -1, 1479 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1480 1.1 mrg } 1481 1.1 mrg 1482 1.1 mrg extern __inline int 1483 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1484 1.1 mrg _mm_ucomige_sh (__m128h __A, __m128h __B) 1485 1.1 mrg { 1486 1.1 mrg return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GE_OQ, 1487 1.1 mrg (__mmask8) -1, 1488 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1489 1.1 mrg } 1490 1.1 mrg 1491 1.1 mrg extern __inline int 1492 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1493 1.1 mrg _mm_ucomineq_sh (__m128h __A, __m128h __B) 1494 1.1 mrg { 1495 1.1 mrg return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_NEQ_UQ, 1496 1.1 mrg (__mmask8) -1, 1497 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1498 1.1 mrg } 1499 1.1 mrg 1500 1.1 mrg #ifdef __OPTIMIZE__ 1501 1.1 mrg extern __inline int 1502 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1503 1.1 mrg _mm_comi_sh (__m128h __A, __m128h __B, const int __P) 1504 1.1 mrg { 1505 1.1 mrg return __builtin_ia32_cmpsh_mask_round (__A, __B, __P, 1506 1.1 mrg (__mmask8) -1, 1507 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1508 1.1 mrg } 1509 1.1 mrg 1510 1.1 mrg extern __inline int 1511 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1512 1.1 mrg _mm_comi_round_sh (__m128h __A, __m128h __B, const int __P, const int __R) 1513 1.1 mrg { 1514 1.1 mrg return __builtin_ia32_cmpsh_mask_round (__A, __B, __P, 1515 1.1 mrg (__mmask8) -1,__R); 1516 1.1 mrg } 1517 1.1 mrg 1518 1.1 mrg #else 1519 1.1 mrg #define _mm_comi_round_sh(A, B, P, R) \ 1520 1.1 mrg (__builtin_ia32_cmpsh_mask_round ((A), (B), (P), (__mmask8) (-1), (R))) 1521 1.1 mrg #define _mm_comi_sh(A, B, P) \ 1522 1.1 mrg (__builtin_ia32_cmpsh_mask_round ((A), (B), (P), (__mmask8) (-1), \ 1523 1.1 mrg _MM_FROUND_CUR_DIRECTION)) 1524 1.1 mrg 1525 1.1 mrg #endif /* __OPTIMIZE__ */ 1526 1.1 mrg 1527 1.1 mrg /* Intrinsics vsqrtph. */ 1528 1.1 mrg extern __inline __m512h 1529 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1530 1.1 mrg _mm512_sqrt_ph (__m512h __A) 1531 1.1 mrg { 1532 1.1 mrg return __builtin_ia32_sqrtph512_mask_round (__A, 1533 1.1 mrg _mm512_setzero_ph(), 1534 1.1 mrg (__mmask32) -1, 1535 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1536 1.1 mrg } 1537 1.1 mrg 1538 1.1 mrg extern __inline __m512h 1539 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1540 1.1 mrg _mm512_mask_sqrt_ph (__m512h __A, __mmask32 __B, __m512h __C) 1541 1.1 mrg { 1542 1.1 mrg return __builtin_ia32_sqrtph512_mask_round (__C, __A, __B, 1543 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1544 1.1 mrg } 1545 1.1 mrg 1546 1.1 mrg extern __inline __m512h 1547 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1548 1.1 mrg _mm512_maskz_sqrt_ph (__mmask32 __A, __m512h __B) 1549 1.1 mrg { 1550 1.1 mrg return __builtin_ia32_sqrtph512_mask_round (__B, 1551 1.1 mrg _mm512_setzero_ph (), 1552 1.1 mrg __A, 1553 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1554 1.1 mrg } 1555 1.1 mrg 1556 1.1 mrg #ifdef __OPTIMIZE__ 1557 1.1 mrg extern __inline __m512h 1558 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1559 1.1 mrg _mm512_sqrt_round_ph (__m512h __A, const int __B) 1560 1.1 mrg { 1561 1.1 mrg return __builtin_ia32_sqrtph512_mask_round (__A, 1562 1.1 mrg _mm512_setzero_ph(), 1563 1.1 mrg (__mmask32) -1, __B); 1564 1.1 mrg } 1565 1.1 mrg 1566 1.1 mrg extern __inline __m512h 1567 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1568 1.1 mrg _mm512_mask_sqrt_round_ph (__m512h __A, __mmask32 __B, __m512h __C, 1569 1.1 mrg const int __D) 1570 1.1 mrg { 1571 1.1 mrg return __builtin_ia32_sqrtph512_mask_round (__C, __A, __B, __D); 1572 1.1 mrg } 1573 1.1 mrg 1574 1.1 mrg extern __inline __m512h 1575 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1576 1.1 mrg _mm512_maskz_sqrt_round_ph (__mmask32 __A, __m512h __B, const int __C) 1577 1.1 mrg { 1578 1.1 mrg return __builtin_ia32_sqrtph512_mask_round (__B, 1579 1.1 mrg _mm512_setzero_ph (), 1580 1.1 mrg __A, __C); 1581 1.1 mrg } 1582 1.1 mrg 1583 1.1 mrg #else 1584 1.1 mrg #define _mm512_sqrt_round_ph(A, B) \ 1585 1.1 mrg (__builtin_ia32_sqrtph512_mask_round ((A), \ 1586 1.1 mrg _mm512_setzero_ph (), \ 1587 1.1 mrg (__mmask32)-1, (B))) 1588 1.1 mrg 1589 1.1 mrg #define _mm512_mask_sqrt_round_ph(A, B, C, D) \ 1590 1.1 mrg (__builtin_ia32_sqrtph512_mask_round ((C), (A), (B), (D))) 1591 1.1 mrg 1592 1.1 mrg #define _mm512_maskz_sqrt_round_ph(A, B, C) \ 1593 1.1 mrg (__builtin_ia32_sqrtph512_mask_round ((B), \ 1594 1.1 mrg _mm512_setzero_ph (), \ 1595 1.1 mrg (A), (C))) 1596 1.1 mrg 1597 1.1 mrg #endif /* __OPTIMIZE__ */ 1598 1.1 mrg 1599 1.1 mrg /* Intrinsics vrsqrtph. */ 1600 1.1 mrg extern __inline __m512h 1601 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1602 1.1 mrg _mm512_rsqrt_ph (__m512h __A) 1603 1.1 mrg { 1604 1.1 mrg return __builtin_ia32_rsqrtph512_mask (__A, _mm512_setzero_ph (), 1605 1.1 mrg (__mmask32) -1); 1606 1.1 mrg } 1607 1.1 mrg 1608 1.1 mrg extern __inline __m512h 1609 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1610 1.1 mrg _mm512_mask_rsqrt_ph (__m512h __A, __mmask32 __B, __m512h __C) 1611 1.1 mrg { 1612 1.1 mrg return __builtin_ia32_rsqrtph512_mask (__C, __A, __B); 1613 1.1 mrg } 1614 1.1 mrg 1615 1.1 mrg extern __inline __m512h 1616 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1617 1.1 mrg _mm512_maskz_rsqrt_ph (__mmask32 __A, __m512h __B) 1618 1.1 mrg { 1619 1.1 mrg return __builtin_ia32_rsqrtph512_mask (__B, _mm512_setzero_ph (), 1620 1.1 mrg __A); 1621 1.1 mrg } 1622 1.1 mrg 1623 1.1 mrg /* Intrinsics vrsqrtsh. */ 1624 1.1 mrg extern __inline __m128h 1625 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1626 1.1 mrg _mm_rsqrt_sh (__m128h __A, __m128h __B) 1627 1.1 mrg { 1628 1.1 mrg return __builtin_ia32_rsqrtsh_mask (__B, __A, _mm_setzero_ph (), 1629 1.1 mrg (__mmask8) -1); 1630 1.1 mrg } 1631 1.1 mrg 1632 1.1 mrg extern __inline __m128h 1633 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1634 1.1 mrg _mm_mask_rsqrt_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) 1635 1.1 mrg { 1636 1.1 mrg return __builtin_ia32_rsqrtsh_mask (__D, __C, __A, __B); 1637 1.1 mrg } 1638 1.1 mrg 1639 1.1 mrg extern __inline __m128h 1640 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1641 1.1 mrg _mm_maskz_rsqrt_sh (__mmask8 __A, __m128h __B, __m128h __C) 1642 1.1 mrg { 1643 1.1 mrg return __builtin_ia32_rsqrtsh_mask (__C, __B, _mm_setzero_ph (), 1644 1.1 mrg __A); 1645 1.1 mrg } 1646 1.1 mrg 1647 1.1 mrg /* Intrinsics vsqrtsh. */ 1648 1.1 mrg extern __inline __m128h 1649 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1650 1.1 mrg _mm_sqrt_sh (__m128h __A, __m128h __B) 1651 1.1 mrg { 1652 1.1 mrg return __builtin_ia32_sqrtsh_mask_round (__B, __A, 1653 1.1 mrg _mm_setzero_ph (), 1654 1.1 mrg (__mmask8) -1, 1655 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1656 1.1 mrg } 1657 1.1 mrg 1658 1.1 mrg extern __inline __m128h 1659 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1660 1.1 mrg _mm_mask_sqrt_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) 1661 1.1 mrg { 1662 1.1 mrg return __builtin_ia32_sqrtsh_mask_round (__D, __C, __A, __B, 1663 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1664 1.1 mrg } 1665 1.1 mrg 1666 1.1 mrg extern __inline __m128h 1667 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1668 1.1 mrg _mm_maskz_sqrt_sh (__mmask8 __A, __m128h __B, __m128h __C) 1669 1.1 mrg { 1670 1.1 mrg return __builtin_ia32_sqrtsh_mask_round (__C, __B, 1671 1.1 mrg _mm_setzero_ph (), 1672 1.1 mrg __A, _MM_FROUND_CUR_DIRECTION); 1673 1.1 mrg } 1674 1.1 mrg 1675 1.1 mrg #ifdef __OPTIMIZE__ 1676 1.1 mrg extern __inline __m128h 1677 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1678 1.1 mrg _mm_sqrt_round_sh (__m128h __A, __m128h __B, const int __C) 1679 1.1 mrg { 1680 1.1 mrg return __builtin_ia32_sqrtsh_mask_round (__B, __A, 1681 1.1 mrg _mm_setzero_ph (), 1682 1.1 mrg (__mmask8) -1, __C); 1683 1.1 mrg } 1684 1.1 mrg 1685 1.1 mrg extern __inline __m128h 1686 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1687 1.1 mrg _mm_mask_sqrt_round_sh (__m128h __A, __mmask8 __B, __m128h __C, 1688 1.1 mrg __m128h __D, const int __E) 1689 1.1 mrg { 1690 1.1 mrg return __builtin_ia32_sqrtsh_mask_round (__D, __C, __A, __B, 1691 1.1 mrg __E); 1692 1.1 mrg } 1693 1.1 mrg 1694 1.1 mrg extern __inline __m128h 1695 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1696 1.1 mrg _mm_maskz_sqrt_round_sh (__mmask8 __A, __m128h __B, __m128h __C, 1697 1.1 mrg const int __D) 1698 1.1 mrg { 1699 1.1 mrg return __builtin_ia32_sqrtsh_mask_round (__C, __B, 1700 1.1 mrg _mm_setzero_ph (), 1701 1.1 mrg __A, __D); 1702 1.1 mrg } 1703 1.1 mrg 1704 1.1 mrg #else 1705 1.1 mrg #define _mm_sqrt_round_sh(A, B, C) \ 1706 1.1 mrg (__builtin_ia32_sqrtsh_mask_round ((B), (A), \ 1707 1.1 mrg _mm_setzero_ph (), \ 1708 1.1 mrg (__mmask8)-1, (C))) 1709 1.1 mrg 1710 1.1 mrg #define _mm_mask_sqrt_round_sh(A, B, C, D, E) \ 1711 1.1 mrg (__builtin_ia32_sqrtsh_mask_round ((D), (C), (A), (B), (E))) 1712 1.1 mrg 1713 1.1 mrg #define _mm_maskz_sqrt_round_sh(A, B, C, D) \ 1714 1.1 mrg (__builtin_ia32_sqrtsh_mask_round ((C), (B), \ 1715 1.1 mrg _mm_setzero_ph (), \ 1716 1.1 mrg (A), (D))) 1717 1.1 mrg 1718 1.1 mrg #endif /* __OPTIMIZE__ */ 1719 1.1 mrg 1720 1.1 mrg /* Intrinsics vrcpph. */ 1721 1.1 mrg extern __inline __m512h 1722 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1723 1.1 mrg _mm512_rcp_ph (__m512h __A) 1724 1.1 mrg { 1725 1.1 mrg return __builtin_ia32_rcpph512_mask (__A, _mm512_setzero_ph (), 1726 1.1 mrg (__mmask32) -1); 1727 1.1 mrg } 1728 1.1 mrg 1729 1.1 mrg extern __inline __m512h 1730 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1731 1.1 mrg _mm512_mask_rcp_ph (__m512h __A, __mmask32 __B, __m512h __C) 1732 1.1 mrg { 1733 1.1 mrg return __builtin_ia32_rcpph512_mask (__C, __A, __B); 1734 1.1 mrg } 1735 1.1 mrg 1736 1.1 mrg extern __inline __m512h 1737 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1738 1.1 mrg _mm512_maskz_rcp_ph (__mmask32 __A, __m512h __B) 1739 1.1 mrg { 1740 1.1 mrg return __builtin_ia32_rcpph512_mask (__B, _mm512_setzero_ph (), 1741 1.1 mrg __A); 1742 1.1 mrg } 1743 1.1 mrg 1744 1.1 mrg /* Intrinsics vrcpsh. */ 1745 1.1 mrg extern __inline __m128h 1746 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1747 1.1 mrg _mm_rcp_sh (__m128h __A, __m128h __B) 1748 1.1 mrg { 1749 1.1 mrg return __builtin_ia32_rcpsh_mask (__B, __A, _mm_setzero_ph (), 1750 1.1 mrg (__mmask8) -1); 1751 1.1 mrg } 1752 1.1 mrg 1753 1.1 mrg extern __inline __m128h 1754 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1755 1.1 mrg _mm_mask_rcp_sh (__m128h __A, __mmask32 __B, __m128h __C, __m128h __D) 1756 1.1 mrg { 1757 1.1 mrg return __builtin_ia32_rcpsh_mask (__D, __C, __A, __B); 1758 1.1 mrg } 1759 1.1 mrg 1760 1.1 mrg extern __inline __m128h 1761 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1762 1.1 mrg _mm_maskz_rcp_sh (__mmask32 __A, __m128h __B, __m128h __C) 1763 1.1 mrg { 1764 1.1 mrg return __builtin_ia32_rcpsh_mask (__C, __B, _mm_setzero_ph (), 1765 1.1 mrg __A); 1766 1.1 mrg } 1767 1.1 mrg 1768 1.1 mrg /* Intrinsics vscalefph. */ 1769 1.1 mrg extern __inline __m512h 1770 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1771 1.1 mrg _mm512_scalef_ph (__m512h __A, __m512h __B) 1772 1.1 mrg { 1773 1.1 mrg return __builtin_ia32_scalefph512_mask_round (__A, __B, 1774 1.1 mrg _mm512_setzero_ph (), 1775 1.1 mrg (__mmask32) -1, 1776 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1777 1.1 mrg } 1778 1.1 mrg 1779 1.1 mrg extern __inline __m512h 1780 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1781 1.1 mrg _mm512_mask_scalef_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D) 1782 1.1 mrg { 1783 1.1 mrg return __builtin_ia32_scalefph512_mask_round (__C, __D, __A, __B, 1784 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1785 1.1 mrg } 1786 1.1 mrg 1787 1.1 mrg extern __inline __m512h 1788 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1789 1.1 mrg _mm512_maskz_scalef_ph (__mmask32 __A, __m512h __B, __m512h __C) 1790 1.1 mrg { 1791 1.1 mrg return __builtin_ia32_scalefph512_mask_round (__B, __C, 1792 1.1 mrg _mm512_setzero_ph (), 1793 1.1 mrg __A, 1794 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1795 1.1 mrg } 1796 1.1 mrg 1797 1.1 mrg #ifdef __OPTIMIZE__ 1798 1.1 mrg extern __inline __m512h 1799 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1800 1.1 mrg _mm512_scalef_round_ph (__m512h __A, __m512h __B, const int __C) 1801 1.1 mrg { 1802 1.1 mrg return __builtin_ia32_scalefph512_mask_round (__A, __B, 1803 1.1 mrg _mm512_setzero_ph (), 1804 1.1 mrg (__mmask32) -1, __C); 1805 1.1 mrg } 1806 1.1 mrg 1807 1.1 mrg extern __inline __m512h 1808 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1809 1.1 mrg _mm512_mask_scalef_round_ph (__m512h __A, __mmask32 __B, __m512h __C, 1810 1.1 mrg __m512h __D, const int __E) 1811 1.1 mrg { 1812 1.1 mrg return __builtin_ia32_scalefph512_mask_round (__C, __D, __A, __B, 1813 1.1 mrg __E); 1814 1.1 mrg } 1815 1.1 mrg 1816 1.1 mrg extern __inline __m512h 1817 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1818 1.1 mrg _mm512_maskz_scalef_round_ph (__mmask32 __A, __m512h __B, __m512h __C, 1819 1.1 mrg const int __D) 1820 1.1 mrg { 1821 1.1 mrg return __builtin_ia32_scalefph512_mask_round (__B, __C, 1822 1.1 mrg _mm512_setzero_ph (), 1823 1.1 mrg __A, __D); 1824 1.1 mrg } 1825 1.1 mrg 1826 1.1 mrg #else 1827 1.1 mrg #define _mm512_scalef_round_ph(A, B, C) \ 1828 1.1 mrg (__builtin_ia32_scalefph512_mask_round ((A), (B), \ 1829 1.1 mrg _mm512_setzero_ph (), \ 1830 1.1 mrg (__mmask32)-1, (C))) 1831 1.1 mrg 1832 1.1 mrg #define _mm512_mask_scalef_round_ph(A, B, C, D, E) \ 1833 1.1 mrg (__builtin_ia32_scalefph512_mask_round ((C), (D), (A), (B), (E))) 1834 1.1 mrg 1835 1.1 mrg #define _mm512_maskz_scalef_round_ph(A, B, C, D) \ 1836 1.1 mrg (__builtin_ia32_scalefph512_mask_round ((B), (C), \ 1837 1.1 mrg _mm512_setzero_ph (), \ 1838 1.1 mrg (A), (D))) 1839 1.1 mrg 1840 1.1 mrg #endif /* __OPTIMIZE__ */ 1841 1.1 mrg 1842 1.1 mrg /* Intrinsics vscalefsh. */ 1843 1.1 mrg extern __inline __m128h 1844 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1845 1.1 mrg _mm_scalef_sh (__m128h __A, __m128h __B) 1846 1.1 mrg { 1847 1.1 mrg return __builtin_ia32_scalefsh_mask_round (__A, __B, 1848 1.1 mrg _mm_setzero_ph (), 1849 1.1 mrg (__mmask8) -1, 1850 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1851 1.1 mrg } 1852 1.1 mrg 1853 1.1 mrg extern __inline __m128h 1854 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1855 1.1 mrg _mm_mask_scalef_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) 1856 1.1 mrg { 1857 1.1 mrg return __builtin_ia32_scalefsh_mask_round (__C, __D, __A, __B, 1858 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1859 1.1 mrg } 1860 1.1 mrg 1861 1.1 mrg extern __inline __m128h 1862 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1863 1.1 mrg _mm_maskz_scalef_sh (__mmask8 __A, __m128h __B, __m128h __C) 1864 1.1 mrg { 1865 1.1 mrg return __builtin_ia32_scalefsh_mask_round (__B, __C, 1866 1.1 mrg _mm_setzero_ph (), 1867 1.1 mrg __A, 1868 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1869 1.1 mrg } 1870 1.1 mrg 1871 1.1 mrg #ifdef __OPTIMIZE__ 1872 1.1 mrg extern __inline __m128h 1873 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1874 1.1 mrg _mm_scalef_round_sh (__m128h __A, __m128h __B, const int __C) 1875 1.1 mrg { 1876 1.1 mrg return __builtin_ia32_scalefsh_mask_round (__A, __B, 1877 1.1 mrg _mm_setzero_ph (), 1878 1.1 mrg (__mmask8) -1, __C); 1879 1.1 mrg } 1880 1.1 mrg 1881 1.1 mrg extern __inline __m128h 1882 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1883 1.1 mrg _mm_mask_scalef_round_sh (__m128h __A, __mmask8 __B, __m128h __C, 1884 1.1 mrg __m128h __D, const int __E) 1885 1.1 mrg { 1886 1.1 mrg return __builtin_ia32_scalefsh_mask_round (__C, __D, __A, __B, 1887 1.1 mrg __E); 1888 1.1 mrg } 1889 1.1 mrg 1890 1.1 mrg extern __inline __m128h 1891 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1892 1.1 mrg _mm_maskz_scalef_round_sh (__mmask8 __A, __m128h __B, __m128h __C, 1893 1.1 mrg const int __D) 1894 1.1 mrg { 1895 1.1 mrg return __builtin_ia32_scalefsh_mask_round (__B, __C, 1896 1.1 mrg _mm_setzero_ph (), 1897 1.1 mrg __A, __D); 1898 1.1 mrg } 1899 1.1 mrg 1900 1.1 mrg #else 1901 1.1 mrg #define _mm_scalef_round_sh(A, B, C) \ 1902 1.1 mrg (__builtin_ia32_scalefsh_mask_round ((A), (B), \ 1903 1.1 mrg _mm_setzero_ph (), \ 1904 1.1 mrg (__mmask8)-1, (C))) 1905 1.1 mrg 1906 1.1 mrg #define _mm_mask_scalef_round_sh(A, B, C, D, E) \ 1907 1.1 mrg (__builtin_ia32_scalefsh_mask_round ((C), (D), (A), (B), (E))) 1908 1.1 mrg 1909 1.1 mrg #define _mm_maskz_scalef_round_sh(A, B, C, D) \ 1910 1.1 mrg (__builtin_ia32_scalefsh_mask_round ((B), (C), _mm_setzero_ph (), \ 1911 1.1 mrg (A), (D))) 1912 1.1 mrg 1913 1.1 mrg #endif /* __OPTIMIZE__ */ 1914 1.1 mrg 1915 1.1 mrg /* Intrinsics vreduceph. */ 1916 1.1 mrg #ifdef __OPTIMIZE__ 1917 1.1 mrg extern __inline __m512h 1918 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1919 1.1 mrg _mm512_reduce_ph (__m512h __A, int __B) 1920 1.1 mrg { 1921 1.1 mrg return __builtin_ia32_reduceph512_mask_round (__A, __B, 1922 1.1 mrg _mm512_setzero_ph (), 1923 1.1 mrg (__mmask32) -1, 1924 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1925 1.1 mrg } 1926 1.1 mrg 1927 1.1 mrg extern __inline __m512h 1928 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1929 1.1 mrg _mm512_mask_reduce_ph (__m512h __A, __mmask32 __B, __m512h __C, int __D) 1930 1.1 mrg { 1931 1.1 mrg return __builtin_ia32_reduceph512_mask_round (__C, __D, __A, __B, 1932 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1933 1.1 mrg } 1934 1.1 mrg 1935 1.1 mrg extern __inline __m512h 1936 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1937 1.1 mrg _mm512_maskz_reduce_ph (__mmask32 __A, __m512h __B, int __C) 1938 1.1 mrg { 1939 1.1 mrg return __builtin_ia32_reduceph512_mask_round (__B, __C, 1940 1.1 mrg _mm512_setzero_ph (), 1941 1.1 mrg __A, 1942 1.1 mrg _MM_FROUND_CUR_DIRECTION); 1943 1.1 mrg } 1944 1.1 mrg 1945 1.1 mrg extern __inline __m512h 1946 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1947 1.1 mrg _mm512_reduce_round_ph (__m512h __A, int __B, const int __C) 1948 1.1 mrg { 1949 1.1 mrg return __builtin_ia32_reduceph512_mask_round (__A, __B, 1950 1.1 mrg _mm512_setzero_ph (), 1951 1.1 mrg (__mmask32) -1, __C); 1952 1.1 mrg } 1953 1.1 mrg 1954 1.1 mrg extern __inline __m512h 1955 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1956 1.1 mrg _mm512_mask_reduce_round_ph (__m512h __A, __mmask32 __B, __m512h __C, 1957 1.1 mrg int __D, const int __E) 1958 1.1 mrg { 1959 1.1 mrg return __builtin_ia32_reduceph512_mask_round (__C, __D, __A, __B, 1960 1.1 mrg __E); 1961 1.1 mrg } 1962 1.1 mrg 1963 1.1 mrg extern __inline __m512h 1964 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1965 1.1 mrg _mm512_maskz_reduce_round_ph (__mmask32 __A, __m512h __B, int __C, 1966 1.1 mrg const int __D) 1967 1.1 mrg { 1968 1.1 mrg return __builtin_ia32_reduceph512_mask_round (__B, __C, 1969 1.1 mrg _mm512_setzero_ph (), 1970 1.1 mrg __A, __D); 1971 1.1 mrg } 1972 1.1 mrg 1973 1.1 mrg #else 1974 1.1 mrg #define _mm512_reduce_ph(A, B) \ 1975 1.1 mrg (__builtin_ia32_reduceph512_mask_round ((A), (B), \ 1976 1.1 mrg _mm512_setzero_ph (), \ 1977 1.1 mrg (__mmask32)-1, \ 1978 1.1 mrg _MM_FROUND_CUR_DIRECTION)) 1979 1.1 mrg 1980 1.1 mrg #define _mm512_mask_reduce_ph(A, B, C, D) \ 1981 1.1 mrg (__builtin_ia32_reduceph512_mask_round ((C), (D), (A), (B), \ 1982 1.1 mrg _MM_FROUND_CUR_DIRECTION)) 1983 1.1 mrg 1984 1.1 mrg #define _mm512_maskz_reduce_ph(A, B, C) \ 1985 1.1 mrg (__builtin_ia32_reduceph512_mask_round ((B), (C), \ 1986 1.1 mrg _mm512_setzero_ph (), \ 1987 1.1 mrg (A), _MM_FROUND_CUR_DIRECTION)) 1988 1.1 mrg 1989 1.1 mrg #define _mm512_reduce_round_ph(A, B, C) \ 1990 1.1 mrg (__builtin_ia32_reduceph512_mask_round ((A), (B), \ 1991 1.1 mrg _mm512_setzero_ph (), \ 1992 1.1 mrg (__mmask32)-1, (C))) 1993 1.1 mrg 1994 1.1 mrg #define _mm512_mask_reduce_round_ph(A, B, C, D, E) \ 1995 1.1 mrg (__builtin_ia32_reduceph512_mask_round ((C), (D), (A), (B), (E))) 1996 1.1 mrg 1997 1.1 mrg #define _mm512_maskz_reduce_round_ph(A, B, C, D) \ 1998 1.1 mrg (__builtin_ia32_reduceph512_mask_round ((B), (C), \ 1999 1.1 mrg _mm512_setzero_ph (), \ 2000 1.1 mrg (A), (D))) 2001 1.1 mrg 2002 1.1 mrg #endif /* __OPTIMIZE__ */ 2003 1.1 mrg 2004 1.1 mrg /* Intrinsics vreducesh. */ 2005 1.1 mrg #ifdef __OPTIMIZE__ 2006 1.1 mrg extern __inline __m128h 2007 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2008 1.1 mrg _mm_reduce_sh (__m128h __A, __m128h __B, int __C) 2009 1.1 mrg { 2010 1.1 mrg return __builtin_ia32_reducesh_mask_round (__A, __B, __C, 2011 1.1 mrg _mm_setzero_ph (), 2012 1.1 mrg (__mmask8) -1, 2013 1.1 mrg _MM_FROUND_CUR_DIRECTION); 2014 1.1 mrg } 2015 1.1 mrg 2016 1.1 mrg extern __inline __m128h 2017 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2018 1.1 mrg _mm_mask_reduce_sh (__m128h __A, __mmask8 __B, __m128h __C, 2019 1.1 mrg __m128h __D, int __E) 2020 1.1 mrg { 2021 1.1 mrg return __builtin_ia32_reducesh_mask_round (__C, __D, __E, __A, __B, 2022 1.1 mrg _MM_FROUND_CUR_DIRECTION); 2023 1.1 mrg } 2024 1.1 mrg 2025 1.1 mrg extern __inline __m128h 2026 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2027 1.1 mrg _mm_maskz_reduce_sh (__mmask8 __A, __m128h __B, __m128h __C, int __D) 2028 1.1 mrg { 2029 1.1 mrg return __builtin_ia32_reducesh_mask_round (__B, __C, __D, 2030 1.1 mrg _mm_setzero_ph (), __A, 2031 1.1 mrg _MM_FROUND_CUR_DIRECTION); 2032 1.1 mrg } 2033 1.1 mrg 2034 1.1 mrg extern __inline __m128h 2035 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2036 1.1 mrg _mm_reduce_round_sh (__m128h __A, __m128h __B, int __C, const int __D) 2037 1.1 mrg { 2038 1.1 mrg return __builtin_ia32_reducesh_mask_round (__A, __B, __C, 2039 1.1 mrg _mm_setzero_ph (), 2040 1.1 mrg (__mmask8) -1, __D); 2041 1.1 mrg } 2042 1.1 mrg 2043 1.1 mrg extern __inline __m128h 2044 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2045 1.1 mrg _mm_mask_reduce_round_sh (__m128h __A, __mmask8 __B, __m128h __C, 2046 1.1 mrg __m128h __D, int __E, const int __F) 2047 1.1 mrg { 2048 1.1 mrg return __builtin_ia32_reducesh_mask_round (__C, __D, __E, __A, 2049 1.1 mrg __B, __F); 2050 1.1 mrg } 2051 1.1 mrg 2052 1.1 mrg extern __inline __m128h 2053 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2054 1.1 mrg _mm_maskz_reduce_round_sh (__mmask8 __A, __m128h __B, __m128h __C, 2055 1.1 mrg int __D, const int __E) 2056 1.1 mrg { 2057 1.1 mrg return __builtin_ia32_reducesh_mask_round (__B, __C, __D, 2058 1.1 mrg _mm_setzero_ph (), 2059 1.1 mrg __A, __E); 2060 1.1 mrg } 2061 1.1 mrg 2062 1.1 mrg #else 2063 1.1 mrg #define _mm_reduce_sh(A, B, C) \ 2064 1.1 mrg (__builtin_ia32_reducesh_mask_round ((A), (B), (C), \ 2065 1.1 mrg _mm_setzero_ph (), \ 2066 1.1 mrg (__mmask8)-1, \ 2067 1.1 mrg _MM_FROUND_CUR_DIRECTION)) 2068 1.1 mrg 2069 1.1 mrg #define _mm_mask_reduce_sh(A, B, C, D, E) \ 2070 1.1 mrg (__builtin_ia32_reducesh_mask_round ((C), (D), (E), (A), (B), \ 2071 1.1 mrg _MM_FROUND_CUR_DIRECTION)) 2072 1.1 mrg 2073 1.1 mrg #define _mm_maskz_reduce_sh(A, B, C, D) \ 2074 1.1 mrg (__builtin_ia32_reducesh_mask_round ((B), (C), (D), \ 2075 1.1 mrg _mm_setzero_ph (), \ 2076 1.1 mrg (A), _MM_FROUND_CUR_DIRECTION)) 2077 1.1 mrg 2078 1.1 mrg #define _mm_reduce_round_sh(A, B, C, D) \ 2079 1.1 mrg (__builtin_ia32_reducesh_mask_round ((A), (B), (C), \ 2080 1.1 mrg _mm_setzero_ph (), \ 2081 1.1 mrg (__mmask8)-1, (D))) 2082 1.1 mrg 2083 1.1 mrg #define _mm_mask_reduce_round_sh(A, B, C, D, E, F) \ 2084 1.1 mrg (__builtin_ia32_reducesh_mask_round ((C), (D), (E), (A), (B), (F))) 2085 1.1 mrg 2086 1.1 mrg #define _mm_maskz_reduce_round_sh(A, B, C, D, E) \ 2087 1.1 mrg (__builtin_ia32_reducesh_mask_round ((B), (C), (D), \ 2088 1.1 mrg _mm_setzero_ph (), \ 2089 1.1 mrg (A), (E))) 2090 1.1 mrg 2091 1.1 mrg #endif /* __OPTIMIZE__ */ 2092 1.1 mrg 2093 1.1 mrg /* Intrinsics vrndscaleph. */ 2094 1.1 mrg #ifdef __OPTIMIZE__ 2095 1.1 mrg extern __inline __m512h 2096 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2097 1.1 mrg _mm512_roundscale_ph (__m512h __A, int __B) 2098 1.1 mrg { 2099 1.1 mrg return __builtin_ia32_rndscaleph512_mask_round (__A, __B, 2100 1.1 mrg _mm512_setzero_ph (), 2101 1.1 mrg (__mmask32) -1, 2102 1.1 mrg _MM_FROUND_CUR_DIRECTION); 2103 1.1 mrg } 2104 1.1 mrg 2105 1.1 mrg extern __inline __m512h 2106 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2107 1.1 mrg _mm512_mask_roundscale_ph (__m512h __A, __mmask32 __B, 2108 1.1 mrg __m512h __C, int __D) 2109 1.1 mrg { 2110 1.1 mrg return __builtin_ia32_rndscaleph512_mask_round (__C, __D, __A, __B, 2111 1.1 mrg _MM_FROUND_CUR_DIRECTION); 2112 1.1 mrg } 2113 1.1 mrg 2114 1.1 mrg extern __inline __m512h 2115 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2116 1.1 mrg _mm512_maskz_roundscale_ph (__mmask32 __A, __m512h __B, int __C) 2117 1.1 mrg { 2118 1.1 mrg return __builtin_ia32_rndscaleph512_mask_round (__B, __C, 2119 1.1 mrg _mm512_setzero_ph (), 2120 1.1 mrg __A, 2121 1.1 mrg _MM_FROUND_CUR_DIRECTION); 2122 1.1 mrg } 2123 1.1 mrg 2124 1.1 mrg extern __inline __m512h 2125 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2126 1.1 mrg _mm512_roundscale_round_ph (__m512h __A, int __B, const int __C) 2127 1.1 mrg { 2128 1.1 mrg return __builtin_ia32_rndscaleph512_mask_round (__A, __B, 2129 1.1 mrg _mm512_setzero_ph (), 2130 1.1 mrg (__mmask32) -1, 2131 1.1 mrg __C); 2132 1.1 mrg } 2133 1.1 mrg 2134 1.1 mrg extern __inline __m512h 2135 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2136 1.1 mrg _mm512_mask_roundscale_round_ph (__m512h __A, __mmask32 __B, 2137 1.1 mrg __m512h __C, int __D, const int __E) 2138 1.1 mrg { 2139 1.1 mrg return __builtin_ia32_rndscaleph512_mask_round (__C, __D, __A, 2140 1.1 mrg __B, __E); 2141 1.1 mrg } 2142 1.1 mrg 2143 1.1 mrg extern __inline __m512h 2144 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2145 1.1 mrg _mm512_maskz_roundscale_round_ph (__mmask32 __A, __m512h __B, int __C, 2146 1.1 mrg const int __D) 2147 1.1 mrg { 2148 1.1 mrg return __builtin_ia32_rndscaleph512_mask_round (__B, __C, 2149 1.1 mrg _mm512_setzero_ph (), 2150 1.1 mrg __A, __D); 2151 1.1 mrg } 2152 1.1 mrg 2153 1.1 mrg #else 2154 1.1 mrg #define _mm512_roundscale_ph(A, B) \ 2155 1.1 mrg (__builtin_ia32_rndscaleph512_mask_round ((A), (B), \ 2156 1.1 mrg _mm512_setzero_ph (), \ 2157 1.1 mrg (__mmask32)-1, \ 2158 1.1 mrg _MM_FROUND_CUR_DIRECTION)) 2159 1.1 mrg 2160 1.1 mrg #define _mm512_mask_roundscale_ph(A, B, C, D) \ 2161 1.1 mrg (__builtin_ia32_rndscaleph512_mask_round ((C), (D), (A), (B), \ 2162 1.1 mrg _MM_FROUND_CUR_DIRECTION)) 2163 1.1 mrg 2164 1.1 mrg #define _mm512_maskz_roundscale_ph(A, B, C) \ 2165 1.1 mrg (__builtin_ia32_rndscaleph512_mask_round ((B), (C), \ 2166 1.1 mrg _mm512_setzero_ph (), \ 2167 1.1 mrg (A), \ 2168 1.1 mrg _MM_FROUND_CUR_DIRECTION)) 2169 1.1 mrg #define _mm512_roundscale_round_ph(A, B, C) \ 2170 1.1 mrg (__builtin_ia32_rndscaleph512_mask_round ((A), (B), \ 2171 1.1 mrg _mm512_setzero_ph (), \ 2172 1.1 mrg (__mmask32)-1, (C))) 2173 1.1 mrg 2174 1.1 mrg #define _mm512_mask_roundscale_round_ph(A, B, C, D, E) \ 2175 1.1 mrg (__builtin_ia32_rndscaleph512_mask_round ((C), (D), (A), (B), (E))) 2176 1.1 mrg 2177 1.1 mrg #define _mm512_maskz_roundscale_round_ph(A, B, C, D) \ 2178 1.1 mrg (__builtin_ia32_rndscaleph512_mask_round ((B), (C), \ 2179 1.1 mrg _mm512_setzero_ph (), \ 2180 1.1 mrg (A), (D))) 2181 1.1 mrg 2182 1.1 mrg #endif /* __OPTIMIZE__ */ 2183 1.1 mrg 2184 1.1 mrg /* Intrinsics vrndscalesh. */ 2185 1.1 mrg #ifdef __OPTIMIZE__ 2186 1.1 mrg extern __inline __m128h 2187 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2188 1.1 mrg _mm_roundscale_sh (__m128h __A, __m128h __B, int __C) 2189 1.1 mrg { 2190 1.1 mrg return __builtin_ia32_rndscalesh_mask_round (__A, __B, __C, 2191 1.1 mrg _mm_setzero_ph (), 2192 1.1 mrg (__mmask8) -1, 2193 1.1 mrg _MM_FROUND_CUR_DIRECTION); 2194 1.1 mrg } 2195 1.1 mrg 2196 1.1 mrg extern __inline __m128h 2197 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2198 1.1 mrg _mm_mask_roundscale_sh (__m128h __A, __mmask8 __B, __m128h __C, 2199 1.1 mrg __m128h __D, int __E) 2200 1.1 mrg { 2201 1.1 mrg return __builtin_ia32_rndscalesh_mask_round (__C, __D, __E, __A, __B, 2202 1.1 mrg _MM_FROUND_CUR_DIRECTION); 2203 1.1 mrg } 2204 1.1 mrg 2205 1.1 mrg extern __inline __m128h 2206 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2207 1.1 mrg _mm_maskz_roundscale_sh (__mmask8 __A, __m128h __B, __m128h __C, int __D) 2208 1.1 mrg { 2209 1.1 mrg return __builtin_ia32_rndscalesh_mask_round (__B, __C, __D, 2210 1.1 mrg _mm_setzero_ph (), __A, 2211 1.1 mrg _MM_FROUND_CUR_DIRECTION); 2212 1.1 mrg } 2213 1.1 mrg 2214 1.1 mrg extern __inline __m128h 2215 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2216 1.1 mrg _mm_roundscale_round_sh (__m128h __A, __m128h __B, int __C, const int __D) 2217 1.1 mrg { 2218 1.1 mrg return __builtin_ia32_rndscalesh_mask_round (__A, __B, __C, 2219 1.1 mrg _mm_setzero_ph (), 2220 1.1 mrg (__mmask8) -1, 2221 1.1 mrg __D); 2222 1.1 mrg } 2223 1.1 mrg 2224 1.1 mrg extern __inline __m128h 2225 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2226 1.1 mrg _mm_mask_roundscale_round_sh (__m128h __A, __mmask8 __B, __m128h __C, 2227 1.1 mrg __m128h __D, int __E, const int __F) 2228 1.1 mrg { 2229 1.1 mrg return __builtin_ia32_rndscalesh_mask_round (__C, __D, __E, 2230 1.1 mrg __A, __B, __F); 2231 1.1 mrg } 2232 1.1 mrg 2233 1.1 mrg extern __inline __m128h 2234 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2235 1.1 mrg _mm_maskz_roundscale_round_sh (__mmask8 __A, __m128h __B, __m128h __C, 2236 1.1 mrg int __D, const int __E) 2237 1.1 mrg { 2238 1.1 mrg return __builtin_ia32_rndscalesh_mask_round (__B, __C, __D, 2239 1.1 mrg _mm_setzero_ph (), 2240 1.1 mrg __A, __E); 2241 1.1 mrg } 2242 1.1 mrg 2243 1.1 mrg #else 2244 1.1 mrg #define _mm_roundscale_sh(A, B, C) \ 2245 1.1 mrg (__builtin_ia32_rndscalesh_mask_round ((A), (B), (C), \ 2246 1.1 mrg _mm_setzero_ph (), \ 2247 1.1 mrg (__mmask8)-1, \ 2248 1.1 mrg _MM_FROUND_CUR_DIRECTION)) 2249 1.1 mrg 2250 1.1 mrg #define _mm_mask_roundscale_sh(A, B, C, D, E) \ 2251 1.1 mrg (__builtin_ia32_rndscalesh_mask_round ((C), (D), (E), (A), (B), \ 2252 1.1 mrg _MM_FROUND_CUR_DIRECTION)) 2253 1.1 mrg 2254 1.1 mrg #define _mm_maskz_roundscale_sh(A, B, C, D) \ 2255 1.1 mrg (__builtin_ia32_rndscalesh_mask_round ((B), (C), (D), \ 2256 1.1 mrg _mm_setzero_ph (), \ 2257 1.1 mrg (A), _MM_FROUND_CUR_DIRECTION)) 2258 1.1 mrg 2259 1.1 mrg #define _mm_roundscale_round_sh(A, B, C, D) \ 2260 1.1 mrg (__builtin_ia32_rndscalesh_mask_round ((A), (B), (C), \ 2261 1.1 mrg _mm_setzero_ph (), \ 2262 1.1 mrg (__mmask8)-1, (D))) 2263 1.1 mrg 2264 1.1 mrg #define _mm_mask_roundscale_round_sh(A, B, C, D, E, F) \ 2265 1.1 mrg (__builtin_ia32_rndscalesh_mask_round ((C), (D), (E), (A), (B), (F))) 2266 1.1 mrg 2267 1.1 mrg #define _mm_maskz_roundscale_round_sh(A, B, C, D, E) \ 2268 1.1 mrg (__builtin_ia32_rndscalesh_mask_round ((B), (C), (D), \ 2269 1.1 mrg _mm_setzero_ph (), \ 2270 1.1 mrg (A), (E))) 2271 1.1 mrg 2272 1.1 mrg #endif /* __OPTIMIZE__ */ 2273 1.1 mrg 2274 1.1 mrg /* Intrinsics vfpclasssh. */ 2275 1.1 mrg #ifdef __OPTIMIZE__ 2276 1.1 mrg extern __inline __mmask8 2277 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2278 1.1 mrg _mm_fpclass_sh_mask (__m128h __A, const int __imm) 2279 1.1 mrg { 2280 1.1 mrg return (__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) __A, __imm, 2281 1.1 mrg (__mmask8) -1); 2282 1.1 mrg } 2283 1.1 mrg 2284 1.1 mrg extern __inline __mmask8 2285 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2286 1.1 mrg _mm_mask_fpclass_sh_mask (__mmask8 __U, __m128h __A, const int __imm) 2287 1.1 mrg { 2288 1.1 mrg return (__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) __A, __imm, __U); 2289 1.1 mrg } 2290 1.1 mrg 2291 1.1 mrg #else 2292 1.1 mrg #define _mm_fpclass_sh_mask(X, C) \ 2293 1.1 mrg ((__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) (__m128h) (X), \ 2294 1.1 mrg (int) (C), (__mmask8) (-1))) \ 2295 1.1 mrg 2296 1.1 mrg #define _mm_mask_fpclass_sh_mask(U, X, C) \ 2297 1.1 mrg ((__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) (__m128h) (X), \ 2298 1.1 mrg (int) (C), (__mmask8) (U))) 2299 1.1 mrg #endif /* __OPTIMIZE__ */ 2300 1.1 mrg 2301 1.1 mrg /* Intrinsics vfpclassph. */ 2302 1.1 mrg #ifdef __OPTIMIZE__ 2303 1.1 mrg extern __inline __mmask32 2304 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2305 1.1 mrg _mm512_mask_fpclass_ph_mask (__mmask32 __U, __m512h __A, 2306 1.1 mrg const int __imm) 2307 1.1 mrg { 2308 1.1 mrg return (__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) __A, 2309 1.1 mrg __imm, __U); 2310 1.1 mrg } 2311 1.1 mrg 2312 1.1 mrg extern __inline __mmask32 2313 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2314 1.1 mrg _mm512_fpclass_ph_mask (__m512h __A, const int __imm) 2315 1.1 mrg { 2316 1.1 mrg return (__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) __A, 2317 1.1 mrg __imm, 2318 1.1 mrg (__mmask32) -1); 2319 1.1 mrg } 2320 1.1 mrg 2321 1.1 mrg #else 2322 1.1 mrg #define _mm512_mask_fpclass_ph_mask(u, x, c) \ 2323 1.1 mrg ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \ 2324 1.1 mrg (int) (c),(__mmask32)(u))) 2325 1.1 mrg 2326 1.1 mrg #define _mm512_fpclass_ph_mask(x, c) \ 2327 1.1 mrg ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \ 2328 1.1 mrg (int) (c),(__mmask32)-1)) 2329 1.1 mrg #endif /* __OPIMTIZE__ */ 2330 1.1 mrg 2331 1.1 mrg /* Intrinsics vgetexpph, vgetexpsh. */ 2332 1.1 mrg extern __inline __m128h 2333 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2334 1.1 mrg _mm_getexp_sh (__m128h __A, __m128h __B) 2335 1.1 mrg { 2336 1.1 mrg return (__m128h) 2337 1.1 mrg __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, (__v8hf) __B, 2338 1.1 mrg (__v8hf) _mm_setzero_ph (), 2339 1.1 mrg (__mmask8) -1, 2340 1.1 mrg _MM_FROUND_CUR_DIRECTION); 2341 1.1 mrg } 2342 1.1 mrg 2343 1.1 mrg extern __inline __m128h 2344 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2345 1.1 mrg _mm_mask_getexp_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) 2346 1.1 mrg { 2347 1.1 mrg return (__m128h) 2348 1.1 mrg __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, (__v8hf) __B, 2349 1.1 mrg (__v8hf) __W, (__mmask8) __U, 2350 1.1 mrg _MM_FROUND_CUR_DIRECTION); 2351 1.1 mrg } 2352 1.1 mrg 2353 1.1 mrg extern __inline __m128h 2354 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2355 1.1 mrg _mm_maskz_getexp_sh (__mmask8 __U, __m128h __A, __m128h __B) 2356 1.1 mrg { 2357 1.1 mrg return (__m128h) 2358 1.1 mrg __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, (__v8hf) __B, 2359 1.1 mrg (__v8hf) _mm_setzero_ph (), 2360 1.1 mrg (__mmask8) __U, 2361 1.1 mrg _MM_FROUND_CUR_DIRECTION); 2362 1.1 mrg } 2363 1.1 mrg 2364 1.1 mrg extern __inline __m512h 2365 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2366 1.1 mrg _mm512_getexp_ph (__m512h __A) 2367 1.1 mrg { 2368 1.1 mrg return (__m512h) 2369 1.1 mrg __builtin_ia32_getexpph512_mask ((__v32hf) __A, 2370 1.1 mrg (__v32hf) _mm512_setzero_ph (), 2371 1.1 mrg (__mmask32) -1, _MM_FROUND_CUR_DIRECTION); 2372 1.1 mrg } 2373 1.1 mrg 2374 1.1 mrg extern __inline __m512h 2375 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2376 1.1 mrg _mm512_mask_getexp_ph (__m512h __W, __mmask32 __U, __m512h __A) 2377 1.1 mrg { 2378 1.1 mrg return (__m512h) 2379 1.1 mrg __builtin_ia32_getexpph512_mask ((__v32hf) __A, (__v32hf) __W, 2380 1.1 mrg (__mmask32) __U, _MM_FROUND_CUR_DIRECTION); 2381 1.1 mrg } 2382 1.1 mrg 2383 1.1 mrg extern __inline __m512h 2384 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2385 1.1 mrg _mm512_maskz_getexp_ph (__mmask32 __U, __m512h __A) 2386 1.1 mrg { 2387 1.1 mrg return (__m512h) 2388 1.1 mrg __builtin_ia32_getexpph512_mask ((__v32hf) __A, 2389 1.1 mrg (__v32hf) _mm512_setzero_ph (), 2390 1.1 mrg (__mmask32) __U, _MM_FROUND_CUR_DIRECTION); 2391 1.1 mrg } 2392 1.1 mrg 2393 1.1 mrg #ifdef __OPTIMIZE__ 2394 1.1 mrg extern __inline __m128h 2395 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2396 1.1 mrg _mm_getexp_round_sh (__m128h __A, __m128h __B, const int __R) 2397 1.1 mrg { 2398 1.1 mrg return (__m128h) __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, 2399 1.1 mrg (__v8hf) __B, 2400 1.1 mrg _mm_setzero_ph (), 2401 1.1 mrg (__mmask8) -1, 2402 1.1 mrg __R); 2403 1.1 mrg } 2404 1.1 mrg 2405 1.1 mrg extern __inline __m128h 2406 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2407 1.1 mrg _mm_mask_getexp_round_sh (__m128h __W, __mmask8 __U, __m128h __A, 2408 1.1 mrg __m128h __B, const int __R) 2409 1.1 mrg { 2410 1.1 mrg return (__m128h) __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, 2411 1.1 mrg (__v8hf) __B, 2412 1.1 mrg (__v8hf) __W, 2413 1.1 mrg (__mmask8) __U, __R); 2414 1.1 mrg } 2415 1.1 mrg 2416 1.1 mrg extern __inline __m128h 2417 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2418 1.1 mrg _mm_maskz_getexp_round_sh (__mmask8 __U, __m128h __A, __m128h __B, 2419 1.1 mrg const int __R) 2420 1.1 mrg { 2421 1.1 mrg return (__m128h) __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, 2422 1.1 mrg (__v8hf) __B, 2423 1.1 mrg (__v8hf) 2424 1.1 mrg _mm_setzero_ph (), 2425 1.1 mrg (__mmask8) __U, __R); 2426 1.1 mrg } 2427 1.1 mrg 2428 1.1 mrg extern __inline __m512h 2429 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2430 1.1 mrg _mm512_getexp_round_ph (__m512h __A, const int __R) 2431 1.1 mrg { 2432 1.1 mrg return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A, 2433 1.1 mrg (__v32hf) 2434 1.1 mrg _mm512_setzero_ph (), 2435 1.1 mrg (__mmask32) -1, __R); 2436 1.1 mrg } 2437 1.1 mrg 2438 1.1 mrg extern __inline __m512h 2439 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2440 1.1 mrg _mm512_mask_getexp_round_ph (__m512h __W, __mmask32 __U, __m512h __A, 2441 1.1 mrg const int __R) 2442 1.1 mrg { 2443 1.1 mrg return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A, 2444 1.1 mrg (__v32hf) __W, 2445 1.1 mrg (__mmask32) __U, __R); 2446 1.1 mrg } 2447 1.1 mrg 2448 1.1 mrg extern __inline __m512h 2449 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2450 1.1 mrg _mm512_maskz_getexp_round_ph (__mmask32 __U, __m512h __A, const int __R) 2451 1.1 mrg { 2452 1.1 mrg return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A, 2453 1.1 mrg (__v32hf) 2454 1.1 mrg _mm512_setzero_ph (), 2455 1.1 mrg (__mmask32) __U, __R); 2456 1.1 mrg } 2457 1.1 mrg 2458 1.1 mrg #else 2459 1.1 mrg #define _mm_getexp_round_sh(A, B, R) \ 2460 1.1 mrg ((__m128h)__builtin_ia32_getexpsh_mask_round((__v8hf)(__m128h)(A), \ 2461 1.1 mrg (__v8hf)(__m128h)(B), \ 2462 1.1 mrg (__v8hf)_mm_setzero_ph(), \ 2463 1.1 mrg (__mmask8)-1, R)) 2464 1.1 mrg 2465 1.1 mrg #define _mm_mask_getexp_round_sh(W, U, A, B, C) \ 2466 1.1 mrg (__m128h)__builtin_ia32_getexpsh_mask_round(A, B, W, U, C) 2467 1.1 mrg 2468 1.1 mrg #define _mm_maskz_getexp_round_sh(U, A, B, C) \ 2469 1.1 mrg (__m128h)__builtin_ia32_getexpsh_mask_round(A, B, \ 2470 1.1 mrg (__v8hf)_mm_setzero_ph(), \ 2471 1.1 mrg U, C) 2472 1.1 mrg 2473 1.1 mrg #define _mm512_getexp_round_ph(A, R) \ 2474 1.1 mrg ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \ 2475 1.1 mrg (__v32hf)_mm512_setzero_ph(), (__mmask32)-1, R)) 2476 1.1 mrg 2477 1.1 mrg #define _mm512_mask_getexp_round_ph(W, U, A, R) \ 2478 1.1 mrg ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \ 2479 1.1 mrg (__v32hf)(__m512h)(W), (__mmask32)(U), R)) 2480 1.1 mrg 2481 1.1 mrg #define _mm512_maskz_getexp_round_ph(U, A, R) \ 2482 1.1 mrg ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \ 2483 1.1 mrg (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), R)) 2484 1.1 mrg 2485 1.1 mrg #endif /* __OPTIMIZE__ */ 2486 1.1 mrg 2487 1.1 mrg /* Intrinsics vgetmantph, vgetmantsh. */ 2488 1.1 mrg #ifdef __OPTIMIZE__ 2489 1.1 mrg extern __inline __m128h 2490 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2491 1.1 mrg _mm_getmant_sh (__m128h __A, __m128h __B, 2492 1.1 mrg _MM_MANTISSA_NORM_ENUM __C, 2493 1.1 mrg _MM_MANTISSA_SIGN_ENUM __D) 2494 1.1 mrg { 2495 1.1 mrg return (__m128h) 2496 1.1 mrg __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, (__v8hf) __B, 2497 1.1 mrg (__D << 2) | __C, _mm_setzero_ph (), 2498 1.1 mrg (__mmask8) -1, 2499 1.1 mrg _MM_FROUND_CUR_DIRECTION); 2500 1.1 mrg } 2501 1.1 mrg 2502 1.1 mrg extern __inline __m128h 2503 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2504 1.1 mrg _mm_mask_getmant_sh (__m128h __W, __mmask8 __U, __m128h __A, 2505 1.1 mrg __m128h __B, _MM_MANTISSA_NORM_ENUM __C, 2506 1.1 mrg _MM_MANTISSA_SIGN_ENUM __D) 2507 1.1 mrg { 2508 1.1 mrg return (__m128h) 2509 1.1 mrg __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, (__v8hf) __B, 2510 1.1 mrg (__D << 2) | __C, (__v8hf) __W, 2511 1.1 mrg __U, _MM_FROUND_CUR_DIRECTION); 2512 1.1 mrg } 2513 1.1 mrg 2514 1.1 mrg extern __inline __m128h 2515 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2516 1.1 mrg _mm_maskz_getmant_sh (__mmask8 __U, __m128h __A, __m128h __B, 2517 1.1 mrg _MM_MANTISSA_NORM_ENUM __C, 2518 1.1 mrg _MM_MANTISSA_SIGN_ENUM __D) 2519 1.1 mrg { 2520 1.1 mrg return (__m128h) 2521 1.1 mrg __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, (__v8hf) __B, 2522 1.1 mrg (__D << 2) | __C, 2523 1.1 mrg (__v8hf) _mm_setzero_ph(), 2524 1.1 mrg __U, _MM_FROUND_CUR_DIRECTION); 2525 1.1 mrg } 2526 1.1 mrg 2527 1.1 mrg extern __inline __m512h 2528 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2529 1.1 mrg _mm512_getmant_ph (__m512h __A, _MM_MANTISSA_NORM_ENUM __B, 2530 1.1 mrg _MM_MANTISSA_SIGN_ENUM __C) 2531 1.1 mrg { 2532 1.1 mrg return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A, 2533 1.1 mrg (__C << 2) | __B, 2534 1.1 mrg _mm512_setzero_ph (), 2535 1.1 mrg (__mmask32) -1, 2536 1.1 mrg _MM_FROUND_CUR_DIRECTION); 2537 1.1 mrg } 2538 1.1 mrg 2539 1.1 mrg extern __inline __m512h 2540 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2541 1.1 mrg _mm512_mask_getmant_ph (__m512h __W, __mmask32 __U, __m512h __A, 2542 1.1 mrg _MM_MANTISSA_NORM_ENUM __B, 2543 1.1 mrg _MM_MANTISSA_SIGN_ENUM __C) 2544 1.1 mrg { 2545 1.1 mrg return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A, 2546 1.1 mrg (__C << 2) | __B, 2547 1.1 mrg (__v32hf) __W, __U, 2548 1.1 mrg _MM_FROUND_CUR_DIRECTION); 2549 1.1 mrg } 2550 1.1 mrg 2551 1.1 mrg extern __inline __m512h 2552 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2553 1.1 mrg _mm512_maskz_getmant_ph (__mmask32 __U, __m512h __A, 2554 1.1 mrg _MM_MANTISSA_NORM_ENUM __B, 2555 1.1 mrg _MM_MANTISSA_SIGN_ENUM __C) 2556 1.1 mrg { 2557 1.1 mrg return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A, 2558 1.1 mrg (__C << 2) | __B, 2559 1.1 mrg (__v32hf) 2560 1.1 mrg _mm512_setzero_ph (), 2561 1.1 mrg __U, 2562 1.1 mrg _MM_FROUND_CUR_DIRECTION); 2563 1.1 mrg } 2564 1.1 mrg 2565 1.1 mrg extern __inline __m128h 2566 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2567 1.1 mrg _mm_getmant_round_sh (__m128h __A, __m128h __B, 2568 1.1 mrg _MM_MANTISSA_NORM_ENUM __C, 2569 1.1 mrg _MM_MANTISSA_SIGN_ENUM __D, const int __R) 2570 1.1 mrg { 2571 1.1 mrg return (__m128h) __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, 2572 1.1 mrg (__v8hf) __B, 2573 1.1 mrg (__D << 2) | __C, 2574 1.1 mrg _mm_setzero_ph (), 2575 1.1 mrg (__mmask8) -1, 2576 1.1 mrg __R); 2577 1.1 mrg } 2578 1.1 mrg 2579 1.1 mrg extern __inline __m128h 2580 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2581 1.1 mrg _mm_mask_getmant_round_sh (__m128h __W, __mmask8 __U, __m128h __A, 2582 1.1 mrg __m128h __B, _MM_MANTISSA_NORM_ENUM __C, 2583 1.1 mrg _MM_MANTISSA_SIGN_ENUM __D, const int __R) 2584 1.1 mrg { 2585 1.1 mrg return (__m128h) __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, 2586 1.1 mrg (__v8hf) __B, 2587 1.1 mrg (__D << 2) | __C, 2588 1.1 mrg (__v8hf) __W, 2589 1.1 mrg __U, __R); 2590 1.1 mrg } 2591 1.1 mrg 2592 1.1 mrg extern __inline __m128h 2593 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2594 1.1 mrg _mm_maskz_getmant_round_sh (__mmask8 __U, __m128h __A, __m128h __B, 2595 1.1 mrg _MM_MANTISSA_NORM_ENUM __C, 2596 1.1 mrg _MM_MANTISSA_SIGN_ENUM __D, const int __R) 2597 1.1 mrg { 2598 1.1 mrg return (__m128h) __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, 2599 1.1 mrg (__v8hf) __B, 2600 1.1 mrg (__D << 2) | __C, 2601 1.1 mrg (__v8hf) 2602 1.1 mrg _mm_setzero_ph(), 2603 1.1 mrg __U, __R); 2604 1.1 mrg } 2605 1.1 mrg 2606 1.1 mrg extern __inline __m512h 2607 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2608 1.1 mrg _mm512_getmant_round_ph (__m512h __A, _MM_MANTISSA_NORM_ENUM __B, 2609 1.1 mrg _MM_MANTISSA_SIGN_ENUM __C, const int __R) 2610 1.1 mrg { 2611 1.1 mrg return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A, 2612 1.1 mrg (__C << 2) | __B, 2613 1.1 mrg _mm512_setzero_ph (), 2614 1.1 mrg (__mmask32) -1, __R); 2615 1.1 mrg } 2616 1.1 mrg 2617 1.1 mrg extern __inline __m512h 2618 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2619 1.1 mrg _mm512_mask_getmant_round_ph (__m512h __W, __mmask32 __U, __m512h __A, 2620 1.1 mrg _MM_MANTISSA_NORM_ENUM __B, 2621 1.1 mrg _MM_MANTISSA_SIGN_ENUM __C, const int __R) 2622 1.1 mrg { 2623 1.1 mrg return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A, 2624 1.1 mrg (__C << 2) | __B, 2625 1.1 mrg (__v32hf) __W, __U, 2626 1.1 mrg __R); 2627 1.1 mrg } 2628 1.1 mrg 2629 1.1 mrg extern __inline __m512h 2630 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2631 1.1 mrg _mm512_maskz_getmant_round_ph (__mmask32 __U, __m512h __A, 2632 1.1 mrg _MM_MANTISSA_NORM_ENUM __B, 2633 1.1 mrg _MM_MANTISSA_SIGN_ENUM __C, const int __R) 2634 1.1 mrg { 2635 1.1 mrg return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A, 2636 1.1 mrg (__C << 2) | __B, 2637 1.1 mrg (__v32hf) 2638 1.1 mrg _mm512_setzero_ph (), 2639 1.1 mrg __U, __R); 2640 1.1 mrg } 2641 1.1 mrg 2642 1.1 mrg #else 2643 1.1 mrg #define _mm512_getmant_ph(X, B, C) \ 2644 1.1 mrg ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \ 2645 1.1 mrg (int)(((C)<<2) | (B)), \ 2646 1.1 mrg (__v32hf)(__m512h) \ 2647 1.1 mrg _mm512_setzero_ph(), \ 2648 1.1 mrg (__mmask32)-1, \ 2649 1.1 mrg _MM_FROUND_CUR_DIRECTION)) 2650 1.1 mrg 2651 1.1 mrg #define _mm512_mask_getmant_ph(W, U, X, B, C) \ 2652 1.1 mrg ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \ 2653 1.1 mrg (int)(((C)<<2) | (B)), \ 2654 1.1 mrg (__v32hf)(__m512h)(W), \ 2655 1.1 mrg (__mmask32)(U), \ 2656 1.1 mrg _MM_FROUND_CUR_DIRECTION)) 2657 1.1 mrg 2658 1.1 mrg 2659 1.1 mrg #define _mm512_maskz_getmant_ph(U, X, B, C) \ 2660 1.1 mrg ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \ 2661 1.1 mrg (int)(((C)<<2) | (B)), \ 2662 1.1 mrg (__v32hf)(__m512h) \ 2663 1.1 mrg _mm512_setzero_ph(), \ 2664 1.1 mrg (__mmask32)(U), \ 2665 1.1 mrg _MM_FROUND_CUR_DIRECTION)) 2666 1.1 mrg 2667 1.1 mrg #define _mm_getmant_sh(X, Y, C, D) \ 2668 1.1 mrg ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \ 2669 1.1 mrg (__v8hf)(__m128h)(Y), \ 2670 1.1 mrg (int)(((D)<<2) | (C)), \ 2671 1.1 mrg (__v8hf)(__m128h) \ 2672 1.1 mrg _mm_setzero_ph (), \ 2673 1.1 mrg (__mmask8)-1, \ 2674 1.1 mrg _MM_FROUND_CUR_DIRECTION)) 2675 1.1 mrg 2676 1.1 mrg #define _mm_mask_getmant_sh(W, U, X, Y, C, D) \ 2677 1.1 mrg ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \ 2678 1.1 mrg (__v8hf)(__m128h)(Y), \ 2679 1.1 mrg (int)(((D)<<2) | (C)), \ 2680 1.1 mrg (__v8hf)(__m128h)(W), \ 2681 1.1 mrg (__mmask8)(U), \ 2682 1.1 mrg _MM_FROUND_CUR_DIRECTION)) 2683 1.1 mrg 2684 1.1 mrg #define _mm_maskz_getmant_sh(U, X, Y, C, D) \ 2685 1.1 mrg ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \ 2686 1.1 mrg (__v8hf)(__m128h)(Y), \ 2687 1.1 mrg (int)(((D)<<2) | (C)), \ 2688 1.1 mrg (__v8hf)(__m128h) \ 2689 1.1 mrg _mm_setzero_ph(), \ 2690 1.1 mrg (__mmask8)(U), \ 2691 1.1 mrg _MM_FROUND_CUR_DIRECTION)) 2692 1.1 mrg 2693 1.1 mrg #define _mm512_getmant_round_ph(X, B, C, R) \ 2694 1.1 mrg ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \ 2695 1.1 mrg (int)(((C)<<2) | (B)), \ 2696 1.1 mrg (__v32hf)(__m512h) \ 2697 1.1 mrg _mm512_setzero_ph(), \ 2698 1.1 mrg (__mmask32)-1, \ 2699 1.1 mrg (R))) 2700 1.1 mrg 2701 1.1 mrg #define _mm512_mask_getmant_round_ph(W, U, X, B, C, R) \ 2702 1.1 mrg ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \ 2703 1.1 mrg (int)(((C)<<2) | (B)), \ 2704 1.1 mrg (__v32hf)(__m512h)(W), \ 2705 1.1 mrg (__mmask32)(U), \ 2706 1.1 mrg (R))) 2707 1.1 mrg 2708 1.1 mrg 2709 1.1 mrg #define _mm512_maskz_getmant_round_ph(U, X, B, C, R) \ 2710 1.1 mrg ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \ 2711 1.1 mrg (int)(((C)<<2) | (B)), \ 2712 1.1 mrg (__v32hf)(__m512h) \ 2713 1.1 mrg _mm512_setzero_ph(), \ 2714 1.1 mrg (__mmask32)(U), \ 2715 1.1 mrg (R))) 2716 1.1 mrg 2717 1.1 mrg #define _mm_getmant_round_sh(X, Y, C, D, R) \ 2718 1.1 mrg ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \ 2719 1.1 mrg (__v8hf)(__m128h)(Y), \ 2720 1.1 mrg (int)(((D)<<2) | (C)), \ 2721 1.1 mrg (__v8hf)(__m128h) \ 2722 1.1 mrg _mm_setzero_ph (), \ 2723 1.1 mrg (__mmask8)-1, \ 2724 1.1 mrg (R))) 2725 1.1 mrg 2726 1.1 mrg #define _mm_mask_getmant_round_sh(W, U, X, Y, C, D, R) \ 2727 1.1 mrg ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \ 2728 1.1 mrg (__v8hf)(__m128h)(Y), \ 2729 1.1 mrg (int)(((D)<<2) | (C)), \ 2730 1.1 mrg (__v8hf)(__m128h)(W), \ 2731 1.1 mrg (__mmask8)(U), \ 2732 1.1 mrg (R))) 2733 1.1 mrg 2734 1.1 mrg #define _mm_maskz_getmant_round_sh(U, X, Y, C, D, R) \ 2735 1.1 mrg ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \ 2736 1.1 mrg (__v8hf)(__m128h)(Y), \ 2737 1.1 mrg (int)(((D)<<2) | (C)), \ 2738 1.1 mrg (__v8hf)(__m128h) \ 2739 1.1 mrg _mm_setzero_ph(), \ 2740 1.1 mrg (__mmask8)(U), \ 2741 1.1 mrg (R))) 2742 1.1 mrg 2743 1.1 mrg #endif /* __OPTIMIZE__ */ 2744 1.1 mrg 2745 1.1 mrg /* Intrinsics vmovw. */ 2746 1.1 mrg extern __inline __m128i 2747 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2748 1.1 mrg _mm_cvtsi16_si128 (short __A) 2749 1.1 mrg { 2750 1.1 mrg return _mm_set_epi16 (0, 0, 0, 0, 0, 0, 0, __A); 2751 1.1 mrg } 2752 1.1 mrg 2753 1.1 mrg extern __inline short 2754 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2755 1.1 mrg _mm_cvtsi128_si16 (__m128i __A) 2756 1.1 mrg { 2757 1.1 mrg return __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, 0); 2758 1.1 mrg } 2759 1.1 mrg 2760 1.1 mrg /* Intrinsics vmovsh. */ 2761 1.1 mrg extern __inline __m128h 2762 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2763 1.1 mrg _mm_mask_load_sh (__m128h __A, __mmask8 __B, _Float16 const* __C) 2764 1.1 mrg { 2765 1.1 mrg return __builtin_ia32_loadsh_mask (__C, __A, __B); 2766 1.1 mrg } 2767 1.1 mrg 2768 1.1 mrg extern __inline __m128h 2769 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2770 1.1 mrg _mm_maskz_load_sh (__mmask8 __A, _Float16 const* __B) 2771 1.1 mrg { 2772 1.1 mrg return __builtin_ia32_loadsh_mask (__B, _mm_setzero_ph (), __A); 2773 1.1 mrg } 2774 1.1 mrg 2775 1.1 mrg extern __inline void 2776 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2777 1.1 mrg _mm_mask_store_sh (_Float16 const* __A, __mmask8 __B, __m128h __C) 2778 1.1 mrg { 2779 1.1 mrg __builtin_ia32_storesh_mask (__A, __C, __B); 2780 1.1 mrg } 2781 1.1 mrg 2782 1.1 mrg extern __inline __m128h 2783 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2784 1.1 mrg _mm_move_sh (__m128h __A, __m128h __B) 2785 1.1 mrg { 2786 1.1 mrg __A[0] = __B[0]; 2787 1.1 mrg return __A; 2788 1.1 mrg } 2789 1.1 mrg 2790 1.1 mrg extern __inline __m128h 2791 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2792 1.1 mrg _mm_mask_move_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) 2793 1.1 mrg { 2794 1.1 mrg return __builtin_ia32_vmovsh_mask (__C, __D, __A, __B); 2795 1.1 mrg } 2796 1.1 mrg 2797 1.1 mrg extern __inline __m128h 2798 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2799 1.1 mrg _mm_maskz_move_sh (__mmask8 __A, __m128h __B, __m128h __C) 2800 1.1 mrg { 2801 1.1 mrg return __builtin_ia32_vmovsh_mask (__B, __C, _mm_setzero_ph (), __A); 2802 1.1 mrg } 2803 1.1 mrg 2804 1.1 mrg /* Intrinsics vcvtph2dq. */ 2805 1.1 mrg extern __inline __m512i 2806 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2807 1.1 mrg _mm512_cvtph_epi32 (__m256h __A) 2808 1.1 mrg { 2809 1.1 mrg return (__m512i) 2810 1.1 mrg __builtin_ia32_vcvtph2dq512_mask_round (__A, 2811 1.1 mrg (__v16si) 2812 1.1 mrg _mm512_setzero_si512 (), 2813 1.1 mrg (__mmask16) -1, 2814 1.1 mrg _MM_FROUND_CUR_DIRECTION); 2815 1.1 mrg } 2816 1.1 mrg 2817 1.1 mrg extern __inline __m512i 2818 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2819 1.1 mrg _mm512_mask_cvtph_epi32 (__m512i __A, __mmask16 __B, __m256h __C) 2820 1.1 mrg { 2821 1.1 mrg return (__m512i) 2822 1.1 mrg __builtin_ia32_vcvtph2dq512_mask_round (__C, 2823 1.1 mrg (__v16si) __A, 2824 1.1 mrg __B, 2825 1.1 mrg _MM_FROUND_CUR_DIRECTION); 2826 1.1 mrg } 2827 1.1 mrg 2828 1.1 mrg extern __inline __m512i 2829 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2830 1.1 mrg _mm512_maskz_cvtph_epi32 (__mmask16 __A, __m256h __B) 2831 1.1 mrg { 2832 1.1 mrg return (__m512i) 2833 1.1 mrg __builtin_ia32_vcvtph2dq512_mask_round (__B, 2834 1.1 mrg (__v16si) 2835 1.1 mrg _mm512_setzero_si512 (), 2836 1.1 mrg __A, 2837 1.1 mrg _MM_FROUND_CUR_DIRECTION); 2838 1.1 mrg } 2839 1.1 mrg 2840 1.1 mrg #ifdef __OPTIMIZE__ 2841 1.1 mrg extern __inline __m512i 2842 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2843 1.1 mrg _mm512_cvt_roundph_epi32 (__m256h __A, int __B) 2844 1.1 mrg { 2845 1.1 mrg return (__m512i) 2846 1.1 mrg __builtin_ia32_vcvtph2dq512_mask_round (__A, 2847 1.1 mrg (__v16si) 2848 1.1 mrg _mm512_setzero_si512 (), 2849 1.1 mrg (__mmask16) -1, 2850 1.1 mrg __B); 2851 1.1 mrg } 2852 1.1 mrg 2853 1.1 mrg extern __inline __m512i 2854 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2855 1.1 mrg _mm512_mask_cvt_roundph_epi32 (__m512i __A, __mmask16 __B, __m256h __C, int __D) 2856 1.1 mrg { 2857 1.1 mrg return (__m512i) 2858 1.1 mrg __builtin_ia32_vcvtph2dq512_mask_round (__C, 2859 1.1 mrg (__v16si) __A, 2860 1.1 mrg __B, 2861 1.1 mrg __D); 2862 1.1 mrg } 2863 1.1 mrg 2864 1.1 mrg extern __inline __m512i 2865 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2866 1.1 mrg _mm512_maskz_cvt_roundph_epi32 (__mmask16 __A, __m256h __B, int __C) 2867 1.1 mrg { 2868 1.1 mrg return (__m512i) 2869 1.1 mrg __builtin_ia32_vcvtph2dq512_mask_round (__B, 2870 1.1 mrg (__v16si) 2871 1.1 mrg _mm512_setzero_si512 (), 2872 1.1 mrg __A, 2873 1.1 mrg __C); 2874 1.1 mrg } 2875 1.1 mrg 2876 1.1 mrg #else 2877 1.1 mrg #define _mm512_cvt_roundph_epi32(A, B) \ 2878 1.1 mrg ((__m512i) \ 2879 1.1 mrg __builtin_ia32_vcvtph2dq512_mask_round ((A), \ 2880 1.1 mrg (__v16si) \ 2881 1.1 mrg _mm512_setzero_si512 (), \ 2882 1.1 mrg (__mmask16)-1, \ 2883 1.1 mrg (B))) 2884 1.1 mrg 2885 1.1 mrg #define _mm512_mask_cvt_roundph_epi32(A, B, C, D) \ 2886 1.1 mrg ((__m512i) \ 2887 1.1 mrg __builtin_ia32_vcvtph2dq512_mask_round ((C), (__v16si)(A), (B), (D))) 2888 1.1 mrg 2889 1.1 mrg #define _mm512_maskz_cvt_roundph_epi32(A, B, C) \ 2890 1.1 mrg ((__m512i) \ 2891 1.1 mrg __builtin_ia32_vcvtph2dq512_mask_round ((B), \ 2892 1.1 mrg (__v16si) \ 2893 1.1 mrg _mm512_setzero_si512 (), \ 2894 1.1 mrg (A), \ 2895 1.1 mrg (C))) 2896 1.1 mrg 2897 1.1 mrg #endif /* __OPTIMIZE__ */ 2898 1.1 mrg 2899 1.1 mrg /* Intrinsics vcvtph2udq. */ 2900 1.1 mrg extern __inline __m512i 2901 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2902 1.1 mrg _mm512_cvtph_epu32 (__m256h __A) 2903 1.1 mrg { 2904 1.1 mrg return (__m512i) 2905 1.1 mrg __builtin_ia32_vcvtph2udq512_mask_round (__A, 2906 1.1 mrg (__v16si) 2907 1.1 mrg _mm512_setzero_si512 (), 2908 1.1 mrg (__mmask16) -1, 2909 1.1 mrg _MM_FROUND_CUR_DIRECTION); 2910 1.1 mrg } 2911 1.1 mrg 2912 1.1 mrg extern __inline __m512i 2913 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2914 1.1 mrg _mm512_mask_cvtph_epu32 (__m512i __A, __mmask16 __B, __m256h __C) 2915 1.1 mrg { 2916 1.1 mrg return (__m512i) 2917 1.1 mrg __builtin_ia32_vcvtph2udq512_mask_round (__C, 2918 1.1 mrg (__v16si) __A, 2919 1.1 mrg __B, 2920 1.1 mrg _MM_FROUND_CUR_DIRECTION); 2921 1.1 mrg } 2922 1.1 mrg 2923 1.1 mrg extern __inline __m512i 2924 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2925 1.1 mrg _mm512_maskz_cvtph_epu32 (__mmask16 __A, __m256h __B) 2926 1.1 mrg { 2927 1.1 mrg return (__m512i) 2928 1.1 mrg __builtin_ia32_vcvtph2udq512_mask_round (__B, 2929 1.1 mrg (__v16si) 2930 1.1 mrg _mm512_setzero_si512 (), 2931 1.1 mrg __A, 2932 1.1 mrg _MM_FROUND_CUR_DIRECTION); 2933 1.1 mrg } 2934 1.1 mrg 2935 1.1 mrg #ifdef __OPTIMIZE__ 2936 1.1 mrg extern __inline __m512i 2937 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2938 1.1 mrg _mm512_cvt_roundph_epu32 (__m256h __A, int __B) 2939 1.1 mrg { 2940 1.1 mrg return (__m512i) 2941 1.1 mrg __builtin_ia32_vcvtph2udq512_mask_round (__A, 2942 1.1 mrg (__v16si) 2943 1.1 mrg _mm512_setzero_si512 (), 2944 1.1 mrg (__mmask16) -1, 2945 1.1 mrg __B); 2946 1.1 mrg } 2947 1.1 mrg 2948 1.1 mrg extern __inline __m512i 2949 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2950 1.1 mrg _mm512_mask_cvt_roundph_epu32 (__m512i __A, __mmask16 __B, __m256h __C, int __D) 2951 1.1 mrg { 2952 1.1 mrg return (__m512i) 2953 1.1 mrg __builtin_ia32_vcvtph2udq512_mask_round (__C, 2954 1.1 mrg (__v16si) __A, 2955 1.1 mrg __B, 2956 1.1 mrg __D); 2957 1.1 mrg } 2958 1.1 mrg 2959 1.1 mrg extern __inline __m512i 2960 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2961 1.1 mrg _mm512_maskz_cvt_roundph_epu32 (__mmask16 __A, __m256h __B, int __C) 2962 1.1 mrg { 2963 1.1 mrg return (__m512i) 2964 1.1 mrg __builtin_ia32_vcvtph2udq512_mask_round (__B, 2965 1.1 mrg (__v16si) 2966 1.1 mrg _mm512_setzero_si512 (), 2967 1.1 mrg __A, 2968 1.1 mrg __C); 2969 1.1 mrg } 2970 1.1 mrg 2971 1.1 mrg #else 2972 1.1 mrg #define _mm512_cvt_roundph_epu32(A, B) \ 2973 1.1 mrg ((__m512i) \ 2974 1.1 mrg __builtin_ia32_vcvtph2udq512_mask_round ((A), \ 2975 1.1 mrg (__v16si) \ 2976 1.1 mrg _mm512_setzero_si512 (), \ 2977 1.1 mrg (__mmask16)-1, \ 2978 1.1 mrg (B))) 2979 1.1 mrg 2980 1.1 mrg #define _mm512_mask_cvt_roundph_epu32(A, B, C, D) \ 2981 1.1 mrg ((__m512i) \ 2982 1.1 mrg __builtin_ia32_vcvtph2udq512_mask_round ((C), (__v16si)(A), (B), (D))) 2983 1.1 mrg 2984 1.1 mrg #define _mm512_maskz_cvt_roundph_epu32(A, B, C) \ 2985 1.1 mrg ((__m512i) \ 2986 1.1 mrg __builtin_ia32_vcvtph2udq512_mask_round ((B), \ 2987 1.1 mrg (__v16si) \ 2988 1.1 mrg _mm512_setzero_si512 (), \ 2989 1.1 mrg (A), \ 2990 1.1 mrg (C))) 2991 1.1 mrg 2992 1.1 mrg #endif /* __OPTIMIZE__ */ 2993 1.1 mrg 2994 1.1 mrg /* Intrinsics vcvttph2dq. */ 2995 1.1 mrg extern __inline __m512i 2996 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2997 1.1 mrg _mm512_cvttph_epi32 (__m256h __A) 2998 1.1 mrg { 2999 1.1 mrg return (__m512i) 3000 1.1 mrg __builtin_ia32_vcvttph2dq512_mask_round (__A, 3001 1.1 mrg (__v16si) 3002 1.1 mrg _mm512_setzero_si512 (), 3003 1.1 mrg (__mmask16) -1, 3004 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3005 1.1 mrg } 3006 1.1 mrg 3007 1.1 mrg extern __inline __m512i 3008 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3009 1.1 mrg _mm512_mask_cvttph_epi32 (__m512i __A, __mmask16 __B, __m256h __C) 3010 1.1 mrg { 3011 1.1 mrg return (__m512i) 3012 1.1 mrg __builtin_ia32_vcvttph2dq512_mask_round (__C, 3013 1.1 mrg (__v16si) __A, 3014 1.1 mrg __B, 3015 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3016 1.1 mrg } 3017 1.1 mrg 3018 1.1 mrg extern __inline __m512i 3019 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3020 1.1 mrg _mm512_maskz_cvttph_epi32 (__mmask16 __A, __m256h __B) 3021 1.1 mrg { 3022 1.1 mrg return (__m512i) 3023 1.1 mrg __builtin_ia32_vcvttph2dq512_mask_round (__B, 3024 1.1 mrg (__v16si) 3025 1.1 mrg _mm512_setzero_si512 (), 3026 1.1 mrg __A, 3027 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3028 1.1 mrg } 3029 1.1 mrg 3030 1.1 mrg #ifdef __OPTIMIZE__ 3031 1.1 mrg extern __inline __m512i 3032 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3033 1.1 mrg _mm512_cvtt_roundph_epi32 (__m256h __A, int __B) 3034 1.1 mrg { 3035 1.1 mrg return (__m512i) 3036 1.1 mrg __builtin_ia32_vcvttph2dq512_mask_round (__A, 3037 1.1 mrg (__v16si) 3038 1.1 mrg _mm512_setzero_si512 (), 3039 1.1 mrg (__mmask16) -1, 3040 1.1 mrg __B); 3041 1.1 mrg } 3042 1.1 mrg 3043 1.1 mrg extern __inline __m512i 3044 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3045 1.1 mrg _mm512_mask_cvtt_roundph_epi32 (__m512i __A, __mmask16 __B, 3046 1.1 mrg __m256h __C, int __D) 3047 1.1 mrg { 3048 1.1 mrg return (__m512i) 3049 1.1 mrg __builtin_ia32_vcvttph2dq512_mask_round (__C, 3050 1.1 mrg (__v16si) __A, 3051 1.1 mrg __B, 3052 1.1 mrg __D); 3053 1.1 mrg } 3054 1.1 mrg 3055 1.1 mrg extern __inline __m512i 3056 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3057 1.1 mrg _mm512_maskz_cvtt_roundph_epi32 (__mmask16 __A, __m256h __B, int __C) 3058 1.1 mrg { 3059 1.1 mrg return (__m512i) 3060 1.1 mrg __builtin_ia32_vcvttph2dq512_mask_round (__B, 3061 1.1 mrg (__v16si) 3062 1.1 mrg _mm512_setzero_si512 (), 3063 1.1 mrg __A, 3064 1.1 mrg __C); 3065 1.1 mrg } 3066 1.1 mrg 3067 1.1 mrg #else 3068 1.1 mrg #define _mm512_cvtt_roundph_epi32(A, B) \ 3069 1.1 mrg ((__m512i) \ 3070 1.1 mrg __builtin_ia32_vcvttph2dq512_mask_round ((A), \ 3071 1.1 mrg (__v16si) \ 3072 1.1 mrg (_mm512_setzero_si512 ()), \ 3073 1.1 mrg (__mmask16)(-1), (B))) 3074 1.1 mrg 3075 1.1 mrg #define _mm512_mask_cvtt_roundph_epi32(A, B, C, D) \ 3076 1.1 mrg ((__m512i) \ 3077 1.1 mrg __builtin_ia32_vcvttph2dq512_mask_round ((C), \ 3078 1.1 mrg (__v16si)(A), \ 3079 1.1 mrg (B), \ 3080 1.1 mrg (D))) 3081 1.1 mrg 3082 1.1 mrg #define _mm512_maskz_cvtt_roundph_epi32(A, B, C) \ 3083 1.1 mrg ((__m512i) \ 3084 1.1 mrg __builtin_ia32_vcvttph2dq512_mask_round ((B), \ 3085 1.1 mrg (__v16si) \ 3086 1.1 mrg _mm512_setzero_si512 (), \ 3087 1.1 mrg (A), \ 3088 1.1 mrg (C))) 3089 1.1 mrg 3090 1.1 mrg #endif /* __OPTIMIZE__ */ 3091 1.1 mrg 3092 1.1 mrg /* Intrinsics vcvttph2udq. */ 3093 1.1 mrg extern __inline __m512i 3094 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3095 1.1 mrg _mm512_cvttph_epu32 (__m256h __A) 3096 1.1 mrg { 3097 1.1 mrg return (__m512i) 3098 1.1 mrg __builtin_ia32_vcvttph2udq512_mask_round (__A, 3099 1.1 mrg (__v16si) 3100 1.1 mrg _mm512_setzero_si512 (), 3101 1.1 mrg (__mmask16) -1, 3102 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3103 1.1 mrg } 3104 1.1 mrg 3105 1.1 mrg extern __inline __m512i 3106 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3107 1.1 mrg _mm512_mask_cvttph_epu32 (__m512i __A, __mmask16 __B, __m256h __C) 3108 1.1 mrg { 3109 1.1 mrg return (__m512i) 3110 1.1 mrg __builtin_ia32_vcvttph2udq512_mask_round (__C, 3111 1.1 mrg (__v16si) __A, 3112 1.1 mrg __B, 3113 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3114 1.1 mrg } 3115 1.1 mrg 3116 1.1 mrg extern __inline __m512i 3117 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3118 1.1 mrg _mm512_maskz_cvttph_epu32 (__mmask16 __A, __m256h __B) 3119 1.1 mrg { 3120 1.1 mrg return (__m512i) 3121 1.1 mrg __builtin_ia32_vcvttph2udq512_mask_round (__B, 3122 1.1 mrg (__v16si) 3123 1.1 mrg _mm512_setzero_si512 (), 3124 1.1 mrg __A, 3125 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3126 1.1 mrg } 3127 1.1 mrg 3128 1.1 mrg #ifdef __OPTIMIZE__ 3129 1.1 mrg extern __inline __m512i 3130 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3131 1.1 mrg _mm512_cvtt_roundph_epu32 (__m256h __A, int __B) 3132 1.1 mrg { 3133 1.1 mrg return (__m512i) 3134 1.1 mrg __builtin_ia32_vcvttph2udq512_mask_round (__A, 3135 1.1 mrg (__v16si) 3136 1.1 mrg _mm512_setzero_si512 (), 3137 1.1 mrg (__mmask16) -1, 3138 1.1 mrg __B); 3139 1.1 mrg } 3140 1.1 mrg 3141 1.1 mrg extern __inline __m512i 3142 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3143 1.1 mrg _mm512_mask_cvtt_roundph_epu32 (__m512i __A, __mmask16 __B, 3144 1.1 mrg __m256h __C, int __D) 3145 1.1 mrg { 3146 1.1 mrg return (__m512i) 3147 1.1 mrg __builtin_ia32_vcvttph2udq512_mask_round (__C, 3148 1.1 mrg (__v16si) __A, 3149 1.1 mrg __B, 3150 1.1 mrg __D); 3151 1.1 mrg } 3152 1.1 mrg 3153 1.1 mrg extern __inline __m512i 3154 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3155 1.1 mrg _mm512_maskz_cvtt_roundph_epu32 (__mmask16 __A, __m256h __B, int __C) 3156 1.1 mrg { 3157 1.1 mrg return (__m512i) 3158 1.1 mrg __builtin_ia32_vcvttph2udq512_mask_round (__B, 3159 1.1 mrg (__v16si) 3160 1.1 mrg _mm512_setzero_si512 (), 3161 1.1 mrg __A, 3162 1.1 mrg __C); 3163 1.1 mrg } 3164 1.1 mrg 3165 1.1 mrg #else 3166 1.1 mrg #define _mm512_cvtt_roundph_epu32(A, B) \ 3167 1.1 mrg ((__m512i) \ 3168 1.1 mrg __builtin_ia32_vcvttph2udq512_mask_round ((A), \ 3169 1.1 mrg (__v16si) \ 3170 1.1 mrg _mm512_setzero_si512 (), \ 3171 1.1 mrg (__mmask16)-1, \ 3172 1.1 mrg (B))) 3173 1.1 mrg 3174 1.1 mrg #define _mm512_mask_cvtt_roundph_epu32(A, B, C, D) \ 3175 1.1 mrg ((__m512i) \ 3176 1.1 mrg __builtin_ia32_vcvttph2udq512_mask_round ((C), \ 3177 1.1 mrg (__v16si)(A), \ 3178 1.1 mrg (B), \ 3179 1.1 mrg (D))) 3180 1.1 mrg 3181 1.1 mrg #define _mm512_maskz_cvtt_roundph_epu32(A, B, C) \ 3182 1.1 mrg ((__m512i) \ 3183 1.1 mrg __builtin_ia32_vcvttph2udq512_mask_round ((B), \ 3184 1.1 mrg (__v16si) \ 3185 1.1 mrg _mm512_setzero_si512 (), \ 3186 1.1 mrg (A), \ 3187 1.1 mrg (C))) 3188 1.1 mrg 3189 1.1 mrg #endif /* __OPTIMIZE__ */ 3190 1.1 mrg 3191 1.1 mrg /* Intrinsics vcvtdq2ph. */ 3192 1.1 mrg extern __inline __m256h 3193 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3194 1.1 mrg _mm512_cvtepi32_ph (__m512i __A) 3195 1.1 mrg { 3196 1.1 mrg return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __A, 3197 1.1 mrg _mm256_setzero_ph (), 3198 1.1 mrg (__mmask16) -1, 3199 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3200 1.1 mrg } 3201 1.1 mrg 3202 1.1 mrg extern __inline __m256h 3203 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3204 1.1 mrg _mm512_mask_cvtepi32_ph (__m256h __A, __mmask16 __B, __m512i __C) 3205 1.1 mrg { 3206 1.1 mrg return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __C, 3207 1.1 mrg __A, 3208 1.1 mrg __B, 3209 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3210 1.1 mrg } 3211 1.1 mrg 3212 1.1 mrg extern __inline __m256h 3213 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3214 1.1 mrg _mm512_maskz_cvtepi32_ph (__mmask16 __A, __m512i __B) 3215 1.1 mrg { 3216 1.1 mrg return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __B, 3217 1.1 mrg _mm256_setzero_ph (), 3218 1.1 mrg __A, 3219 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3220 1.1 mrg } 3221 1.1 mrg 3222 1.1 mrg #ifdef __OPTIMIZE__ 3223 1.1 mrg extern __inline __m256h 3224 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3225 1.1 mrg _mm512_cvt_roundepi32_ph (__m512i __A, int __B) 3226 1.1 mrg { 3227 1.1 mrg return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __A, 3228 1.1 mrg _mm256_setzero_ph (), 3229 1.1 mrg (__mmask16) -1, 3230 1.1 mrg __B); 3231 1.1 mrg } 3232 1.1 mrg 3233 1.1 mrg extern __inline __m256h 3234 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3235 1.1 mrg _mm512_mask_cvt_roundepi32_ph (__m256h __A, __mmask16 __B, __m512i __C, int __D) 3236 1.1 mrg { 3237 1.1 mrg return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __C, 3238 1.1 mrg __A, 3239 1.1 mrg __B, 3240 1.1 mrg __D); 3241 1.1 mrg } 3242 1.1 mrg 3243 1.1 mrg extern __inline __m256h 3244 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3245 1.1 mrg _mm512_maskz_cvt_roundepi32_ph (__mmask16 __A, __m512i __B, int __C) 3246 1.1 mrg { 3247 1.1 mrg return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __B, 3248 1.1 mrg _mm256_setzero_ph (), 3249 1.1 mrg __A, 3250 1.1 mrg __C); 3251 1.1 mrg } 3252 1.1 mrg 3253 1.1 mrg #else 3254 1.1 mrg #define _mm512_cvt_roundepi32_ph(A, B) \ 3255 1.1 mrg (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(A), \ 3256 1.1 mrg _mm256_setzero_ph (), \ 3257 1.1 mrg (__mmask16)-1, \ 3258 1.1 mrg (B))) 3259 1.1 mrg 3260 1.1 mrg #define _mm512_mask_cvt_roundepi32_ph(A, B, C, D) \ 3261 1.1 mrg (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(C), \ 3262 1.1 mrg (A), \ 3263 1.1 mrg (B), \ 3264 1.1 mrg (D))) 3265 1.1 mrg 3266 1.1 mrg #define _mm512_maskz_cvt_roundepi32_ph(A, B, C) \ 3267 1.1 mrg (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(B), \ 3268 1.1 mrg _mm256_setzero_ph (), \ 3269 1.1 mrg (A), \ 3270 1.1 mrg (C))) 3271 1.1 mrg 3272 1.1 mrg #endif /* __OPTIMIZE__ */ 3273 1.1 mrg 3274 1.1 mrg /* Intrinsics vcvtudq2ph. */ 3275 1.1 mrg extern __inline __m256h 3276 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3277 1.1 mrg _mm512_cvtepu32_ph (__m512i __A) 3278 1.1 mrg { 3279 1.1 mrg return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __A, 3280 1.1 mrg _mm256_setzero_ph (), 3281 1.1 mrg (__mmask16) -1, 3282 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3283 1.1 mrg } 3284 1.1 mrg 3285 1.1 mrg extern __inline __m256h 3286 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3287 1.1 mrg _mm512_mask_cvtepu32_ph (__m256h __A, __mmask16 __B, __m512i __C) 3288 1.1 mrg { 3289 1.1 mrg return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __C, 3290 1.1 mrg __A, 3291 1.1 mrg __B, 3292 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3293 1.1 mrg } 3294 1.1 mrg 3295 1.1 mrg extern __inline __m256h 3296 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3297 1.1 mrg _mm512_maskz_cvtepu32_ph (__mmask16 __A, __m512i __B) 3298 1.1 mrg { 3299 1.1 mrg return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __B, 3300 1.1 mrg _mm256_setzero_ph (), 3301 1.1 mrg __A, 3302 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3303 1.1 mrg } 3304 1.1 mrg 3305 1.1 mrg #ifdef __OPTIMIZE__ 3306 1.1 mrg extern __inline __m256h 3307 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3308 1.1 mrg _mm512_cvt_roundepu32_ph (__m512i __A, int __B) 3309 1.1 mrg { 3310 1.1 mrg return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __A, 3311 1.1 mrg _mm256_setzero_ph (), 3312 1.1 mrg (__mmask16) -1, 3313 1.1 mrg __B); 3314 1.1 mrg } 3315 1.1 mrg 3316 1.1 mrg extern __inline __m256h 3317 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3318 1.1 mrg _mm512_mask_cvt_roundepu32_ph (__m256h __A, __mmask16 __B, __m512i __C, int __D) 3319 1.1 mrg { 3320 1.1 mrg return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __C, 3321 1.1 mrg __A, 3322 1.1 mrg __B, 3323 1.1 mrg __D); 3324 1.1 mrg } 3325 1.1 mrg 3326 1.1 mrg extern __inline __m256h 3327 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3328 1.1 mrg _mm512_maskz_cvt_roundepu32_ph (__mmask16 __A, __m512i __B, int __C) 3329 1.1 mrg { 3330 1.1 mrg return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __B, 3331 1.1 mrg _mm256_setzero_ph (), 3332 1.1 mrg __A, 3333 1.1 mrg __C); 3334 1.1 mrg } 3335 1.1 mrg 3336 1.1 mrg #else 3337 1.1 mrg #define _mm512_cvt_roundepu32_ph(A, B) \ 3338 1.1 mrg (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)(A), \ 3339 1.1 mrg _mm256_setzero_ph (), \ 3340 1.1 mrg (__mmask16)-1, \ 3341 1.1 mrg B)) 3342 1.1 mrg 3343 1.1 mrg #define _mm512_mask_cvt_roundepu32_ph(A, B, C, D) \ 3344 1.1 mrg (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)C, \ 3345 1.1 mrg A, \ 3346 1.1 mrg B, \ 3347 1.1 mrg D)) 3348 1.1 mrg 3349 1.1 mrg #define _mm512_maskz_cvt_roundepu32_ph(A, B, C) \ 3350 1.1 mrg (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)B, \ 3351 1.1 mrg _mm256_setzero_ph (), \ 3352 1.1 mrg A, \ 3353 1.1 mrg C)) 3354 1.1 mrg 3355 1.1 mrg #endif /* __OPTIMIZE__ */ 3356 1.1 mrg 3357 1.1 mrg /* Intrinsics vcvtph2qq. */ 3358 1.1 mrg extern __inline __m512i 3359 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3360 1.1 mrg _mm512_cvtph_epi64 (__m128h __A) 3361 1.1 mrg { 3362 1.1 mrg return __builtin_ia32_vcvtph2qq512_mask_round (__A, 3363 1.1 mrg _mm512_setzero_si512 (), 3364 1.1 mrg (__mmask8) -1, 3365 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3366 1.1 mrg } 3367 1.1 mrg 3368 1.1 mrg extern __inline __m512i 3369 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3370 1.1 mrg _mm512_mask_cvtph_epi64 (__m512i __A, __mmask8 __B, __m128h __C) 3371 1.1 mrg { 3372 1.1 mrg return __builtin_ia32_vcvtph2qq512_mask_round (__C, __A, __B, 3373 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3374 1.1 mrg } 3375 1.1 mrg 3376 1.1 mrg extern __inline __m512i 3377 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3378 1.1 mrg _mm512_maskz_cvtph_epi64 (__mmask8 __A, __m128h __B) 3379 1.1 mrg { 3380 1.1 mrg return __builtin_ia32_vcvtph2qq512_mask_round (__B, 3381 1.1 mrg _mm512_setzero_si512 (), 3382 1.1 mrg __A, 3383 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3384 1.1 mrg } 3385 1.1 mrg 3386 1.1 mrg #ifdef __OPTIMIZE__ 3387 1.1 mrg extern __inline __m512i 3388 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3389 1.1 mrg _mm512_cvt_roundph_epi64 (__m128h __A, int __B) 3390 1.1 mrg { 3391 1.1 mrg return __builtin_ia32_vcvtph2qq512_mask_round (__A, 3392 1.1 mrg _mm512_setzero_si512 (), 3393 1.1 mrg (__mmask8) -1, 3394 1.1 mrg __B); 3395 1.1 mrg } 3396 1.1 mrg 3397 1.1 mrg extern __inline __m512i 3398 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3399 1.1 mrg _mm512_mask_cvt_roundph_epi64 (__m512i __A, __mmask8 __B, __m128h __C, int __D) 3400 1.1 mrg { 3401 1.1 mrg return __builtin_ia32_vcvtph2qq512_mask_round (__C, __A, __B, __D); 3402 1.1 mrg } 3403 1.1 mrg 3404 1.1 mrg extern __inline __m512i 3405 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3406 1.1 mrg _mm512_maskz_cvt_roundph_epi64 (__mmask8 __A, __m128h __B, int __C) 3407 1.1 mrg { 3408 1.1 mrg return __builtin_ia32_vcvtph2qq512_mask_round (__B, 3409 1.1 mrg _mm512_setzero_si512 (), 3410 1.1 mrg __A, 3411 1.1 mrg __C); 3412 1.1 mrg } 3413 1.1 mrg 3414 1.1 mrg #else 3415 1.1 mrg #define _mm512_cvt_roundph_epi64(A, B) \ 3416 1.1 mrg (__builtin_ia32_vcvtph2qq512_mask_round ((A), \ 3417 1.1 mrg _mm512_setzero_si512 (), \ 3418 1.1 mrg (__mmask8)-1, \ 3419 1.1 mrg (B))) 3420 1.1 mrg 3421 1.1 mrg #define _mm512_mask_cvt_roundph_epi64(A, B, C, D) \ 3422 1.1 mrg (__builtin_ia32_vcvtph2qq512_mask_round ((C), (A), (B), (D))) 3423 1.1 mrg 3424 1.1 mrg #define _mm512_maskz_cvt_roundph_epi64(A, B, C) \ 3425 1.1 mrg (__builtin_ia32_vcvtph2qq512_mask_round ((B), \ 3426 1.1 mrg _mm512_setzero_si512 (), \ 3427 1.1 mrg (A), \ 3428 1.1 mrg (C))) 3429 1.1 mrg 3430 1.1 mrg #endif /* __OPTIMIZE__ */ 3431 1.1 mrg 3432 1.1 mrg /* Intrinsics vcvtph2uqq. */ 3433 1.1 mrg extern __inline __m512i 3434 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3435 1.1 mrg _mm512_cvtph_epu64 (__m128h __A) 3436 1.1 mrg { 3437 1.1 mrg return __builtin_ia32_vcvtph2uqq512_mask_round (__A, 3438 1.1 mrg _mm512_setzero_si512 (), 3439 1.1 mrg (__mmask8) -1, 3440 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3441 1.1 mrg } 3442 1.1 mrg 3443 1.1 mrg extern __inline __m512i 3444 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3445 1.1 mrg _mm512_mask_cvtph_epu64 (__m512i __A, __mmask8 __B, __m128h __C) 3446 1.1 mrg { 3447 1.1 mrg return __builtin_ia32_vcvtph2uqq512_mask_round (__C, __A, __B, 3448 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3449 1.1 mrg } 3450 1.1 mrg 3451 1.1 mrg extern __inline __m512i 3452 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3453 1.1 mrg _mm512_maskz_cvtph_epu64 (__mmask8 __A, __m128h __B) 3454 1.1 mrg { 3455 1.1 mrg return __builtin_ia32_vcvtph2uqq512_mask_round (__B, 3456 1.1 mrg _mm512_setzero_si512 (), 3457 1.1 mrg __A, 3458 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3459 1.1 mrg } 3460 1.1 mrg 3461 1.1 mrg #ifdef __OPTIMIZE__ 3462 1.1 mrg 3463 1.1 mrg extern __inline __m512i 3464 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3465 1.1 mrg _mm512_cvt_roundph_epu64 (__m128h __A, int __B) 3466 1.1 mrg { 3467 1.1 mrg return __builtin_ia32_vcvtph2uqq512_mask_round (__A, 3468 1.1 mrg _mm512_setzero_si512 (), 3469 1.1 mrg (__mmask8) -1, 3470 1.1 mrg __B); 3471 1.1 mrg } 3472 1.1 mrg 3473 1.1 mrg extern __inline __m512i 3474 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3475 1.1 mrg _mm512_mask_cvt_roundph_epu64 (__m512i __A, __mmask8 __B, __m128h __C, int __D) 3476 1.1 mrg { 3477 1.1 mrg return __builtin_ia32_vcvtph2uqq512_mask_round (__C, __A, __B, __D); 3478 1.1 mrg } 3479 1.1 mrg 3480 1.1 mrg extern __inline __m512i 3481 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3482 1.1 mrg _mm512_maskz_cvt_roundph_epu64 (__mmask8 __A, __m128h __B, int __C) 3483 1.1 mrg { 3484 1.1 mrg return __builtin_ia32_vcvtph2uqq512_mask_round (__B, 3485 1.1 mrg _mm512_setzero_si512 (), 3486 1.1 mrg __A, 3487 1.1 mrg __C); 3488 1.1 mrg } 3489 1.1 mrg 3490 1.1 mrg #else 3491 1.1 mrg #define _mm512_cvt_roundph_epu64(A, B) \ 3492 1.1 mrg (__builtin_ia32_vcvtph2uqq512_mask_round ((A), \ 3493 1.1 mrg _mm512_setzero_si512 (), \ 3494 1.1 mrg (__mmask8)-1, \ 3495 1.1 mrg (B))) 3496 1.1 mrg 3497 1.1 mrg #define _mm512_mask_cvt_roundph_epu64(A, B, C, D) \ 3498 1.1 mrg (__builtin_ia32_vcvtph2uqq512_mask_round ((C), (A), (B), (D))) 3499 1.1 mrg 3500 1.1 mrg #define _mm512_maskz_cvt_roundph_epu64(A, B, C) \ 3501 1.1 mrg (__builtin_ia32_vcvtph2uqq512_mask_round ((B), \ 3502 1.1 mrg _mm512_setzero_si512 (), \ 3503 1.1 mrg (A), \ 3504 1.1 mrg (C))) 3505 1.1 mrg 3506 1.1 mrg #endif /* __OPTIMIZE__ */ 3507 1.1 mrg 3508 1.1 mrg /* Intrinsics vcvttph2qq. */ 3509 1.1 mrg extern __inline __m512i 3510 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3511 1.1 mrg _mm512_cvttph_epi64 (__m128h __A) 3512 1.1 mrg { 3513 1.1 mrg return __builtin_ia32_vcvttph2qq512_mask_round (__A, 3514 1.1 mrg _mm512_setzero_si512 (), 3515 1.1 mrg (__mmask8) -1, 3516 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3517 1.1 mrg } 3518 1.1 mrg 3519 1.1 mrg extern __inline __m512i 3520 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3521 1.1 mrg _mm512_mask_cvttph_epi64 (__m512i __A, __mmask8 __B, __m128h __C) 3522 1.1 mrg { 3523 1.1 mrg return __builtin_ia32_vcvttph2qq512_mask_round (__C, __A, __B, 3524 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3525 1.1 mrg } 3526 1.1 mrg 3527 1.1 mrg extern __inline __m512i 3528 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3529 1.1 mrg _mm512_maskz_cvttph_epi64 (__mmask8 __A, __m128h __B) 3530 1.1 mrg { 3531 1.1 mrg return __builtin_ia32_vcvttph2qq512_mask_round (__B, 3532 1.1 mrg _mm512_setzero_si512 (), 3533 1.1 mrg __A, 3534 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3535 1.1 mrg } 3536 1.1 mrg 3537 1.1 mrg #ifdef __OPTIMIZE__ 3538 1.1 mrg extern __inline __m512i 3539 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3540 1.1 mrg _mm512_cvtt_roundph_epi64 (__m128h __A, int __B) 3541 1.1 mrg { 3542 1.1 mrg return __builtin_ia32_vcvttph2qq512_mask_round (__A, 3543 1.1 mrg _mm512_setzero_si512 (), 3544 1.1 mrg (__mmask8) -1, 3545 1.1 mrg __B); 3546 1.1 mrg } 3547 1.1 mrg 3548 1.1 mrg extern __inline __m512i 3549 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3550 1.1 mrg _mm512_mask_cvtt_roundph_epi64 (__m512i __A, __mmask8 __B, __m128h __C, int __D) 3551 1.1 mrg { 3552 1.1 mrg return __builtin_ia32_vcvttph2qq512_mask_round (__C, __A, __B, __D); 3553 1.1 mrg } 3554 1.1 mrg 3555 1.1 mrg extern __inline __m512i 3556 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3557 1.1 mrg _mm512_maskz_cvtt_roundph_epi64 (__mmask8 __A, __m128h __B, int __C) 3558 1.1 mrg { 3559 1.1 mrg return __builtin_ia32_vcvttph2qq512_mask_round (__B, 3560 1.1 mrg _mm512_setzero_si512 (), 3561 1.1 mrg __A, 3562 1.1 mrg __C); 3563 1.1 mrg } 3564 1.1 mrg 3565 1.1 mrg #else 3566 1.1 mrg #define _mm512_cvtt_roundph_epi64(A, B) \ 3567 1.1 mrg (__builtin_ia32_vcvttph2qq512_mask_round ((A), \ 3568 1.1 mrg _mm512_setzero_si512 (), \ 3569 1.1 mrg (__mmask8)-1, \ 3570 1.1 mrg (B))) 3571 1.1 mrg 3572 1.1 mrg #define _mm512_mask_cvtt_roundph_epi64(A, B, C, D) \ 3573 1.1 mrg __builtin_ia32_vcvttph2qq512_mask_round ((C), (A), (B), (D)) 3574 1.1 mrg 3575 1.1 mrg #define _mm512_maskz_cvtt_roundph_epi64(A, B, C) \ 3576 1.1 mrg (__builtin_ia32_vcvttph2qq512_mask_round ((B), \ 3577 1.1 mrg _mm512_setzero_si512 (), \ 3578 1.1 mrg (A), \ 3579 1.1 mrg (C))) 3580 1.1 mrg 3581 1.1 mrg #endif /* __OPTIMIZE__ */ 3582 1.1 mrg 3583 1.1 mrg /* Intrinsics vcvttph2uqq. */ 3584 1.1 mrg extern __inline __m512i 3585 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3586 1.1 mrg _mm512_cvttph_epu64 (__m128h __A) 3587 1.1 mrg { 3588 1.1 mrg return __builtin_ia32_vcvttph2uqq512_mask_round (__A, 3589 1.1 mrg _mm512_setzero_si512 (), 3590 1.1 mrg (__mmask8) -1, 3591 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3592 1.1 mrg } 3593 1.1 mrg 3594 1.1 mrg extern __inline __m512i 3595 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3596 1.1 mrg _mm512_mask_cvttph_epu64 (__m512i __A, __mmask8 __B, __m128h __C) 3597 1.1 mrg { 3598 1.1 mrg return __builtin_ia32_vcvttph2uqq512_mask_round (__C, __A, __B, 3599 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3600 1.1 mrg } 3601 1.1 mrg 3602 1.1 mrg extern __inline __m512i 3603 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3604 1.1 mrg _mm512_maskz_cvttph_epu64 (__mmask8 __A, __m128h __B) 3605 1.1 mrg { 3606 1.1 mrg return __builtin_ia32_vcvttph2uqq512_mask_round (__B, 3607 1.1 mrg _mm512_setzero_si512 (), 3608 1.1 mrg __A, 3609 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3610 1.1 mrg } 3611 1.1 mrg 3612 1.1 mrg #ifdef __OPTIMIZE__ 3613 1.1 mrg extern __inline __m512i 3614 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3615 1.1 mrg _mm512_cvtt_roundph_epu64 (__m128h __A, int __B) 3616 1.1 mrg { 3617 1.1 mrg return __builtin_ia32_vcvttph2uqq512_mask_round (__A, 3618 1.1 mrg _mm512_setzero_si512 (), 3619 1.1 mrg (__mmask8) -1, 3620 1.1 mrg __B); 3621 1.1 mrg } 3622 1.1 mrg 3623 1.1 mrg extern __inline __m512i 3624 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3625 1.1 mrg _mm512_mask_cvtt_roundph_epu64 (__m512i __A, __mmask8 __B, __m128h __C, int __D) 3626 1.1 mrg { 3627 1.1 mrg return __builtin_ia32_vcvttph2uqq512_mask_round (__C, __A, __B, __D); 3628 1.1 mrg } 3629 1.1 mrg 3630 1.1 mrg extern __inline __m512i 3631 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3632 1.1 mrg _mm512_maskz_cvtt_roundph_epu64 (__mmask8 __A, __m128h __B, int __C) 3633 1.1 mrg { 3634 1.1 mrg return __builtin_ia32_vcvttph2uqq512_mask_round (__B, 3635 1.1 mrg _mm512_setzero_si512 (), 3636 1.1 mrg __A, 3637 1.1 mrg __C); 3638 1.1 mrg } 3639 1.1 mrg 3640 1.1 mrg #else 3641 1.1 mrg #define _mm512_cvtt_roundph_epu64(A, B) \ 3642 1.1 mrg (__builtin_ia32_vcvttph2uqq512_mask_round ((A), \ 3643 1.1 mrg _mm512_setzero_si512 (), \ 3644 1.1 mrg (__mmask8)-1, \ 3645 1.1 mrg (B))) 3646 1.1 mrg 3647 1.1 mrg #define _mm512_mask_cvtt_roundph_epu64(A, B, C, D) \ 3648 1.1 mrg __builtin_ia32_vcvttph2uqq512_mask_round ((C), (A), (B), (D)) 3649 1.1 mrg 3650 1.1 mrg #define _mm512_maskz_cvtt_roundph_epu64(A, B, C) \ 3651 1.1 mrg (__builtin_ia32_vcvttph2uqq512_mask_round ((B), \ 3652 1.1 mrg _mm512_setzero_si512 (), \ 3653 1.1 mrg (A), \ 3654 1.1 mrg (C))) 3655 1.1 mrg 3656 1.1 mrg #endif /* __OPTIMIZE__ */ 3657 1.1 mrg 3658 1.1 mrg /* Intrinsics vcvtqq2ph. */ 3659 1.1 mrg extern __inline __m128h 3660 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3661 1.1 mrg _mm512_cvtepi64_ph (__m512i __A) 3662 1.1 mrg { 3663 1.1 mrg return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __A, 3664 1.1 mrg _mm_setzero_ph (), 3665 1.1 mrg (__mmask8) -1, 3666 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3667 1.1 mrg } 3668 1.1 mrg 3669 1.1 mrg extern __inline __m128h 3670 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3671 1.1 mrg _mm512_mask_cvtepi64_ph (__m128h __A, __mmask8 __B, __m512i __C) 3672 1.1 mrg { 3673 1.1 mrg return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __C, 3674 1.1 mrg __A, 3675 1.1 mrg __B, 3676 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3677 1.1 mrg } 3678 1.1 mrg 3679 1.1 mrg extern __inline __m128h 3680 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3681 1.1 mrg _mm512_maskz_cvtepi64_ph (__mmask8 __A, __m512i __B) 3682 1.1 mrg { 3683 1.1 mrg return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __B, 3684 1.1 mrg _mm_setzero_ph (), 3685 1.1 mrg __A, 3686 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3687 1.1 mrg } 3688 1.1 mrg 3689 1.1 mrg #ifdef __OPTIMIZE__ 3690 1.1 mrg extern __inline __m128h 3691 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3692 1.1 mrg _mm512_cvt_roundepi64_ph (__m512i __A, int __B) 3693 1.1 mrg { 3694 1.1 mrg return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __A, 3695 1.1 mrg _mm_setzero_ph (), 3696 1.1 mrg (__mmask8) -1, 3697 1.1 mrg __B); 3698 1.1 mrg } 3699 1.1 mrg 3700 1.1 mrg extern __inline __m128h 3701 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3702 1.1 mrg _mm512_mask_cvt_roundepi64_ph (__m128h __A, __mmask8 __B, __m512i __C, int __D) 3703 1.1 mrg { 3704 1.1 mrg return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __C, 3705 1.1 mrg __A, 3706 1.1 mrg __B, 3707 1.1 mrg __D); 3708 1.1 mrg } 3709 1.1 mrg 3710 1.1 mrg extern __inline __m128h 3711 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3712 1.1 mrg _mm512_maskz_cvt_roundepi64_ph (__mmask8 __A, __m512i __B, int __C) 3713 1.1 mrg { 3714 1.1 mrg return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __B, 3715 1.1 mrg _mm_setzero_ph (), 3716 1.1 mrg __A, 3717 1.1 mrg __C); 3718 1.1 mrg } 3719 1.1 mrg 3720 1.1 mrg #else 3721 1.1 mrg #define _mm512_cvt_roundepi64_ph(A, B) \ 3722 1.1 mrg (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(A), \ 3723 1.1 mrg _mm_setzero_ph (), \ 3724 1.1 mrg (__mmask8)-1, \ 3725 1.1 mrg (B))) 3726 1.1 mrg 3727 1.1 mrg #define _mm512_mask_cvt_roundepi64_ph(A, B, C, D) \ 3728 1.1 mrg (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(C), (A), (B), (D))) 3729 1.1 mrg 3730 1.1 mrg #define _mm512_maskz_cvt_roundepi64_ph(A, B, C) \ 3731 1.1 mrg (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(B), \ 3732 1.1 mrg _mm_setzero_ph (), \ 3733 1.1 mrg (A), \ 3734 1.1 mrg (C))) 3735 1.1 mrg 3736 1.1 mrg #endif /* __OPTIMIZE__ */ 3737 1.1 mrg 3738 1.1 mrg /* Intrinsics vcvtuqq2ph. */ 3739 1.1 mrg extern __inline __m128h 3740 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3741 1.1 mrg _mm512_cvtepu64_ph (__m512i __A) 3742 1.1 mrg { 3743 1.1 mrg return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __A, 3744 1.1 mrg _mm_setzero_ph (), 3745 1.1 mrg (__mmask8) -1, 3746 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3747 1.1 mrg } 3748 1.1 mrg 3749 1.1 mrg extern __inline __m128h 3750 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3751 1.1 mrg _mm512_mask_cvtepu64_ph (__m128h __A, __mmask8 __B, __m512i __C) 3752 1.1 mrg { 3753 1.1 mrg return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __C, 3754 1.1 mrg __A, 3755 1.1 mrg __B, 3756 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3757 1.1 mrg } 3758 1.1 mrg 3759 1.1 mrg extern __inline __m128h 3760 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3761 1.1 mrg _mm512_maskz_cvtepu64_ph (__mmask8 __A, __m512i __B) 3762 1.1 mrg { 3763 1.1 mrg return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __B, 3764 1.1 mrg _mm_setzero_ph (), 3765 1.1 mrg __A, 3766 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3767 1.1 mrg } 3768 1.1 mrg 3769 1.1 mrg #ifdef __OPTIMIZE__ 3770 1.1 mrg extern __inline __m128h 3771 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3772 1.1 mrg _mm512_cvt_roundepu64_ph (__m512i __A, int __B) 3773 1.1 mrg { 3774 1.1 mrg return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __A, 3775 1.1 mrg _mm_setzero_ph (), 3776 1.1 mrg (__mmask8) -1, 3777 1.1 mrg __B); 3778 1.1 mrg } 3779 1.1 mrg 3780 1.1 mrg extern __inline __m128h 3781 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3782 1.1 mrg _mm512_mask_cvt_roundepu64_ph (__m128h __A, __mmask8 __B, __m512i __C, int __D) 3783 1.1 mrg { 3784 1.1 mrg return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __C, 3785 1.1 mrg __A, 3786 1.1 mrg __B, 3787 1.1 mrg __D); 3788 1.1 mrg } 3789 1.1 mrg 3790 1.1 mrg extern __inline __m128h 3791 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3792 1.1 mrg _mm512_maskz_cvt_roundepu64_ph (__mmask8 __A, __m512i __B, int __C) 3793 1.1 mrg { 3794 1.1 mrg return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __B, 3795 1.1 mrg _mm_setzero_ph (), 3796 1.1 mrg __A, 3797 1.1 mrg __C); 3798 1.1 mrg } 3799 1.1 mrg 3800 1.1 mrg #else 3801 1.1 mrg #define _mm512_cvt_roundepu64_ph(A, B) \ 3802 1.1 mrg (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(A), \ 3803 1.1 mrg _mm_setzero_ph (), \ 3804 1.1 mrg (__mmask8)-1, \ 3805 1.1 mrg (B))) 3806 1.1 mrg 3807 1.1 mrg #define _mm512_mask_cvt_roundepu64_ph(A, B, C, D) \ 3808 1.1 mrg (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(C), (A), (B), (D))) 3809 1.1 mrg 3810 1.1 mrg #define _mm512_maskz_cvt_roundepu64_ph(A, B, C) \ 3811 1.1 mrg (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(B), \ 3812 1.1 mrg _mm_setzero_ph (), \ 3813 1.1 mrg (A), \ 3814 1.1 mrg (C))) 3815 1.1 mrg 3816 1.1 mrg #endif /* __OPTIMIZE__ */ 3817 1.1 mrg 3818 1.1 mrg /* Intrinsics vcvtph2w. */ 3819 1.1 mrg extern __inline __m512i 3820 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3821 1.1 mrg _mm512_cvtph_epi16 (__m512h __A) 3822 1.1 mrg { 3823 1.1 mrg return (__m512i) 3824 1.1 mrg __builtin_ia32_vcvtph2w512_mask_round (__A, 3825 1.1 mrg (__v32hi) 3826 1.1 mrg _mm512_setzero_si512 (), 3827 1.1 mrg (__mmask32) -1, 3828 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3829 1.1 mrg } 3830 1.1 mrg 3831 1.1 mrg extern __inline __m512i 3832 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3833 1.1 mrg _mm512_mask_cvtph_epi16 (__m512i __A, __mmask32 __B, __m512h __C) 3834 1.1 mrg { 3835 1.1 mrg return (__m512i) 3836 1.1 mrg __builtin_ia32_vcvtph2w512_mask_round (__C, 3837 1.1 mrg (__v32hi) __A, 3838 1.1 mrg __B, 3839 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3840 1.1 mrg } 3841 1.1 mrg 3842 1.1 mrg extern __inline __m512i 3843 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3844 1.1 mrg _mm512_maskz_cvtph_epi16 (__mmask32 __A, __m512h __B) 3845 1.1 mrg { 3846 1.1 mrg return (__m512i) 3847 1.1 mrg __builtin_ia32_vcvtph2w512_mask_round (__B, 3848 1.1 mrg (__v32hi) 3849 1.1 mrg _mm512_setzero_si512 (), 3850 1.1 mrg __A, 3851 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3852 1.1 mrg } 3853 1.1 mrg 3854 1.1 mrg #ifdef __OPTIMIZE__ 3855 1.1 mrg extern __inline __m512i 3856 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3857 1.1 mrg _mm512_cvt_roundph_epi16 (__m512h __A, int __B) 3858 1.1 mrg { 3859 1.1 mrg return (__m512i) 3860 1.1 mrg __builtin_ia32_vcvtph2w512_mask_round (__A, 3861 1.1 mrg (__v32hi) 3862 1.1 mrg _mm512_setzero_si512 (), 3863 1.1 mrg (__mmask32) -1, 3864 1.1 mrg __B); 3865 1.1 mrg } 3866 1.1 mrg 3867 1.1 mrg extern __inline __m512i 3868 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3869 1.1 mrg _mm512_mask_cvt_roundph_epi16 (__m512i __A, __mmask32 __B, __m512h __C, int __D) 3870 1.1 mrg { 3871 1.1 mrg return (__m512i) 3872 1.1 mrg __builtin_ia32_vcvtph2w512_mask_round (__C, 3873 1.1 mrg (__v32hi) __A, 3874 1.1 mrg __B, 3875 1.1 mrg __D); 3876 1.1 mrg } 3877 1.1 mrg 3878 1.1 mrg extern __inline __m512i 3879 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3880 1.1 mrg _mm512_maskz_cvt_roundph_epi16 (__mmask32 __A, __m512h __B, int __C) 3881 1.1 mrg { 3882 1.1 mrg return (__m512i) 3883 1.1 mrg __builtin_ia32_vcvtph2w512_mask_round (__B, 3884 1.1 mrg (__v32hi) 3885 1.1 mrg _mm512_setzero_si512 (), 3886 1.1 mrg __A, 3887 1.1 mrg __C); 3888 1.1 mrg } 3889 1.1 mrg 3890 1.1 mrg #else 3891 1.1 mrg #define _mm512_cvt_roundph_epi16(A, B) \ 3892 1.1 mrg ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((A), \ 3893 1.1 mrg (__v32hi) \ 3894 1.1 mrg _mm512_setzero_si512 (), \ 3895 1.1 mrg (__mmask32)-1, \ 3896 1.1 mrg (B))) 3897 1.1 mrg 3898 1.1 mrg #define _mm512_mask_cvt_roundph_epi16(A, B, C, D) \ 3899 1.1 mrg ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((C), \ 3900 1.1 mrg (__v32hi)(A), \ 3901 1.1 mrg (B), \ 3902 1.1 mrg (D))) 3903 1.1 mrg 3904 1.1 mrg #define _mm512_maskz_cvt_roundph_epi16(A, B, C) \ 3905 1.1 mrg ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((B), \ 3906 1.1 mrg (__v32hi) \ 3907 1.1 mrg _mm512_setzero_si512 (), \ 3908 1.1 mrg (A), \ 3909 1.1 mrg (C))) 3910 1.1 mrg 3911 1.1 mrg #endif /* __OPTIMIZE__ */ 3912 1.1 mrg 3913 1.1 mrg /* Intrinsics vcvtph2uw. */ 3914 1.1 mrg extern __inline __m512i 3915 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3916 1.1 mrg _mm512_cvtph_epu16 (__m512h __A) 3917 1.1 mrg { 3918 1.1 mrg return (__m512i) 3919 1.1 mrg __builtin_ia32_vcvtph2uw512_mask_round (__A, 3920 1.1 mrg (__v32hi) 3921 1.1 mrg _mm512_setzero_si512 (), 3922 1.1 mrg (__mmask32) -1, 3923 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3924 1.1 mrg } 3925 1.1 mrg 3926 1.1 mrg extern __inline __m512i 3927 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3928 1.1 mrg _mm512_mask_cvtph_epu16 (__m512i __A, __mmask32 __B, __m512h __C) 3929 1.1 mrg { 3930 1.1 mrg return (__m512i) 3931 1.1 mrg __builtin_ia32_vcvtph2uw512_mask_round (__C, (__v32hi) __A, __B, 3932 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3933 1.1 mrg } 3934 1.1 mrg 3935 1.1 mrg extern __inline __m512i 3936 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3937 1.1 mrg _mm512_maskz_cvtph_epu16 (__mmask32 __A, __m512h __B) 3938 1.1 mrg { 3939 1.1 mrg return (__m512i) 3940 1.1 mrg __builtin_ia32_vcvtph2uw512_mask_round (__B, 3941 1.1 mrg (__v32hi) 3942 1.1 mrg _mm512_setzero_si512 (), 3943 1.1 mrg __A, 3944 1.1 mrg _MM_FROUND_CUR_DIRECTION); 3945 1.1 mrg } 3946 1.1 mrg 3947 1.1 mrg #ifdef __OPTIMIZE__ 3948 1.1 mrg extern __inline __m512i 3949 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3950 1.1 mrg _mm512_cvt_roundph_epu16 (__m512h __A, int __B) 3951 1.1 mrg { 3952 1.1 mrg return (__m512i) 3953 1.1 mrg __builtin_ia32_vcvtph2uw512_mask_round (__A, 3954 1.1 mrg (__v32hi) 3955 1.1 mrg _mm512_setzero_si512 (), 3956 1.1 mrg (__mmask32) -1, 3957 1.1 mrg __B); 3958 1.1 mrg } 3959 1.1 mrg 3960 1.1 mrg extern __inline __m512i 3961 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3962 1.1 mrg _mm512_mask_cvt_roundph_epu16 (__m512i __A, __mmask32 __B, __m512h __C, int __D) 3963 1.1 mrg { 3964 1.1 mrg return (__m512i) 3965 1.1 mrg __builtin_ia32_vcvtph2uw512_mask_round (__C, (__v32hi) __A, __B, __D); 3966 1.1 mrg } 3967 1.1 mrg 3968 1.1 mrg extern __inline __m512i 3969 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3970 1.1 mrg _mm512_maskz_cvt_roundph_epu16 (__mmask32 __A, __m512h __B, int __C) 3971 1.1 mrg { 3972 1.1 mrg return (__m512i) 3973 1.1 mrg __builtin_ia32_vcvtph2uw512_mask_round (__B, 3974 1.1 mrg (__v32hi) 3975 1.1 mrg _mm512_setzero_si512 (), 3976 1.1 mrg __A, 3977 1.1 mrg __C); 3978 1.1 mrg } 3979 1.1 mrg 3980 1.1 mrg #else 3981 1.1 mrg #define _mm512_cvt_roundph_epu16(A, B) \ 3982 1.1 mrg ((__m512i) \ 3983 1.1 mrg __builtin_ia32_vcvtph2uw512_mask_round ((A), \ 3984 1.1 mrg (__v32hi) \ 3985 1.1 mrg _mm512_setzero_si512 (), \ 3986 1.1 mrg (__mmask32)-1, (B))) 3987 1.1 mrg 3988 1.1 mrg #define _mm512_mask_cvt_roundph_epu16(A, B, C, D) \ 3989 1.1 mrg ((__m512i) \ 3990 1.1 mrg __builtin_ia32_vcvtph2uw512_mask_round ((C), (__v32hi)(A), (B), (D))) 3991 1.1 mrg 3992 1.1 mrg #define _mm512_maskz_cvt_roundph_epu16(A, B, C) \ 3993 1.1 mrg ((__m512i) \ 3994 1.1 mrg __builtin_ia32_vcvtph2uw512_mask_round ((B), \ 3995 1.1 mrg (__v32hi) \ 3996 1.1 mrg _mm512_setzero_si512 (), \ 3997 1.1 mrg (A), \ 3998 1.1 mrg (C))) 3999 1.1 mrg 4000 1.1 mrg #endif /* __OPTIMIZE__ */ 4001 1.1 mrg 4002 1.1 mrg /* Intrinsics vcvttph2w. */ 4003 1.1 mrg extern __inline __m512i 4004 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4005 1.1 mrg _mm512_cvttph_epi16 (__m512h __A) 4006 1.1 mrg { 4007 1.1 mrg return (__m512i) 4008 1.1 mrg __builtin_ia32_vcvttph2w512_mask_round (__A, 4009 1.1 mrg (__v32hi) 4010 1.1 mrg _mm512_setzero_si512 (), 4011 1.1 mrg (__mmask32) -1, 4012 1.1 mrg _MM_FROUND_CUR_DIRECTION); 4013 1.1 mrg } 4014 1.1 mrg 4015 1.1 mrg extern __inline __m512i 4016 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4017 1.1 mrg _mm512_mask_cvttph_epi16 (__m512i __A, __mmask32 __B, __m512h __C) 4018 1.1 mrg { 4019 1.1 mrg return (__m512i) 4020 1.1 mrg __builtin_ia32_vcvttph2w512_mask_round (__C, 4021 1.1 mrg (__v32hi) __A, 4022 1.1 mrg __B, 4023 1.1 mrg _MM_FROUND_CUR_DIRECTION); 4024 1.1 mrg } 4025 1.1 mrg 4026 1.1 mrg extern __inline __m512i 4027 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4028 1.1 mrg _mm512_maskz_cvttph_epi16 (__mmask32 __A, __m512h __B) 4029 1.1 mrg { 4030 1.1 mrg return (__m512i) 4031 1.1 mrg __builtin_ia32_vcvttph2w512_mask_round (__B, 4032 1.1 mrg (__v32hi) 4033 1.1 mrg _mm512_setzero_si512 (), 4034 1.1 mrg __A, 4035 1.1 mrg _MM_FROUND_CUR_DIRECTION); 4036 1.1 mrg } 4037 1.1 mrg 4038 1.1 mrg #ifdef __OPTIMIZE__ 4039 1.1 mrg extern __inline __m512i 4040 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4041 1.1 mrg _mm512_cvtt_roundph_epi16 (__m512h __A, int __B) 4042 1.1 mrg { 4043 1.1 mrg return (__m512i) 4044 1.1 mrg __builtin_ia32_vcvttph2w512_mask_round (__A, 4045 1.1 mrg (__v32hi) 4046 1.1 mrg _mm512_setzero_si512 (), 4047 1.1 mrg (__mmask32) -1, 4048 1.1 mrg __B); 4049 1.1 mrg } 4050 1.1 mrg 4051 1.1 mrg extern __inline __m512i 4052 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4053 1.1 mrg _mm512_mask_cvtt_roundph_epi16 (__m512i __A, __mmask32 __B, 4054 1.1 mrg __m512h __C, int __D) 4055 1.1 mrg { 4056 1.1 mrg return (__m512i) 4057 1.1 mrg __builtin_ia32_vcvttph2w512_mask_round (__C, 4058 1.1 mrg (__v32hi) __A, 4059 1.1 mrg __B, 4060 1.1 mrg __D); 4061 1.1 mrg } 4062 1.1 mrg 4063 1.1 mrg extern __inline __m512i 4064 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4065 1.1 mrg _mm512_maskz_cvtt_roundph_epi16 (__mmask32 __A, __m512h __B, int __C) 4066 1.1 mrg { 4067 1.1 mrg return (__m512i) 4068 1.1 mrg __builtin_ia32_vcvttph2w512_mask_round (__B, 4069 1.1 mrg (__v32hi) 4070 1.1 mrg _mm512_setzero_si512 (), 4071 1.1 mrg __A, 4072 1.1 mrg __C); 4073 1.1 mrg } 4074 1.1 mrg 4075 1.1 mrg #else 4076 1.1 mrg #define _mm512_cvtt_roundph_epi16(A, B) \ 4077 1.1 mrg ((__m512i) \ 4078 1.1 mrg __builtin_ia32_vcvttph2w512_mask_round ((A), \ 4079 1.1 mrg (__v32hi) \ 4080 1.1 mrg _mm512_setzero_si512 (), \ 4081 1.1 mrg (__mmask32)-1, \ 4082 1.1 mrg (B))) 4083 1.1 mrg 4084 1.1 mrg #define _mm512_mask_cvtt_roundph_epi16(A, B, C, D) \ 4085 1.1 mrg ((__m512i) \ 4086 1.1 mrg __builtin_ia32_vcvttph2w512_mask_round ((C), \ 4087 1.1 mrg (__v32hi)(A), \ 4088 1.1 mrg (B), \ 4089 1.1 mrg (D))) 4090 1.1 mrg 4091 1.1 mrg #define _mm512_maskz_cvtt_roundph_epi16(A, B, C) \ 4092 1.1 mrg ((__m512i) \ 4093 1.1 mrg __builtin_ia32_vcvttph2w512_mask_round ((B), \ 4094 1.1 mrg (__v32hi) \ 4095 1.1 mrg _mm512_setzero_si512 (), \ 4096 1.1 mrg (A), \ 4097 1.1 mrg (C))) 4098 1.1 mrg 4099 1.1 mrg #endif /* __OPTIMIZE__ */ 4100 1.1 mrg 4101 1.1 mrg /* Intrinsics vcvttph2uw. */ 4102 1.1 mrg extern __inline __m512i 4103 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4104 1.1 mrg _mm512_cvttph_epu16 (__m512h __A) 4105 1.1 mrg { 4106 1.1 mrg return (__m512i) 4107 1.1 mrg __builtin_ia32_vcvttph2uw512_mask_round (__A, 4108 1.1 mrg (__v32hi) 4109 1.1 mrg _mm512_setzero_si512 (), 4110 1.1 mrg (__mmask32) -1, 4111 1.1 mrg _MM_FROUND_CUR_DIRECTION); 4112 1.1 mrg } 4113 1.1 mrg 4114 1.1 mrg extern __inline __m512i 4115 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4116 1.1 mrg _mm512_mask_cvttph_epu16 (__m512i __A, __mmask32 __B, __m512h __C) 4117 1.1 mrg { 4118 1.1 mrg return (__m512i) 4119 1.1 mrg __builtin_ia32_vcvttph2uw512_mask_round (__C, 4120 1.1 mrg (__v32hi) __A, 4121 1.1 mrg __B, 4122 1.1 mrg _MM_FROUND_CUR_DIRECTION); 4123 1.1 mrg } 4124 1.1 mrg 4125 1.1 mrg extern __inline __m512i 4126 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4127 1.1 mrg _mm512_maskz_cvttph_epu16 (__mmask32 __A, __m512h __B) 4128 1.1 mrg { 4129 1.1 mrg return (__m512i) 4130 1.1 mrg __builtin_ia32_vcvttph2uw512_mask_round (__B, 4131 1.1 mrg (__v32hi) 4132 1.1 mrg _mm512_setzero_si512 (), 4133 1.1 mrg __A, 4134 1.1 mrg _MM_FROUND_CUR_DIRECTION); 4135 1.1 mrg } 4136 1.1 mrg 4137 1.1 mrg #ifdef __OPTIMIZE__ 4138 1.1 mrg extern __inline __m512i 4139 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4140 1.1 mrg _mm512_cvtt_roundph_epu16 (__m512h __A, int __B) 4141 1.1 mrg { 4142 1.1 mrg return (__m512i) 4143 1.1 mrg __builtin_ia32_vcvttph2uw512_mask_round (__A, 4144 1.1 mrg (__v32hi) 4145 1.1 mrg _mm512_setzero_si512 (), 4146 1.1 mrg (__mmask32) -1, 4147 1.1 mrg __B); 4148 1.1 mrg } 4149 1.1 mrg 4150 1.1 mrg extern __inline __m512i 4151 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4152 1.1 mrg _mm512_mask_cvtt_roundph_epu16 (__m512i __A, __mmask32 __B, 4153 1.1 mrg __m512h __C, int __D) 4154 1.1 mrg { 4155 1.1 mrg return (__m512i) 4156 1.1 mrg __builtin_ia32_vcvttph2uw512_mask_round (__C, 4157 1.1 mrg (__v32hi) __A, 4158 1.1 mrg __B, 4159 1.1 mrg __D); 4160 1.1 mrg } 4161 1.1 mrg 4162 1.1 mrg extern __inline __m512i 4163 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4164 1.1 mrg _mm512_maskz_cvtt_roundph_epu16 (__mmask32 __A, __m512h __B, int __C) 4165 1.1 mrg { 4166 1.1 mrg return (__m512i) 4167 1.1 mrg __builtin_ia32_vcvttph2uw512_mask_round (__B, 4168 1.1 mrg (__v32hi) 4169 1.1 mrg _mm512_setzero_si512 (), 4170 1.1 mrg __A, 4171 1.1 mrg __C); 4172 1.1 mrg } 4173 1.1 mrg 4174 1.1 mrg #else 4175 1.1 mrg #define _mm512_cvtt_roundph_epu16(A, B) \ 4176 1.1 mrg ((__m512i) \ 4177 1.1 mrg __builtin_ia32_vcvttph2uw512_mask_round ((A), \ 4178 1.1 mrg (__v32hi) \ 4179 1.1 mrg _mm512_setzero_si512 (), \ 4180 1.1 mrg (__mmask32)-1, \ 4181 1.1 mrg (B))) 4182 1.1 mrg 4183 1.1 mrg #define _mm512_mask_cvtt_roundph_epu16(A, B, C, D) \ 4184 1.1 mrg ((__m512i) \ 4185 1.1 mrg __builtin_ia32_vcvttph2uw512_mask_round ((C), \ 4186 1.1 mrg (__v32hi)(A), \ 4187 1.1 mrg (B), \ 4188 1.1 mrg (D))) 4189 1.1 mrg 4190 1.1 mrg #define _mm512_maskz_cvtt_roundph_epu16(A, B, C) \ 4191 1.1 mrg ((__m512i) \ 4192 1.1 mrg __builtin_ia32_vcvttph2uw512_mask_round ((B), \ 4193 1.1 mrg (__v32hi) \ 4194 1.1 mrg _mm512_setzero_si512 (), \ 4195 1.1 mrg (A), \ 4196 1.1 mrg (C))) 4197 1.1 mrg 4198 1.1 mrg #endif /* __OPTIMIZE__ */ 4199 1.1 mrg 4200 1.1 mrg /* Intrinsics vcvtw2ph. */ 4201 1.1 mrg extern __inline __m512h 4202 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4203 1.1 mrg _mm512_cvtepi16_ph (__m512i __A) 4204 1.1 mrg { 4205 1.1 mrg return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __A, 4206 1.1 mrg _mm512_setzero_ph (), 4207 1.1 mrg (__mmask32) -1, 4208 1.1 mrg _MM_FROUND_CUR_DIRECTION); 4209 1.1 mrg } 4210 1.1 mrg 4211 1.1 mrg extern __inline __m512h 4212 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4213 1.1 mrg _mm512_mask_cvtepi16_ph (__m512h __A, __mmask32 __B, __m512i __C) 4214 1.1 mrg { 4215 1.1 mrg return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __C, 4216 1.1 mrg __A, 4217 1.1 mrg __B, 4218 1.1 mrg _MM_FROUND_CUR_DIRECTION); 4219 1.1 mrg } 4220 1.1 mrg 4221 1.1 mrg extern __inline __m512h 4222 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4223 1.1 mrg _mm512_maskz_cvtepi16_ph (__mmask32 __A, __m512i __B) 4224 1.1 mrg { 4225 1.1 mrg return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __B, 4226 1.1 mrg _mm512_setzero_ph (), 4227 1.1 mrg __A, 4228 1.1 mrg _MM_FROUND_CUR_DIRECTION); 4229 1.1 mrg } 4230 1.1 mrg 4231 1.1 mrg #ifdef __OPTIMIZE__ 4232 1.1 mrg extern __inline __m512h 4233 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4234 1.1 mrg _mm512_cvt_roundepi16_ph (__m512i __A, int __B) 4235 1.1 mrg { 4236 1.1 mrg return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __A, 4237 1.1 mrg _mm512_setzero_ph (), 4238 1.1 mrg (__mmask32) -1, 4239 1.1 mrg __B); 4240 1.1 mrg } 4241 1.1 mrg 4242 1.1 mrg extern __inline __m512h 4243 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4244 1.1 mrg _mm512_mask_cvt_roundepi16_ph (__m512h __A, __mmask32 __B, __m512i __C, int __D) 4245 1.1 mrg { 4246 1.1 mrg return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __C, 4247 1.1 mrg __A, 4248 1.1 mrg __B, 4249 1.1 mrg __D); 4250 1.1 mrg } 4251 1.1 mrg 4252 1.1 mrg extern __inline __m512h 4253 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4254 1.1 mrg _mm512_maskz_cvt_roundepi16_ph (__mmask32 __A, __m512i __B, int __C) 4255 1.1 mrg { 4256 1.1 mrg return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __B, 4257 1.1 mrg _mm512_setzero_ph (), 4258 1.1 mrg __A, 4259 1.1 mrg __C); 4260 1.1 mrg } 4261 1.1 mrg 4262 1.1 mrg #else 4263 1.1 mrg #define _mm512_cvt_roundepi16_ph(A, B) \ 4264 1.1 mrg (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(A), \ 4265 1.1 mrg _mm512_setzero_ph (), \ 4266 1.1 mrg (__mmask32)-1, \ 4267 1.1 mrg (B))) 4268 1.1 mrg 4269 1.1 mrg #define _mm512_mask_cvt_roundepi16_ph(A, B, C, D) \ 4270 1.1 mrg (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(C), \ 4271 1.1 mrg (A), \ 4272 1.1 mrg (B), \ 4273 1.1 mrg (D))) 4274 1.1 mrg 4275 1.1 mrg #define _mm512_maskz_cvt_roundepi16_ph(A, B, C) \ 4276 1.1 mrg (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(B), \ 4277 1.1 mrg _mm512_setzero_ph (), \ 4278 1.1 mrg (A), \ 4279 1.1 mrg (C))) 4280 1.1 mrg 4281 1.1 mrg #endif /* __OPTIMIZE__ */ 4282 1.1 mrg 4283 1.1 mrg /* Intrinsics vcvtuw2ph. */ 4284 1.1 mrg extern __inline __m512h 4285 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4286 1.1 mrg _mm512_cvtepu16_ph (__m512i __A) 4287 1.1 mrg { 4288 1.1 mrg return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __A, 4289 1.1 mrg _mm512_setzero_ph (), 4290 1.1 mrg (__mmask32) -1, 4291 1.1 mrg _MM_FROUND_CUR_DIRECTION); 4292 1.1 mrg } 4293 1.1 mrg 4294 1.1 mrg extern __inline __m512h 4295 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4296 1.1 mrg _mm512_mask_cvtepu16_ph (__m512h __A, __mmask32 __B, __m512i __C) 4297 1.1 mrg { 4298 1.1 mrg return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __C, 4299 1.1 mrg __A, 4300 1.1 mrg __B, 4301 1.1 mrg _MM_FROUND_CUR_DIRECTION); 4302 1.1 mrg } 4303 1.1 mrg 4304 1.1 mrg extern __inline __m512h 4305 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4306 1.1 mrg _mm512_maskz_cvtepu16_ph (__mmask32 __A, __m512i __B) 4307 1.1 mrg { 4308 1.1 mrg return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __B, 4309 1.1 mrg _mm512_setzero_ph (), 4310 1.1 mrg __A, 4311 1.1 mrg _MM_FROUND_CUR_DIRECTION); 4312 1.1 mrg } 4313 1.1 mrg 4314 1.1 mrg #ifdef __OPTIMIZE__ 4315 1.1 mrg extern __inline __m512h 4316 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4317 1.1 mrg _mm512_cvt_roundepu16_ph (__m512i __A, int __B) 4318 1.1 mrg { 4319 1.1 mrg return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __A, 4320 1.1 mrg _mm512_setzero_ph (), 4321 1.1 mrg (__mmask32) -1, 4322 1.1 mrg __B); 4323 1.1 mrg } 4324 1.1 mrg 4325 1.1 mrg extern __inline __m512h 4326 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4327 1.1 mrg _mm512_mask_cvt_roundepu16_ph (__m512h __A, __mmask32 __B, __m512i __C, int __D) 4328 1.1 mrg { 4329 1.1 mrg return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __C, 4330 1.1 mrg __A, 4331 1.1 mrg __B, 4332 1.1 mrg __D); 4333 1.1 mrg } 4334 1.1 mrg 4335 1.1 mrg extern __inline __m512h 4336 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4337 1.1 mrg _mm512_maskz_cvt_roundepu16_ph (__mmask32 __A, __m512i __B, int __C) 4338 1.1 mrg { 4339 1.1 mrg return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __B, 4340 1.1 mrg _mm512_setzero_ph (), 4341 1.1 mrg __A, 4342 1.1 mrg __C); 4343 1.1 mrg } 4344 1.1 mrg 4345 1.1 mrg #else 4346 1.1 mrg #define _mm512_cvt_roundepu16_ph(A, B) \ 4347 1.1 mrg (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(A), \ 4348 1.1 mrg _mm512_setzero_ph (), \ 4349 1.1 mrg (__mmask32)-1, \ 4350 1.1 mrg (B))) 4351 1.1 mrg 4352 1.1 mrg #define _mm512_mask_cvt_roundepu16_ph(A, B, C, D) \ 4353 1.1 mrg (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(C), \ 4354 1.1 mrg (A), \ 4355 1.1 mrg (B), \ 4356 1.1 mrg (D))) 4357 1.1 mrg 4358 1.1 mrg #define _mm512_maskz_cvt_roundepu16_ph(A, B, C) \ 4359 1.1 mrg (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(B), \ 4360 1.1 mrg _mm512_setzero_ph (), \ 4361 1.1 mrg (A), \ 4362 1.1 mrg (C))) 4363 1.1 mrg 4364 1.1 mrg #endif /* __OPTIMIZE__ */ 4365 1.1 mrg 4366 1.1 mrg /* Intrinsics vcvtsh2si, vcvtsh2us. */ 4367 1.1 mrg extern __inline int 4368 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4369 1.1 mrg _mm_cvtsh_i32 (__m128h __A) 4370 1.1 mrg { 4371 1.1 mrg return (int) __builtin_ia32_vcvtsh2si32_round (__A, _MM_FROUND_CUR_DIRECTION); 4372 1.1 mrg } 4373 1.1 mrg 4374 1.1 mrg extern __inline unsigned 4375 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4376 1.1 mrg _mm_cvtsh_u32 (__m128h __A) 4377 1.1 mrg { 4378 1.1 mrg return (int) __builtin_ia32_vcvtsh2usi32_round (__A, 4379 1.1 mrg _MM_FROUND_CUR_DIRECTION); 4380 1.1 mrg } 4381 1.1 mrg 4382 1.1 mrg #ifdef __OPTIMIZE__ 4383 1.1 mrg extern __inline int 4384 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4385 1.1 mrg _mm_cvt_roundsh_i32 (__m128h __A, const int __R) 4386 1.1 mrg { 4387 1.1 mrg return (int) __builtin_ia32_vcvtsh2si32_round (__A, __R); 4388 1.1 mrg } 4389 1.1 mrg 4390 1.1 mrg extern __inline unsigned 4391 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4392 1.1 mrg _mm_cvt_roundsh_u32 (__m128h __A, const int __R) 4393 1.1 mrg { 4394 1.1 mrg return (int) __builtin_ia32_vcvtsh2usi32_round (__A, __R); 4395 1.1 mrg } 4396 1.1 mrg 4397 1.1 mrg #else 4398 1.1 mrg #define _mm_cvt_roundsh_i32(A, B) \ 4399 1.1 mrg ((int)__builtin_ia32_vcvtsh2si32_round ((A), (B))) 4400 1.1 mrg #define _mm_cvt_roundsh_u32(A, B) \ 4401 1.1 mrg ((int)__builtin_ia32_vcvtsh2usi32_round ((A), (B))) 4402 1.1 mrg 4403 1.1 mrg #endif /* __OPTIMIZE__ */ 4404 1.1 mrg 4405 1.1 mrg #ifdef __x86_64__ 4406 1.1 mrg extern __inline long long 4407 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4408 1.1 mrg _mm_cvtsh_i64 (__m128h __A) 4409 1.1 mrg { 4410 1.1 mrg return (long long) 4411 1.1 mrg __builtin_ia32_vcvtsh2si64_round (__A, _MM_FROUND_CUR_DIRECTION); 4412 1.1 mrg } 4413 1.1 mrg 4414 1.1 mrg extern __inline unsigned long long 4415 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4416 1.1 mrg _mm_cvtsh_u64 (__m128h __A) 4417 1.1 mrg { 4418 1.1 mrg return (long long) 4419 1.1 mrg __builtin_ia32_vcvtsh2usi64_round (__A, _MM_FROUND_CUR_DIRECTION); 4420 1.1 mrg } 4421 1.1 mrg 4422 1.1 mrg #ifdef __OPTIMIZE__ 4423 1.1 mrg extern __inline long long 4424 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4425 1.1 mrg _mm_cvt_roundsh_i64 (__m128h __A, const int __R) 4426 1.1 mrg { 4427 1.1 mrg return (long long) __builtin_ia32_vcvtsh2si64_round (__A, __R); 4428 1.1 mrg } 4429 1.1 mrg 4430 1.1 mrg extern __inline unsigned long long 4431 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4432 1.1 mrg _mm_cvt_roundsh_u64 (__m128h __A, const int __R) 4433 1.1 mrg { 4434 1.1 mrg return (long long) __builtin_ia32_vcvtsh2usi64_round (__A, __R); 4435 1.1 mrg } 4436 1.1 mrg 4437 1.1 mrg #else 4438 1.1 mrg #define _mm_cvt_roundsh_i64(A, B) \ 4439 1.1 mrg ((long long)__builtin_ia32_vcvtsh2si64_round ((A), (B))) 4440 1.1 mrg #define _mm_cvt_roundsh_u64(A, B) \ 4441 1.1 mrg ((long long)__builtin_ia32_vcvtsh2usi64_round ((A), (B))) 4442 1.1 mrg 4443 1.1 mrg #endif /* __OPTIMIZE__ */ 4444 1.1 mrg #endif /* __x86_64__ */ 4445 1.1 mrg 4446 1.1 mrg /* Intrinsics vcvttsh2si, vcvttsh2us. */ 4447 1.1 mrg extern __inline int 4448 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4449 1.1 mrg _mm_cvttsh_i32 (__m128h __A) 4450 1.1 mrg { 4451 1.1 mrg return (int) 4452 1.1 mrg __builtin_ia32_vcvttsh2si32_round (__A, _MM_FROUND_CUR_DIRECTION); 4453 1.1 mrg } 4454 1.1 mrg 4455 1.1 mrg extern __inline unsigned 4456 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4457 1.1 mrg _mm_cvttsh_u32 (__m128h __A) 4458 1.1 mrg { 4459 1.1 mrg return (int) 4460 1.1 mrg __builtin_ia32_vcvttsh2usi32_round (__A, _MM_FROUND_CUR_DIRECTION); 4461 1.1 mrg } 4462 1.1 mrg 4463 1.1 mrg #ifdef __OPTIMIZE__ 4464 1.1 mrg extern __inline int 4465 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4466 1.1 mrg _mm_cvtt_roundsh_i32 (__m128h __A, const int __R) 4467 1.1 mrg { 4468 1.1 mrg return (int) __builtin_ia32_vcvttsh2si32_round (__A, __R); 4469 1.1 mrg } 4470 1.1 mrg 4471 1.1 mrg extern __inline unsigned 4472 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4473 1.1 mrg _mm_cvtt_roundsh_u32 (__m128h __A, const int __R) 4474 1.1 mrg { 4475 1.1 mrg return (int) __builtin_ia32_vcvttsh2usi32_round (__A, __R); 4476 1.1 mrg } 4477 1.1 mrg 4478 1.1 mrg #else 4479 1.1 mrg #define _mm_cvtt_roundsh_i32(A, B) \ 4480 1.1 mrg ((int)__builtin_ia32_vcvttsh2si32_round ((A), (B))) 4481 1.1 mrg #define _mm_cvtt_roundsh_u32(A, B) \ 4482 1.1 mrg ((int)__builtin_ia32_vcvttsh2usi32_round ((A), (B))) 4483 1.1 mrg 4484 1.1 mrg #endif /* __OPTIMIZE__ */ 4485 1.1 mrg 4486 1.1 mrg #ifdef __x86_64__ 4487 1.1 mrg extern __inline long long 4488 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4489 1.1 mrg _mm_cvttsh_i64 (__m128h __A) 4490 1.1 mrg { 4491 1.1 mrg return (long long) 4492 1.1 mrg __builtin_ia32_vcvttsh2si64_round (__A, _MM_FROUND_CUR_DIRECTION); 4493 1.1 mrg } 4494 1.1 mrg 4495 1.1 mrg extern __inline unsigned long long 4496 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4497 1.1 mrg _mm_cvttsh_u64 (__m128h __A) 4498 1.1 mrg { 4499 1.1 mrg return (long long) 4500 1.1 mrg __builtin_ia32_vcvttsh2usi64_round (__A, _MM_FROUND_CUR_DIRECTION); 4501 1.1 mrg } 4502 1.1 mrg 4503 1.1 mrg #ifdef __OPTIMIZE__ 4504 1.1 mrg extern __inline long long 4505 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4506 1.1 mrg _mm_cvtt_roundsh_i64 (__m128h __A, const int __R) 4507 1.1 mrg { 4508 1.1 mrg return (long long) __builtin_ia32_vcvttsh2si64_round (__A, __R); 4509 1.1 mrg } 4510 1.1 mrg 4511 1.1 mrg extern __inline unsigned long long 4512 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4513 1.1 mrg _mm_cvtt_roundsh_u64 (__m128h __A, const int __R) 4514 1.1 mrg { 4515 1.1 mrg return (long long) __builtin_ia32_vcvttsh2usi64_round (__A, __R); 4516 1.1 mrg } 4517 1.1 mrg 4518 1.1 mrg #else 4519 1.1 mrg #define _mm_cvtt_roundsh_i64(A, B) \ 4520 1.1 mrg ((long long)__builtin_ia32_vcvttsh2si64_round ((A), (B))) 4521 1.1 mrg #define _mm_cvtt_roundsh_u64(A, B) \ 4522 1.1 mrg ((long long)__builtin_ia32_vcvttsh2usi64_round ((A), (B))) 4523 1.1 mrg 4524 1.1 mrg #endif /* __OPTIMIZE__ */ 4525 1.1 mrg #endif /* __x86_64__ */ 4526 1.1 mrg 4527 1.1 mrg /* Intrinsics vcvtsi2sh, vcvtusi2sh. */ 4528 1.1 mrg extern __inline __m128h 4529 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4530 1.1 mrg _mm_cvti32_sh (__m128h __A, int __B) 4531 1.1 mrg { 4532 1.1 mrg return __builtin_ia32_vcvtsi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION); 4533 1.1 mrg } 4534 1.1 mrg 4535 1.1 mrg extern __inline __m128h 4536 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4537 1.1 mrg _mm_cvtu32_sh (__m128h __A, unsigned int __B) 4538 1.1 mrg { 4539 1.1 mrg return __builtin_ia32_vcvtusi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION); 4540 1.1 mrg } 4541 1.1 mrg 4542 1.1 mrg #ifdef __OPTIMIZE__ 4543 1.1 mrg extern __inline __m128h 4544 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4545 1.1 mrg _mm_cvt_roundi32_sh (__m128h __A, int __B, const int __R) 4546 1.1 mrg { 4547 1.1 mrg return __builtin_ia32_vcvtsi2sh32_round (__A, __B, __R); 4548 1.1 mrg } 4549 1.1 mrg 4550 1.1 mrg extern __inline __m128h 4551 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4552 1.1 mrg _mm_cvt_roundu32_sh (__m128h __A, unsigned int __B, const int __R) 4553 1.1 mrg { 4554 1.1 mrg return __builtin_ia32_vcvtusi2sh32_round (__A, __B, __R); 4555 1.1 mrg } 4556 1.1 mrg 4557 1.1 mrg #else 4558 1.1 mrg #define _mm_cvt_roundi32_sh(A, B, C) \ 4559 1.1 mrg (__builtin_ia32_vcvtsi2sh32_round ((A), (B), (C))) 4560 1.1 mrg #define _mm_cvt_roundu32_sh(A, B, C) \ 4561 1.1 mrg (__builtin_ia32_vcvtusi2sh32_round ((A), (B), (C))) 4562 1.1 mrg 4563 1.1 mrg #endif /* __OPTIMIZE__ */ 4564 1.1 mrg 4565 1.1 mrg #ifdef __x86_64__ 4566 1.1 mrg extern __inline __m128h 4567 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4568 1.1 mrg _mm_cvti64_sh (__m128h __A, long long __B) 4569 1.1 mrg { 4570 1.1 mrg return __builtin_ia32_vcvtsi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION); 4571 1.1 mrg } 4572 1.1 mrg 4573 1.1 mrg extern __inline __m128h 4574 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4575 1.1 mrg _mm_cvtu64_sh (__m128h __A, unsigned long long __B) 4576 1.1 mrg { 4577 1.1 mrg return __builtin_ia32_vcvtusi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION); 4578 1.1 mrg } 4579 1.1 mrg 4580 1.1 mrg #ifdef __OPTIMIZE__ 4581 1.1 mrg extern __inline __m128h 4582 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4583 1.1 mrg _mm_cvt_roundi64_sh (__m128h __A, long long __B, const int __R) 4584 1.1 mrg { 4585 1.1 mrg return __builtin_ia32_vcvtsi2sh64_round (__A, __B, __R); 4586 1.1 mrg } 4587 1.1 mrg 4588 1.1 mrg extern __inline __m128h 4589 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4590 1.1 mrg _mm_cvt_roundu64_sh (__m128h __A, unsigned long long __B, const int __R) 4591 1.1 mrg { 4592 1.1 mrg return __builtin_ia32_vcvtusi2sh64_round (__A, __B, __R); 4593 1.1 mrg } 4594 1.1 mrg 4595 1.1 mrg #else 4596 1.1 mrg #define _mm_cvt_roundi64_sh(A, B, C) \ 4597 1.1 mrg (__builtin_ia32_vcvtsi2sh64_round ((A), (B), (C))) 4598 1.1 mrg #define _mm_cvt_roundu64_sh(A, B, C) \ 4599 1.1 mrg (__builtin_ia32_vcvtusi2sh64_round ((A), (B), (C))) 4600 1.1 mrg 4601 1.1 mrg #endif /* __OPTIMIZE__ */ 4602 1.1 mrg #endif /* __x86_64__ */ 4603 1.1 mrg 4604 1.1 mrg /* Intrinsics vcvtph2pd. */ 4605 1.1 mrg extern __inline __m512d 4606 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4607 1.1 mrg _mm512_cvtph_pd (__m128h __A) 4608 1.1 mrg { 4609 1.1 mrg return __builtin_ia32_vcvtph2pd512_mask_round (__A, 4610 1.1 mrg _mm512_setzero_pd (), 4611 1.1 mrg (__mmask8) -1, 4612 1.1 mrg _MM_FROUND_CUR_DIRECTION); 4613 1.1 mrg } 4614 1.1 mrg 4615 1.1 mrg extern __inline __m512d 4616 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4617 1.1 mrg _mm512_mask_cvtph_pd (__m512d __A, __mmask8 __B, __m128h __C) 4618 1.1 mrg { 4619 1.1 mrg return __builtin_ia32_vcvtph2pd512_mask_round (__C, __A, __B, 4620 1.1 mrg _MM_FROUND_CUR_DIRECTION); 4621 1.1 mrg } 4622 1.1 mrg 4623 1.1 mrg extern __inline __m512d 4624 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4625 1.1 mrg _mm512_maskz_cvtph_pd (__mmask8 __A, __m128h __B) 4626 1.1 mrg { 4627 1.1 mrg return __builtin_ia32_vcvtph2pd512_mask_round (__B, 4628 1.1 mrg _mm512_setzero_pd (), 4629 1.1 mrg __A, 4630 1.1 mrg _MM_FROUND_CUR_DIRECTION); 4631 1.1 mrg } 4632 1.1 mrg 4633 1.1 mrg #ifdef __OPTIMIZE__ 4634 1.1 mrg extern __inline __m512d 4635 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4636 1.1 mrg _mm512_cvt_roundph_pd (__m128h __A, int __B) 4637 1.1 mrg { 4638 1.1 mrg return __builtin_ia32_vcvtph2pd512_mask_round (__A, 4639 1.1 mrg _mm512_setzero_pd (), 4640 1.1 mrg (__mmask8) -1, 4641 1.1 mrg __B); 4642 1.1 mrg } 4643 1.1 mrg 4644 1.1 mrg extern __inline __m512d 4645 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4646 1.1 mrg _mm512_mask_cvt_roundph_pd (__m512d __A, __mmask8 __B, __m128h __C, int __D) 4647 1.1 mrg { 4648 1.1 mrg return __builtin_ia32_vcvtph2pd512_mask_round (__C, __A, __B, __D); 4649 1.1 mrg } 4650 1.1 mrg 4651 1.1 mrg extern __inline __m512d 4652 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4653 1.1 mrg _mm512_maskz_cvt_roundph_pd (__mmask8 __A, __m128h __B, int __C) 4654 1.1 mrg { 4655 1.1 mrg return __builtin_ia32_vcvtph2pd512_mask_round (__B, 4656 1.1 mrg _mm512_setzero_pd (), 4657 1.1 mrg __A, 4658 1.1 mrg __C); 4659 1.1 mrg } 4660 1.1 mrg 4661 1.1 mrg #else 4662 1.1 mrg #define _mm512_cvt_roundph_pd(A, B) \ 4663 1.1 mrg (__builtin_ia32_vcvtph2pd512_mask_round ((A), \ 4664 1.1 mrg _mm512_setzero_pd (), \ 4665 1.1 mrg (__mmask8)-1, \ 4666 1.1 mrg (B))) 4667 1.1 mrg 4668 1.1 mrg #define _mm512_mask_cvt_roundph_pd(A, B, C, D) \ 4669 1.1 mrg (__builtin_ia32_vcvtph2pd512_mask_round ((C), (A), (B), (D))) 4670 1.1 mrg 4671 1.1 mrg #define _mm512_maskz_cvt_roundph_pd(A, B, C) \ 4672 1.1 mrg (__builtin_ia32_vcvtph2pd512_mask_round ((B), \ 4673 1.1 mrg _mm512_setzero_pd (), \ 4674 1.1 mrg (A), \ 4675 1.1 mrg (C))) 4676 1.1 mrg 4677 1.1 mrg #endif /* __OPTIMIZE__ */ 4678 1.1 mrg 4679 1.1 mrg /* Intrinsics vcvtph2psx. */ 4680 1.1 mrg extern __inline __m512 4681 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4682 1.1 mrg _mm512_cvtxph_ps (__m256h __A) 4683 1.1 mrg { 4684 1.1 mrg return __builtin_ia32_vcvtph2psx512_mask_round (__A, 4685 1.1 mrg _mm512_setzero_ps (), 4686 1.1 mrg (__mmask16) -1, 4687 1.1 mrg _MM_FROUND_CUR_DIRECTION); 4688 1.1 mrg } 4689 1.1 mrg 4690 1.1 mrg extern __inline __m512 4691 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4692 1.1 mrg _mm512_mask_cvtxph_ps (__m512 __A, __mmask16 __B, __m256h __C) 4693 1.1 mrg { 4694 1.1 mrg return __builtin_ia32_vcvtph2psx512_mask_round (__C, __A, __B, 4695 1.1 mrg _MM_FROUND_CUR_DIRECTION); 4696 1.1 mrg } 4697 1.1 mrg 4698 1.1 mrg extern __inline __m512 4699 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4700 1.1 mrg _mm512_maskz_cvtxph_ps (__mmask16 __A, __m256h __B) 4701 1.1 mrg { 4702 1.1 mrg return __builtin_ia32_vcvtph2psx512_mask_round (__B, 4703 1.1 mrg _mm512_setzero_ps (), 4704 1.1 mrg __A, 4705 1.1 mrg _MM_FROUND_CUR_DIRECTION); 4706 1.1 mrg } 4707 1.1 mrg 4708 1.1 mrg #ifdef __OPTIMIZE__ 4709 1.1 mrg extern __inline __m512 4710 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4711 1.1 mrg _mm512_cvtx_roundph_ps (__m256h __A, int __B) 4712 1.1 mrg { 4713 1.1 mrg return __builtin_ia32_vcvtph2psx512_mask_round (__A, 4714 1.1 mrg _mm512_setzero_ps (), 4715 1.1 mrg (__mmask16) -1, 4716 1.1 mrg __B); 4717 1.1 mrg } 4718 1.1 mrg 4719 1.1 mrg extern __inline __m512 4720 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4721 1.1 mrg _mm512_mask_cvtx_roundph_ps (__m512 __A, __mmask16 __B, __m256h __C, int __D) 4722 1.1 mrg { 4723 1.1 mrg return __builtin_ia32_vcvtph2psx512_mask_round (__C, __A, __B, __D); 4724 1.1 mrg } 4725 1.1 mrg 4726 1.1 mrg extern __inline __m512 4727 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4728 1.1 mrg _mm512_maskz_cvtx_roundph_ps (__mmask16 __A, __m256h __B, int __C) 4729 1.1 mrg { 4730 1.1 mrg return __builtin_ia32_vcvtph2psx512_mask_round (__B, 4731 1.1 mrg _mm512_setzero_ps (), 4732 1.1 mrg __A, 4733 1.1 mrg __C); 4734 1.1 mrg } 4735 1.1 mrg 4736 1.1 mrg #else 4737 1.1 mrg #define _mm512_cvtx_roundph_ps(A, B) \ 4738 1.1 mrg (__builtin_ia32_vcvtph2psx512_mask_round ((A), \ 4739 1.1 mrg _mm512_setzero_ps (), \ 4740 1.1 mrg (__mmask16)-1, \ 4741 1.1 mrg (B))) 4742 1.1 mrg 4743 1.1 mrg #define _mm512_mask_cvtx_roundph_ps(A, B, C, D) \ 4744 1.1 mrg (__builtin_ia32_vcvtph2psx512_mask_round ((C), (A), (B), (D))) 4745 1.1 mrg 4746 1.1 mrg #define _mm512_maskz_cvtx_roundph_ps(A, B, C) \ 4747 1.1 mrg (__builtin_ia32_vcvtph2psx512_mask_round ((B), \ 4748 1.1 mrg _mm512_setzero_ps (), \ 4749 1.1 mrg (A), \ 4750 1.1 mrg (C))) 4751 1.1 mrg #endif /* __OPTIMIZE__ */ 4752 1.1 mrg 4753 1.1 mrg /* Intrinsics vcvtps2ph. */ 4754 1.1 mrg extern __inline __m256h 4755 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4756 1.1 mrg _mm512_cvtxps_ph (__m512 __A) 4757 1.1 mrg { 4758 1.1 mrg return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __A, 4759 1.1 mrg _mm256_setzero_ph (), 4760 1.1 mrg (__mmask16) -1, 4761 1.1 mrg _MM_FROUND_CUR_DIRECTION); 4762 1.1 mrg } 4763 1.1 mrg 4764 1.1 mrg extern __inline __m256h 4765 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4766 1.1 mrg _mm512_mask_cvtxps_ph (__m256h __A, __mmask16 __B, __m512 __C) 4767 1.1 mrg { 4768 1.1 mrg return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __C, 4769 1.1 mrg __A, __B, 4770 1.1 mrg _MM_FROUND_CUR_DIRECTION); 4771 1.1 mrg } 4772 1.1 mrg 4773 1.1 mrg extern __inline __m256h 4774 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4775 1.1 mrg _mm512_maskz_cvtxps_ph (__mmask16 __A, __m512 __B) 4776 1.1 mrg { 4777 1.1 mrg return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __B, 4778 1.1 mrg _mm256_setzero_ph (), 4779 1.1 mrg __A, 4780 1.1 mrg _MM_FROUND_CUR_DIRECTION); 4781 1.1 mrg } 4782 1.1 mrg 4783 1.1 mrg #ifdef __OPTIMIZE__ 4784 1.1 mrg extern __inline __m256h 4785 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4786 1.1 mrg _mm512_cvtx_roundps_ph (__m512 __A, int __B) 4787 1.1 mrg { 4788 1.1 mrg return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __A, 4789 1.1 mrg _mm256_setzero_ph (), 4790 1.1 mrg (__mmask16) -1, 4791 1.1 mrg __B); 4792 1.1 mrg } 4793 1.1 mrg 4794 1.1 mrg extern __inline __m256h 4795 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4796 1.1 mrg _mm512_mask_cvtx_roundps_ph (__m256h __A, __mmask16 __B, __m512 __C, int __D) 4797 1.1 mrg { 4798 1.1 mrg return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __C, 4799 1.1 mrg __A, __B, __D); 4800 1.1 mrg } 4801 1.1 mrg 4802 1.1 mrg extern __inline __m256h 4803 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4804 1.1 mrg _mm512_maskz_cvtx_roundps_ph (__mmask16 __A, __m512 __B, int __C) 4805 1.1 mrg { 4806 1.1 mrg return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __B, 4807 1.1 mrg _mm256_setzero_ph (), 4808 1.1 mrg __A, __C); 4809 1.1 mrg } 4810 1.1 mrg 4811 1.1 mrg #else 4812 1.1 mrg #define _mm512_cvtx_roundps_ph(A, B) \ 4813 1.1 mrg (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(A), \ 4814 1.1 mrg _mm256_setzero_ph (),\ 4815 1.1 mrg (__mmask16)-1, (B))) 4816 1.1 mrg 4817 1.1 mrg #define _mm512_mask_cvtx_roundps_ph(A, B, C, D) \ 4818 1.1 mrg (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(C), \ 4819 1.1 mrg (A), (B), (D))) 4820 1.1 mrg 4821 1.1 mrg #define _mm512_maskz_cvtx_roundps_ph(A, B, C) \ 4822 1.1 mrg (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(B), \ 4823 1.1 mrg _mm256_setzero_ph (),\ 4824 1.1 mrg (A), (C))) 4825 1.1 mrg #endif /* __OPTIMIZE__ */ 4826 1.1 mrg 4827 1.1 mrg /* Intrinsics vcvtpd2ph. */ 4828 1.1 mrg extern __inline __m128h 4829 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4830 1.1 mrg _mm512_cvtpd_ph (__m512d __A) 4831 1.1 mrg { 4832 1.1 mrg return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __A, 4833 1.1 mrg _mm_setzero_ph (), 4834 1.1 mrg (__mmask8) -1, 4835 1.1 mrg _MM_FROUND_CUR_DIRECTION); 4836 1.1 mrg } 4837 1.1 mrg 4838 1.1 mrg extern __inline __m128h 4839 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4840 1.1 mrg _mm512_mask_cvtpd_ph (__m128h __A, __mmask8 __B, __m512d __C) 4841 1.1 mrg { 4842 1.1 mrg return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __C, 4843 1.1 mrg __A, __B, 4844 1.1 mrg _MM_FROUND_CUR_DIRECTION); 4845 1.1 mrg } 4846 1.1 mrg 4847 1.1 mrg extern __inline __m128h 4848 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4849 1.1 mrg _mm512_maskz_cvtpd_ph (__mmask8 __A, __m512d __B) 4850 1.1 mrg { 4851 1.1 mrg return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __B, 4852 1.1 mrg _mm_setzero_ph (), 4853 1.1 mrg __A, 4854 1.1 mrg _MM_FROUND_CUR_DIRECTION); 4855 1.1 mrg } 4856 1.1 mrg 4857 1.1 mrg #ifdef __OPTIMIZE__ 4858 1.1 mrg extern __inline __m128h 4859 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4860 1.1 mrg _mm512_cvt_roundpd_ph (__m512d __A, int __B) 4861 1.1 mrg { 4862 1.1 mrg return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __A, 4863 1.1 mrg _mm_setzero_ph (), 4864 1.1 mrg (__mmask8) -1, 4865 1.1 mrg __B); 4866 1.1 mrg } 4867 1.1 mrg 4868 1.1 mrg extern __inline __m128h 4869 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4870 1.1 mrg _mm512_mask_cvt_roundpd_ph (__m128h __A, __mmask8 __B, __m512d __C, int __D) 4871 1.1 mrg { 4872 1.1 mrg return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __C, 4873 1.1 mrg __A, __B, __D); 4874 1.1 mrg } 4875 1.1 mrg 4876 1.1 mrg extern __inline __m128h 4877 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4878 1.1 mrg _mm512_maskz_cvt_roundpd_ph (__mmask8 __A, __m512d __B, int __C) 4879 1.1 mrg { 4880 1.1 mrg return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __B, 4881 1.1 mrg _mm_setzero_ph (), 4882 1.1 mrg __A, __C); 4883 1.1 mrg } 4884 1.1 mrg 4885 1.1 mrg #else 4886 1.1 mrg #define _mm512_cvt_roundpd_ph(A, B) \ 4887 1.1 mrg (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(A), \ 4888 1.1 mrg _mm_setzero_ph (), \ 4889 1.1 mrg (__mmask8)-1, (B))) 4890 1.1 mrg 4891 1.1 mrg #define _mm512_mask_cvt_roundpd_ph(A, B, C, D) \ 4892 1.1 mrg (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(C), \ 4893 1.1 mrg (A), (B), (D))) 4894 1.1 mrg 4895 1.1 mrg #define _mm512_maskz_cvt_roundpd_ph(A, B, C) \ 4896 1.1 mrg (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(B), \ 4897 1.1 mrg _mm_setzero_ph (), \ 4898 1.1 mrg (A), (C))) 4899 1.1 mrg 4900 1.1 mrg #endif /* __OPTIMIZE__ */ 4901 1.1 mrg 4902 1.1 mrg /* Intrinsics vcvtsh2ss, vcvtsh2sd. */ 4903 1.1 mrg extern __inline __m128 4904 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4905 1.1 mrg _mm_cvtsh_ss (__m128 __A, __m128h __B) 4906 1.1 mrg { 4907 1.1 mrg return __builtin_ia32_vcvtsh2ss_mask_round (__B, __A, 4908 1.1 mrg _mm_setzero_ps (), 4909 1.1 mrg (__mmask8) -1, 4910 1.1 mrg _MM_FROUND_CUR_DIRECTION); 4911 1.1 mrg } 4912 1.1 mrg 4913 1.1 mrg extern __inline __m128 4914 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4915 1.1 mrg _mm_mask_cvtsh_ss (__m128 __A, __mmask8 __B, __m128 __C, 4916 1.1 mrg __m128h __D) 4917 1.1 mrg { 4918 1.1 mrg return __builtin_ia32_vcvtsh2ss_mask_round (__D, __C, __A, __B, 4919 1.1 mrg _MM_FROUND_CUR_DIRECTION); 4920 1.1 mrg } 4921 1.1 mrg 4922 1.1 mrg extern __inline __m128 4923 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4924 1.1 mrg _mm_maskz_cvtsh_ss (__mmask8 __A, __m128 __B, 4925 1.1 mrg __m128h __C) 4926 1.1 mrg { 4927 1.1 mrg return __builtin_ia32_vcvtsh2ss_mask_round (__C, __B, 4928 1.1 mrg _mm_setzero_ps (), 4929 1.1 mrg __A, _MM_FROUND_CUR_DIRECTION); 4930 1.1 mrg } 4931 1.1 mrg 4932 1.1 mrg extern __inline __m128d 4933 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4934 1.1 mrg _mm_cvtsh_sd (__m128d __A, __m128h __B) 4935 1.1 mrg { 4936 1.1 mrg return __builtin_ia32_vcvtsh2sd_mask_round (__B, __A, 4937 1.1 mrg _mm_setzero_pd (), 4938 1.1 mrg (__mmask8) -1, 4939 1.1 mrg _MM_FROUND_CUR_DIRECTION); 4940 1.1 mrg } 4941 1.1 mrg 4942 1.1 mrg extern __inline __m128d 4943 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4944 1.1 mrg _mm_mask_cvtsh_sd (__m128d __A, __mmask8 __B, __m128d __C, 4945 1.1 mrg __m128h __D) 4946 1.1 mrg { 4947 1.1 mrg return __builtin_ia32_vcvtsh2sd_mask_round (__D, __C, __A, __B, 4948 1.1 mrg _MM_FROUND_CUR_DIRECTION); 4949 1.1 mrg } 4950 1.1 mrg 4951 1.1 mrg extern __inline __m128d 4952 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4953 1.1 mrg _mm_maskz_cvtsh_sd (__mmask8 __A, __m128d __B, __m128h __C) 4954 1.1 mrg { 4955 1.1 mrg return __builtin_ia32_vcvtsh2sd_mask_round (__C, __B, 4956 1.1 mrg _mm_setzero_pd (), 4957 1.1 mrg __A, _MM_FROUND_CUR_DIRECTION); 4958 1.1 mrg } 4959 1.1 mrg 4960 1.1 mrg #ifdef __OPTIMIZE__ 4961 1.1 mrg extern __inline __m128 4962 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4963 1.1 mrg _mm_cvt_roundsh_ss (__m128 __A, __m128h __B, const int __R) 4964 1.1 mrg { 4965 1.1 mrg return __builtin_ia32_vcvtsh2ss_mask_round (__B, __A, 4966 1.1 mrg _mm_setzero_ps (), 4967 1.1 mrg (__mmask8) -1, __R); 4968 1.1 mrg } 4969 1.1 mrg 4970 1.1 mrg extern __inline __m128 4971 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4972 1.1 mrg _mm_mask_cvt_roundsh_ss (__m128 __A, __mmask8 __B, __m128 __C, 4973 1.1 mrg __m128h __D, const int __R) 4974 1.1 mrg { 4975 1.1 mrg return __builtin_ia32_vcvtsh2ss_mask_round (__D, __C, __A, __B, __R); 4976 1.1 mrg } 4977 1.1 mrg 4978 1.1 mrg extern __inline __m128 4979 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4980 1.1 mrg _mm_maskz_cvt_roundsh_ss (__mmask8 __A, __m128 __B, 4981 1.1 mrg __m128h __C, const int __R) 4982 1.1 mrg { 4983 1.1 mrg return __builtin_ia32_vcvtsh2ss_mask_round (__C, __B, 4984 1.1 mrg _mm_setzero_ps (), 4985 1.1 mrg __A, __R); 4986 1.1 mrg } 4987 1.1 mrg 4988 1.1 mrg extern __inline __m128d 4989 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4990 1.1 mrg _mm_cvt_roundsh_sd (__m128d __A, __m128h __B, const int __R) 4991 1.1 mrg { 4992 1.1 mrg return __builtin_ia32_vcvtsh2sd_mask_round (__B, __A, 4993 1.1 mrg _mm_setzero_pd (), 4994 1.1 mrg (__mmask8) -1, __R); 4995 1.1 mrg } 4996 1.1 mrg 4997 1.1 mrg extern __inline __m128d 4998 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4999 1.1 mrg _mm_mask_cvt_roundsh_sd (__m128d __A, __mmask8 __B, __m128d __C, 5000 1.1 mrg __m128h __D, const int __R) 5001 1.1 mrg { 5002 1.1 mrg return __builtin_ia32_vcvtsh2sd_mask_round (__D, __C, __A, __B, __R); 5003 1.1 mrg } 5004 1.1 mrg 5005 1.1 mrg extern __inline __m128d 5006 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5007 1.1 mrg _mm_maskz_cvt_roundsh_sd (__mmask8 __A, __m128d __B, __m128h __C, const int __R) 5008 1.1 mrg { 5009 1.1 mrg return __builtin_ia32_vcvtsh2sd_mask_round (__C, __B, 5010 1.1 mrg _mm_setzero_pd (), 5011 1.1 mrg __A, __R); 5012 1.1 mrg } 5013 1.1 mrg 5014 1.1 mrg #else 5015 1.1 mrg #define _mm_cvt_roundsh_ss(A, B, R) \ 5016 1.1 mrg (__builtin_ia32_vcvtsh2ss_mask_round ((B), (A), \ 5017 1.1 mrg _mm_setzero_ps (), \ 5018 1.1 mrg (__mmask8) -1, (R))) 5019 1.1 mrg 5020 1.1 mrg #define _mm_mask_cvt_roundsh_ss(A, B, C, D, R) \ 5021 1.1 mrg (__builtin_ia32_vcvtsh2ss_mask_round ((D), (C), (A), (B), (R))) 5022 1.1 mrg 5023 1.1 mrg #define _mm_maskz_cvt_roundsh_ss(A, B, C, R) \ 5024 1.1 mrg (__builtin_ia32_vcvtsh2ss_mask_round ((C), (B), \ 5025 1.1 mrg _mm_setzero_ps (), \ 5026 1.1 mrg (A), (R))) 5027 1.1 mrg 5028 1.1 mrg #define _mm_cvt_roundsh_sd(A, B, R) \ 5029 1.1 mrg (__builtin_ia32_vcvtsh2sd_mask_round ((B), (A), \ 5030 1.1 mrg _mm_setzero_pd (), \ 5031 1.1 mrg (__mmask8) -1, (R))) 5032 1.1 mrg 5033 1.1 mrg #define _mm_mask_cvt_roundsh_sd(A, B, C, D, R) \ 5034 1.1 mrg (__builtin_ia32_vcvtsh2sd_mask_round ((D), (C), (A), (B), (R))) 5035 1.1 mrg 5036 1.1 mrg #define _mm_maskz_cvt_roundsh_sd(A, B, C, R) \ 5037 1.1 mrg (__builtin_ia32_vcvtsh2sd_mask_round ((C), (B), \ 5038 1.1 mrg _mm_setzero_pd (), \ 5039 1.1 mrg (A), (R))) 5040 1.1 mrg 5041 1.1 mrg #endif /* __OPTIMIZE__ */ 5042 1.1 mrg 5043 1.1 mrg /* Intrinsics vcvtss2sh, vcvtsd2sh. */ 5044 1.1 mrg extern __inline __m128h 5045 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5046 1.1 mrg _mm_cvtss_sh (__m128h __A, __m128 __B) 5047 1.1 mrg { 5048 1.1 mrg return __builtin_ia32_vcvtss2sh_mask_round (__B, __A, 5049 1.1 mrg _mm_setzero_ph (), 5050 1.1 mrg (__mmask8) -1, 5051 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5052 1.1 mrg } 5053 1.1 mrg 5054 1.1 mrg extern __inline __m128h 5055 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5056 1.1 mrg _mm_mask_cvtss_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128 __D) 5057 1.1 mrg { 5058 1.1 mrg return __builtin_ia32_vcvtss2sh_mask_round (__D, __C, __A, __B, 5059 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5060 1.1 mrg } 5061 1.1 mrg 5062 1.1 mrg extern __inline __m128h 5063 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5064 1.1 mrg _mm_maskz_cvtss_sh (__mmask8 __A, __m128h __B, __m128 __C) 5065 1.1 mrg { 5066 1.1 mrg return __builtin_ia32_vcvtss2sh_mask_round (__C, __B, 5067 1.1 mrg _mm_setzero_ph (), 5068 1.1 mrg __A, _MM_FROUND_CUR_DIRECTION); 5069 1.1 mrg } 5070 1.1 mrg 5071 1.1 mrg extern __inline __m128h 5072 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5073 1.1 mrg _mm_cvtsd_sh (__m128h __A, __m128d __B) 5074 1.1 mrg { 5075 1.1 mrg return __builtin_ia32_vcvtsd2sh_mask_round (__B, __A, 5076 1.1 mrg _mm_setzero_ph (), 5077 1.1 mrg (__mmask8) -1, 5078 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5079 1.1 mrg } 5080 1.1 mrg 5081 1.1 mrg extern __inline __m128h 5082 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5083 1.1 mrg _mm_mask_cvtsd_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128d __D) 5084 1.1 mrg { 5085 1.1 mrg return __builtin_ia32_vcvtsd2sh_mask_round (__D, __C, __A, __B, 5086 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5087 1.1 mrg } 5088 1.1 mrg 5089 1.1 mrg extern __inline __m128h 5090 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5091 1.1 mrg _mm_maskz_cvtsd_sh (__mmask8 __A, __m128h __B, __m128d __C) 5092 1.1 mrg { 5093 1.1 mrg return __builtin_ia32_vcvtsd2sh_mask_round (__C, __B, 5094 1.1 mrg _mm_setzero_ph (), 5095 1.1 mrg __A, _MM_FROUND_CUR_DIRECTION); 5096 1.1 mrg } 5097 1.1 mrg 5098 1.1 mrg #ifdef __OPTIMIZE__ 5099 1.1 mrg extern __inline __m128h 5100 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5101 1.1 mrg _mm_cvt_roundss_sh (__m128h __A, __m128 __B, const int __R) 5102 1.1 mrg { 5103 1.1 mrg return __builtin_ia32_vcvtss2sh_mask_round (__B, __A, 5104 1.1 mrg _mm_setzero_ph (), 5105 1.1 mrg (__mmask8) -1, __R); 5106 1.1 mrg } 5107 1.1 mrg 5108 1.1 mrg extern __inline __m128h 5109 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5110 1.1 mrg _mm_mask_cvt_roundss_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128 __D, 5111 1.1 mrg const int __R) 5112 1.1 mrg { 5113 1.1 mrg return __builtin_ia32_vcvtss2sh_mask_round (__D, __C, __A, __B, __R); 5114 1.1 mrg } 5115 1.1 mrg 5116 1.1 mrg extern __inline __m128h 5117 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5118 1.1 mrg _mm_maskz_cvt_roundss_sh (__mmask8 __A, __m128h __B, __m128 __C, 5119 1.1 mrg const int __R) 5120 1.1 mrg { 5121 1.1 mrg return __builtin_ia32_vcvtss2sh_mask_round (__C, __B, 5122 1.1 mrg _mm_setzero_ph (), 5123 1.1 mrg __A, __R); 5124 1.1 mrg } 5125 1.1 mrg 5126 1.1 mrg extern __inline __m128h 5127 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5128 1.1 mrg _mm_cvt_roundsd_sh (__m128h __A, __m128d __B, const int __R) 5129 1.1 mrg { 5130 1.1 mrg return __builtin_ia32_vcvtsd2sh_mask_round (__B, __A, 5131 1.1 mrg _mm_setzero_ph (), 5132 1.1 mrg (__mmask8) -1, __R); 5133 1.1 mrg } 5134 1.1 mrg 5135 1.1 mrg extern __inline __m128h 5136 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5137 1.1 mrg _mm_mask_cvt_roundsd_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128d __D, 5138 1.1 mrg const int __R) 5139 1.1 mrg { 5140 1.1 mrg return __builtin_ia32_vcvtsd2sh_mask_round (__D, __C, __A, __B, __R); 5141 1.1 mrg } 5142 1.1 mrg 5143 1.1 mrg extern __inline __m128h 5144 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5145 1.1 mrg _mm_maskz_cvt_roundsd_sh (__mmask8 __A, __m128h __B, __m128d __C, 5146 1.1 mrg const int __R) 5147 1.1 mrg { 5148 1.1 mrg return __builtin_ia32_vcvtsd2sh_mask_round (__C, __B, 5149 1.1 mrg _mm_setzero_ph (), 5150 1.1 mrg __A, __R); 5151 1.1 mrg } 5152 1.1 mrg 5153 1.1 mrg #else 5154 1.1 mrg #define _mm_cvt_roundss_sh(A, B, R) \ 5155 1.1 mrg (__builtin_ia32_vcvtss2sh_mask_round ((B), (A), \ 5156 1.1 mrg _mm_setzero_ph (), \ 5157 1.1 mrg (__mmask8) -1, R)) 5158 1.1 mrg 5159 1.1 mrg #define _mm_mask_cvt_roundss_sh(A, B, C, D, R) \ 5160 1.1 mrg (__builtin_ia32_vcvtss2sh_mask_round ((D), (C), (A), (B), (R))) 5161 1.1 mrg 5162 1.1 mrg #define _mm_maskz_cvt_roundss_sh(A, B, C, R) \ 5163 1.1 mrg (__builtin_ia32_vcvtss2sh_mask_round ((C), (B), \ 5164 1.1 mrg _mm_setzero_ph (), \ 5165 1.1 mrg A, R)) 5166 1.1 mrg 5167 1.1 mrg #define _mm_cvt_roundsd_sh(A, B, R) \ 5168 1.1 mrg (__builtin_ia32_vcvtsd2sh_mask_round ((B), (A), \ 5169 1.1 mrg _mm_setzero_ph (), \ 5170 1.1 mrg (__mmask8) -1, R)) 5171 1.1 mrg 5172 1.1 mrg #define _mm_mask_cvt_roundsd_sh(A, B, C, D, R) \ 5173 1.1 mrg (__builtin_ia32_vcvtsd2sh_mask_round ((D), (C), (A), (B), (R))) 5174 1.1 mrg 5175 1.1 mrg #define _mm_maskz_cvt_roundsd_sh(A, B, C, R) \ 5176 1.1 mrg (__builtin_ia32_vcvtsd2sh_mask_round ((C), (B), \ 5177 1.1 mrg _mm_setzero_ph (), \ 5178 1.1 mrg (A), (R))) 5179 1.1 mrg 5180 1.1 mrg #endif /* __OPTIMIZE__ */ 5181 1.1 mrg 5182 1.1 mrg /* Intrinsics vfmaddsub[132,213,231]ph. */ 5183 1.1 mrg extern __inline __m512h 5184 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5185 1.1 mrg _mm512_fmaddsub_ph (__m512h __A, __m512h __B, __m512h __C) 5186 1.1 mrg { 5187 1.1 mrg return (__m512h) 5188 1.1 mrg __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A, 5189 1.1 mrg (__v32hf) __B, 5190 1.1 mrg (__v32hf) __C, 5191 1.1 mrg (__mmask32) -1, 5192 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5193 1.1 mrg } 5194 1.1 mrg 5195 1.1 mrg extern __inline __m512h 5196 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5197 1.1 mrg _mm512_mask_fmaddsub_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) 5198 1.1 mrg { 5199 1.1 mrg return (__m512h) 5200 1.1 mrg __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A, 5201 1.1 mrg (__v32hf) __B, 5202 1.1 mrg (__v32hf) __C, 5203 1.1 mrg (__mmask32) __U, 5204 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5205 1.1 mrg } 5206 1.1 mrg 5207 1.1 mrg extern __inline __m512h 5208 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5209 1.1 mrg _mm512_mask3_fmaddsub_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) 5210 1.1 mrg { 5211 1.1 mrg return (__m512h) 5212 1.1 mrg __builtin_ia32_vfmaddsubph512_mask3 ((__v32hf) __A, 5213 1.1 mrg (__v32hf) __B, 5214 1.1 mrg (__v32hf) __C, 5215 1.1 mrg (__mmask32) __U, 5216 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5217 1.1 mrg } 5218 1.1 mrg 5219 1.1 mrg extern __inline __m512h 5220 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5221 1.1 mrg _mm512_maskz_fmaddsub_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) 5222 1.1 mrg { 5223 1.1 mrg return (__m512h) 5224 1.1 mrg __builtin_ia32_vfmaddsubph512_maskz ((__v32hf) __A, 5225 1.1 mrg (__v32hf) __B, 5226 1.1 mrg (__v32hf) __C, 5227 1.1 mrg (__mmask32) __U, 5228 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5229 1.1 mrg } 5230 1.1 mrg 5231 1.1 mrg #ifdef __OPTIMIZE__ 5232 1.1 mrg extern __inline __m512h 5233 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5234 1.1 mrg _mm512_fmaddsub_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R) 5235 1.1 mrg { 5236 1.1 mrg return (__m512h) 5237 1.1 mrg __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A, 5238 1.1 mrg (__v32hf) __B, 5239 1.1 mrg (__v32hf) __C, 5240 1.1 mrg (__mmask32) -1, __R); 5241 1.1 mrg } 5242 1.1 mrg 5243 1.1 mrg extern __inline __m512h 5244 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5245 1.1 mrg _mm512_mask_fmaddsub_round_ph (__m512h __A, __mmask32 __U, __m512h __B, 5246 1.1 mrg __m512h __C, const int __R) 5247 1.1 mrg { 5248 1.1 mrg return (__m512h) 5249 1.1 mrg __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A, 5250 1.1 mrg (__v32hf) __B, 5251 1.1 mrg (__v32hf) __C, 5252 1.1 mrg (__mmask32) __U, __R); 5253 1.1 mrg } 5254 1.1 mrg 5255 1.1 mrg extern __inline __m512h 5256 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5257 1.1 mrg _mm512_mask3_fmaddsub_round_ph (__m512h __A, __m512h __B, __m512h __C, 5258 1.1 mrg __mmask32 __U, const int __R) 5259 1.1 mrg { 5260 1.1 mrg return (__m512h) 5261 1.1 mrg __builtin_ia32_vfmaddsubph512_mask3 ((__v32hf) __A, 5262 1.1 mrg (__v32hf) __B, 5263 1.1 mrg (__v32hf) __C, 5264 1.1 mrg (__mmask32) __U, __R); 5265 1.1 mrg } 5266 1.1 mrg 5267 1.1 mrg extern __inline __m512h 5268 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5269 1.1 mrg _mm512_maskz_fmaddsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B, 5270 1.1 mrg __m512h __C, const int __R) 5271 1.1 mrg { 5272 1.1 mrg return (__m512h) 5273 1.1 mrg __builtin_ia32_vfmaddsubph512_maskz ((__v32hf) __A, 5274 1.1 mrg (__v32hf) __B, 5275 1.1 mrg (__v32hf) __C, 5276 1.1 mrg (__mmask32) __U, __R); 5277 1.1 mrg } 5278 1.1 mrg 5279 1.1 mrg #else 5280 1.1 mrg #define _mm512_fmaddsub_round_ph(A, B, C, R) \ 5281 1.1 mrg ((__m512h)__builtin_ia32_vfmaddsubph512_mask ((A), (B), (C), -1, (R))) 5282 1.1 mrg 5283 1.1 mrg #define _mm512_mask_fmaddsub_round_ph(A, U, B, C, R) \ 5284 1.1 mrg ((__m512h)__builtin_ia32_vfmaddsubph512_mask ((A), (B), (C), (U), (R))) 5285 1.1 mrg 5286 1.1 mrg #define _mm512_mask3_fmaddsub_round_ph(A, B, C, U, R) \ 5287 1.1 mrg ((__m512h)__builtin_ia32_vfmaddsubph512_mask3 ((A), (B), (C), (U), (R))) 5288 1.1 mrg 5289 1.1 mrg #define _mm512_maskz_fmaddsub_round_ph(U, A, B, C, R) \ 5290 1.1 mrg ((__m512h)__builtin_ia32_vfmaddsubph512_maskz ((A), (B), (C), (U), (R))) 5291 1.1 mrg 5292 1.1 mrg #endif /* __OPTIMIZE__ */ 5293 1.1 mrg 5294 1.1 mrg /* Intrinsics vfmsubadd[132,213,231]ph. */ 5295 1.1 mrg extern __inline __m512h 5296 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5297 1.1 mrg _mm512_fmsubadd_ph (__m512h __A, __m512h __B, __m512h __C) 5298 1.1 mrg { 5299 1.1 mrg return (__m512h) 5300 1.1 mrg __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A, 5301 1.1 mrg (__v32hf) __B, 5302 1.1 mrg (__v32hf) __C, 5303 1.1 mrg (__mmask32) -1, 5304 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5305 1.1 mrg } 5306 1.1 mrg 5307 1.1 mrg extern __inline __m512h 5308 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5309 1.1 mrg _mm512_mask_fmsubadd_ph (__m512h __A, __mmask32 __U, 5310 1.1 mrg __m512h __B, __m512h __C) 5311 1.1 mrg { 5312 1.1 mrg return (__m512h) 5313 1.1 mrg __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A, 5314 1.1 mrg (__v32hf) __B, 5315 1.1 mrg (__v32hf) __C, 5316 1.1 mrg (__mmask32) __U, 5317 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5318 1.1 mrg } 5319 1.1 mrg 5320 1.1 mrg extern __inline __m512h 5321 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5322 1.1 mrg _mm512_mask3_fmsubadd_ph (__m512h __A, __m512h __B, 5323 1.1 mrg __m512h __C, __mmask32 __U) 5324 1.1 mrg { 5325 1.1 mrg return (__m512h) 5326 1.1 mrg __builtin_ia32_vfmsubaddph512_mask3 ((__v32hf) __A, 5327 1.1 mrg (__v32hf) __B, 5328 1.1 mrg (__v32hf) __C, 5329 1.1 mrg (__mmask32) __U, 5330 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5331 1.1 mrg } 5332 1.1 mrg 5333 1.1 mrg extern __inline __m512h 5334 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5335 1.1 mrg _mm512_maskz_fmsubadd_ph (__mmask32 __U, __m512h __A, 5336 1.1 mrg __m512h __B, __m512h __C) 5337 1.1 mrg { 5338 1.1 mrg return (__m512h) 5339 1.1 mrg __builtin_ia32_vfmsubaddph512_maskz ((__v32hf) __A, 5340 1.1 mrg (__v32hf) __B, 5341 1.1 mrg (__v32hf) __C, 5342 1.1 mrg (__mmask32) __U, 5343 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5344 1.1 mrg } 5345 1.1 mrg 5346 1.1 mrg #ifdef __OPTIMIZE__ 5347 1.1 mrg extern __inline __m512h 5348 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5349 1.1 mrg _mm512_fmsubadd_round_ph (__m512h __A, __m512h __B, 5350 1.1 mrg __m512h __C, const int __R) 5351 1.1 mrg { 5352 1.1 mrg return (__m512h) 5353 1.1 mrg __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A, 5354 1.1 mrg (__v32hf) __B, 5355 1.1 mrg (__v32hf) __C, 5356 1.1 mrg (__mmask32) -1, __R); 5357 1.1 mrg } 5358 1.1 mrg 5359 1.1 mrg extern __inline __m512h 5360 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5361 1.1 mrg _mm512_mask_fmsubadd_round_ph (__m512h __A, __mmask32 __U, __m512h __B, 5362 1.1 mrg __m512h __C, const int __R) 5363 1.1 mrg { 5364 1.1 mrg return (__m512h) 5365 1.1 mrg __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A, 5366 1.1 mrg (__v32hf) __B, 5367 1.1 mrg (__v32hf) __C, 5368 1.1 mrg (__mmask32) __U, __R); 5369 1.1 mrg } 5370 1.1 mrg 5371 1.1 mrg extern __inline __m512h 5372 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5373 1.1 mrg _mm512_mask3_fmsubadd_round_ph (__m512h __A, __m512h __B, __m512h __C, 5374 1.1 mrg __mmask32 __U, const int __R) 5375 1.1 mrg { 5376 1.1 mrg return (__m512h) 5377 1.1 mrg __builtin_ia32_vfmsubaddph512_mask3 ((__v32hf) __A, 5378 1.1 mrg (__v32hf) __B, 5379 1.1 mrg (__v32hf) __C, 5380 1.1 mrg (__mmask32) __U, __R); 5381 1.1 mrg } 5382 1.1 mrg 5383 1.1 mrg extern __inline __m512h 5384 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5385 1.1 mrg _mm512_maskz_fmsubadd_round_ph (__mmask32 __U, __m512h __A, __m512h __B, 5386 1.1 mrg __m512h __C, const int __R) 5387 1.1 mrg { 5388 1.1 mrg return (__m512h) 5389 1.1 mrg __builtin_ia32_vfmsubaddph512_maskz ((__v32hf) __A, 5390 1.1 mrg (__v32hf) __B, 5391 1.1 mrg (__v32hf) __C, 5392 1.1 mrg (__mmask32) __U, __R); 5393 1.1 mrg } 5394 1.1 mrg 5395 1.1 mrg #else 5396 1.1 mrg #define _mm512_fmsubadd_round_ph(A, B, C, R) \ 5397 1.1 mrg ((__m512h)__builtin_ia32_vfmsubaddph512_mask ((A), (B), (C), -1, (R))) 5398 1.1 mrg 5399 1.1 mrg #define _mm512_mask_fmsubadd_round_ph(A, U, B, C, R) \ 5400 1.1 mrg ((__m512h)__builtin_ia32_vfmsubaddph512_mask ((A), (B), (C), (U), (R))) 5401 1.1 mrg 5402 1.1 mrg #define _mm512_mask3_fmsubadd_round_ph(A, B, C, U, R) \ 5403 1.1 mrg ((__m512h)__builtin_ia32_vfmsubaddph512_mask3 ((A), (B), (C), (U), (R))) 5404 1.1 mrg 5405 1.1 mrg #define _mm512_maskz_fmsubadd_round_ph(U, A, B, C, R) \ 5406 1.1 mrg ((__m512h)__builtin_ia32_vfmsubaddph512_maskz ((A), (B), (C), (U), (R))) 5407 1.1 mrg 5408 1.1 mrg #endif /* __OPTIMIZE__ */ 5409 1.1 mrg 5410 1.1 mrg /* Intrinsics vfmadd[132,213,231]ph. */ 5411 1.1 mrg extern __inline __m512h 5412 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5413 1.1 mrg _mm512_fmadd_ph (__m512h __A, __m512h __B, __m512h __C) 5414 1.1 mrg { 5415 1.1 mrg return (__m512h) 5416 1.1 mrg __builtin_ia32_vfmaddph512_mask ((__v32hf) __A, 5417 1.1 mrg (__v32hf) __B, 5418 1.1 mrg (__v32hf) __C, 5419 1.1 mrg (__mmask32) -1, 5420 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5421 1.1 mrg } 5422 1.1 mrg 5423 1.1 mrg extern __inline __m512h 5424 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5425 1.1 mrg _mm512_mask_fmadd_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) 5426 1.1 mrg { 5427 1.1 mrg return (__m512h) 5428 1.1 mrg __builtin_ia32_vfmaddph512_mask ((__v32hf) __A, 5429 1.1 mrg (__v32hf) __B, 5430 1.1 mrg (__v32hf) __C, 5431 1.1 mrg (__mmask32) __U, 5432 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5433 1.1 mrg } 5434 1.1 mrg 5435 1.1 mrg extern __inline __m512h 5436 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5437 1.1 mrg _mm512_mask3_fmadd_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) 5438 1.1 mrg { 5439 1.1 mrg return (__m512h) 5440 1.1 mrg __builtin_ia32_vfmaddph512_mask3 ((__v32hf) __A, 5441 1.1 mrg (__v32hf) __B, 5442 1.1 mrg (__v32hf) __C, 5443 1.1 mrg (__mmask32) __U, 5444 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5445 1.1 mrg } 5446 1.1 mrg 5447 1.1 mrg extern __inline __m512h 5448 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5449 1.1 mrg _mm512_maskz_fmadd_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) 5450 1.1 mrg { 5451 1.1 mrg return (__m512h) 5452 1.1 mrg __builtin_ia32_vfmaddph512_maskz ((__v32hf) __A, 5453 1.1 mrg (__v32hf) __B, 5454 1.1 mrg (__v32hf) __C, 5455 1.1 mrg (__mmask32) __U, 5456 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5457 1.1 mrg } 5458 1.1 mrg 5459 1.1 mrg #ifdef __OPTIMIZE__ 5460 1.1 mrg extern __inline __m512h 5461 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5462 1.1 mrg _mm512_fmadd_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R) 5463 1.1 mrg { 5464 1.1 mrg return (__m512h) __builtin_ia32_vfmaddph512_mask ((__v32hf) __A, 5465 1.1 mrg (__v32hf) __B, 5466 1.1 mrg (__v32hf) __C, 5467 1.1 mrg (__mmask32) -1, __R); 5468 1.1 mrg } 5469 1.1 mrg 5470 1.1 mrg extern __inline __m512h 5471 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5472 1.1 mrg _mm512_mask_fmadd_round_ph (__m512h __A, __mmask32 __U, __m512h __B, 5473 1.1 mrg __m512h __C, const int __R) 5474 1.1 mrg { 5475 1.1 mrg return (__m512h) __builtin_ia32_vfmaddph512_mask ((__v32hf) __A, 5476 1.1 mrg (__v32hf) __B, 5477 1.1 mrg (__v32hf) __C, 5478 1.1 mrg (__mmask32) __U, __R); 5479 1.1 mrg } 5480 1.1 mrg 5481 1.1 mrg extern __inline __m512h 5482 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5483 1.1 mrg _mm512_mask3_fmadd_round_ph (__m512h __A, __m512h __B, __m512h __C, 5484 1.1 mrg __mmask32 __U, const int __R) 5485 1.1 mrg { 5486 1.1 mrg return (__m512h) __builtin_ia32_vfmaddph512_mask3 ((__v32hf) __A, 5487 1.1 mrg (__v32hf) __B, 5488 1.1 mrg (__v32hf) __C, 5489 1.1 mrg (__mmask32) __U, __R); 5490 1.1 mrg } 5491 1.1 mrg 5492 1.1 mrg extern __inline __m512h 5493 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5494 1.1 mrg _mm512_maskz_fmadd_round_ph (__mmask32 __U, __m512h __A, __m512h __B, 5495 1.1 mrg __m512h __C, const int __R) 5496 1.1 mrg { 5497 1.1 mrg return (__m512h) __builtin_ia32_vfmaddph512_maskz ((__v32hf) __A, 5498 1.1 mrg (__v32hf) __B, 5499 1.1 mrg (__v32hf) __C, 5500 1.1 mrg (__mmask32) __U, __R); 5501 1.1 mrg } 5502 1.1 mrg 5503 1.1 mrg #else 5504 1.1 mrg #define _mm512_fmadd_round_ph(A, B, C, R) \ 5505 1.1 mrg ((__m512h)__builtin_ia32_vfmaddph512_mask ((A), (B), (C), -1, (R))) 5506 1.1 mrg 5507 1.1 mrg #define _mm512_mask_fmadd_round_ph(A, U, B, C, R) \ 5508 1.1 mrg ((__m512h)__builtin_ia32_vfmaddph512_mask ((A), (B), (C), (U), (R))) 5509 1.1 mrg 5510 1.1 mrg #define _mm512_mask3_fmadd_round_ph(A, B, C, U, R) \ 5511 1.1 mrg ((__m512h)__builtin_ia32_vfmaddph512_mask3 ((A), (B), (C), (U), (R))) 5512 1.1 mrg 5513 1.1 mrg #define _mm512_maskz_fmadd_round_ph(U, A, B, C, R) \ 5514 1.1 mrg ((__m512h)__builtin_ia32_vfmaddph512_maskz ((A), (B), (C), (U), (R))) 5515 1.1 mrg 5516 1.1 mrg #endif /* __OPTIMIZE__ */ 5517 1.1 mrg 5518 1.1 mrg /* Intrinsics vfnmadd[132,213,231]ph. */ 5519 1.1 mrg extern __inline __m512h 5520 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5521 1.1 mrg _mm512_fnmadd_ph (__m512h __A, __m512h __B, __m512h __C) 5522 1.1 mrg { 5523 1.1 mrg return (__m512h) 5524 1.1 mrg __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A, 5525 1.1 mrg (__v32hf) __B, 5526 1.1 mrg (__v32hf) __C, 5527 1.1 mrg (__mmask32) -1, 5528 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5529 1.1 mrg } 5530 1.1 mrg 5531 1.1 mrg extern __inline __m512h 5532 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5533 1.1 mrg _mm512_mask_fnmadd_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) 5534 1.1 mrg { 5535 1.1 mrg return (__m512h) 5536 1.1 mrg __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A, 5537 1.1 mrg (__v32hf) __B, 5538 1.1 mrg (__v32hf) __C, 5539 1.1 mrg (__mmask32) __U, 5540 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5541 1.1 mrg } 5542 1.1 mrg 5543 1.1 mrg extern __inline __m512h 5544 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5545 1.1 mrg _mm512_mask3_fnmadd_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) 5546 1.1 mrg { 5547 1.1 mrg return (__m512h) 5548 1.1 mrg __builtin_ia32_vfnmaddph512_mask3 ((__v32hf) __A, 5549 1.1 mrg (__v32hf) __B, 5550 1.1 mrg (__v32hf) __C, 5551 1.1 mrg (__mmask32) __U, 5552 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5553 1.1 mrg } 5554 1.1 mrg 5555 1.1 mrg extern __inline __m512h 5556 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5557 1.1 mrg _mm512_maskz_fnmadd_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) 5558 1.1 mrg { 5559 1.1 mrg return (__m512h) 5560 1.1 mrg __builtin_ia32_vfnmaddph512_maskz ((__v32hf) __A, 5561 1.1 mrg (__v32hf) __B, 5562 1.1 mrg (__v32hf) __C, 5563 1.1 mrg (__mmask32) __U, 5564 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5565 1.1 mrg } 5566 1.1 mrg 5567 1.1 mrg #ifdef __OPTIMIZE__ 5568 1.1 mrg extern __inline __m512h 5569 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5570 1.1 mrg _mm512_fnmadd_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R) 5571 1.1 mrg { 5572 1.1 mrg return (__m512h) __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A, 5573 1.1 mrg (__v32hf) __B, 5574 1.1 mrg (__v32hf) __C, 5575 1.1 mrg (__mmask32) -1, __R); 5576 1.1 mrg } 5577 1.1 mrg 5578 1.1 mrg extern __inline __m512h 5579 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5580 1.1 mrg _mm512_mask_fnmadd_round_ph (__m512h __A, __mmask32 __U, __m512h __B, 5581 1.1 mrg __m512h __C, const int __R) 5582 1.1 mrg { 5583 1.1 mrg return (__m512h) __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A, 5584 1.1 mrg (__v32hf) __B, 5585 1.1 mrg (__v32hf) __C, 5586 1.1 mrg (__mmask32) __U, __R); 5587 1.1 mrg } 5588 1.1 mrg 5589 1.1 mrg extern __inline __m512h 5590 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5591 1.1 mrg _mm512_mask3_fnmadd_round_ph (__m512h __A, __m512h __B, __m512h __C, 5592 1.1 mrg __mmask32 __U, const int __R) 5593 1.1 mrg { 5594 1.1 mrg return (__m512h) __builtin_ia32_vfnmaddph512_mask3 ((__v32hf) __A, 5595 1.1 mrg (__v32hf) __B, 5596 1.1 mrg (__v32hf) __C, 5597 1.1 mrg (__mmask32) __U, __R); 5598 1.1 mrg } 5599 1.1 mrg 5600 1.1 mrg extern __inline __m512h 5601 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5602 1.1 mrg _mm512_maskz_fnmadd_round_ph (__mmask32 __U, __m512h __A, __m512h __B, 5603 1.1 mrg __m512h __C, const int __R) 5604 1.1 mrg { 5605 1.1 mrg return (__m512h) __builtin_ia32_vfnmaddph512_maskz ((__v32hf) __A, 5606 1.1 mrg (__v32hf) __B, 5607 1.1 mrg (__v32hf) __C, 5608 1.1 mrg (__mmask32) __U, __R); 5609 1.1 mrg } 5610 1.1 mrg 5611 1.1 mrg #else 5612 1.1 mrg #define _mm512_fnmadd_round_ph(A, B, C, R) \ 5613 1.1 mrg ((__m512h)__builtin_ia32_vfnmaddph512_mask ((A), (B), (C), -1, (R))) 5614 1.1 mrg 5615 1.1 mrg #define _mm512_mask_fnmadd_round_ph(A, U, B, C, R) \ 5616 1.1 mrg ((__m512h)__builtin_ia32_vfnmaddph512_mask ((A), (B), (C), (U), (R))) 5617 1.1 mrg 5618 1.1 mrg #define _mm512_mask3_fnmadd_round_ph(A, B, C, U, R) \ 5619 1.1 mrg ((__m512h)__builtin_ia32_vfnmaddph512_mask3 ((A), (B), (C), (U), (R))) 5620 1.1 mrg 5621 1.1 mrg #define _mm512_maskz_fnmadd_round_ph(U, A, B, C, R) \ 5622 1.1 mrg ((__m512h)__builtin_ia32_vfnmaddph512_maskz ((A), (B), (C), (U), (R))) 5623 1.1 mrg 5624 1.1 mrg #endif /* __OPTIMIZE__ */ 5625 1.1 mrg 5626 1.1 mrg /* Intrinsics vfmsub[132,213,231]ph. */ 5627 1.1 mrg extern __inline __m512h 5628 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5629 1.1 mrg _mm512_fmsub_ph (__m512h __A, __m512h __B, __m512h __C) 5630 1.1 mrg { 5631 1.1 mrg return (__m512h) 5632 1.1 mrg __builtin_ia32_vfmsubph512_mask ((__v32hf) __A, 5633 1.1 mrg (__v32hf) __B, 5634 1.1 mrg (__v32hf) __C, 5635 1.1 mrg (__mmask32) -1, 5636 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5637 1.1 mrg } 5638 1.1 mrg 5639 1.1 mrg extern __inline __m512h 5640 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5641 1.1 mrg _mm512_mask_fmsub_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) 5642 1.1 mrg { 5643 1.1 mrg return (__m512h) 5644 1.1 mrg __builtin_ia32_vfmsubph512_mask ((__v32hf) __A, 5645 1.1 mrg (__v32hf) __B, 5646 1.1 mrg (__v32hf) __C, 5647 1.1 mrg (__mmask32) __U, 5648 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5649 1.1 mrg } 5650 1.1 mrg 5651 1.1 mrg extern __inline __m512h 5652 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5653 1.1 mrg _mm512_mask3_fmsub_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) 5654 1.1 mrg { 5655 1.1 mrg return (__m512h) 5656 1.1 mrg __builtin_ia32_vfmsubph512_mask3 ((__v32hf) __A, 5657 1.1 mrg (__v32hf) __B, 5658 1.1 mrg (__v32hf) __C, 5659 1.1 mrg (__mmask32) __U, 5660 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5661 1.1 mrg } 5662 1.1 mrg 5663 1.1 mrg extern __inline __m512h 5664 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5665 1.1 mrg _mm512_maskz_fmsub_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) 5666 1.1 mrg { 5667 1.1 mrg return (__m512h) 5668 1.1 mrg __builtin_ia32_vfmsubph512_maskz ((__v32hf) __A, 5669 1.1 mrg (__v32hf) __B, 5670 1.1 mrg (__v32hf) __C, 5671 1.1 mrg (__mmask32) __U, 5672 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5673 1.1 mrg } 5674 1.1 mrg 5675 1.1 mrg #ifdef __OPTIMIZE__ 5676 1.1 mrg extern __inline __m512h 5677 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5678 1.1 mrg _mm512_fmsub_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R) 5679 1.1 mrg { 5680 1.1 mrg return (__m512h) __builtin_ia32_vfmsubph512_mask ((__v32hf) __A, 5681 1.1 mrg (__v32hf) __B, 5682 1.1 mrg (__v32hf) __C, 5683 1.1 mrg (__mmask32) -1, __R); 5684 1.1 mrg } 5685 1.1 mrg 5686 1.1 mrg extern __inline __m512h 5687 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5688 1.1 mrg _mm512_mask_fmsub_round_ph (__m512h __A, __mmask32 __U, __m512h __B, 5689 1.1 mrg __m512h __C, const int __R) 5690 1.1 mrg { 5691 1.1 mrg return (__m512h) __builtin_ia32_vfmsubph512_mask ((__v32hf) __A, 5692 1.1 mrg (__v32hf) __B, 5693 1.1 mrg (__v32hf) __C, 5694 1.1 mrg (__mmask32) __U, __R); 5695 1.1 mrg } 5696 1.1 mrg 5697 1.1 mrg extern __inline __m512h 5698 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5699 1.1 mrg _mm512_mask3_fmsub_round_ph (__m512h __A, __m512h __B, __m512h __C, 5700 1.1 mrg __mmask32 __U, const int __R) 5701 1.1 mrg { 5702 1.1 mrg return (__m512h) __builtin_ia32_vfmsubph512_mask3 ((__v32hf) __A, 5703 1.1 mrg (__v32hf) __B, 5704 1.1 mrg (__v32hf) __C, 5705 1.1 mrg (__mmask32) __U, __R); 5706 1.1 mrg } 5707 1.1 mrg 5708 1.1 mrg extern __inline __m512h 5709 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5710 1.1 mrg _mm512_maskz_fmsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B, 5711 1.1 mrg __m512h __C, const int __R) 5712 1.1 mrg { 5713 1.1 mrg return (__m512h) __builtin_ia32_vfmsubph512_maskz ((__v32hf) __A, 5714 1.1 mrg (__v32hf) __B, 5715 1.1 mrg (__v32hf) __C, 5716 1.1 mrg (__mmask32) __U, __R); 5717 1.1 mrg } 5718 1.1 mrg 5719 1.1 mrg #else 5720 1.1 mrg #define _mm512_fmsub_round_ph(A, B, C, R) \ 5721 1.1 mrg ((__m512h)__builtin_ia32_vfmsubph512_mask ((A), (B), (C), -1, (R))) 5722 1.1 mrg 5723 1.1 mrg #define _mm512_mask_fmsub_round_ph(A, U, B, C, R) \ 5724 1.1 mrg ((__m512h)__builtin_ia32_vfmsubph512_mask ((A), (B), (C), (U), (R))) 5725 1.1 mrg 5726 1.1 mrg #define _mm512_mask3_fmsub_round_ph(A, B, C, U, R) \ 5727 1.1 mrg ((__m512h)__builtin_ia32_vfmsubph512_mask3 ((A), (B), (C), (U), (R))) 5728 1.1 mrg 5729 1.1 mrg #define _mm512_maskz_fmsub_round_ph(U, A, B, C, R) \ 5730 1.1 mrg ((__m512h)__builtin_ia32_vfmsubph512_maskz ((A), (B), (C), (U), (R))) 5731 1.1 mrg 5732 1.1 mrg #endif /* __OPTIMIZE__ */ 5733 1.1 mrg 5734 1.1 mrg /* Intrinsics vfnmsub[132,213,231]ph. */ 5735 1.1 mrg extern __inline __m512h 5736 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5737 1.1 mrg _mm512_fnmsub_ph (__m512h __A, __m512h __B, __m512h __C) 5738 1.1 mrg { 5739 1.1 mrg return (__m512h) 5740 1.1 mrg __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A, 5741 1.1 mrg (__v32hf) __B, 5742 1.1 mrg (__v32hf) __C, 5743 1.1 mrg (__mmask32) -1, 5744 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5745 1.1 mrg } 5746 1.1 mrg 5747 1.1 mrg extern __inline __m512h 5748 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5749 1.1 mrg _mm512_mask_fnmsub_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) 5750 1.1 mrg { 5751 1.1 mrg return (__m512h) 5752 1.1 mrg __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A, 5753 1.1 mrg (__v32hf) __B, 5754 1.1 mrg (__v32hf) __C, 5755 1.1 mrg (__mmask32) __U, 5756 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5757 1.1 mrg } 5758 1.1 mrg 5759 1.1 mrg extern __inline __m512h 5760 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5761 1.1 mrg _mm512_mask3_fnmsub_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) 5762 1.1 mrg { 5763 1.1 mrg return (__m512h) 5764 1.1 mrg __builtin_ia32_vfnmsubph512_mask3 ((__v32hf) __A, 5765 1.1 mrg (__v32hf) __B, 5766 1.1 mrg (__v32hf) __C, 5767 1.1 mrg (__mmask32) __U, 5768 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5769 1.1 mrg } 5770 1.1 mrg 5771 1.1 mrg extern __inline __m512h 5772 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5773 1.1 mrg _mm512_maskz_fnmsub_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) 5774 1.1 mrg { 5775 1.1 mrg return (__m512h) 5776 1.1 mrg __builtin_ia32_vfnmsubph512_maskz ((__v32hf) __A, 5777 1.1 mrg (__v32hf) __B, 5778 1.1 mrg (__v32hf) __C, 5779 1.1 mrg (__mmask32) __U, 5780 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5781 1.1 mrg } 5782 1.1 mrg 5783 1.1 mrg #ifdef __OPTIMIZE__ 5784 1.1 mrg extern __inline __m512h 5785 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5786 1.1 mrg _mm512_fnmsub_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R) 5787 1.1 mrg { 5788 1.1 mrg return (__m512h) __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A, 5789 1.1 mrg (__v32hf) __B, 5790 1.1 mrg (__v32hf) __C, 5791 1.1 mrg (__mmask32) -1, __R); 5792 1.1 mrg } 5793 1.1 mrg 5794 1.1 mrg extern __inline __m512h 5795 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5796 1.1 mrg _mm512_mask_fnmsub_round_ph (__m512h __A, __mmask32 __U, __m512h __B, 5797 1.1 mrg __m512h __C, const int __R) 5798 1.1 mrg { 5799 1.1 mrg return (__m512h) __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A, 5800 1.1 mrg (__v32hf) __B, 5801 1.1 mrg (__v32hf) __C, 5802 1.1 mrg (__mmask32) __U, __R); 5803 1.1 mrg } 5804 1.1 mrg 5805 1.1 mrg extern __inline __m512h 5806 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5807 1.1 mrg _mm512_mask3_fnmsub_round_ph (__m512h __A, __m512h __B, __m512h __C, 5808 1.1 mrg __mmask32 __U, const int __R) 5809 1.1 mrg { 5810 1.1 mrg return (__m512h) __builtin_ia32_vfnmsubph512_mask3 ((__v32hf) __A, 5811 1.1 mrg (__v32hf) __B, 5812 1.1 mrg (__v32hf) __C, 5813 1.1 mrg (__mmask32) __U, __R); 5814 1.1 mrg } 5815 1.1 mrg 5816 1.1 mrg extern __inline __m512h 5817 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5818 1.1 mrg _mm512_maskz_fnmsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B, 5819 1.1 mrg __m512h __C, const int __R) 5820 1.1 mrg { 5821 1.1 mrg return (__m512h) __builtin_ia32_vfnmsubph512_maskz ((__v32hf) __A, 5822 1.1 mrg (__v32hf) __B, 5823 1.1 mrg (__v32hf) __C, 5824 1.1 mrg (__mmask32) __U, __R); 5825 1.1 mrg } 5826 1.1 mrg 5827 1.1 mrg #else 5828 1.1 mrg #define _mm512_fnmsub_round_ph(A, B, C, R) \ 5829 1.1 mrg ((__m512h)__builtin_ia32_vfnmsubph512_mask ((A), (B), (C), -1, (R))) 5830 1.1 mrg 5831 1.1 mrg #define _mm512_mask_fnmsub_round_ph(A, U, B, C, R) \ 5832 1.1 mrg ((__m512h)__builtin_ia32_vfnmsubph512_mask ((A), (B), (C), (U), (R))) 5833 1.1 mrg 5834 1.1 mrg #define _mm512_mask3_fnmsub_round_ph(A, B, C, U, R) \ 5835 1.1 mrg ((__m512h)__builtin_ia32_vfnmsubph512_mask3 ((A), (B), (C), (U), (R))) 5836 1.1 mrg 5837 1.1 mrg #define _mm512_maskz_fnmsub_round_ph(U, A, B, C, R) \ 5838 1.1 mrg ((__m512h)__builtin_ia32_vfnmsubph512_maskz ((A), (B), (C), (U), (R))) 5839 1.1 mrg 5840 1.1 mrg #endif /* __OPTIMIZE__ */ 5841 1.1 mrg 5842 1.1 mrg /* Intrinsics vfmadd[132,213,231]sh. */ 5843 1.1 mrg extern __inline __m128h 5844 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5845 1.1 mrg _mm_fmadd_sh (__m128h __W, __m128h __A, __m128h __B) 5846 1.1 mrg { 5847 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W, 5848 1.1 mrg (__v8hf) __A, 5849 1.1 mrg (__v8hf) __B, 5850 1.1 mrg (__mmask8) -1, 5851 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5852 1.1 mrg } 5853 1.1 mrg 5854 1.1 mrg extern __inline __m128h 5855 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5856 1.1 mrg _mm_mask_fmadd_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) 5857 1.1 mrg { 5858 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W, 5859 1.1 mrg (__v8hf) __A, 5860 1.1 mrg (__v8hf) __B, 5861 1.1 mrg (__mmask8) __U, 5862 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5863 1.1 mrg } 5864 1.1 mrg 5865 1.1 mrg extern __inline __m128h 5866 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5867 1.1 mrg _mm_mask3_fmadd_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U) 5868 1.1 mrg { 5869 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_mask3 ((__v8hf) __W, 5870 1.1 mrg (__v8hf) __A, 5871 1.1 mrg (__v8hf) __B, 5872 1.1 mrg (__mmask8) __U, 5873 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5874 1.1 mrg } 5875 1.1 mrg 5876 1.1 mrg extern __inline __m128h 5877 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5878 1.1 mrg _mm_maskz_fmadd_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B) 5879 1.1 mrg { 5880 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W, 5881 1.1 mrg (__v8hf) __A, 5882 1.1 mrg (__v8hf) __B, 5883 1.1 mrg (__mmask8) __U, 5884 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5885 1.1 mrg } 5886 1.1 mrg 5887 1.1 mrg 5888 1.1 mrg #ifdef __OPTIMIZE__ 5889 1.1 mrg extern __inline __m128h 5890 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5891 1.1 mrg _mm_fmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R) 5892 1.1 mrg { 5893 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W, 5894 1.1 mrg (__v8hf) __A, 5895 1.1 mrg (__v8hf) __B, 5896 1.1 mrg (__mmask8) -1, 5897 1.1 mrg __R); 5898 1.1 mrg } 5899 1.1 mrg 5900 1.1 mrg extern __inline __m128h 5901 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5902 1.1 mrg _mm_mask_fmadd_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B, 5903 1.1 mrg const int __R) 5904 1.1 mrg { 5905 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W, 5906 1.1 mrg (__v8hf) __A, 5907 1.1 mrg (__v8hf) __B, 5908 1.1 mrg (__mmask8) __U, __R); 5909 1.1 mrg } 5910 1.1 mrg 5911 1.1 mrg extern __inline __m128h 5912 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5913 1.1 mrg _mm_mask3_fmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U, 5914 1.1 mrg const int __R) 5915 1.1 mrg { 5916 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_mask3 ((__v8hf) __W, 5917 1.1 mrg (__v8hf) __A, 5918 1.1 mrg (__v8hf) __B, 5919 1.1 mrg (__mmask8) __U, __R); 5920 1.1 mrg } 5921 1.1 mrg 5922 1.1 mrg extern __inline __m128h 5923 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5924 1.1 mrg _mm_maskz_fmadd_round_sh (__mmask8 __U, __m128h __W, __m128h __A, 5925 1.1 mrg __m128h __B, const int __R) 5926 1.1 mrg { 5927 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W, 5928 1.1 mrg (__v8hf) __A, 5929 1.1 mrg (__v8hf) __B, 5930 1.1 mrg (__mmask8) __U, __R); 5931 1.1 mrg } 5932 1.1 mrg 5933 1.1 mrg #else 5934 1.1 mrg #define _mm_fmadd_round_sh(A, B, C, R) \ 5935 1.1 mrg ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), (C), (-1), (R))) 5936 1.1 mrg #define _mm_mask_fmadd_round_sh(A, U, B, C, R) \ 5937 1.1 mrg ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), (C), (U), (R))) 5938 1.1 mrg #define _mm_mask3_fmadd_round_sh(A, B, C, U, R) \ 5939 1.1 mrg ((__m128h) __builtin_ia32_vfmaddsh3_mask3 ((A), (B), (C), (U), (R))) 5940 1.1 mrg #define _mm_maskz_fmadd_round_sh(U, A, B, C, R) \ 5941 1.1 mrg ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), (B), (C), (U), (R))) 5942 1.1 mrg 5943 1.1 mrg #endif /* __OPTIMIZE__ */ 5944 1.1 mrg 5945 1.1 mrg /* Intrinsics vfnmadd[132,213,231]sh. */ 5946 1.1 mrg extern __inline __m128h 5947 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5948 1.1 mrg _mm_fnmadd_sh (__m128h __W, __m128h __A, __m128h __B) 5949 1.1 mrg { 5950 1.1 mrg return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W, 5951 1.1 mrg (__v8hf) __A, 5952 1.1 mrg (__v8hf) __B, 5953 1.1 mrg (__mmask8) -1, 5954 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5955 1.1 mrg } 5956 1.1 mrg 5957 1.1 mrg extern __inline __m128h 5958 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5959 1.1 mrg _mm_mask_fnmadd_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) 5960 1.1 mrg { 5961 1.1 mrg return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W, 5962 1.1 mrg (__v8hf) __A, 5963 1.1 mrg (__v8hf) __B, 5964 1.1 mrg (__mmask8) __U, 5965 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5966 1.1 mrg } 5967 1.1 mrg 5968 1.1 mrg extern __inline __m128h 5969 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5970 1.1 mrg _mm_mask3_fnmadd_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U) 5971 1.1 mrg { 5972 1.1 mrg return (__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((__v8hf) __W, 5973 1.1 mrg (__v8hf) __A, 5974 1.1 mrg (__v8hf) __B, 5975 1.1 mrg (__mmask8) __U, 5976 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5977 1.1 mrg } 5978 1.1 mrg 5979 1.1 mrg extern __inline __m128h 5980 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5981 1.1 mrg _mm_maskz_fnmadd_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B) 5982 1.1 mrg { 5983 1.1 mrg return (__m128h) __builtin_ia32_vfnmaddsh3_maskz ((__v8hf) __W, 5984 1.1 mrg (__v8hf) __A, 5985 1.1 mrg (__v8hf) __B, 5986 1.1 mrg (__mmask8) __U, 5987 1.1 mrg _MM_FROUND_CUR_DIRECTION); 5988 1.1 mrg } 5989 1.1 mrg 5990 1.1 mrg 5991 1.1 mrg #ifdef __OPTIMIZE__ 5992 1.1 mrg extern __inline __m128h 5993 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5994 1.1 mrg _mm_fnmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R) 5995 1.1 mrg { 5996 1.1 mrg return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W, 5997 1.1 mrg (__v8hf) __A, 5998 1.1 mrg (__v8hf) __B, 5999 1.1 mrg (__mmask8) -1, 6000 1.1 mrg __R); 6001 1.1 mrg } 6002 1.1 mrg 6003 1.1 mrg extern __inline __m128h 6004 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6005 1.1 mrg _mm_mask_fnmadd_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B, 6006 1.1 mrg const int __R) 6007 1.1 mrg { 6008 1.1 mrg return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W, 6009 1.1 mrg (__v8hf) __A, 6010 1.1 mrg (__v8hf) __B, 6011 1.1 mrg (__mmask8) __U, __R); 6012 1.1 mrg } 6013 1.1 mrg 6014 1.1 mrg extern __inline __m128h 6015 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6016 1.1 mrg _mm_mask3_fnmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U, 6017 1.1 mrg const int __R) 6018 1.1 mrg { 6019 1.1 mrg return (__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((__v8hf) __W, 6020 1.1 mrg (__v8hf) __A, 6021 1.1 mrg (__v8hf) __B, 6022 1.1 mrg (__mmask8) __U, __R); 6023 1.1 mrg } 6024 1.1 mrg 6025 1.1 mrg extern __inline __m128h 6026 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6027 1.1 mrg _mm_maskz_fnmadd_round_sh (__mmask8 __U, __m128h __W, __m128h __A, 6028 1.1 mrg __m128h __B, const int __R) 6029 1.1 mrg { 6030 1.1 mrg return (__m128h) __builtin_ia32_vfnmaddsh3_maskz ((__v8hf) __W, 6031 1.1 mrg (__v8hf) __A, 6032 1.1 mrg (__v8hf) __B, 6033 1.1 mrg (__mmask8) __U, __R); 6034 1.1 mrg } 6035 1.1 mrg 6036 1.1 mrg #else 6037 1.1 mrg #define _mm_fnmadd_round_sh(A, B, C, R) \ 6038 1.1 mrg ((__m128h) __builtin_ia32_vfnmaddsh3_mask ((A), (B), (C), (-1), (R))) 6039 1.1 mrg #define _mm_mask_fnmadd_round_sh(A, U, B, C, R) \ 6040 1.1 mrg ((__m128h) __builtin_ia32_vfnmaddsh3_mask ((A), (B), (C), (U), (R))) 6041 1.1 mrg #define _mm_mask3_fnmadd_round_sh(A, B, C, U, R) \ 6042 1.1 mrg ((__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((A), (B), (C), (U), (R))) 6043 1.1 mrg #define _mm_maskz_fnmadd_round_sh(U, A, B, C, R) \ 6044 1.1 mrg ((__m128h) __builtin_ia32_vfnmaddsh3_maskz ((A), (B), (C), (U), (R))) 6045 1.1 mrg 6046 1.1 mrg #endif /* __OPTIMIZE__ */ 6047 1.1 mrg 6048 1.1 mrg /* Intrinsics vfmsub[132,213,231]sh. */ 6049 1.1 mrg extern __inline __m128h 6050 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6051 1.1 mrg _mm_fmsub_sh (__m128h __W, __m128h __A, __m128h __B) 6052 1.1 mrg { 6053 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W, 6054 1.1 mrg (__v8hf) __A, 6055 1.1 mrg -(__v8hf) __B, 6056 1.1 mrg (__mmask8) -1, 6057 1.1 mrg _MM_FROUND_CUR_DIRECTION); 6058 1.1 mrg } 6059 1.1 mrg 6060 1.1 mrg extern __inline __m128h 6061 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6062 1.1 mrg _mm_mask_fmsub_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) 6063 1.1 mrg { 6064 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W, 6065 1.1 mrg (__v8hf) __A, 6066 1.1 mrg -(__v8hf) __B, 6067 1.1 mrg (__mmask8) __U, 6068 1.1 mrg _MM_FROUND_CUR_DIRECTION); 6069 1.1 mrg } 6070 1.1 mrg 6071 1.1 mrg extern __inline __m128h 6072 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6073 1.1 mrg _mm_mask3_fmsub_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U) 6074 1.1 mrg { 6075 1.1 mrg return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W, 6076 1.1 mrg (__v8hf) __A, 6077 1.1 mrg (__v8hf) __B, 6078 1.1 mrg (__mmask8) __U, 6079 1.1 mrg _MM_FROUND_CUR_DIRECTION); 6080 1.1 mrg } 6081 1.1 mrg 6082 1.1 mrg extern __inline __m128h 6083 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6084 1.1 mrg _mm_maskz_fmsub_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B) 6085 1.1 mrg { 6086 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W, 6087 1.1 mrg (__v8hf) __A, 6088 1.1 mrg -(__v8hf) __B, 6089 1.1 mrg (__mmask8) __U, 6090 1.1 mrg _MM_FROUND_CUR_DIRECTION); 6091 1.1 mrg } 6092 1.1 mrg 6093 1.1 mrg 6094 1.1 mrg #ifdef __OPTIMIZE__ 6095 1.1 mrg extern __inline __m128h 6096 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6097 1.1 mrg _mm_fmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R) 6098 1.1 mrg { 6099 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W, 6100 1.1 mrg (__v8hf) __A, 6101 1.1 mrg -(__v8hf) __B, 6102 1.1 mrg (__mmask8) -1, 6103 1.1 mrg __R); 6104 1.1 mrg } 6105 1.1 mrg 6106 1.1 mrg extern __inline __m128h 6107 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6108 1.1 mrg _mm_mask_fmsub_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B, 6109 1.1 mrg const int __R) 6110 1.1 mrg { 6111 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W, 6112 1.1 mrg (__v8hf) __A, 6113 1.1 mrg -(__v8hf) __B, 6114 1.1 mrg (__mmask8) __U, __R); 6115 1.1 mrg } 6116 1.1 mrg 6117 1.1 mrg extern __inline __m128h 6118 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6119 1.1 mrg _mm_mask3_fmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U, 6120 1.1 mrg const int __R) 6121 1.1 mrg { 6122 1.1 mrg return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W, 6123 1.1 mrg (__v8hf) __A, 6124 1.1 mrg (__v8hf) __B, 6125 1.1 mrg (__mmask8) __U, __R); 6126 1.1 mrg } 6127 1.1 mrg 6128 1.1 mrg extern __inline __m128h 6129 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6130 1.1 mrg _mm_maskz_fmsub_round_sh (__mmask8 __U, __m128h __W, __m128h __A, 6131 1.1 mrg __m128h __B, const int __R) 6132 1.1 mrg { 6133 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W, 6134 1.1 mrg (__v8hf) __A, 6135 1.1 mrg -(__v8hf) __B, 6136 1.1 mrg (__mmask8) __U, __R); 6137 1.1 mrg } 6138 1.1 mrg 6139 1.1 mrg #else 6140 1.1 mrg #define _mm_fmsub_round_sh(A, B, C, R) \ 6141 1.1 mrg ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), -(C), (-1), (R))) 6142 1.1 mrg #define _mm_mask_fmsub_round_sh(A, U, B, C, R) \ 6143 1.1 mrg ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), -(C), (U), (R))) 6144 1.1 mrg #define _mm_mask3_fmsub_round_sh(A, B, C, U, R) \ 6145 1.1 mrg ((__m128h) __builtin_ia32_vfmsubsh3_mask3 ((A), (B), (C), (U), (R))) 6146 1.1 mrg #define _mm_maskz_fmsub_round_sh(U, A, B, C, R) \ 6147 1.1 mrg ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), (B), -(C), (U), (R))) 6148 1.1 mrg 6149 1.1 mrg #endif /* __OPTIMIZE__ */ 6150 1.1 mrg 6151 1.1 mrg /* Intrinsics vfnmsub[132,213,231]sh. */ 6152 1.1 mrg extern __inline __m128h 6153 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6154 1.1 mrg _mm_fnmsub_sh (__m128h __W, __m128h __A, __m128h __B) 6155 1.1 mrg { 6156 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W, 6157 1.1 mrg -(__v8hf) __A, 6158 1.1 mrg -(__v8hf) __B, 6159 1.1 mrg (__mmask8) -1, 6160 1.1 mrg _MM_FROUND_CUR_DIRECTION); 6161 1.1 mrg } 6162 1.1 mrg 6163 1.1 mrg extern __inline __m128h 6164 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6165 1.1 mrg _mm_mask_fnmsub_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) 6166 1.1 mrg { 6167 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W, 6168 1.1 mrg -(__v8hf) __A, 6169 1.1 mrg -(__v8hf) __B, 6170 1.1 mrg (__mmask8) __U, 6171 1.1 mrg _MM_FROUND_CUR_DIRECTION); 6172 1.1 mrg } 6173 1.1 mrg 6174 1.1 mrg extern __inline __m128h 6175 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6176 1.1 mrg _mm_mask3_fnmsub_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U) 6177 1.1 mrg { 6178 1.1 mrg return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W, 6179 1.1 mrg -(__v8hf) __A, 6180 1.1 mrg (__v8hf) __B, 6181 1.1 mrg (__mmask8) __U, 6182 1.1 mrg _MM_FROUND_CUR_DIRECTION); 6183 1.1 mrg } 6184 1.1 mrg 6185 1.1 mrg extern __inline __m128h 6186 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6187 1.1 mrg _mm_maskz_fnmsub_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B) 6188 1.1 mrg { 6189 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W, 6190 1.1 mrg -(__v8hf) __A, 6191 1.1 mrg -(__v8hf) __B, 6192 1.1 mrg (__mmask8) __U, 6193 1.1 mrg _MM_FROUND_CUR_DIRECTION); 6194 1.1 mrg } 6195 1.1 mrg 6196 1.1 mrg 6197 1.1 mrg #ifdef __OPTIMIZE__ 6198 1.1 mrg extern __inline __m128h 6199 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6200 1.1 mrg _mm_fnmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R) 6201 1.1 mrg { 6202 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W, 6203 1.1 mrg -(__v8hf) __A, 6204 1.1 mrg -(__v8hf) __B, 6205 1.1 mrg (__mmask8) -1, 6206 1.1 mrg __R); 6207 1.1 mrg } 6208 1.1 mrg 6209 1.1 mrg extern __inline __m128h 6210 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6211 1.1 mrg _mm_mask_fnmsub_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B, 6212 1.1 mrg const int __R) 6213 1.1 mrg { 6214 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W, 6215 1.1 mrg -(__v8hf) __A, 6216 1.1 mrg -(__v8hf) __B, 6217 1.1 mrg (__mmask8) __U, __R); 6218 1.1 mrg } 6219 1.1 mrg 6220 1.1 mrg extern __inline __m128h 6221 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6222 1.1 mrg _mm_mask3_fnmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U, 6223 1.1 mrg const int __R) 6224 1.1 mrg { 6225 1.1 mrg return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W, 6226 1.1 mrg -(__v8hf) __A, 6227 1.1 mrg (__v8hf) __B, 6228 1.1 mrg (__mmask8) __U, __R); 6229 1.1 mrg } 6230 1.1 mrg 6231 1.1 mrg extern __inline __m128h 6232 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6233 1.1 mrg _mm_maskz_fnmsub_round_sh (__mmask8 __U, __m128h __W, __m128h __A, 6234 1.1 mrg __m128h __B, const int __R) 6235 1.1 mrg { 6236 1.1 mrg return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W, 6237 1.1 mrg -(__v8hf) __A, 6238 1.1 mrg -(__v8hf) __B, 6239 1.1 mrg (__mmask8) __U, __R); 6240 1.1 mrg } 6241 1.1 mrg 6242 1.1 mrg #else 6243 1.1 mrg #define _mm_fnmsub_round_sh(A, B, C, R) \ 6244 1.1 mrg ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), -(B), -(C), (-1), (R))) 6245 1.1 mrg #define _mm_mask_fnmsub_round_sh(A, U, B, C, R) \ 6246 1.1 mrg ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), -(B), -(C), (U), (R))) 6247 1.1 mrg #define _mm_mask3_fnmsub_round_sh(A, B, C, U, R) \ 6248 1.1 mrg ((__m128h) __builtin_ia32_vfmsubsh3_mask3 ((A), -(B), (C), (U), (R))) 6249 1.1 mrg #define _mm_maskz_fnmsub_round_sh(U, A, B, C, R) \ 6250 1.1 mrg ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), -(B), -(C), (U), (R))) 6251 1.1 mrg 6252 1.1 mrg #endif /* __OPTIMIZE__ */ 6253 1.1 mrg 6254 1.1 mrg /* Intrinsics vf[,c]maddcph. */ 6255 1.1 mrg extern __inline __m512h 6256 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6257 1.1 mrg _mm512_fcmadd_pch (__m512h __A, __m512h __B, __m512h __C) 6258 1.1 mrg { 6259 1.1 mrg return (__m512h) 6260 1.1 mrg __builtin_ia32_vfcmaddcph512_round ((__v32hf) __A, 6261 1.1 mrg (__v32hf) __B, 6262 1.1 mrg (__v32hf) __C, 6263 1.1 mrg _MM_FROUND_CUR_DIRECTION); 6264 1.1 mrg } 6265 1.1 mrg 6266 1.1 mrg extern __inline __m512h 6267 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6268 1.1 mrg _mm512_mask_fcmadd_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D) 6269 1.1 mrg { 6270 1.1 mrg return (__m512h) 6271 1.1 mrg __builtin_ia32_vfcmaddcph512_mask_round ((__v32hf) __A, 6272 1.1 mrg (__v32hf) __C, 6273 1.1 mrg (__v32hf) __D, __B, 6274 1.1 mrg _MM_FROUND_CUR_DIRECTION); 6275 1.1 mrg } 6276 1.1 mrg 6277 1.1 mrg extern __inline __m512h 6278 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6279 1.1 mrg _mm512_mask3_fcmadd_pch (__m512h __A, __m512h __B, __m512h __C, __mmask16 __D) 6280 1.1 mrg { 6281 1.1 mrg return (__m512h) 6282 1.1 mrg __builtin_ia32_vfcmaddcph512_mask3_round ((__v32hf) __A, 6283 1.1 mrg (__v32hf) __B, 6284 1.1 mrg (__v32hf) __C, 6285 1.1 mrg __D, _MM_FROUND_CUR_DIRECTION); 6286 1.1 mrg } 6287 1.1 mrg 6288 1.1 mrg extern __inline __m512h 6289 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6290 1.1 mrg _mm512_maskz_fcmadd_pch (__mmask16 __A, __m512h __B, __m512h __C, __m512h __D) 6291 1.1 mrg { 6292 1.1 mrg return (__m512h) 6293 1.1 mrg __builtin_ia32_vfcmaddcph512_maskz_round ((__v32hf) __B, 6294 1.1 mrg (__v32hf) __C, 6295 1.1 mrg (__v32hf) __D, 6296 1.1 mrg __A, _MM_FROUND_CUR_DIRECTION); 6297 1.1 mrg } 6298 1.1 mrg 6299 1.1 mrg extern __inline __m512h 6300 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6301 1.1 mrg _mm512_fmadd_pch (__m512h __A, __m512h __B, __m512h __C) 6302 1.1 mrg { 6303 1.1 mrg return (__m512h) 6304 1.1 mrg __builtin_ia32_vfmaddcph512_round ((__v32hf) __A, 6305 1.1 mrg (__v32hf) __B, 6306 1.1 mrg (__v32hf) __C, 6307 1.1 mrg _MM_FROUND_CUR_DIRECTION); 6308 1.1 mrg } 6309 1.1 mrg 6310 1.1 mrg extern __inline __m512h 6311 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6312 1.1 mrg _mm512_mask_fmadd_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D) 6313 1.1 mrg { 6314 1.1 mrg return (__m512h) 6315 1.1 mrg __builtin_ia32_vfmaddcph512_mask_round ((__v32hf) __A, 6316 1.1 mrg (__v32hf) __C, 6317 1.1 mrg (__v32hf) __D, __B, 6318 1.1 mrg _MM_FROUND_CUR_DIRECTION); 6319 1.1 mrg } 6320 1.1 mrg 6321 1.1 mrg extern __inline __m512h 6322 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6323 1.1 mrg _mm512_mask3_fmadd_pch (__m512h __A, __m512h __B, __m512h __C, __mmask16 __D) 6324 1.1 mrg { 6325 1.1 mrg return (__m512h) 6326 1.1 mrg __builtin_ia32_vfmaddcph512_mask3_round ((__v32hf) __A, 6327 1.1 mrg (__v32hf) __B, 6328 1.1 mrg (__v32hf) __C, 6329 1.1 mrg __D, _MM_FROUND_CUR_DIRECTION); 6330 1.1 mrg } 6331 1.1 mrg 6332 1.1 mrg extern __inline __m512h 6333 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6334 1.1 mrg _mm512_maskz_fmadd_pch (__mmask16 __A, __m512h __B, __m512h __C, __m512h __D) 6335 1.1 mrg { 6336 1.1 mrg return (__m512h) 6337 1.1 mrg __builtin_ia32_vfmaddcph512_maskz_round ((__v32hf) __B, 6338 1.1 mrg (__v32hf) __C, 6339 1.1 mrg (__v32hf) __D, 6340 1.1 mrg __A, _MM_FROUND_CUR_DIRECTION); 6341 1.1 mrg } 6342 1.1 mrg 6343 1.1 mrg #ifdef __OPTIMIZE__ 6344 1.1 mrg extern __inline __m512h 6345 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6346 1.1 mrg _mm512_fcmadd_round_pch (__m512h __A, __m512h __B, __m512h __C, const int __D) 6347 1.1 mrg { 6348 1.1 mrg return (__m512h) 6349 1.1 mrg __builtin_ia32_vfcmaddcph512_round ((__v32hf) __A, 6350 1.1 mrg (__v32hf) __B, 6351 1.1 mrg (__v32hf) __C, 6352 1.1 mrg __D); 6353 1.1 mrg } 6354 1.1 mrg 6355 1.1 mrg extern __inline __m512h 6356 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6357 1.1 mrg _mm512_mask_fcmadd_round_pch (__m512h __A, __mmask16 __B, __m512h __C, 6358 1.1 mrg __m512h __D, const int __E) 6359 1.1 mrg { 6360 1.1 mrg return (__m512h) 6361 1.1 mrg __builtin_ia32_vfcmaddcph512_mask_round ((__v32hf) __A, 6362 1.1 mrg (__v32hf) __C, 6363 1.1 mrg (__v32hf) __D, __B, 6364 1.1 mrg __E); 6365 1.1 mrg } 6366 1.1 mrg 6367 1.1 mrg extern __inline __m512h 6368 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6369 1.1 mrg _mm512_mask3_fcmadd_round_pch (__m512h __A, __m512h __B, __m512h __C, 6370 1.1 mrg __mmask16 __D, const int __E) 6371 1.1 mrg { 6372 1.1 mrg return (__m512h) 6373 1.1 mrg __builtin_ia32_vfcmaddcph512_mask3_round ((__v32hf) __A, 6374 1.1 mrg (__v32hf) __B, 6375 1.1 mrg (__v32hf) __C, 6376 1.1 mrg __D, __E); 6377 1.1 mrg } 6378 1.1 mrg 6379 1.1 mrg extern __inline __m512h 6380 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6381 1.1 mrg _mm512_maskz_fcmadd_round_pch (__mmask16 __A, __m512h __B, __m512h __C, 6382 1.1 mrg __m512h __D, const int __E) 6383 1.1 mrg { 6384 1.1 mrg return (__m512h) 6385 1.1 mrg __builtin_ia32_vfcmaddcph512_maskz_round ((__v32hf) __B, 6386 1.1 mrg (__v32hf) __C, 6387 1.1 mrg (__v32hf) __D, 6388 1.1 mrg __A, __E); 6389 1.1 mrg } 6390 1.1 mrg 6391 1.1 mrg extern __inline __m512h 6392 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6393 1.1 mrg _mm512_fmadd_round_pch (__m512h __A, __m512h __B, __m512h __C, const int __D) 6394 1.1 mrg { 6395 1.1 mrg return (__m512h) 6396 1.1 mrg __builtin_ia32_vfmaddcph512_round ((__v32hf) __A, 6397 1.1 mrg (__v32hf) __B, 6398 1.1 mrg (__v32hf) __C, 6399 1.1 mrg __D); 6400 1.1 mrg } 6401 1.1 mrg 6402 1.1 mrg extern __inline __m512h 6403 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6404 1.1 mrg _mm512_mask_fmadd_round_pch (__m512h __A, __mmask16 __B, __m512h __C, 6405 1.1 mrg __m512h __D, const int __E) 6406 1.1 mrg { 6407 1.1 mrg return (__m512h) 6408 1.1 mrg __builtin_ia32_vfmaddcph512_mask_round ((__v32hf) __A, 6409 1.1 mrg (__v32hf) __C, 6410 1.1 mrg (__v32hf) __D, __B, 6411 1.1 mrg __E); 6412 1.1 mrg } 6413 1.1 mrg 6414 1.1 mrg extern __inline __m512h 6415 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6416 1.1 mrg _mm512_mask3_fmadd_round_pch (__m512h __A, __m512h __B, __m512h __C, 6417 1.1 mrg __mmask16 __D, const int __E) 6418 1.1 mrg { 6419 1.1 mrg return (__m512h) 6420 1.1 mrg __builtin_ia32_vfmaddcph512_mask3_round ((__v32hf) __A, 6421 1.1 mrg (__v32hf) __B, 6422 1.1 mrg (__v32hf) __C, 6423 1.1 mrg __D, __E); 6424 1.1 mrg } 6425 1.1 mrg 6426 1.1 mrg extern __inline __m512h 6427 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6428 1.1 mrg _mm512_maskz_fmadd_round_pch (__mmask16 __A, __m512h __B, __m512h __C, 6429 1.1 mrg __m512h __D, const int __E) 6430 1.1 mrg { 6431 1.1 mrg return (__m512h) 6432 1.1 mrg __builtin_ia32_vfmaddcph512_maskz_round ((__v32hf) __B, 6433 1.1 mrg (__v32hf) __C, 6434 1.1 mrg (__v32hf) __D, 6435 1.1 mrg __A, __E); 6436 1.1 mrg } 6437 1.1 mrg 6438 1.1 mrg #else 6439 1.1 mrg #define _mm512_fcmadd_round_pch(A, B, C, D) \ 6440 1.1 mrg (__m512h) __builtin_ia32_vfcmaddcph512_round ((A), (B), (C), (D)) 6441 1.1 mrg 6442 1.1 mrg #define _mm512_mask_fcmadd_round_pch(A, B, C, D, E) \ 6443 1.1 mrg ((__m512h) \ 6444 1.1 mrg __builtin_ia32_vfcmaddcph512_mask_round ((__v32hf) (A), \ 6445 1.1 mrg (__v32hf) (C), \ 6446 1.1 mrg (__v32hf) (D), \ 6447 1.1 mrg (B), (E))) 6448 1.1 mrg 6449 1.1 mrg 6450 1.1 mrg #define _mm512_mask3_fcmadd_round_pch(A, B, C, D, E) \ 6451 1.1 mrg ((__m512h) \ 6452 1.1 mrg __builtin_ia32_vfcmaddcph512_mask3_round ((A), (B), (C), (D), (E))) 6453 1.1 mrg 6454 1.1 mrg #define _mm512_maskz_fcmadd_round_pch(A, B, C, D, E) \ 6455 1.1 mrg (__m512h) \ 6456 1.1 mrg __builtin_ia32_vfcmaddcph512_maskz_round ((B), (C), (D), (A), (E)) 6457 1.1 mrg 6458 1.1 mrg #define _mm512_fmadd_round_pch(A, B, C, D) \ 6459 1.1 mrg (__m512h) __builtin_ia32_vfmaddcph512_round ((A), (B), (C), (D)) 6460 1.1 mrg 6461 1.1 mrg #define _mm512_mask_fmadd_round_pch(A, B, C, D, E) \ 6462 1.1 mrg ((__m512h) \ 6463 1.1 mrg __builtin_ia32_vfmaddcph512_mask_round ((__v32hf) (A), \ 6464 1.1 mrg (__v32hf) (C), \ 6465 1.1 mrg (__v32hf) (D), \ 6466 1.1 mrg (B), (E))) 6467 1.1 mrg 6468 1.1 mrg #define _mm512_mask3_fmadd_round_pch(A, B, C, D, E) \ 6469 1.1 mrg (__m512h) \ 6470 1.1 mrg __builtin_ia32_vfmaddcph512_mask3_round ((A), (B), (C), (D), (E)) 6471 1.1 mrg 6472 1.1 mrg #define _mm512_maskz_fmadd_round_pch(A, B, C, D, E) \ 6473 1.1 mrg (__m512h) \ 6474 1.1 mrg __builtin_ia32_vfmaddcph512_maskz_round ((B), (C), (D), (A), (E)) 6475 1.1 mrg 6476 1.1 mrg #endif /* __OPTIMIZE__ */ 6477 1.1 mrg 6478 1.1 mrg /* Intrinsics vf[,c]mulcph. */ 6479 1.1 mrg extern __inline __m512h 6480 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6481 1.1 mrg _mm512_fcmul_pch (__m512h __A, __m512h __B) 6482 1.1 mrg { 6483 1.1 mrg return (__m512h) 6484 1.1 mrg __builtin_ia32_vfcmulcph512_round ((__v32hf) __A, 6485 1.1 mrg (__v32hf) __B, 6486 1.1 mrg _MM_FROUND_CUR_DIRECTION); 6487 1.1 mrg } 6488 1.1 mrg 6489 1.1 mrg extern __inline __m512h 6490 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6491 1.1 mrg _mm512_mask_fcmul_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D) 6492 1.1 mrg { 6493 1.1 mrg return (__m512h) 6494 1.1 mrg __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __C, 6495 1.1 mrg (__v32hf) __D, 6496 1.1 mrg (__v32hf) __A, 6497 1.1 mrg __B, _MM_FROUND_CUR_DIRECTION); 6498 1.1 mrg } 6499 1.1 mrg 6500 1.1 mrg extern __inline __m512h 6501 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6502 1.1 mrg _mm512_maskz_fcmul_pch (__mmask16 __A, __m512h __B, __m512h __C) 6503 1.1 mrg { 6504 1.1 mrg return (__m512h) 6505 1.1 mrg __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __B, 6506 1.1 mrg (__v32hf) __C, 6507 1.1 mrg _mm512_setzero_ph (), 6508 1.1 mrg __A, _MM_FROUND_CUR_DIRECTION); 6509 1.1 mrg } 6510 1.1 mrg 6511 1.1 mrg extern __inline __m512h 6512 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6513 1.1 mrg _mm512_fmul_pch (__m512h __A, __m512h __B) 6514 1.1 mrg { 6515 1.1 mrg return (__m512h) 6516 1.1 mrg __builtin_ia32_vfmulcph512_round ((__v32hf) __A, 6517 1.1 mrg (__v32hf) __B, 6518 1.1 mrg _MM_FROUND_CUR_DIRECTION); 6519 1.1 mrg } 6520 1.1 mrg 6521 1.1 mrg extern __inline __m512h 6522 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6523 1.1 mrg _mm512_mask_fmul_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D) 6524 1.1 mrg { 6525 1.1 mrg return (__m512h) 6526 1.1 mrg __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __C, 6527 1.1 mrg (__v32hf) __D, 6528 1.1 mrg (__v32hf) __A, 6529 1.1 mrg __B, _MM_FROUND_CUR_DIRECTION); 6530 1.1 mrg } 6531 1.1 mrg 6532 1.1 mrg extern __inline __m512h 6533 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6534 1.1 mrg _mm512_maskz_fmul_pch (__mmask16 __A, __m512h __B, __m512h __C) 6535 1.1 mrg { 6536 1.1 mrg return (__m512h) 6537 1.1 mrg __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __B, 6538 1.1 mrg (__v32hf) __C, 6539 1.1 mrg _mm512_setzero_ph (), 6540 1.1 mrg __A, _MM_FROUND_CUR_DIRECTION); 6541 1.1 mrg } 6542 1.1 mrg 6543 1.1 mrg #ifdef __OPTIMIZE__ 6544 1.1 mrg extern __inline __m512h 6545 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6546 1.1 mrg _mm512_fcmul_round_pch (__m512h __A, __m512h __B, const int __D) 6547 1.1 mrg { 6548 1.1 mrg return (__m512h) 6549 1.1 mrg __builtin_ia32_vfcmulcph512_round ((__v32hf) __A, 6550 1.1 mrg (__v32hf) __B, __D); 6551 1.1 mrg } 6552 1.1 mrg 6553 1.1 mrg extern __inline __m512h 6554 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6555 1.1 mrg _mm512_mask_fcmul_round_pch (__m512h __A, __mmask16 __B, __m512h __C, 6556 1.1 mrg __m512h __D, const int __E) 6557 1.1 mrg { 6558 1.1 mrg return (__m512h) 6559 1.1 mrg __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __C, 6560 1.1 mrg (__v32hf) __D, 6561 1.1 mrg (__v32hf) __A, 6562 1.1 mrg __B, __E); 6563 1.1 mrg } 6564 1.1 mrg 6565 1.1 mrg extern __inline __m512h 6566 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6567 1.1 mrg _mm512_maskz_fcmul_round_pch (__mmask16 __A, __m512h __B, 6568 1.1 mrg __m512h __C, const int __E) 6569 1.1 mrg { 6570 1.1 mrg return (__m512h) 6571 1.1 mrg __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __B, 6572 1.1 mrg (__v32hf) __C, 6573 1.1 mrg _mm512_setzero_ph (), 6574 1.1 mrg __A, __E); 6575 1.1 mrg } 6576 1.1 mrg 6577 1.1 mrg extern __inline __m512h 6578 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6579 1.1 mrg _mm512_fmul_round_pch (__m512h __A, __m512h __B, const int __D) 6580 1.1 mrg { 6581 1.1 mrg return (__m512h) 6582 1.1 mrg __builtin_ia32_vfmulcph512_round ((__v32hf) __A, 6583 1.1 mrg (__v32hf) __B, 6584 1.1 mrg __D); 6585 1.1 mrg } 6586 1.1 mrg 6587 1.1 mrg extern __inline __m512h 6588 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6589 1.1 mrg _mm512_mask_fmul_round_pch (__m512h __A, __mmask16 __B, __m512h __C, 6590 1.1 mrg __m512h __D, const int __E) 6591 1.1 mrg { 6592 1.1 mrg return (__m512h) 6593 1.1 mrg __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __C, 6594 1.1 mrg (__v32hf) __D, 6595 1.1 mrg (__v32hf) __A, 6596 1.1 mrg __B, __E); 6597 1.1 mrg } 6598 1.1 mrg 6599 1.1 mrg extern __inline __m512h 6600 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6601 1.1 mrg _mm512_maskz_fmul_round_pch (__mmask16 __A, __m512h __B, 6602 1.1 mrg __m512h __C, const int __E) 6603 1.1 mrg { 6604 1.1 mrg return (__m512h) 6605 1.1 mrg __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __B, 6606 1.1 mrg (__v32hf) __C, 6607 1.1 mrg _mm512_setzero_ph (), 6608 1.1 mrg __A, __E); 6609 1.1 mrg } 6610 1.1 mrg 6611 1.1 mrg #else 6612 1.1 mrg #define _mm512_fcmul_round_pch(A, B, D) \ 6613 1.1 mrg (__m512h) __builtin_ia32_vfcmulcph512_round ((A), (B), (D)) 6614 1.1 mrg 6615 1.1 mrg #define _mm512_mask_fcmul_round_pch(A, B, C, D, E) \ 6616 1.1 mrg (__m512h) __builtin_ia32_vfcmulcph512_mask_round ((C), (D), (A), (B), (E)) 6617 1.1 mrg 6618 1.1 mrg #define _mm512_maskz_fcmul_round_pch(A, B, C, E) \ 6619 1.1 mrg (__m512h) __builtin_ia32_vfcmulcph512_mask_round ((B), (C), \ 6620 1.1 mrg (__v32hf) \ 6621 1.1 mrg _mm512_setzero_ph (), \ 6622 1.1 mrg (A), (E)) 6623 1.1 mrg 6624 1.1 mrg #define _mm512_fmul_round_pch(A, B, D) \ 6625 1.1 mrg (__m512h) __builtin_ia32_vfmulcph512_round ((A), (B), (D)) 6626 1.1 mrg 6627 1.1 mrg #define _mm512_mask_fmul_round_pch(A, B, C, D, E) \ 6628 1.1 mrg (__m512h) __builtin_ia32_vfmulcph512_mask_round ((C), (D), (A), (B), (E)) 6629 1.1 mrg 6630 1.1 mrg #define _mm512_maskz_fmul_round_pch(A, B, C, E) \ 6631 1.1 mrg (__m512h) __builtin_ia32_vfmulcph512_mask_round ((B), (C), \ 6632 1.1 mrg (__v32hf) \ 6633 1.1 mrg _mm512_setzero_ph (), \ 6634 1.1 mrg (A), (E)) 6635 1.1 mrg 6636 1.1 mrg #endif /* __OPTIMIZE__ */ 6637 1.1 mrg 6638 1.1 mrg /* Intrinsics vf[,c]maddcsh. */ 6639 1.1 mrg extern __inline __m128h 6640 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6641 1.1 mrg _mm_mask_fcmadd_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) 6642 1.1 mrg { 6643 1.1 mrg return (__m128h) 6644 1.1 mrg __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) __A, 6645 1.1 mrg (__v8hf) __C, 6646 1.1 mrg (__v8hf) __D, __B, 6647 1.1 mrg _MM_FROUND_CUR_DIRECTION); 6648 1.1 mrg } 6649 1.1 mrg 6650 1.1 mrg extern __inline __m128h 6651 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6652 1.1 mrg _mm_mask3_fcmadd_sch (__m128h __A, __m128h __B, __m128h __C, __mmask8 __D) 6653 1.1 mrg { 6654 1.1 mrg return (__m128h) 6655 1.1 mrg __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) __A, 6656 1.1 mrg (__v8hf) __B, 6657 1.1 mrg (__v8hf) __C, __D, 6658 1.1 mrg _MM_FROUND_CUR_DIRECTION); 6659 1.1 mrg } 6660 1.1 mrg 6661 1.1 mrg extern __inline __m128h 6662 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6663 1.1 mrg _mm_maskz_fcmadd_sch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D) 6664 1.1 mrg { 6665 1.1 mrg return (__m128h) 6666 1.1 mrg __builtin_ia32_vfcmaddcsh_maskz_round ((__v8hf) __B, 6667 1.1 mrg (__v8hf) __C, 6668 1.1 mrg (__v8hf) __D, 6669 1.1 mrg __A, _MM_FROUND_CUR_DIRECTION); 6670 1.1 mrg } 6671 1.1 mrg 6672 1.1 mrg extern __inline __m128h 6673 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6674 1.1 mrg _mm_fcmadd_sch (__m128h __A, __m128h __B, __m128h __C) 6675 1.1 mrg { 6676 1.1 mrg return (__m128h) 6677 1.1 mrg __builtin_ia32_vfcmaddcsh_round ((__v8hf) __A, 6678 1.1 mrg (__v8hf) __B, 6679 1.1 mrg (__v8hf) __C, 6680 1.1 mrg _MM_FROUND_CUR_DIRECTION); 6681 1.1 mrg } 6682 1.1 mrg 6683 1.1 mrg extern __inline __m128h 6684 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6685 1.1 mrg _mm_mask_fmadd_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) 6686 1.1 mrg { 6687 1.1 mrg return (__m128h) 6688 1.1 mrg __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) __A, 6689 1.1 mrg (__v8hf) __C, 6690 1.1 mrg (__v8hf) __D, __B, 6691 1.1 mrg _MM_FROUND_CUR_DIRECTION); 6692 1.1 mrg } 6693 1.1 mrg 6694 1.1 mrg extern __inline __m128h 6695 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6696 1.1 mrg _mm_mask3_fmadd_sch (__m128h __A, __m128h __B, __m128h __C, __mmask8 __D) 6697 1.1 mrg { 6698 1.1 mrg return (__m128h) 6699 1.1 mrg __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) __A, 6700 1.1 mrg (__v8hf) __B, 6701 1.1 mrg (__v8hf) __C, __D, 6702 1.1 mrg _MM_FROUND_CUR_DIRECTION); 6703 1.1 mrg } 6704 1.1 mrg 6705 1.1 mrg extern __inline __m128h 6706 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6707 1.1 mrg _mm_maskz_fmadd_sch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D) 6708 1.1 mrg { 6709 1.1 mrg return (__m128h) 6710 1.1 mrg __builtin_ia32_vfmaddcsh_maskz_round ((__v8hf) __B, 6711 1.1 mrg (__v8hf) __C, 6712 1.1 mrg (__v8hf) __D, 6713 1.1 mrg __A, _MM_FROUND_CUR_DIRECTION); 6714 1.1 mrg } 6715 1.1 mrg 6716 1.1 mrg extern __inline __m128h 6717 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6718 1.1 mrg _mm_fmadd_sch (__m128h __A, __m128h __B, __m128h __C) 6719 1.1 mrg { 6720 1.1 mrg return (__m128h) 6721 1.1 mrg __builtin_ia32_vfmaddcsh_round ((__v8hf) __A, 6722 1.1 mrg (__v8hf) __B, 6723 1.1 mrg (__v8hf) __C, 6724 1.1 mrg _MM_FROUND_CUR_DIRECTION); 6725 1.1 mrg } 6726 1.1 mrg 6727 1.1 mrg #ifdef __OPTIMIZE__ 6728 1.1 mrg extern __inline __m128h 6729 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6730 1.1 mrg _mm_mask_fcmadd_round_sch (__m128h __A, __mmask8 __B, __m128h __C, 6731 1.1 mrg __m128h __D, const int __E) 6732 1.1 mrg { 6733 1.1 mrg return (__m128h) 6734 1.1 mrg __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) __A, 6735 1.1 mrg (__v8hf) __C, 6736 1.1 mrg (__v8hf) __D, 6737 1.1 mrg __B, __E); 6738 1.1 mrg } 6739 1.1 mrg 6740 1.1 mrg extern __inline __m128h 6741 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6742 1.1 mrg _mm_mask3_fcmadd_round_sch (__m128h __A, __m128h __B, __m128h __C, 6743 1.1 mrg __mmask8 __D, const int __E) 6744 1.1 mrg { 6745 1.1 mrg return (__m128h) 6746 1.1 mrg __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) __A, 6747 1.1 mrg (__v8hf) __B, 6748 1.1 mrg (__v8hf) __C, 6749 1.1 mrg __D, __E); 6750 1.1 mrg } 6751 1.1 mrg 6752 1.1 mrg extern __inline __m128h 6753 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6754 1.1 mrg _mm_maskz_fcmadd_round_sch (__mmask8 __A, __m128h __B, __m128h __C, 6755 1.1 mrg __m128h __D, const int __E) 6756 1.1 mrg { 6757 1.1 mrg return (__m128h) 6758 1.1 mrg __builtin_ia32_vfcmaddcsh_maskz_round ((__v8hf) __B, 6759 1.1 mrg (__v8hf) __C, 6760 1.1 mrg (__v8hf) __D, 6761 1.1 mrg __A, __E); 6762 1.1 mrg } 6763 1.1 mrg 6764 1.1 mrg extern __inline __m128h 6765 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6766 1.1 mrg _mm_fcmadd_round_sch (__m128h __A, __m128h __B, __m128h __C, const int __D) 6767 1.1 mrg { 6768 1.1 mrg return (__m128h) 6769 1.1 mrg __builtin_ia32_vfcmaddcsh_round ((__v8hf) __A, 6770 1.1 mrg (__v8hf) __B, 6771 1.1 mrg (__v8hf) __C, 6772 1.1 mrg __D); 6773 1.1 mrg } 6774 1.1 mrg 6775 1.1 mrg extern __inline __m128h 6776 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6777 1.1 mrg _mm_mask_fmadd_round_sch (__m128h __A, __mmask8 __B, __m128h __C, 6778 1.1 mrg __m128h __D, const int __E) 6779 1.1 mrg { 6780 1.1 mrg return (__m128h) 6781 1.1 mrg __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) __A, 6782 1.1 mrg (__v8hf) __C, 6783 1.1 mrg (__v8hf) __D, 6784 1.1 mrg __B, __E); 6785 1.1 mrg } 6786 1.1 mrg 6787 1.1 mrg extern __inline __m128h 6788 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6789 1.1 mrg _mm_mask3_fmadd_round_sch (__m128h __A, __m128h __B, __m128h __C, 6790 1.1 mrg __mmask8 __D, const int __E) 6791 1.1 mrg { 6792 1.1 mrg return (__m128h) 6793 1.1 mrg __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) __A, 6794 1.1 mrg (__v8hf) __B, 6795 1.1 mrg (__v8hf) __C, 6796 1.1 mrg __D, __E); 6797 1.1 mrg } 6798 1.1 mrg 6799 1.1 mrg extern __inline __m128h 6800 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6801 1.1 mrg _mm_maskz_fmadd_round_sch (__mmask8 __A, __m128h __B, __m128h __C, 6802 1.1 mrg __m128h __D, const int __E) 6803 1.1 mrg { 6804 1.1 mrg return (__m128h) 6805 1.1 mrg __builtin_ia32_vfmaddcsh_maskz_round ((__v8hf) __B, 6806 1.1 mrg (__v8hf) __C, 6807 1.1 mrg (__v8hf) __D, 6808 1.1 mrg __A, __E); 6809 1.1 mrg } 6810 1.1 mrg 6811 1.1 mrg extern __inline __m128h 6812 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6813 1.1 mrg _mm_fmadd_round_sch (__m128h __A, __m128h __B, __m128h __C, const int __D) 6814 1.1 mrg { 6815 1.1 mrg return (__m128h) 6816 1.1 mrg __builtin_ia32_vfmaddcsh_round ((__v8hf) __A, 6817 1.1 mrg (__v8hf) __B, 6818 1.1 mrg (__v8hf) __C, 6819 1.1 mrg __D); 6820 1.1 mrg } 6821 1.1 mrg #else 6822 1.1 mrg #define _mm_mask_fcmadd_round_sch(A, B, C, D, E) \ 6823 1.1 mrg ((__m128h) \ 6824 1.1 mrg __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) (A), \ 6825 1.1 mrg (__v8hf) (C), \ 6826 1.1 mrg (__v8hf) (D), \ 6827 1.1 mrg (B), (E))) 6828 1.1 mrg 6829 1.1 mrg 6830 1.1 mrg #define _mm_mask3_fcmadd_round_sch(A, B, C, D, E) \ 6831 1.1 mrg ((__m128h) \ 6832 1.1 mrg __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) (A), \ 6833 1.1 mrg (__v8hf) (B), \ 6834 1.1 mrg (__v8hf) (C), \ 6835 1.1 mrg (D), (E))) 6836 1.1 mrg 6837 1.1 mrg #define _mm_maskz_fcmadd_round_sch(A, B, C, D, E) \ 6838 1.1 mrg __builtin_ia32_vfcmaddcsh_maskz_round ((B), (C), (D), (A), (E)) 6839 1.1 mrg 6840 1.1 mrg #define _mm_fcmadd_round_sch(A, B, C, D) \ 6841 1.1 mrg __builtin_ia32_vfcmaddcsh_round ((A), (B), (C), (D)) 6842 1.1 mrg 6843 1.1 mrg #define _mm_mask_fmadd_round_sch(A, B, C, D, E) \ 6844 1.1 mrg ((__m128h) \ 6845 1.1 mrg __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) (A), \ 6846 1.1 mrg (__v8hf) (C), \ 6847 1.1 mrg (__v8hf) (D), \ 6848 1.1 mrg (B), (E))) 6849 1.1 mrg 6850 1.1 mrg #define _mm_mask3_fmadd_round_sch(A, B, C, D, E) \ 6851 1.1 mrg ((__m128h) \ 6852 1.1 mrg __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) (A), \ 6853 1.1 mrg (__v8hf) (B), \ 6854 1.1 mrg (__v8hf) (C), \ 6855 1.1 mrg (D), (E))) 6856 1.1 mrg 6857 1.1 mrg #define _mm_maskz_fmadd_round_sch(A, B, C, D, E) \ 6858 1.1 mrg __builtin_ia32_vfmaddcsh_maskz_round ((B), (C), (D), (A), (E)) 6859 1.1 mrg 6860 1.1 mrg #define _mm_fmadd_round_sch(A, B, C, D) \ 6861 1.1 mrg __builtin_ia32_vfmaddcsh_round ((A), (B), (C), (D)) 6862 1.1 mrg 6863 1.1 mrg #endif /* __OPTIMIZE__ */ 6864 1.1 mrg 6865 1.1 mrg /* Intrinsics vf[,c]mulcsh. */ 6866 1.1 mrg extern __inline __m128h 6867 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6868 1.1 mrg _mm_fcmul_sch (__m128h __A, __m128h __B) 6869 1.1 mrg { 6870 1.1 mrg return (__m128h) 6871 1.1 mrg __builtin_ia32_vfcmulcsh_round ((__v8hf) __A, 6872 1.1 mrg (__v8hf) __B, 6873 1.1 mrg _MM_FROUND_CUR_DIRECTION); 6874 1.1 mrg } 6875 1.1 mrg 6876 1.1 mrg extern __inline __m128h 6877 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6878 1.1 mrg _mm_mask_fcmul_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) 6879 1.1 mrg { 6880 1.1 mrg return (__m128h) 6881 1.1 mrg __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C, 6882 1.1 mrg (__v8hf) __D, 6883 1.1 mrg (__v8hf) __A, 6884 1.1 mrg __B, _MM_FROUND_CUR_DIRECTION); 6885 1.1 mrg } 6886 1.1 mrg 6887 1.1 mrg extern __inline __m128h 6888 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6889 1.1 mrg _mm_maskz_fcmul_sch (__mmask8 __A, __m128h __B, __m128h __C) 6890 1.1 mrg { 6891 1.1 mrg return (__m128h) 6892 1.1 mrg __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B, 6893 1.1 mrg (__v8hf) __C, 6894 1.1 mrg _mm_setzero_ph (), 6895 1.1 mrg __A, _MM_FROUND_CUR_DIRECTION); 6896 1.1 mrg } 6897 1.1 mrg 6898 1.1 mrg extern __inline __m128h 6899 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6900 1.1 mrg _mm_fmul_sch (__m128h __A, __m128h __B) 6901 1.1 mrg { 6902 1.1 mrg return (__m128h) 6903 1.1 mrg __builtin_ia32_vfmulcsh_round ((__v8hf) __A, 6904 1.1 mrg (__v8hf) __B, 6905 1.1 mrg _MM_FROUND_CUR_DIRECTION); 6906 1.1 mrg } 6907 1.1 mrg 6908 1.1 mrg extern __inline __m128h 6909 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6910 1.1 mrg _mm_mask_fmul_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) 6911 1.1 mrg { 6912 1.1 mrg return (__m128h) 6913 1.1 mrg __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C, 6914 1.1 mrg (__v8hf) __D, 6915 1.1 mrg (__v8hf) __A, 6916 1.1 mrg __B, _MM_FROUND_CUR_DIRECTION); 6917 1.1 mrg } 6918 1.1 mrg 6919 1.1 mrg extern __inline __m128h 6920 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6921 1.1 mrg _mm_maskz_fmul_sch (__mmask8 __A, __m128h __B, __m128h __C) 6922 1.1 mrg { 6923 1.1 mrg return (__m128h) 6924 1.1 mrg __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B, 6925 1.1 mrg (__v8hf) __C, 6926 1.1 mrg _mm_setzero_ph (), 6927 1.1 mrg __A, _MM_FROUND_CUR_DIRECTION); 6928 1.1 mrg } 6929 1.1 mrg 6930 1.1 mrg #ifdef __OPTIMIZE__ 6931 1.1 mrg extern __inline __m128h 6932 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6933 1.1 mrg _mm_fcmul_round_sch (__m128h __A, __m128h __B, const int __D) 6934 1.1 mrg { 6935 1.1 mrg return (__m128h) 6936 1.1 mrg __builtin_ia32_vfcmulcsh_round ((__v8hf) __A, 6937 1.1 mrg (__v8hf) __B, 6938 1.1 mrg __D); 6939 1.1 mrg } 6940 1.1 mrg 6941 1.1 mrg extern __inline __m128h 6942 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6943 1.1 mrg _mm_mask_fcmul_round_sch (__m128h __A, __mmask8 __B, __m128h __C, 6944 1.1 mrg __m128h __D, const int __E) 6945 1.1 mrg { 6946 1.1 mrg return (__m128h) 6947 1.1 mrg __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C, 6948 1.1 mrg (__v8hf) __D, 6949 1.1 mrg (__v8hf) __A, 6950 1.1 mrg __B, __E); 6951 1.1 mrg } 6952 1.1 mrg 6953 1.1 mrg extern __inline __m128h 6954 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6955 1.1 mrg _mm_maskz_fcmul_round_sch (__mmask8 __A, __m128h __B, __m128h __C, 6956 1.1 mrg const int __E) 6957 1.1 mrg { 6958 1.1 mrg return (__m128h) 6959 1.1 mrg __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B, 6960 1.1 mrg (__v8hf) __C, 6961 1.1 mrg _mm_setzero_ph (), 6962 1.1 mrg __A, __E); 6963 1.1 mrg } 6964 1.1 mrg 6965 1.1 mrg extern __inline __m128h 6966 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6967 1.1 mrg _mm_fmul_round_sch (__m128h __A, __m128h __B, const int __D) 6968 1.1 mrg { 6969 1.1 mrg return (__m128h) 6970 1.1 mrg __builtin_ia32_vfmulcsh_round ((__v8hf) __A, 6971 1.1 mrg (__v8hf) __B, __D); 6972 1.1 mrg } 6973 1.1 mrg 6974 1.1 mrg extern __inline __m128h 6975 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6976 1.1 mrg _mm_mask_fmul_round_sch (__m128h __A, __mmask8 __B, __m128h __C, 6977 1.1 mrg __m128h __D, const int __E) 6978 1.1 mrg { 6979 1.1 mrg return (__m128h) 6980 1.1 mrg __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C, 6981 1.1 mrg (__v8hf) __D, 6982 1.1 mrg (__v8hf) __A, 6983 1.1 mrg __B, __E); 6984 1.1 mrg } 6985 1.1 mrg 6986 1.1 mrg extern __inline __m128h 6987 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6988 1.1 mrg _mm_maskz_fmul_round_sch (__mmask8 __A, __m128h __B, __m128h __C, const int __E) 6989 1.1 mrg { 6990 1.1 mrg return (__m128h) 6991 1.1 mrg __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B, 6992 1.1 mrg (__v8hf) __C, 6993 1.1 mrg _mm_setzero_ph (), 6994 1.1 mrg __A, __E); 6995 1.1 mrg } 6996 1.1 mrg 6997 1.1 mrg #else 6998 1.1 mrg #define _mm_fcmul_round_sch(__A, __B, __D) \ 6999 1.1 mrg (__m128h) __builtin_ia32_vfcmulcsh_round ((__v8hf) __A, \ 7000 1.1 mrg (__v8hf) __B, __D) 7001 1.1 mrg 7002 1.1 mrg #define _mm_mask_fcmul_round_sch(__A, __B, __C, __D, __E) \ 7003 1.1 mrg (__m128h) __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C, \ 7004 1.1 mrg (__v8hf) __D, \ 7005 1.1 mrg (__v8hf) __A, \ 7006 1.1 mrg __B, __E) 7007 1.1 mrg 7008 1.1 mrg #define _mm_maskz_fcmul_round_sch(__A, __B, __C, __E) \ 7009 1.1 mrg (__m128h) __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B, \ 7010 1.1 mrg (__v8hf) __C, \ 7011 1.1 mrg _mm_setzero_ph (), \ 7012 1.1 mrg __A, __E) 7013 1.1 mrg 7014 1.1 mrg #define _mm_fmul_round_sch(__A, __B, __D) \ 7015 1.1 mrg (__m128h) __builtin_ia32_vfmulcsh_round ((__v8hf) __A, \ 7016 1.1 mrg (__v8hf) __B, __D) 7017 1.1 mrg 7018 1.1 mrg #define _mm_mask_fmul_round_sch(__A, __B, __C, __D, __E) \ 7019 1.1 mrg (__m128h) __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C, \ 7020 1.1 mrg (__v8hf) __D, \ 7021 1.1 mrg (__v8hf) __A, \ 7022 1.1 mrg __B, __E) 7023 1.1 mrg 7024 1.1 mrg #define _mm_maskz_fmul_round_sch(__A, __B, __C, __E) \ 7025 1.1 mrg (__m128h) __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B, \ 7026 1.1 mrg (__v8hf) __C, \ 7027 1.1 mrg _mm_setzero_ph (), \ 7028 1.1 mrg __A, __E) 7029 1.1 mrg 7030 1.1 mrg #endif /* __OPTIMIZE__ */ 7031 1.1 mrg 7032 1.1 mrg #define _MM512_REDUCE_OP(op) \ 7033 1.1 mrg __m256h __T1 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 0); \ 7034 1.1 mrg __m256h __T2 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 1); \ 7035 1.1 mrg __m256h __T3 = (__T1 op __T2); \ 7036 1.1 mrg __m128h __T4 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 0); \ 7037 1.1 mrg __m128h __T5 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 1); \ 7038 1.1 mrg __m128h __T6 = (__T4 op __T5); \ 7039 1.1 mrg __m128h __T7 = (__m128h) __builtin_shuffle ((__m128h)__T6, \ 7040 1.1 mrg (__v8hi) { 4, 5, 6, 7, 0, 1, 2, 3 }); \ 7041 1.1 mrg __m128h __T8 = (__T6 op __T7); \ 7042 1.1 mrg __m128h __T9 = (__m128h) __builtin_shuffle ((__m128h)__T8, \ 7043 1.1 mrg (__v8hi) { 2, 3, 0, 1, 4, 5, 6, 7 }); \ 7044 1.1 mrg __m128h __T10 = __T8 op __T9; \ 7045 1.1 mrg return __T10[0] op __T10[1] 7046 1.1 mrg 7047 1.1 mrg // TODO reduce 7048 1.1 mrg extern __inline _Float16 7049 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7050 1.1 mrg _mm512_reduce_add_ph (__m512h __A) 7051 1.1 mrg { 7052 1.1 mrg _MM512_REDUCE_OP (+); 7053 1.1 mrg } 7054 1.1 mrg 7055 1.1 mrg extern __inline _Float16 7056 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7057 1.1 mrg _mm512_reduce_mul_ph (__m512h __A) 7058 1.1 mrg { 7059 1.1 mrg _MM512_REDUCE_OP (*); 7060 1.1 mrg } 7061 1.1 mrg 7062 1.1 mrg #undef _MM512_REDUCE_OP 7063 1.1 mrg 7064 1.1 mrg #ifdef __AVX512VL__ 7065 1.1 mrg 7066 1.1 mrg #define _MM512_REDUCE_OP(op) \ 7067 1.1 mrg __m256h __T1 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 0); \ 7068 1.1 mrg __m256h __T2 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 1); \ 7069 1.1 mrg __m256h __T3 = __builtin_ia32_##op##ph256_mask (__T1, __T2, \ 7070 1.1 mrg _mm256_setzero_ph (), (__mmask16) -1); \ 7071 1.1 mrg __m128h __T4 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 0); \ 7072 1.1 mrg __m128h __T5 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 1); \ 7073 1.1 mrg __m128h __T6 = __builtin_ia32_##op##ph128_mask \ 7074 1.1 mrg (__T4, __T5, _mm_setzero_ph (),(__mmask8) -1); \ 7075 1.1 mrg __m128h __T7 = (__m128h) __builtin_shuffle ((__m128h)__T6, \ 7076 1.1 mrg (__v8hi) { 2, 3, 0, 1, 6, 7, 4, 5 }); \ 7077 1.1 mrg __m128h __T8 = (__m128h) __builtin_ia32_##op##ph128_mask \ 7078 1.1 mrg (__T6, __T7, _mm_setzero_ph (),(__mmask8) -1); \ 7079 1.1 mrg __m128h __T9 = (__m128h) __builtin_shuffle ((__m128h)__T8, \ 7080 1.1 mrg (__v8hi) { 4, 5 }); \ 7081 1.1 mrg __m128h __T10 = __builtin_ia32_##op##ph128_mask \ 7082 1.1 mrg (__T8, __T9, _mm_setzero_ph (),(__mmask8) -1); \ 7083 1.1 mrg __m128h __T11 = (__m128h) __builtin_shuffle (__T10, \ 7084 1.1 mrg (__v8hi) { 1, 0 }); \ 7085 1.1 mrg __m128h __T12 = __builtin_ia32_##op##ph128_mask \ 7086 1.1 mrg (__T10, __T11, _mm_setzero_ph (),(__mmask8) -1); \ 7087 1.1 mrg return __T12[0] 7088 1.1 mrg 7089 1.1 mrg #else 7090 1.1 mrg 7091 1.1 mrg #define _MM512_REDUCE_OP(op) \ 7092 1.1 mrg __m512h __T1 = (__m512h) __builtin_shuffle ((__m512d) __A, \ 7093 1.1 mrg (__v8di) { 4, 5, 6, 7, 0, 0, 0, 0 }); \ 7094 1.1 mrg __m512h __T2 = _mm512_##op##_ph (__A, __T1); \ 7095 1.1 mrg __m512h __T3 = (__m512h) __builtin_shuffle ((__m512d) __T2, \ 7096 1.1 mrg (__v8di) { 2, 3, 0, 0, 0, 0, 0, 0 }); \ 7097 1.1 mrg __m512h __T4 = _mm512_##op##_ph (__T2, __T3); \ 7098 1.1 mrg __m512h __T5 = (__m512h) __builtin_shuffle ((__m512d) __T4, \ 7099 1.1 mrg (__v8di) { 1, 0, 0, 0, 0, 0, 0, 0 }); \ 7100 1.1 mrg __m512h __T6 = _mm512_##op##_ph (__T4, __T5); \ 7101 1.1 mrg __m512h __T7 = (__m512h) __builtin_shuffle ((__m512) __T6, \ 7102 1.1 mrg (__v16si) { 1, 0, 0, 0, 0, 0, 0, 0, \ 7103 1.1 mrg 0, 0, 0, 0, 0, 0, 0, 0 }); \ 7104 1.1 mrg __m512h __T8 = _mm512_##op##_ph (__T6, __T7); \ 7105 1.1 mrg __m512h __T9 = (__m512h) __builtin_shuffle (__T8, \ 7106 1.1 mrg (__v32hi) { 1, 0, 0, 0, 0, 0, 0, 0, \ 7107 1.1 mrg 0, 0, 0, 0, 0, 0, 0, 0, \ 7108 1.1 mrg 0, 0, 0, 0, 0, 0, 0, 0, \ 7109 1.1 mrg 0, 0, 0, 0, 0, 0, 0, 0 }); \ 7110 1.1 mrg __m512h __T10 = _mm512_##op##_ph (__T8, __T9); \ 7111 1.1 mrg return __T10[0] 7112 1.1 mrg #endif 7113 1.1 mrg 7114 1.1 mrg extern __inline _Float16 7115 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7116 1.1 mrg _mm512_reduce_min_ph (__m512h __A) 7117 1.1 mrg { 7118 1.1 mrg _MM512_REDUCE_OP (min); 7119 1.1 mrg } 7120 1.1 mrg 7121 1.1 mrg extern __inline _Float16 7122 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7123 1.1 mrg _mm512_reduce_max_ph (__m512h __A) 7124 1.1 mrg { 7125 1.1 mrg _MM512_REDUCE_OP (max); 7126 1.1 mrg } 7127 1.1 mrg 7128 1.1 mrg #undef _MM512_REDUCE_OP 7129 1.1 mrg 7130 1.1 mrg extern __inline __m512h 7131 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7132 1.1 mrg _mm512_mask_blend_ph (__mmask32 __U, __m512h __A, __m512h __W) 7133 1.1 mrg { 7134 1.1 mrg return (__m512h) __builtin_ia32_movdquhi512_mask ((__v32hi) __W, 7135 1.1 mrg (__v32hi) __A, 7136 1.1 mrg (__mmask32) __U); 7137 1.1 mrg 7138 1.1 mrg } 7139 1.1 mrg 7140 1.1 mrg extern __inline __m512h 7141 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7142 1.1 mrg _mm512_permutex2var_ph (__m512h __A, __m512i __I, __m512h __B) 7143 1.1 mrg { 7144 1.1 mrg return (__m512h) __builtin_ia32_vpermi2varhi512_mask ((__v32hi) __A, 7145 1.1 mrg (__v32hi) __I, 7146 1.1 mrg (__v32hi) __B, 7147 1.1 mrg (__mmask32)-1); 7148 1.1 mrg } 7149 1.1 mrg 7150 1.1 mrg extern __inline __m512h 7151 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7152 1.1 mrg _mm512_permutexvar_ph (__m512i __A, __m512h __B) 7153 1.1 mrg { 7154 1.1 mrg return (__m512h) __builtin_ia32_permvarhi512_mask ((__v32hi) __B, 7155 1.1 mrg (__v32hi) __A, 7156 1.1 mrg (__v32hi) 7157 1.1 mrg (_mm512_setzero_ph ()), 7158 1.1 mrg (__mmask32)-1); 7159 1.1 mrg } 7160 1.1 mrg 7161 1.1 mrg extern __inline __m512h 7162 1.1 mrg __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7163 1.1 mrg _mm512_set1_pch (_Float16 _Complex __A) 7164 1.1 mrg { 7165 1.1 mrg union 7166 1.1 mrg { 7167 1.1 mrg _Float16 _Complex __a; 7168 1.1 mrg float __b; 7169 1.1 mrg } __u = { .__a = __A}; 7170 1.1 mrg 7171 1.1 mrg return (__m512h) _mm512_set1_ps (__u.__b); 7172 1.1 mrg } 7173 1.1 mrg 7174 1.1 mrg // intrinsics below are alias for f*mul_*ch 7175 1.1 mrg #define _mm512_mul_pch(A, B) _mm512_fmul_pch ((A), (B)) 7176 1.1 mrg #define _mm512_mask_mul_pch(W, U, A, B) \ 7177 1.1 mrg _mm512_mask_fmul_pch ((W), (U), (A), (B)) 7178 1.1 mrg #define _mm512_maskz_mul_pch(U, A, B) _mm512_maskz_fmul_pch ((U), (A), (B)) 7179 1.1 mrg #define _mm512_mul_round_pch(A, B, R) _mm512_fmul_round_pch ((A), (B), (R)) 7180 1.1 mrg #define _mm512_mask_mul_round_pch(W, U, A, B, R) \ 7181 1.1 mrg _mm512_mask_fmul_round_pch ((W), (U), (A), (B), (R)) 7182 1.1 mrg #define _mm512_maskz_mul_round_pch(U, A, B, R) \ 7183 1.1 mrg _mm512_maskz_fmul_round_pch ((U), (A), (B), (R)) 7184 1.1 mrg 7185 1.1 mrg #define _mm512_cmul_pch(A, B) _mm512_fcmul_pch ((A), (B)) 7186 1.1 mrg #define _mm512_mask_cmul_pch(W, U, A, B) \ 7187 1.1 mrg _mm512_mask_fcmul_pch ((W), (U), (A), (B)) 7188 1.1 mrg #define _mm512_maskz_cmul_pch(U, A, B) _mm512_maskz_fcmul_pch ((U), (A), (B)) 7189 1.1 mrg #define _mm512_cmul_round_pch(A, B, R) _mm512_fcmul_round_pch ((A), (B), (R)) 7190 1.1 mrg #define _mm512_mask_cmul_round_pch(W, U, A, B, R) \ 7191 1.1 mrg _mm512_mask_fcmul_round_pch ((W), (U), (A), (B), (R)) 7192 1.1 mrg #define _mm512_maskz_cmul_round_pch(U, A, B, R) \ 7193 1.1 mrg _mm512_maskz_fcmul_round_pch ((U), (A), (B), (R)) 7194 1.1 mrg 7195 1.1 mrg #define _mm_mul_sch(A, B) _mm_fmul_sch ((A), (B)) 7196 1.1 mrg #define _mm_mask_mul_sch(W, U, A, B) _mm_mask_fmul_sch ((W), (U), (A), (B)) 7197 1.1 mrg #define _mm_maskz_mul_sch(U, A, B) _mm_maskz_fmul_sch ((U), (A), (B)) 7198 1.1 mrg #define _mm_mul_round_sch(A, B, R) _mm_fmul_round_sch ((A), (B), (R)) 7199 1.1 mrg #define _mm_mask_mul_round_sch(W, U, A, B, R) \ 7200 1.1 mrg _mm_mask_fmul_round_sch ((W), (U), (A), (B), (R)) 7201 1.1 mrg #define _mm_maskz_mul_round_sch(U, A, B, R) \ 7202 1.1 mrg _mm_maskz_fmul_round_sch ((U), (A), (B), (R)) 7203 1.1 mrg 7204 1.1 mrg #define _mm_cmul_sch(A, B) _mm_fcmul_sch ((A), (B)) 7205 1.1 mrg #define _mm_mask_cmul_sch(W, U, A, B) _mm_mask_fcmul_sch ((W), (U), (A), (B)) 7206 1.1 mrg #define _mm_maskz_cmul_sch(U, A, B) _mm_maskz_fcmul_sch ((U), (A), (B)) 7207 1.1 mrg #define _mm_cmul_round_sch(A, B, R) _mm_fcmul_round_sch ((A), (B), (R)) 7208 1.1 mrg #define _mm_mask_cmul_round_sch(W, U, A, B, R) \ 7209 1.1 mrg _mm_mask_fcmul_round_sch ((W), (U), (A), (B), (R)) 7210 1.1 mrg #define _mm_maskz_cmul_round_sch(U, A, B, R) \ 7211 1.1 mrg _mm_maskz_fcmul_round_sch ((U), (A), (B), (R)) 7212 1.1 mrg 7213 1.1 mrg #ifdef __DISABLE_AVX512FP16__ 7214 1.1 mrg #undef __DISABLE_AVX512FP16__ 7215 1.1 mrg #pragma GCC pop_options 7216 1.1 mrg #endif /* __DISABLE_AVX512FP16__ */ 7217 1.1 mrg 7218 1.1 mrg #endif /* __AVX512FP16INTRIN_H_INCLUDED */ 7219