1 1.1 joerg /*===------------- avx512bwintrin.h - AVX512BW intrinsics ------------------=== 2 1.1 joerg * 3 1.1 joerg * 4 1.1 joerg * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 1.1 joerg * See https://llvm.org/LICENSE.txt for license information. 6 1.1 joerg * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 1.1 joerg * 8 1.1 joerg *===-----------------------------------------------------------------------=== 9 1.1 joerg */ 10 1.1 joerg #ifndef __IMMINTRIN_H 11 1.1 joerg #error "Never use <avx512bwintrin.h> directly; include <immintrin.h> instead." 12 1.1 joerg #endif 13 1.1 joerg 14 1.1 joerg #ifndef __AVX512BWINTRIN_H 15 1.1 joerg #define __AVX512BWINTRIN_H 16 1.1 joerg 17 1.1 joerg typedef unsigned int __mmask32; 18 1.1 joerg typedef unsigned long long __mmask64; 19 1.1 joerg 20 1.1 joerg /* Define the default attributes for the functions in this file. */ 21 1.1 joerg #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"), __min_vector_width__(512))) 22 1.1 joerg #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"))) 23 1.1 joerg 24 1.1 joerg static __inline __mmask32 __DEFAULT_FN_ATTRS 25 1.1 joerg _knot_mask32(__mmask32 __M) 26 1.1 joerg { 27 1.1 joerg return __builtin_ia32_knotsi(__M); 28 1.1 joerg } 29 1.1 joerg 30 1.1 joerg static __inline __mmask64 __DEFAULT_FN_ATTRS 31 1.1 joerg _knot_mask64(__mmask64 __M) 32 1.1 joerg { 33 1.1 joerg return __builtin_ia32_knotdi(__M); 34 1.1 joerg } 35 1.1 joerg 36 1.1 joerg static __inline__ __mmask32 __DEFAULT_FN_ATTRS 37 1.1 joerg _kand_mask32(__mmask32 __A, __mmask32 __B) 38 1.1 joerg { 39 1.1 joerg return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B); 40 1.1 joerg } 41 1.1 joerg 42 1.1 joerg static __inline__ __mmask64 __DEFAULT_FN_ATTRS 43 1.1 joerg _kand_mask64(__mmask64 __A, __mmask64 __B) 44 1.1 joerg { 45 1.1 joerg return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B); 46 1.1 joerg } 47 1.1 joerg 48 1.1 joerg static __inline__ __mmask32 __DEFAULT_FN_ATTRS 49 1.1 joerg _kandn_mask32(__mmask32 __A, __mmask32 __B) 50 1.1 joerg { 51 1.1 joerg return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B); 52 1.1 joerg } 53 1.1 joerg 54 1.1 joerg static __inline__ __mmask64 __DEFAULT_FN_ATTRS 55 1.1 joerg _kandn_mask64(__mmask64 __A, __mmask64 __B) 56 1.1 joerg { 57 1.1 joerg return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B); 58 1.1 joerg } 59 1.1 joerg 60 1.1 joerg static __inline__ __mmask32 __DEFAULT_FN_ATTRS 61 1.1 joerg _kor_mask32(__mmask32 __A, __mmask32 __B) 62 1.1 joerg { 63 1.1 joerg return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B); 64 1.1 joerg } 65 1.1 joerg 66 1.1 joerg static __inline__ __mmask64 __DEFAULT_FN_ATTRS 67 1.1 joerg _kor_mask64(__mmask64 __A, __mmask64 __B) 68 1.1 joerg { 69 1.1 joerg return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B); 70 1.1 joerg } 71 1.1 joerg 72 1.1 joerg static __inline__ __mmask32 __DEFAULT_FN_ATTRS 73 1.1 joerg _kxnor_mask32(__mmask32 __A, __mmask32 __B) 74 1.1 joerg { 75 1.1 joerg return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B); 76 1.1 joerg } 77 1.1 joerg 78 1.1 joerg static __inline__ __mmask64 __DEFAULT_FN_ATTRS 79 1.1 joerg _kxnor_mask64(__mmask64 __A, __mmask64 __B) 80 1.1 joerg { 81 1.1 joerg return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B); 82 1.1 joerg } 83 1.1 joerg 84 1.1 joerg static __inline__ __mmask32 __DEFAULT_FN_ATTRS 85 1.1 joerg _kxor_mask32(__mmask32 __A, __mmask32 __B) 86 1.1 joerg { 87 1.1 joerg return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B); 88 1.1 joerg } 89 1.1 joerg 90 1.1 joerg static __inline__ __mmask64 __DEFAULT_FN_ATTRS 91 1.1 joerg _kxor_mask64(__mmask64 __A, __mmask64 __B) 92 1.1 joerg { 93 1.1 joerg return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B); 94 1.1 joerg } 95 1.1 joerg 96 1.1 joerg static __inline__ unsigned char __DEFAULT_FN_ATTRS 97 1.1 joerg _kortestc_mask32_u8(__mmask32 __A, __mmask32 __B) 98 1.1 joerg { 99 1.1 joerg return (unsigned char)__builtin_ia32_kortestcsi(__A, __B); 100 1.1 joerg } 101 1.1 joerg 102 1.1 joerg static __inline__ unsigned char __DEFAULT_FN_ATTRS 103 1.1 joerg _kortestz_mask32_u8(__mmask32 __A, __mmask32 __B) 104 1.1 joerg { 105 1.1 joerg return (unsigned char)__builtin_ia32_kortestzsi(__A, __B); 106 1.1 joerg } 107 1.1 joerg 108 1.1 joerg static __inline__ unsigned char __DEFAULT_FN_ATTRS 109 1.1 joerg _kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) { 110 1.1 joerg *__C = (unsigned char)__builtin_ia32_kortestcsi(__A, __B); 111 1.1 joerg return (unsigned char)__builtin_ia32_kortestzsi(__A, __B); 112 1.1 joerg } 113 1.1 joerg 114 1.1 joerg static __inline__ unsigned char __DEFAULT_FN_ATTRS 115 1.1 joerg _kortestc_mask64_u8(__mmask64 __A, __mmask64 __B) 116 1.1 joerg { 117 1.1 joerg return (unsigned char)__builtin_ia32_kortestcdi(__A, __B); 118 1.1 joerg } 119 1.1 joerg 120 1.1 joerg static __inline__ unsigned char __DEFAULT_FN_ATTRS 121 1.1 joerg _kortestz_mask64_u8(__mmask64 __A, __mmask64 __B) 122 1.1 joerg { 123 1.1 joerg return (unsigned char)__builtin_ia32_kortestzdi(__A, __B); 124 1.1 joerg } 125 1.1 joerg 126 1.1 joerg static __inline__ unsigned char __DEFAULT_FN_ATTRS 127 1.1 joerg _kortest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) { 128 1.1 joerg *__C = (unsigned char)__builtin_ia32_kortestcdi(__A, __B); 129 1.1 joerg return (unsigned char)__builtin_ia32_kortestzdi(__A, __B); 130 1.1 joerg } 131 1.1 joerg 132 1.1 joerg static __inline__ unsigned char __DEFAULT_FN_ATTRS 133 1.1 joerg _ktestc_mask32_u8(__mmask32 __A, __mmask32 __B) 134 1.1 joerg { 135 1.1 joerg return (unsigned char)__builtin_ia32_ktestcsi(__A, __B); 136 1.1 joerg } 137 1.1 joerg 138 1.1 joerg static __inline__ unsigned char __DEFAULT_FN_ATTRS 139 1.1 joerg _ktestz_mask32_u8(__mmask32 __A, __mmask32 __B) 140 1.1 joerg { 141 1.1 joerg return (unsigned char)__builtin_ia32_ktestzsi(__A, __B); 142 1.1 joerg } 143 1.1 joerg 144 1.1 joerg static __inline__ unsigned char __DEFAULT_FN_ATTRS 145 1.1 joerg _ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) { 146 1.1 joerg *__C = (unsigned char)__builtin_ia32_ktestcsi(__A, __B); 147 1.1 joerg return (unsigned char)__builtin_ia32_ktestzsi(__A, __B); 148 1.1 joerg } 149 1.1 joerg 150 1.1 joerg static __inline__ unsigned char __DEFAULT_FN_ATTRS 151 1.1 joerg _ktestc_mask64_u8(__mmask64 __A, __mmask64 __B) 152 1.1 joerg { 153 1.1 joerg return (unsigned char)__builtin_ia32_ktestcdi(__A, __B); 154 1.1 joerg } 155 1.1 joerg 156 1.1 joerg static __inline__ unsigned char __DEFAULT_FN_ATTRS 157 1.1 joerg _ktestz_mask64_u8(__mmask64 __A, __mmask64 __B) 158 1.1 joerg { 159 1.1 joerg return (unsigned char)__builtin_ia32_ktestzdi(__A, __B); 160 1.1 joerg } 161 1.1 joerg 162 1.1 joerg static __inline__ unsigned char __DEFAULT_FN_ATTRS 163 1.1 joerg _ktest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) { 164 1.1 joerg *__C = (unsigned char)__builtin_ia32_ktestcdi(__A, __B); 165 1.1 joerg return (unsigned char)__builtin_ia32_ktestzdi(__A, __B); 166 1.1 joerg } 167 1.1 joerg 168 1.1 joerg static __inline__ __mmask32 __DEFAULT_FN_ATTRS 169 1.1 joerg _kadd_mask32(__mmask32 __A, __mmask32 __B) 170 1.1 joerg { 171 1.1 joerg return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B); 172 1.1 joerg } 173 1.1 joerg 174 1.1 joerg static __inline__ __mmask64 __DEFAULT_FN_ATTRS 175 1.1 joerg _kadd_mask64(__mmask64 __A, __mmask64 __B) 176 1.1 joerg { 177 1.1 joerg return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B); 178 1.1 joerg } 179 1.1 joerg 180 1.1 joerg #define _kshiftli_mask32(A, I) \ 181 1.1 joerg (__mmask32)__builtin_ia32_kshiftlisi((__mmask32)(A), (unsigned int)(I)) 182 1.1 joerg 183 1.1 joerg #define _kshiftri_mask32(A, I) \ 184 1.1 joerg (__mmask32)__builtin_ia32_kshiftrisi((__mmask32)(A), (unsigned int)(I)) 185 1.1 joerg 186 1.1 joerg #define _kshiftli_mask64(A, I) \ 187 1.1 joerg (__mmask64)__builtin_ia32_kshiftlidi((__mmask64)(A), (unsigned int)(I)) 188 1.1 joerg 189 1.1 joerg #define _kshiftri_mask64(A, I) \ 190 1.1 joerg (__mmask64)__builtin_ia32_kshiftridi((__mmask64)(A), (unsigned int)(I)) 191 1.1 joerg 192 1.1 joerg static __inline__ unsigned int __DEFAULT_FN_ATTRS 193 1.1 joerg _cvtmask32_u32(__mmask32 __A) { 194 1.1 joerg return (unsigned int)__builtin_ia32_kmovd((__mmask32)__A); 195 1.1 joerg } 196 1.1 joerg 197 1.1 joerg static __inline__ unsigned long long __DEFAULT_FN_ATTRS 198 1.1 joerg _cvtmask64_u64(__mmask64 __A) { 199 1.1 joerg return (unsigned long long)__builtin_ia32_kmovq((__mmask64)__A); 200 1.1 joerg } 201 1.1 joerg 202 1.1 joerg static __inline__ __mmask32 __DEFAULT_FN_ATTRS 203 1.1 joerg _cvtu32_mask32(unsigned int __A) { 204 1.1 joerg return (__mmask32)__builtin_ia32_kmovd((__mmask32)__A); 205 1.1 joerg } 206 1.1 joerg 207 1.1 joerg static __inline__ __mmask64 __DEFAULT_FN_ATTRS 208 1.1 joerg _cvtu64_mask64(unsigned long long __A) { 209 1.1 joerg return (__mmask64)__builtin_ia32_kmovq((__mmask64)__A); 210 1.1 joerg } 211 1.1 joerg 212 1.1 joerg static __inline__ __mmask32 __DEFAULT_FN_ATTRS 213 1.1 joerg _load_mask32(__mmask32 *__A) { 214 1.1 joerg return (__mmask32)__builtin_ia32_kmovd(*(__mmask32 *)__A); 215 1.1 joerg } 216 1.1 joerg 217 1.1 joerg static __inline__ __mmask64 __DEFAULT_FN_ATTRS 218 1.1 joerg _load_mask64(__mmask64 *__A) { 219 1.1 joerg return (__mmask64)__builtin_ia32_kmovq(*(__mmask64 *)__A); 220 1.1 joerg } 221 1.1 joerg 222 1.1 joerg static __inline__ void __DEFAULT_FN_ATTRS 223 1.1 joerg _store_mask32(__mmask32 *__A, __mmask32 __B) { 224 1.1 joerg *(__mmask32 *)__A = __builtin_ia32_kmovd((__mmask32)__B); 225 1.1 joerg } 226 1.1 joerg 227 1.1 joerg static __inline__ void __DEFAULT_FN_ATTRS 228 1.1 joerg _store_mask64(__mmask64 *__A, __mmask64 __B) { 229 1.1 joerg *(__mmask64 *)__A = __builtin_ia32_kmovq((__mmask64)__B); 230 1.1 joerg } 231 1.1 joerg 232 1.1 joerg /* Integer compare */ 233 1.1 joerg 234 1.1 joerg #define _mm512_cmp_epi8_mask(a, b, p) \ 235 1.1 joerg (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ 236 1.1 joerg (__v64qi)(__m512i)(b), (int)(p), \ 237 1.1 joerg (__mmask64)-1) 238 1.1 joerg 239 1.1 joerg #define _mm512_mask_cmp_epi8_mask(m, a, b, p) \ 240 1.1 joerg (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ 241 1.1 joerg (__v64qi)(__m512i)(b), (int)(p), \ 242 1.1 joerg (__mmask64)(m)) 243 1.1 joerg 244 1.1 joerg #define _mm512_cmp_epu8_mask(a, b, p) \ 245 1.1 joerg (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ 246 1.1 joerg (__v64qi)(__m512i)(b), (int)(p), \ 247 1.1 joerg (__mmask64)-1) 248 1.1 joerg 249 1.1 joerg #define _mm512_mask_cmp_epu8_mask(m, a, b, p) \ 250 1.1 joerg (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ 251 1.1 joerg (__v64qi)(__m512i)(b), (int)(p), \ 252 1.1 joerg (__mmask64)(m)) 253 1.1 joerg 254 1.1 joerg #define _mm512_cmp_epi16_mask(a, b, p) \ 255 1.1 joerg (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ 256 1.1 joerg (__v32hi)(__m512i)(b), (int)(p), \ 257 1.1 joerg (__mmask32)-1) 258 1.1 joerg 259 1.1 joerg #define _mm512_mask_cmp_epi16_mask(m, a, b, p) \ 260 1.1 joerg (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ 261 1.1 joerg (__v32hi)(__m512i)(b), (int)(p), \ 262 1.1 joerg (__mmask32)(m)) 263 1.1 joerg 264 1.1 joerg #define _mm512_cmp_epu16_mask(a, b, p) \ 265 1.1 joerg (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ 266 1.1 joerg (__v32hi)(__m512i)(b), (int)(p), \ 267 1.1 joerg (__mmask32)-1) 268 1.1 joerg 269 1.1 joerg #define _mm512_mask_cmp_epu16_mask(m, a, b, p) \ 270 1.1 joerg (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ 271 1.1 joerg (__v32hi)(__m512i)(b), (int)(p), \ 272 1.1 joerg (__mmask32)(m)) 273 1.1 joerg 274 1.1 joerg #define _mm512_cmpeq_epi8_mask(A, B) \ 275 1.1 joerg _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ) 276 1.1 joerg #define _mm512_mask_cmpeq_epi8_mask(k, A, B) \ 277 1.1 joerg _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ) 278 1.1 joerg #define _mm512_cmpge_epi8_mask(A, B) \ 279 1.1 joerg _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GE) 280 1.1 joerg #define _mm512_mask_cmpge_epi8_mask(k, A, B) \ 281 1.1 joerg _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE) 282 1.1 joerg #define _mm512_cmpgt_epi8_mask(A, B) \ 283 1.1 joerg _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GT) 284 1.1 joerg #define _mm512_mask_cmpgt_epi8_mask(k, A, B) \ 285 1.1 joerg _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT) 286 1.1 joerg #define _mm512_cmple_epi8_mask(A, B) \ 287 1.1 joerg _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LE) 288 1.1 joerg #define _mm512_mask_cmple_epi8_mask(k, A, B) \ 289 1.1 joerg _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE) 290 1.1 joerg #define _mm512_cmplt_epi8_mask(A, B) \ 291 1.1 joerg _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LT) 292 1.1 joerg #define _mm512_mask_cmplt_epi8_mask(k, A, B) \ 293 1.1 joerg _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT) 294 1.1 joerg #define _mm512_cmpneq_epi8_mask(A, B) \ 295 1.1 joerg _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_NE) 296 1.1 joerg #define _mm512_mask_cmpneq_epi8_mask(k, A, B) \ 297 1.1 joerg _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE) 298 1.1 joerg 299 1.1 joerg #define _mm512_cmpeq_epu8_mask(A, B) \ 300 1.1 joerg _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ) 301 1.1 joerg #define _mm512_mask_cmpeq_epu8_mask(k, A, B) \ 302 1.1 joerg _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ) 303 1.1 joerg #define _mm512_cmpge_epu8_mask(A, B) \ 304 1.1 joerg _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GE) 305 1.1 joerg #define _mm512_mask_cmpge_epu8_mask(k, A, B) \ 306 1.1 joerg _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE) 307 1.1 joerg #define _mm512_cmpgt_epu8_mask(A, B) \ 308 1.1 joerg _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GT) 309 1.1 joerg #define _mm512_mask_cmpgt_epu8_mask(k, A, B) \ 310 1.1 joerg _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT) 311 1.1 joerg #define _mm512_cmple_epu8_mask(A, B) \ 312 1.1 joerg _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LE) 313 1.1 joerg #define _mm512_mask_cmple_epu8_mask(k, A, B) \ 314 1.1 joerg _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE) 315 1.1 joerg #define _mm512_cmplt_epu8_mask(A, B) \ 316 1.1 joerg _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LT) 317 1.1 joerg #define _mm512_mask_cmplt_epu8_mask(k, A, B) \ 318 1.1 joerg _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT) 319 1.1 joerg #define _mm512_cmpneq_epu8_mask(A, B) \ 320 1.1 joerg _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_NE) 321 1.1 joerg #define _mm512_mask_cmpneq_epu8_mask(k, A, B) \ 322 1.1 joerg _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE) 323 1.1 joerg 324 1.1 joerg #define _mm512_cmpeq_epi16_mask(A, B) \ 325 1.1 joerg _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ) 326 1.1 joerg #define _mm512_mask_cmpeq_epi16_mask(k, A, B) \ 327 1.1 joerg _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ) 328 1.1 joerg #define _mm512_cmpge_epi16_mask(A, B) \ 329 1.1 joerg _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GE) 330 1.1 joerg #define _mm512_mask_cmpge_epi16_mask(k, A, B) \ 331 1.1 joerg _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE) 332 1.1 joerg #define _mm512_cmpgt_epi16_mask(A, B) \ 333 1.1 joerg _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GT) 334 1.1 joerg #define _mm512_mask_cmpgt_epi16_mask(k, A, B) \ 335 1.1 joerg _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT) 336 1.1 joerg #define _mm512_cmple_epi16_mask(A, B) \ 337 1.1 joerg _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LE) 338 1.1 joerg #define _mm512_mask_cmple_epi16_mask(k, A, B) \ 339 1.1 joerg _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE) 340 1.1 joerg #define _mm512_cmplt_epi16_mask(A, B) \ 341 1.1 joerg _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LT) 342 1.1 joerg #define _mm512_mask_cmplt_epi16_mask(k, A, B) \ 343 1.1 joerg _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT) 344 1.1 joerg #define _mm512_cmpneq_epi16_mask(A, B) \ 345 1.1 joerg _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_NE) 346 1.1 joerg #define _mm512_mask_cmpneq_epi16_mask(k, A, B) \ 347 1.1 joerg _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE) 348 1.1 joerg 349 1.1 joerg #define _mm512_cmpeq_epu16_mask(A, B) \ 350 1.1 joerg _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ) 351 1.1 joerg #define _mm512_mask_cmpeq_epu16_mask(k, A, B) \ 352 1.1 joerg _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ) 353 1.1 joerg #define _mm512_cmpge_epu16_mask(A, B) \ 354 1.1 joerg _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GE) 355 1.1 joerg #define _mm512_mask_cmpge_epu16_mask(k, A, B) \ 356 1.1 joerg _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE) 357 1.1 joerg #define _mm512_cmpgt_epu16_mask(A, B) \ 358 1.1 joerg _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GT) 359 1.1 joerg #define _mm512_mask_cmpgt_epu16_mask(k, A, B) \ 360 1.1 joerg _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT) 361 1.1 joerg #define _mm512_cmple_epu16_mask(A, B) \ 362 1.1 joerg _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LE) 363 1.1 joerg #define _mm512_mask_cmple_epu16_mask(k, A, B) \ 364 1.1 joerg _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE) 365 1.1 joerg #define _mm512_cmplt_epu16_mask(A, B) \ 366 1.1 joerg _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LT) 367 1.1 joerg #define _mm512_mask_cmplt_epu16_mask(k, A, B) \ 368 1.1 joerg _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT) 369 1.1 joerg #define _mm512_cmpneq_epu16_mask(A, B) \ 370 1.1 joerg _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_NE) 371 1.1 joerg #define _mm512_mask_cmpneq_epu16_mask(k, A, B) \ 372 1.1 joerg _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) 373 1.1 joerg 374 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 375 1.1 joerg _mm512_add_epi8 (__m512i __A, __m512i __B) { 376 1.1 joerg return (__m512i) ((__v64qu) __A + (__v64qu) __B); 377 1.1 joerg } 378 1.1 joerg 379 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 380 1.1 joerg _mm512_mask_add_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { 381 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 382 1.1 joerg (__v64qi)_mm512_add_epi8(__A, __B), 383 1.1 joerg (__v64qi)__W); 384 1.1 joerg } 385 1.1 joerg 386 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 387 1.1 joerg _mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) { 388 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 389 1.1 joerg (__v64qi)_mm512_add_epi8(__A, __B), 390 1.1 joerg (__v64qi)_mm512_setzero_si512()); 391 1.1 joerg } 392 1.1 joerg 393 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 394 1.1 joerg _mm512_sub_epi8 (__m512i __A, __m512i __B) { 395 1.1 joerg return (__m512i) ((__v64qu) __A - (__v64qu) __B); 396 1.1 joerg } 397 1.1 joerg 398 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 399 1.1 joerg _mm512_mask_sub_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { 400 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 401 1.1 joerg (__v64qi)_mm512_sub_epi8(__A, __B), 402 1.1 joerg (__v64qi)__W); 403 1.1 joerg } 404 1.1 joerg 405 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 406 1.1 joerg _mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) { 407 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 408 1.1 joerg (__v64qi)_mm512_sub_epi8(__A, __B), 409 1.1 joerg (__v64qi)_mm512_setzero_si512()); 410 1.1 joerg } 411 1.1 joerg 412 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 413 1.1 joerg _mm512_add_epi16 (__m512i __A, __m512i __B) { 414 1.1 joerg return (__m512i) ((__v32hu) __A + (__v32hu) __B); 415 1.1 joerg } 416 1.1 joerg 417 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 418 1.1 joerg _mm512_mask_add_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 419 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 420 1.1 joerg (__v32hi)_mm512_add_epi16(__A, __B), 421 1.1 joerg (__v32hi)__W); 422 1.1 joerg } 423 1.1 joerg 424 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 425 1.1 joerg _mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 426 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 427 1.1 joerg (__v32hi)_mm512_add_epi16(__A, __B), 428 1.1 joerg (__v32hi)_mm512_setzero_si512()); 429 1.1 joerg } 430 1.1 joerg 431 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 432 1.1 joerg _mm512_sub_epi16 (__m512i __A, __m512i __B) { 433 1.1 joerg return (__m512i) ((__v32hu) __A - (__v32hu) __B); 434 1.1 joerg } 435 1.1 joerg 436 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 437 1.1 joerg _mm512_mask_sub_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 438 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 439 1.1 joerg (__v32hi)_mm512_sub_epi16(__A, __B), 440 1.1 joerg (__v32hi)__W); 441 1.1 joerg } 442 1.1 joerg 443 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 444 1.1 joerg _mm512_maskz_sub_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 445 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 446 1.1 joerg (__v32hi)_mm512_sub_epi16(__A, __B), 447 1.1 joerg (__v32hi)_mm512_setzero_si512()); 448 1.1 joerg } 449 1.1 joerg 450 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 451 1.1 joerg _mm512_mullo_epi16 (__m512i __A, __m512i __B) { 452 1.1 joerg return (__m512i) ((__v32hu) __A * (__v32hu) __B); 453 1.1 joerg } 454 1.1 joerg 455 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 456 1.1 joerg _mm512_mask_mullo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 457 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 458 1.1 joerg (__v32hi)_mm512_mullo_epi16(__A, __B), 459 1.1 joerg (__v32hi)__W); 460 1.1 joerg } 461 1.1 joerg 462 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 463 1.1 joerg _mm512_maskz_mullo_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 464 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 465 1.1 joerg (__v32hi)_mm512_mullo_epi16(__A, __B), 466 1.1 joerg (__v32hi)_mm512_setzero_si512()); 467 1.1 joerg } 468 1.1 joerg 469 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 470 1.1 joerg _mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W) 471 1.1 joerg { 472 1.1 joerg return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, 473 1.1 joerg (__v64qi) __W, 474 1.1 joerg (__v64qi) __A); 475 1.1 joerg } 476 1.1 joerg 477 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 478 1.1 joerg _mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W) 479 1.1 joerg { 480 1.1 joerg return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, 481 1.1 joerg (__v32hi) __W, 482 1.1 joerg (__v32hi) __A); 483 1.1 joerg } 484 1.1 joerg 485 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 486 1.1 joerg _mm512_abs_epi8 (__m512i __A) 487 1.1 joerg { 488 1.1 joerg return (__m512i)__builtin_ia32_pabsb512((__v64qi)__A); 489 1.1 joerg } 490 1.1 joerg 491 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 492 1.1 joerg _mm512_mask_abs_epi8 (__m512i __W, __mmask64 __U, __m512i __A) 493 1.1 joerg { 494 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 495 1.1 joerg (__v64qi)_mm512_abs_epi8(__A), 496 1.1 joerg (__v64qi)__W); 497 1.1 joerg } 498 1.1 joerg 499 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 500 1.1 joerg _mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A) 501 1.1 joerg { 502 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 503 1.1 joerg (__v64qi)_mm512_abs_epi8(__A), 504 1.1 joerg (__v64qi)_mm512_setzero_si512()); 505 1.1 joerg } 506 1.1 joerg 507 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 508 1.1 joerg _mm512_abs_epi16 (__m512i __A) 509 1.1 joerg { 510 1.1 joerg return (__m512i)__builtin_ia32_pabsw512((__v32hi)__A); 511 1.1 joerg } 512 1.1 joerg 513 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 514 1.1 joerg _mm512_mask_abs_epi16 (__m512i __W, __mmask32 __U, __m512i __A) 515 1.1 joerg { 516 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 517 1.1 joerg (__v32hi)_mm512_abs_epi16(__A), 518 1.1 joerg (__v32hi)__W); 519 1.1 joerg } 520 1.1 joerg 521 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 522 1.1 joerg _mm512_maskz_abs_epi16 (__mmask32 __U, __m512i __A) 523 1.1 joerg { 524 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 525 1.1 joerg (__v32hi)_mm512_abs_epi16(__A), 526 1.1 joerg (__v32hi)_mm512_setzero_si512()); 527 1.1 joerg } 528 1.1 joerg 529 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 530 1.1 joerg _mm512_packs_epi32(__m512i __A, __m512i __B) 531 1.1 joerg { 532 1.1 joerg return (__m512i)__builtin_ia32_packssdw512((__v16si)__A, (__v16si)__B); 533 1.1 joerg } 534 1.1 joerg 535 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 536 1.1 joerg _mm512_maskz_packs_epi32(__mmask32 __M, __m512i __A, __m512i __B) 537 1.1 joerg { 538 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 539 1.1 joerg (__v32hi)_mm512_packs_epi32(__A, __B), 540 1.1 joerg (__v32hi)_mm512_setzero_si512()); 541 1.1 joerg } 542 1.1 joerg 543 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 544 1.1 joerg _mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 545 1.1 joerg { 546 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 547 1.1 joerg (__v32hi)_mm512_packs_epi32(__A, __B), 548 1.1 joerg (__v32hi)__W); 549 1.1 joerg } 550 1.1 joerg 551 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 552 1.1 joerg _mm512_packs_epi16(__m512i __A, __m512i __B) 553 1.1 joerg { 554 1.1 joerg return (__m512i)__builtin_ia32_packsswb512((__v32hi)__A, (__v32hi) __B); 555 1.1 joerg } 556 1.1 joerg 557 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 558 1.1 joerg _mm512_mask_packs_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 559 1.1 joerg { 560 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 561 1.1 joerg (__v64qi)_mm512_packs_epi16(__A, __B), 562 1.1 joerg (__v64qi)__W); 563 1.1 joerg } 564 1.1 joerg 565 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 566 1.1 joerg _mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, __m512i __B) 567 1.1 joerg { 568 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 569 1.1 joerg (__v64qi)_mm512_packs_epi16(__A, __B), 570 1.1 joerg (__v64qi)_mm512_setzero_si512()); 571 1.1 joerg } 572 1.1 joerg 573 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 574 1.1 joerg _mm512_packus_epi32(__m512i __A, __m512i __B) 575 1.1 joerg { 576 1.1 joerg return (__m512i)__builtin_ia32_packusdw512((__v16si) __A, (__v16si) __B); 577 1.1 joerg } 578 1.1 joerg 579 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 580 1.1 joerg _mm512_maskz_packus_epi32(__mmask32 __M, __m512i __A, __m512i __B) 581 1.1 joerg { 582 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 583 1.1 joerg (__v32hi)_mm512_packus_epi32(__A, __B), 584 1.1 joerg (__v32hi)_mm512_setzero_si512()); 585 1.1 joerg } 586 1.1 joerg 587 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 588 1.1 joerg _mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 589 1.1 joerg { 590 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 591 1.1 joerg (__v32hi)_mm512_packus_epi32(__A, __B), 592 1.1 joerg (__v32hi)__W); 593 1.1 joerg } 594 1.1 joerg 595 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 596 1.1 joerg _mm512_packus_epi16(__m512i __A, __m512i __B) 597 1.1 joerg { 598 1.1 joerg return (__m512i)__builtin_ia32_packuswb512((__v32hi) __A, (__v32hi) __B); 599 1.1 joerg } 600 1.1 joerg 601 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 602 1.1 joerg _mm512_mask_packus_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 603 1.1 joerg { 604 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 605 1.1 joerg (__v64qi)_mm512_packus_epi16(__A, __B), 606 1.1 joerg (__v64qi)__W); 607 1.1 joerg } 608 1.1 joerg 609 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 610 1.1 joerg _mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B) 611 1.1 joerg { 612 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 613 1.1 joerg (__v64qi)_mm512_packus_epi16(__A, __B), 614 1.1 joerg (__v64qi)_mm512_setzero_si512()); 615 1.1 joerg } 616 1.1 joerg 617 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 618 1.1 joerg _mm512_adds_epi8 (__m512i __A, __m512i __B) 619 1.1 joerg { 620 1.1 joerg return (__m512i)__builtin_ia32_paddsb512((__v64qi)__A, (__v64qi)__B); 621 1.1 joerg } 622 1.1 joerg 623 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 624 1.1 joerg _mm512_mask_adds_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 625 1.1 joerg { 626 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 627 1.1 joerg (__v64qi)_mm512_adds_epi8(__A, __B), 628 1.1 joerg (__v64qi)__W); 629 1.1 joerg } 630 1.1 joerg 631 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 632 1.1 joerg _mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B) 633 1.1 joerg { 634 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 635 1.1 joerg (__v64qi)_mm512_adds_epi8(__A, __B), 636 1.1 joerg (__v64qi)_mm512_setzero_si512()); 637 1.1 joerg } 638 1.1 joerg 639 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 640 1.1 joerg _mm512_adds_epi16 (__m512i __A, __m512i __B) 641 1.1 joerg { 642 1.1 joerg return (__m512i)__builtin_ia32_paddsw512((__v32hi)__A, (__v32hi)__B); 643 1.1 joerg } 644 1.1 joerg 645 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 646 1.1 joerg _mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 647 1.1 joerg { 648 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 649 1.1 joerg (__v32hi)_mm512_adds_epi16(__A, __B), 650 1.1 joerg (__v32hi)__W); 651 1.1 joerg } 652 1.1 joerg 653 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 654 1.1 joerg _mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B) 655 1.1 joerg { 656 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 657 1.1 joerg (__v32hi)_mm512_adds_epi16(__A, __B), 658 1.1 joerg (__v32hi)_mm512_setzero_si512()); 659 1.1 joerg } 660 1.1 joerg 661 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 662 1.1 joerg _mm512_adds_epu8 (__m512i __A, __m512i __B) 663 1.1 joerg { 664 1.1 joerg return (__m512i)__builtin_ia32_paddusb512((__v64qi) __A, (__v64qi) __B); 665 1.1 joerg } 666 1.1 joerg 667 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 668 1.1 joerg _mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 669 1.1 joerg { 670 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 671 1.1 joerg (__v64qi)_mm512_adds_epu8(__A, __B), 672 1.1 joerg (__v64qi)__W); 673 1.1 joerg } 674 1.1 joerg 675 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 676 1.1 joerg _mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B) 677 1.1 joerg { 678 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 679 1.1 joerg (__v64qi)_mm512_adds_epu8(__A, __B), 680 1.1 joerg (__v64qi)_mm512_setzero_si512()); 681 1.1 joerg } 682 1.1 joerg 683 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 684 1.1 joerg _mm512_adds_epu16 (__m512i __A, __m512i __B) 685 1.1 joerg { 686 1.1 joerg return (__m512i)__builtin_ia32_paddusw512((__v32hi) __A, (__v32hi) __B); 687 1.1 joerg } 688 1.1 joerg 689 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 690 1.1 joerg _mm512_mask_adds_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 691 1.1 joerg { 692 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 693 1.1 joerg (__v32hi)_mm512_adds_epu16(__A, __B), 694 1.1 joerg (__v32hi)__W); 695 1.1 joerg } 696 1.1 joerg 697 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 698 1.1 joerg _mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B) 699 1.1 joerg { 700 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 701 1.1 joerg (__v32hi)_mm512_adds_epu16(__A, __B), 702 1.1 joerg (__v32hi)_mm512_setzero_si512()); 703 1.1 joerg } 704 1.1 joerg 705 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 706 1.1 joerg _mm512_avg_epu8 (__m512i __A, __m512i __B) 707 1.1 joerg { 708 1.1 joerg return (__m512i)__builtin_ia32_pavgb512((__v64qi)__A, (__v64qi)__B); 709 1.1 joerg } 710 1.1 joerg 711 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 712 1.1 joerg _mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A, 713 1.1 joerg __m512i __B) 714 1.1 joerg { 715 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 716 1.1 joerg (__v64qi)_mm512_avg_epu8(__A, __B), 717 1.1 joerg (__v64qi)__W); 718 1.1 joerg } 719 1.1 joerg 720 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 721 1.1 joerg _mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B) 722 1.1 joerg { 723 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 724 1.1 joerg (__v64qi)_mm512_avg_epu8(__A, __B), 725 1.1 joerg (__v64qi)_mm512_setzero_si512()); 726 1.1 joerg } 727 1.1 joerg 728 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 729 1.1 joerg _mm512_avg_epu16 (__m512i __A, __m512i __B) 730 1.1 joerg { 731 1.1 joerg return (__m512i)__builtin_ia32_pavgw512((__v32hi)__A, (__v32hi)__B); 732 1.1 joerg } 733 1.1 joerg 734 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 735 1.1 joerg _mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A, 736 1.1 joerg __m512i __B) 737 1.1 joerg { 738 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 739 1.1 joerg (__v32hi)_mm512_avg_epu16(__A, __B), 740 1.1 joerg (__v32hi)__W); 741 1.1 joerg } 742 1.1 joerg 743 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 744 1.1 joerg _mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B) 745 1.1 joerg { 746 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 747 1.1 joerg (__v32hi)_mm512_avg_epu16(__A, __B), 748 1.1 joerg (__v32hi) _mm512_setzero_si512()); 749 1.1 joerg } 750 1.1 joerg 751 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 752 1.1 joerg _mm512_max_epi8 (__m512i __A, __m512i __B) 753 1.1 joerg { 754 1.1 joerg return (__m512i)__builtin_ia32_pmaxsb512((__v64qi) __A, (__v64qi) __B); 755 1.1 joerg } 756 1.1 joerg 757 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 758 1.1 joerg _mm512_maskz_max_epi8 (__mmask64 __M, __m512i __A, __m512i __B) 759 1.1 joerg { 760 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 761 1.1 joerg (__v64qi)_mm512_max_epi8(__A, __B), 762 1.1 joerg (__v64qi)_mm512_setzero_si512()); 763 1.1 joerg } 764 1.1 joerg 765 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 766 1.1 joerg _mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 767 1.1 joerg { 768 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 769 1.1 joerg (__v64qi)_mm512_max_epi8(__A, __B), 770 1.1 joerg (__v64qi)__W); 771 1.1 joerg } 772 1.1 joerg 773 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 774 1.1 joerg _mm512_max_epi16 (__m512i __A, __m512i __B) 775 1.1 joerg { 776 1.1 joerg return (__m512i)__builtin_ia32_pmaxsw512((__v32hi) __A, (__v32hi) __B); 777 1.1 joerg } 778 1.1 joerg 779 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 780 1.1 joerg _mm512_maskz_max_epi16 (__mmask32 __M, __m512i __A, __m512i __B) 781 1.1 joerg { 782 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 783 1.1 joerg (__v32hi)_mm512_max_epi16(__A, __B), 784 1.1 joerg (__v32hi)_mm512_setzero_si512()); 785 1.1 joerg } 786 1.1 joerg 787 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 788 1.1 joerg _mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A, 789 1.1 joerg __m512i __B) 790 1.1 joerg { 791 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 792 1.1 joerg (__v32hi)_mm512_max_epi16(__A, __B), 793 1.1 joerg (__v32hi)__W); 794 1.1 joerg } 795 1.1 joerg 796 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 797 1.1 joerg _mm512_max_epu8 (__m512i __A, __m512i __B) 798 1.1 joerg { 799 1.1 joerg return (__m512i)__builtin_ia32_pmaxub512((__v64qi)__A, (__v64qi)__B); 800 1.1 joerg } 801 1.1 joerg 802 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 803 1.1 joerg _mm512_maskz_max_epu8 (__mmask64 __M, __m512i __A, __m512i __B) 804 1.1 joerg { 805 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 806 1.1 joerg (__v64qi)_mm512_max_epu8(__A, __B), 807 1.1 joerg (__v64qi)_mm512_setzero_si512()); 808 1.1 joerg } 809 1.1 joerg 810 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 811 1.1 joerg _mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 812 1.1 joerg { 813 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 814 1.1 joerg (__v64qi)_mm512_max_epu8(__A, __B), 815 1.1 joerg (__v64qi)__W); 816 1.1 joerg } 817 1.1 joerg 818 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 819 1.1 joerg _mm512_max_epu16 (__m512i __A, __m512i __B) 820 1.1 joerg { 821 1.1 joerg return (__m512i)__builtin_ia32_pmaxuw512((__v32hi)__A, (__v32hi)__B); 822 1.1 joerg } 823 1.1 joerg 824 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 825 1.1 joerg _mm512_maskz_max_epu16 (__mmask32 __M, __m512i __A, __m512i __B) 826 1.1 joerg { 827 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 828 1.1 joerg (__v32hi)_mm512_max_epu16(__A, __B), 829 1.1 joerg (__v32hi)_mm512_setzero_si512()); 830 1.1 joerg } 831 1.1 joerg 832 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 833 1.1 joerg _mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 834 1.1 joerg { 835 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 836 1.1 joerg (__v32hi)_mm512_max_epu16(__A, __B), 837 1.1 joerg (__v32hi)__W); 838 1.1 joerg } 839 1.1 joerg 840 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 841 1.1 joerg _mm512_min_epi8 (__m512i __A, __m512i __B) 842 1.1 joerg { 843 1.1 joerg return (__m512i)__builtin_ia32_pminsb512((__v64qi) __A, (__v64qi) __B); 844 1.1 joerg } 845 1.1 joerg 846 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 847 1.1 joerg _mm512_maskz_min_epi8 (__mmask64 __M, __m512i __A, __m512i __B) 848 1.1 joerg { 849 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 850 1.1 joerg (__v64qi)_mm512_min_epi8(__A, __B), 851 1.1 joerg (__v64qi)_mm512_setzero_si512()); 852 1.1 joerg } 853 1.1 joerg 854 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 855 1.1 joerg _mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 856 1.1 joerg { 857 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 858 1.1 joerg (__v64qi)_mm512_min_epi8(__A, __B), 859 1.1 joerg (__v64qi)__W); 860 1.1 joerg } 861 1.1 joerg 862 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 863 1.1 joerg _mm512_min_epi16 (__m512i __A, __m512i __B) 864 1.1 joerg { 865 1.1 joerg return (__m512i)__builtin_ia32_pminsw512((__v32hi) __A, (__v32hi) __B); 866 1.1 joerg } 867 1.1 joerg 868 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 869 1.1 joerg _mm512_maskz_min_epi16 (__mmask32 __M, __m512i __A, __m512i __B) 870 1.1 joerg { 871 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 872 1.1 joerg (__v32hi)_mm512_min_epi16(__A, __B), 873 1.1 joerg (__v32hi)_mm512_setzero_si512()); 874 1.1 joerg } 875 1.1 joerg 876 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 877 1.1 joerg _mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 878 1.1 joerg { 879 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 880 1.1 joerg (__v32hi)_mm512_min_epi16(__A, __B), 881 1.1 joerg (__v32hi)__W); 882 1.1 joerg } 883 1.1 joerg 884 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 885 1.1 joerg _mm512_min_epu8 (__m512i __A, __m512i __B) 886 1.1 joerg { 887 1.1 joerg return (__m512i)__builtin_ia32_pminub512((__v64qi)__A, (__v64qi)__B); 888 1.1 joerg } 889 1.1 joerg 890 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 891 1.1 joerg _mm512_maskz_min_epu8 (__mmask64 __M, __m512i __A, __m512i __B) 892 1.1 joerg { 893 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 894 1.1 joerg (__v64qi)_mm512_min_epu8(__A, __B), 895 1.1 joerg (__v64qi)_mm512_setzero_si512()); 896 1.1 joerg } 897 1.1 joerg 898 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 899 1.1 joerg _mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 900 1.1 joerg { 901 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 902 1.1 joerg (__v64qi)_mm512_min_epu8(__A, __B), 903 1.1 joerg (__v64qi)__W); 904 1.1 joerg } 905 1.1 joerg 906 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 907 1.1 joerg _mm512_min_epu16 (__m512i __A, __m512i __B) 908 1.1 joerg { 909 1.1 joerg return (__m512i)__builtin_ia32_pminuw512((__v32hi)__A, (__v32hi)__B); 910 1.1 joerg } 911 1.1 joerg 912 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 913 1.1 joerg _mm512_maskz_min_epu16 (__mmask32 __M, __m512i __A, __m512i __B) 914 1.1 joerg { 915 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 916 1.1 joerg (__v32hi)_mm512_min_epu16(__A, __B), 917 1.1 joerg (__v32hi)_mm512_setzero_si512()); 918 1.1 joerg } 919 1.1 joerg 920 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 921 1.1 joerg _mm512_mask_min_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 922 1.1 joerg { 923 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 924 1.1 joerg (__v32hi)_mm512_min_epu16(__A, __B), 925 1.1 joerg (__v32hi)__W); 926 1.1 joerg } 927 1.1 joerg 928 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 929 1.1 joerg _mm512_shuffle_epi8(__m512i __A, __m512i __B) 930 1.1 joerg { 931 1.1 joerg return (__m512i)__builtin_ia32_pshufb512((__v64qi)__A,(__v64qi)__B); 932 1.1 joerg } 933 1.1 joerg 934 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 935 1.1 joerg _mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 936 1.1 joerg { 937 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 938 1.1 joerg (__v64qi)_mm512_shuffle_epi8(__A, __B), 939 1.1 joerg (__v64qi)__W); 940 1.1 joerg } 941 1.1 joerg 942 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 943 1.1 joerg _mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B) 944 1.1 joerg { 945 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 946 1.1 joerg (__v64qi)_mm512_shuffle_epi8(__A, __B), 947 1.1 joerg (__v64qi)_mm512_setzero_si512()); 948 1.1 joerg } 949 1.1 joerg 950 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 951 1.1 joerg _mm512_subs_epi8 (__m512i __A, __m512i __B) 952 1.1 joerg { 953 1.1 joerg return (__m512i)__builtin_ia32_psubsb512((__v64qi)__A, (__v64qi)__B); 954 1.1 joerg } 955 1.1 joerg 956 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 957 1.1 joerg _mm512_mask_subs_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 958 1.1 joerg { 959 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 960 1.1 joerg (__v64qi)_mm512_subs_epi8(__A, __B), 961 1.1 joerg (__v64qi)__W); 962 1.1 joerg } 963 1.1 joerg 964 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 965 1.1 joerg _mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B) 966 1.1 joerg { 967 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 968 1.1 joerg (__v64qi)_mm512_subs_epi8(__A, __B), 969 1.1 joerg (__v64qi)_mm512_setzero_si512()); 970 1.1 joerg } 971 1.1 joerg 972 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 973 1.1 joerg _mm512_subs_epi16 (__m512i __A, __m512i __B) 974 1.1 joerg { 975 1.1 joerg return (__m512i)__builtin_ia32_psubsw512((__v32hi)__A, (__v32hi)__B); 976 1.1 joerg } 977 1.1 joerg 978 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 979 1.1 joerg _mm512_mask_subs_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 980 1.1 joerg { 981 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 982 1.1 joerg (__v32hi)_mm512_subs_epi16(__A, __B), 983 1.1 joerg (__v32hi)__W); 984 1.1 joerg } 985 1.1 joerg 986 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 987 1.1 joerg _mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B) 988 1.1 joerg { 989 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 990 1.1 joerg (__v32hi)_mm512_subs_epi16(__A, __B), 991 1.1 joerg (__v32hi)_mm512_setzero_si512()); 992 1.1 joerg } 993 1.1 joerg 994 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 995 1.1 joerg _mm512_subs_epu8 (__m512i __A, __m512i __B) 996 1.1 joerg { 997 1.1 joerg return (__m512i)__builtin_ia32_psubusb512((__v64qi) __A, (__v64qi) __B); 998 1.1 joerg } 999 1.1 joerg 1000 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1001 1.1 joerg _mm512_mask_subs_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 1002 1.1 joerg { 1003 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1004 1.1 joerg (__v64qi)_mm512_subs_epu8(__A, __B), 1005 1.1 joerg (__v64qi)__W); 1006 1.1 joerg } 1007 1.1 joerg 1008 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1009 1.1 joerg _mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B) 1010 1.1 joerg { 1011 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1012 1.1 joerg (__v64qi)_mm512_subs_epu8(__A, __B), 1013 1.1 joerg (__v64qi)_mm512_setzero_si512()); 1014 1.1 joerg } 1015 1.1 joerg 1016 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1017 1.1 joerg _mm512_subs_epu16 (__m512i __A, __m512i __B) 1018 1.1 joerg { 1019 1.1 joerg return (__m512i)__builtin_ia32_psubusw512((__v32hi) __A, (__v32hi) __B); 1020 1.1 joerg } 1021 1.1 joerg 1022 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1023 1.1 joerg _mm512_mask_subs_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1024 1.1 joerg { 1025 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1026 1.1 joerg (__v32hi)_mm512_subs_epu16(__A, __B), 1027 1.1 joerg (__v32hi)__W); 1028 1.1 joerg } 1029 1.1 joerg 1030 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1031 1.1 joerg _mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B) 1032 1.1 joerg { 1033 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1034 1.1 joerg (__v32hi)_mm512_subs_epu16(__A, __B), 1035 1.1 joerg (__v32hi)_mm512_setzero_si512()); 1036 1.1 joerg } 1037 1.1 joerg 1038 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1039 1.1 joerg _mm512_permutex2var_epi16(__m512i __A, __m512i __I, __m512i __B) 1040 1.1 joerg { 1041 1.1 joerg return (__m512i)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I, 1042 1.1 joerg (__v32hi)__B); 1043 1.1 joerg } 1044 1.1 joerg 1045 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1046 1.1 joerg _mm512_mask_permutex2var_epi16(__m512i __A, __mmask32 __U, __m512i __I, 1047 1.1 joerg __m512i __B) 1048 1.1 joerg { 1049 1.1 joerg return (__m512i)__builtin_ia32_selectw_512(__U, 1050 1.1 joerg (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), 1051 1.1 joerg (__v32hi)__A); 1052 1.1 joerg } 1053 1.1 joerg 1054 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1055 1.1 joerg _mm512_mask2_permutex2var_epi16(__m512i __A, __m512i __I, __mmask32 __U, 1056 1.1 joerg __m512i __B) 1057 1.1 joerg { 1058 1.1 joerg return (__m512i)__builtin_ia32_selectw_512(__U, 1059 1.1 joerg (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), 1060 1.1 joerg (__v32hi)__I); 1061 1.1 joerg } 1062 1.1 joerg 1063 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1064 1.1 joerg _mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i __I, 1065 1.1 joerg __m512i __B) 1066 1.1 joerg { 1067 1.1 joerg return (__m512i)__builtin_ia32_selectw_512(__U, 1068 1.1 joerg (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), 1069 1.1 joerg (__v32hi)_mm512_setzero_si512()); 1070 1.1 joerg } 1071 1.1 joerg 1072 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1073 1.1 joerg _mm512_mulhrs_epi16(__m512i __A, __m512i __B) 1074 1.1 joerg { 1075 1.1 joerg return (__m512i)__builtin_ia32_pmulhrsw512((__v32hi)__A, (__v32hi)__B); 1076 1.1 joerg } 1077 1.1 joerg 1078 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1079 1.1 joerg _mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1080 1.1 joerg { 1081 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1082 1.1 joerg (__v32hi)_mm512_mulhrs_epi16(__A, __B), 1083 1.1 joerg (__v32hi)__W); 1084 1.1 joerg } 1085 1.1 joerg 1086 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1087 1.1 joerg _mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B) 1088 1.1 joerg { 1089 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1090 1.1 joerg (__v32hi)_mm512_mulhrs_epi16(__A, __B), 1091 1.1 joerg (__v32hi)_mm512_setzero_si512()); 1092 1.1 joerg } 1093 1.1 joerg 1094 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1095 1.1 joerg _mm512_mulhi_epi16(__m512i __A, __m512i __B) 1096 1.1 joerg { 1097 1.1 joerg return (__m512i)__builtin_ia32_pmulhw512((__v32hi) __A, (__v32hi) __B); 1098 1.1 joerg } 1099 1.1 joerg 1100 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1101 1.1 joerg _mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, 1102 1.1 joerg __m512i __B) 1103 1.1 joerg { 1104 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1105 1.1 joerg (__v32hi)_mm512_mulhi_epi16(__A, __B), 1106 1.1 joerg (__v32hi)__W); 1107 1.1 joerg } 1108 1.1 joerg 1109 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1110 1.1 joerg _mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) 1111 1.1 joerg { 1112 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1113 1.1 joerg (__v32hi)_mm512_mulhi_epi16(__A, __B), 1114 1.1 joerg (__v32hi)_mm512_setzero_si512()); 1115 1.1 joerg } 1116 1.1 joerg 1117 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1118 1.1 joerg _mm512_mulhi_epu16(__m512i __A, __m512i __B) 1119 1.1 joerg { 1120 1.1 joerg return (__m512i)__builtin_ia32_pmulhuw512((__v32hi) __A, (__v32hi) __B); 1121 1.1 joerg } 1122 1.1 joerg 1123 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1124 1.1 joerg _mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1125 1.1 joerg { 1126 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1127 1.1 joerg (__v32hi)_mm512_mulhi_epu16(__A, __B), 1128 1.1 joerg (__v32hi)__W); 1129 1.1 joerg } 1130 1.1 joerg 1131 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1132 1.1 joerg _mm512_maskz_mulhi_epu16 (__mmask32 __U, __m512i __A, __m512i __B) 1133 1.1 joerg { 1134 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1135 1.1 joerg (__v32hi)_mm512_mulhi_epu16(__A, __B), 1136 1.1 joerg (__v32hi)_mm512_setzero_si512()); 1137 1.1 joerg } 1138 1.1 joerg 1139 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1140 1.1 joerg _mm512_maddubs_epi16(__m512i __X, __m512i __Y) { 1141 1.1 joerg return (__m512i)__builtin_ia32_pmaddubsw512((__v64qi)__X, (__v64qi)__Y); 1142 1.1 joerg } 1143 1.1 joerg 1144 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1145 1.1 joerg _mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X, 1146 1.1 joerg __m512i __Y) { 1147 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U, 1148 1.1 joerg (__v32hi)_mm512_maddubs_epi16(__X, __Y), 1149 1.1 joerg (__v32hi)__W); 1150 1.1 joerg } 1151 1.1 joerg 1152 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1153 1.1 joerg _mm512_maskz_maddubs_epi16(__mmask32 __U, __m512i __X, __m512i __Y) { 1154 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U, 1155 1.1 joerg (__v32hi)_mm512_maddubs_epi16(__X, __Y), 1156 1.1 joerg (__v32hi)_mm512_setzero_si512()); 1157 1.1 joerg } 1158 1.1 joerg 1159 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1160 1.1 joerg _mm512_madd_epi16(__m512i __A, __m512i __B) { 1161 1.1 joerg return (__m512i)__builtin_ia32_pmaddwd512((__v32hi)__A, (__v32hi)__B); 1162 1.1 joerg } 1163 1.1 joerg 1164 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1165 1.1 joerg _mm512_mask_madd_epi16(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { 1166 1.1 joerg return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 1167 1.1 joerg (__v16si)_mm512_madd_epi16(__A, __B), 1168 1.1 joerg (__v16si)__W); 1169 1.1 joerg } 1170 1.1 joerg 1171 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1172 1.1 joerg _mm512_maskz_madd_epi16(__mmask16 __U, __m512i __A, __m512i __B) { 1173 1.1 joerg return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 1174 1.1 joerg (__v16si)_mm512_madd_epi16(__A, __B), 1175 1.1 joerg (__v16si)_mm512_setzero_si512()); 1176 1.1 joerg } 1177 1.1 joerg 1178 1.1 joerg static __inline__ __m256i __DEFAULT_FN_ATTRS512 1179 1.1 joerg _mm512_cvtsepi16_epi8 (__m512i __A) { 1180 1.1 joerg return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A, 1181 1.1 joerg (__v32qi)_mm256_setzero_si256(), 1182 1.1 joerg (__mmask32) -1); 1183 1.1 joerg } 1184 1.1 joerg 1185 1.1 joerg static __inline__ __m256i __DEFAULT_FN_ATTRS512 1186 1.1 joerg _mm512_mask_cvtsepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) { 1187 1.1 joerg return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A, 1188 1.1 joerg (__v32qi)__O, 1189 1.1 joerg __M); 1190 1.1 joerg } 1191 1.1 joerg 1192 1.1 joerg static __inline__ __m256i __DEFAULT_FN_ATTRS512 1193 1.1 joerg _mm512_maskz_cvtsepi16_epi8 (__mmask32 __M, __m512i __A) { 1194 1.1 joerg return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A, 1195 1.1 joerg (__v32qi) _mm256_setzero_si256(), 1196 1.1 joerg __M); 1197 1.1 joerg } 1198 1.1 joerg 1199 1.1 joerg static __inline__ __m256i __DEFAULT_FN_ATTRS512 1200 1.1 joerg _mm512_cvtusepi16_epi8 (__m512i __A) { 1201 1.1 joerg return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A, 1202 1.1 joerg (__v32qi) _mm256_setzero_si256(), 1203 1.1 joerg (__mmask32) -1); 1204 1.1 joerg } 1205 1.1 joerg 1206 1.1 joerg static __inline__ __m256i __DEFAULT_FN_ATTRS512 1207 1.1 joerg _mm512_mask_cvtusepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) { 1208 1.1 joerg return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A, 1209 1.1 joerg (__v32qi) __O, 1210 1.1 joerg __M); 1211 1.1 joerg } 1212 1.1 joerg 1213 1.1 joerg static __inline__ __m256i __DEFAULT_FN_ATTRS512 1214 1.1 joerg _mm512_maskz_cvtusepi16_epi8 (__mmask32 __M, __m512i __A) { 1215 1.1 joerg return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A, 1216 1.1 joerg (__v32qi) _mm256_setzero_si256(), 1217 1.1 joerg __M); 1218 1.1 joerg } 1219 1.1 joerg 1220 1.1 joerg static __inline__ __m256i __DEFAULT_FN_ATTRS512 1221 1.1 joerg _mm512_cvtepi16_epi8 (__m512i __A) { 1222 1.1 joerg return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, 1223 1.1 joerg (__v32qi) _mm256_undefined_si256(), 1224 1.1 joerg (__mmask32) -1); 1225 1.1 joerg } 1226 1.1 joerg 1227 1.1 joerg static __inline__ __m256i __DEFAULT_FN_ATTRS512 1228 1.1 joerg _mm512_mask_cvtepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) { 1229 1.1 joerg return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, 1230 1.1 joerg (__v32qi) __O, 1231 1.1 joerg __M); 1232 1.1 joerg } 1233 1.1 joerg 1234 1.1 joerg static __inline__ __m256i __DEFAULT_FN_ATTRS512 1235 1.1 joerg _mm512_maskz_cvtepi16_epi8 (__mmask32 __M, __m512i __A) { 1236 1.1 joerg return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, 1237 1.1 joerg (__v32qi) _mm256_setzero_si256(), 1238 1.1 joerg __M); 1239 1.1 joerg } 1240 1.1 joerg 1241 1.1 joerg static __inline__ void __DEFAULT_FN_ATTRS512 1242 1.1 joerg _mm512_mask_cvtepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) 1243 1.1 joerg { 1244 1.1 joerg __builtin_ia32_pmovwb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); 1245 1.1 joerg } 1246 1.1 joerg 1247 1.1 joerg static __inline__ void __DEFAULT_FN_ATTRS512 1248 1.1 joerg _mm512_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) 1249 1.1 joerg { 1250 1.1 joerg __builtin_ia32_pmovswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); 1251 1.1 joerg } 1252 1.1 joerg 1253 1.1 joerg static __inline__ void __DEFAULT_FN_ATTRS512 1254 1.1 joerg _mm512_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) 1255 1.1 joerg { 1256 1.1 joerg __builtin_ia32_pmovuswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); 1257 1.1 joerg } 1258 1.1 joerg 1259 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1260 1.1 joerg _mm512_unpackhi_epi8(__m512i __A, __m512i __B) { 1261 1.1 joerg return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B, 1262 1.1 joerg 8, 64+8, 9, 64+9, 1263 1.1 joerg 10, 64+10, 11, 64+11, 1264 1.1 joerg 12, 64+12, 13, 64+13, 1265 1.1 joerg 14, 64+14, 15, 64+15, 1266 1.1 joerg 24, 64+24, 25, 64+25, 1267 1.1 joerg 26, 64+26, 27, 64+27, 1268 1.1 joerg 28, 64+28, 29, 64+29, 1269 1.1 joerg 30, 64+30, 31, 64+31, 1270 1.1 joerg 40, 64+40, 41, 64+41, 1271 1.1 joerg 42, 64+42, 43, 64+43, 1272 1.1 joerg 44, 64+44, 45, 64+45, 1273 1.1 joerg 46, 64+46, 47, 64+47, 1274 1.1 joerg 56, 64+56, 57, 64+57, 1275 1.1 joerg 58, 64+58, 59, 64+59, 1276 1.1 joerg 60, 64+60, 61, 64+61, 1277 1.1 joerg 62, 64+62, 63, 64+63); 1278 1.1 joerg } 1279 1.1 joerg 1280 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1281 1.1 joerg _mm512_mask_unpackhi_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { 1282 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1283 1.1 joerg (__v64qi)_mm512_unpackhi_epi8(__A, __B), 1284 1.1 joerg (__v64qi)__W); 1285 1.1 joerg } 1286 1.1 joerg 1287 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1288 1.1 joerg _mm512_maskz_unpackhi_epi8(__mmask64 __U, __m512i __A, __m512i __B) { 1289 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1290 1.1 joerg (__v64qi)_mm512_unpackhi_epi8(__A, __B), 1291 1.1 joerg (__v64qi)_mm512_setzero_si512()); 1292 1.1 joerg } 1293 1.1 joerg 1294 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1295 1.1 joerg _mm512_unpackhi_epi16(__m512i __A, __m512i __B) { 1296 1.1 joerg return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B, 1297 1.1 joerg 4, 32+4, 5, 32+5, 1298 1.1 joerg 6, 32+6, 7, 32+7, 1299 1.1 joerg 12, 32+12, 13, 32+13, 1300 1.1 joerg 14, 32+14, 15, 32+15, 1301 1.1 joerg 20, 32+20, 21, 32+21, 1302 1.1 joerg 22, 32+22, 23, 32+23, 1303 1.1 joerg 28, 32+28, 29, 32+29, 1304 1.1 joerg 30, 32+30, 31, 32+31); 1305 1.1 joerg } 1306 1.1 joerg 1307 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1308 1.1 joerg _mm512_mask_unpackhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 1309 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1310 1.1 joerg (__v32hi)_mm512_unpackhi_epi16(__A, __B), 1311 1.1 joerg (__v32hi)__W); 1312 1.1 joerg } 1313 1.1 joerg 1314 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1315 1.1 joerg _mm512_maskz_unpackhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 1316 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1317 1.1 joerg (__v32hi)_mm512_unpackhi_epi16(__A, __B), 1318 1.1 joerg (__v32hi)_mm512_setzero_si512()); 1319 1.1 joerg } 1320 1.1 joerg 1321 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1322 1.1 joerg _mm512_unpacklo_epi8(__m512i __A, __m512i __B) { 1323 1.1 joerg return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B, 1324 1.1 joerg 0, 64+0, 1, 64+1, 1325 1.1 joerg 2, 64+2, 3, 64+3, 1326 1.1 joerg 4, 64+4, 5, 64+5, 1327 1.1 joerg 6, 64+6, 7, 64+7, 1328 1.1 joerg 16, 64+16, 17, 64+17, 1329 1.1 joerg 18, 64+18, 19, 64+19, 1330 1.1 joerg 20, 64+20, 21, 64+21, 1331 1.1 joerg 22, 64+22, 23, 64+23, 1332 1.1 joerg 32, 64+32, 33, 64+33, 1333 1.1 joerg 34, 64+34, 35, 64+35, 1334 1.1 joerg 36, 64+36, 37, 64+37, 1335 1.1 joerg 38, 64+38, 39, 64+39, 1336 1.1 joerg 48, 64+48, 49, 64+49, 1337 1.1 joerg 50, 64+50, 51, 64+51, 1338 1.1 joerg 52, 64+52, 53, 64+53, 1339 1.1 joerg 54, 64+54, 55, 64+55); 1340 1.1 joerg } 1341 1.1 joerg 1342 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1343 1.1 joerg _mm512_mask_unpacklo_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { 1344 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1345 1.1 joerg (__v64qi)_mm512_unpacklo_epi8(__A, __B), 1346 1.1 joerg (__v64qi)__W); 1347 1.1 joerg } 1348 1.1 joerg 1349 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1350 1.1 joerg _mm512_maskz_unpacklo_epi8(__mmask64 __U, __m512i __A, __m512i __B) { 1351 1.1 joerg return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1352 1.1 joerg (__v64qi)_mm512_unpacklo_epi8(__A, __B), 1353 1.1 joerg (__v64qi)_mm512_setzero_si512()); 1354 1.1 joerg } 1355 1.1 joerg 1356 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1357 1.1 joerg _mm512_unpacklo_epi16(__m512i __A, __m512i __B) { 1358 1.1 joerg return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B, 1359 1.1 joerg 0, 32+0, 1, 32+1, 1360 1.1 joerg 2, 32+2, 3, 32+3, 1361 1.1 joerg 8, 32+8, 9, 32+9, 1362 1.1 joerg 10, 32+10, 11, 32+11, 1363 1.1 joerg 16, 32+16, 17, 32+17, 1364 1.1 joerg 18, 32+18, 19, 32+19, 1365 1.1 joerg 24, 32+24, 25, 32+25, 1366 1.1 joerg 26, 32+26, 27, 32+27); 1367 1.1 joerg } 1368 1.1 joerg 1369 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1370 1.1 joerg _mm512_mask_unpacklo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 1371 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1372 1.1 joerg (__v32hi)_mm512_unpacklo_epi16(__A, __B), 1373 1.1 joerg (__v32hi)__W); 1374 1.1 joerg } 1375 1.1 joerg 1376 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1377 1.1 joerg _mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 1378 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1379 1.1 joerg (__v32hi)_mm512_unpacklo_epi16(__A, __B), 1380 1.1 joerg (__v32hi)_mm512_setzero_si512()); 1381 1.1 joerg } 1382 1.1 joerg 1383 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1384 1.1 joerg _mm512_cvtepi8_epi16(__m256i __A) 1385 1.1 joerg { 1386 1.1 joerg /* This function always performs a signed extension, but __v32qi is a char 1387 1.1 joerg which may be signed or unsigned, so use __v32qs. */ 1388 1.1 joerg return (__m512i)__builtin_convertvector((__v32qs)__A, __v32hi); 1389 1.1 joerg } 1390 1.1 joerg 1391 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1392 1.1 joerg _mm512_mask_cvtepi8_epi16(__m512i __W, __mmask32 __U, __m256i __A) 1393 1.1 joerg { 1394 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1395 1.1 joerg (__v32hi)_mm512_cvtepi8_epi16(__A), 1396 1.1 joerg (__v32hi)__W); 1397 1.1 joerg } 1398 1.1 joerg 1399 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1400 1.1 joerg _mm512_maskz_cvtepi8_epi16(__mmask32 __U, __m256i __A) 1401 1.1 joerg { 1402 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1403 1.1 joerg (__v32hi)_mm512_cvtepi8_epi16(__A), 1404 1.1 joerg (__v32hi)_mm512_setzero_si512()); 1405 1.1 joerg } 1406 1.1 joerg 1407 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1408 1.1 joerg _mm512_cvtepu8_epi16(__m256i __A) 1409 1.1 joerg { 1410 1.1 joerg return (__m512i)__builtin_convertvector((__v32qu)__A, __v32hi); 1411 1.1 joerg } 1412 1.1 joerg 1413 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1414 1.1 joerg _mm512_mask_cvtepu8_epi16(__m512i __W, __mmask32 __U, __m256i __A) 1415 1.1 joerg { 1416 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1417 1.1 joerg (__v32hi)_mm512_cvtepu8_epi16(__A), 1418 1.1 joerg (__v32hi)__W); 1419 1.1 joerg } 1420 1.1 joerg 1421 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1422 1.1 joerg _mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A) 1423 1.1 joerg { 1424 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1425 1.1 joerg (__v32hi)_mm512_cvtepu8_epi16(__A), 1426 1.1 joerg (__v32hi)_mm512_setzero_si512()); 1427 1.1 joerg } 1428 1.1 joerg 1429 1.1 joerg 1430 1.1 joerg #define _mm512_shufflehi_epi16(A, imm) \ 1431 1.1 joerg (__m512i)__builtin_ia32_pshufhw512((__v32hi)(__m512i)(A), (int)(imm)) 1432 1.1 joerg 1433 1.1 joerg #define _mm512_mask_shufflehi_epi16(W, U, A, imm) \ 1434 1.1 joerg (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 1435 1.1 joerg (__v32hi)_mm512_shufflehi_epi16((A), \ 1436 1.1 joerg (imm)), \ 1437 1.1 joerg (__v32hi)(__m512i)(W)) 1438 1.1 joerg 1439 1.1 joerg #define _mm512_maskz_shufflehi_epi16(U, A, imm) \ 1440 1.1 joerg (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 1441 1.1 joerg (__v32hi)_mm512_shufflehi_epi16((A), \ 1442 1.1 joerg (imm)), \ 1443 1.1 joerg (__v32hi)_mm512_setzero_si512()) 1444 1.1 joerg 1445 1.1 joerg #define _mm512_shufflelo_epi16(A, imm) \ 1446 1.1 joerg (__m512i)__builtin_ia32_pshuflw512((__v32hi)(__m512i)(A), (int)(imm)) 1447 1.1 joerg 1448 1.1 joerg 1449 1.1 joerg #define _mm512_mask_shufflelo_epi16(W, U, A, imm) \ 1450 1.1 joerg (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 1451 1.1 joerg (__v32hi)_mm512_shufflelo_epi16((A), \ 1452 1.1 joerg (imm)), \ 1453 1.1 joerg (__v32hi)(__m512i)(W)) 1454 1.1 joerg 1455 1.1 joerg 1456 1.1 joerg #define _mm512_maskz_shufflelo_epi16(U, A, imm) \ 1457 1.1 joerg (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 1458 1.1 joerg (__v32hi)_mm512_shufflelo_epi16((A), \ 1459 1.1 joerg (imm)), \ 1460 1.1 joerg (__v32hi)_mm512_setzero_si512()) 1461 1.1 joerg 1462 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1463 1.1 joerg _mm512_sllv_epi16(__m512i __A, __m512i __B) 1464 1.1 joerg { 1465 1.1 joerg return (__m512i)__builtin_ia32_psllv32hi((__v32hi) __A, (__v32hi) __B); 1466 1.1 joerg } 1467 1.1 joerg 1468 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1469 1.1 joerg _mm512_mask_sllv_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1470 1.1 joerg { 1471 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1472 1.1 joerg (__v32hi)_mm512_sllv_epi16(__A, __B), 1473 1.1 joerg (__v32hi)__W); 1474 1.1 joerg } 1475 1.1 joerg 1476 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1477 1.1 joerg _mm512_maskz_sllv_epi16(__mmask32 __U, __m512i __A, __m512i __B) 1478 1.1 joerg { 1479 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1480 1.1 joerg (__v32hi)_mm512_sllv_epi16(__A, __B), 1481 1.1 joerg (__v32hi)_mm512_setzero_si512()); 1482 1.1 joerg } 1483 1.1 joerg 1484 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1485 1.1 joerg _mm512_sll_epi16(__m512i __A, __m128i __B) 1486 1.1 joerg { 1487 1.1 joerg return (__m512i)__builtin_ia32_psllw512((__v32hi) __A, (__v8hi) __B); 1488 1.1 joerg } 1489 1.1 joerg 1490 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1491 1.1 joerg _mm512_mask_sll_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) 1492 1.1 joerg { 1493 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1494 1.1 joerg (__v32hi)_mm512_sll_epi16(__A, __B), 1495 1.1 joerg (__v32hi)__W); 1496 1.1 joerg } 1497 1.1 joerg 1498 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1499 1.1 joerg _mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B) 1500 1.1 joerg { 1501 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1502 1.1 joerg (__v32hi)_mm512_sll_epi16(__A, __B), 1503 1.1 joerg (__v32hi)_mm512_setzero_si512()); 1504 1.1 joerg } 1505 1.1 joerg 1506 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1507 1.1.1.2 joerg _mm512_slli_epi16(__m512i __A, unsigned int __B) 1508 1.1 joerg { 1509 1.1 joerg return (__m512i)__builtin_ia32_psllwi512((__v32hi)__A, __B); 1510 1.1 joerg } 1511 1.1 joerg 1512 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1513 1.1.1.2 joerg _mm512_mask_slli_epi16(__m512i __W, __mmask32 __U, __m512i __A, 1514 1.1.1.2 joerg unsigned int __B) 1515 1.1 joerg { 1516 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1517 1.1 joerg (__v32hi)_mm512_slli_epi16(__A, __B), 1518 1.1 joerg (__v32hi)__W); 1519 1.1 joerg } 1520 1.1 joerg 1521 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1522 1.1.1.2 joerg _mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, unsigned int __B) 1523 1.1 joerg { 1524 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1525 1.1 joerg (__v32hi)_mm512_slli_epi16(__A, __B), 1526 1.1 joerg (__v32hi)_mm512_setzero_si512()); 1527 1.1 joerg } 1528 1.1 joerg 1529 1.1 joerg #define _mm512_bslli_epi128(a, imm) \ 1530 1.1 joerg (__m512i)__builtin_ia32_pslldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm)) 1531 1.1 joerg 1532 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1533 1.1 joerg _mm512_srlv_epi16(__m512i __A, __m512i __B) 1534 1.1 joerg { 1535 1.1 joerg return (__m512i)__builtin_ia32_psrlv32hi((__v32hi)__A, (__v32hi)__B); 1536 1.1 joerg } 1537 1.1 joerg 1538 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1539 1.1 joerg _mm512_mask_srlv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1540 1.1 joerg { 1541 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1542 1.1 joerg (__v32hi)_mm512_srlv_epi16(__A, __B), 1543 1.1 joerg (__v32hi)__W); 1544 1.1 joerg } 1545 1.1 joerg 1546 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1547 1.1 joerg _mm512_maskz_srlv_epi16(__mmask32 __U, __m512i __A, __m512i __B) 1548 1.1 joerg { 1549 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1550 1.1 joerg (__v32hi)_mm512_srlv_epi16(__A, __B), 1551 1.1 joerg (__v32hi)_mm512_setzero_si512()); 1552 1.1 joerg } 1553 1.1 joerg 1554 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1555 1.1 joerg _mm512_srav_epi16(__m512i __A, __m512i __B) 1556 1.1 joerg { 1557 1.1 joerg return (__m512i)__builtin_ia32_psrav32hi((__v32hi)__A, (__v32hi)__B); 1558 1.1 joerg } 1559 1.1 joerg 1560 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1561 1.1 joerg _mm512_mask_srav_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1562 1.1 joerg { 1563 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1564 1.1 joerg (__v32hi)_mm512_srav_epi16(__A, __B), 1565 1.1 joerg (__v32hi)__W); 1566 1.1 joerg } 1567 1.1 joerg 1568 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1569 1.1 joerg _mm512_maskz_srav_epi16(__mmask32 __U, __m512i __A, __m512i __B) 1570 1.1 joerg { 1571 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1572 1.1 joerg (__v32hi)_mm512_srav_epi16(__A, __B), 1573 1.1 joerg (__v32hi)_mm512_setzero_si512()); 1574 1.1 joerg } 1575 1.1 joerg 1576 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1577 1.1 joerg _mm512_sra_epi16(__m512i __A, __m128i __B) 1578 1.1 joerg { 1579 1.1 joerg return (__m512i)__builtin_ia32_psraw512((__v32hi) __A, (__v8hi) __B); 1580 1.1 joerg } 1581 1.1 joerg 1582 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1583 1.1 joerg _mm512_mask_sra_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) 1584 1.1 joerg { 1585 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1586 1.1 joerg (__v32hi)_mm512_sra_epi16(__A, __B), 1587 1.1 joerg (__v32hi)__W); 1588 1.1 joerg } 1589 1.1 joerg 1590 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1591 1.1 joerg _mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B) 1592 1.1 joerg { 1593 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1594 1.1 joerg (__v32hi)_mm512_sra_epi16(__A, __B), 1595 1.1 joerg (__v32hi)_mm512_setzero_si512()); 1596 1.1 joerg } 1597 1.1 joerg 1598 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1599 1.1.1.2 joerg _mm512_srai_epi16(__m512i __A, unsigned int __B) 1600 1.1 joerg { 1601 1.1 joerg return (__m512i)__builtin_ia32_psrawi512((__v32hi)__A, __B); 1602 1.1 joerg } 1603 1.1 joerg 1604 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1605 1.1.1.2 joerg _mm512_mask_srai_epi16(__m512i __W, __mmask32 __U, __m512i __A, 1606 1.1.1.2 joerg unsigned int __B) 1607 1.1 joerg { 1608 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1609 1.1 joerg (__v32hi)_mm512_srai_epi16(__A, __B), 1610 1.1 joerg (__v32hi)__W); 1611 1.1 joerg } 1612 1.1 joerg 1613 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1614 1.1.1.2 joerg _mm512_maskz_srai_epi16(__mmask32 __U, __m512i __A, unsigned int __B) 1615 1.1 joerg { 1616 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1617 1.1 joerg (__v32hi)_mm512_srai_epi16(__A, __B), 1618 1.1 joerg (__v32hi)_mm512_setzero_si512()); 1619 1.1 joerg } 1620 1.1 joerg 1621 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1622 1.1 joerg _mm512_srl_epi16(__m512i __A, __m128i __B) 1623 1.1 joerg { 1624 1.1 joerg return (__m512i)__builtin_ia32_psrlw512((__v32hi) __A, (__v8hi) __B); 1625 1.1 joerg } 1626 1.1 joerg 1627 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1628 1.1 joerg _mm512_mask_srl_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) 1629 1.1 joerg { 1630 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1631 1.1 joerg (__v32hi)_mm512_srl_epi16(__A, __B), 1632 1.1 joerg (__v32hi)__W); 1633 1.1 joerg } 1634 1.1 joerg 1635 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1636 1.1 joerg _mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, __m128i __B) 1637 1.1 joerg { 1638 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1639 1.1 joerg (__v32hi)_mm512_srl_epi16(__A, __B), 1640 1.1 joerg (__v32hi)_mm512_setzero_si512()); 1641 1.1 joerg } 1642 1.1 joerg 1643 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1644 1.1.1.2 joerg _mm512_srli_epi16(__m512i __A, unsigned int __B) 1645 1.1 joerg { 1646 1.1 joerg return (__m512i)__builtin_ia32_psrlwi512((__v32hi)__A, __B); 1647 1.1 joerg } 1648 1.1 joerg 1649 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1650 1.1.1.2 joerg _mm512_mask_srli_epi16(__m512i __W, __mmask32 __U, __m512i __A, 1651 1.1.1.2 joerg unsigned int __B) 1652 1.1 joerg { 1653 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1654 1.1 joerg (__v32hi)_mm512_srli_epi16(__A, __B), 1655 1.1 joerg (__v32hi)__W); 1656 1.1 joerg } 1657 1.1 joerg 1658 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1659 1.1 joerg _mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B) 1660 1.1 joerg { 1661 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1662 1.1 joerg (__v32hi)_mm512_srli_epi16(__A, __B), 1663 1.1 joerg (__v32hi)_mm512_setzero_si512()); 1664 1.1 joerg } 1665 1.1 joerg 1666 1.1 joerg #define _mm512_bsrli_epi128(a, imm) \ 1667 1.1 joerg (__m512i)__builtin_ia32_psrldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm)) 1668 1.1 joerg 1669 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1670 1.1 joerg _mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A) 1671 1.1 joerg { 1672 1.1 joerg return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, 1673 1.1 joerg (__v32hi) __A, 1674 1.1 joerg (__v32hi) __W); 1675 1.1 joerg } 1676 1.1 joerg 1677 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1678 1.1 joerg _mm512_maskz_mov_epi16 (__mmask32 __U, __m512i __A) 1679 1.1 joerg { 1680 1.1 joerg return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, 1681 1.1 joerg (__v32hi) __A, 1682 1.1 joerg (__v32hi) _mm512_setzero_si512 ()); 1683 1.1 joerg } 1684 1.1 joerg 1685 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1686 1.1 joerg _mm512_mask_mov_epi8 (__m512i __W, __mmask64 __U, __m512i __A) 1687 1.1 joerg { 1688 1.1 joerg return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, 1689 1.1 joerg (__v64qi) __A, 1690 1.1 joerg (__v64qi) __W); 1691 1.1 joerg } 1692 1.1 joerg 1693 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1694 1.1 joerg _mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A) 1695 1.1 joerg { 1696 1.1 joerg return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, 1697 1.1 joerg (__v64qi) __A, 1698 1.1 joerg (__v64qi) _mm512_setzero_si512 ()); 1699 1.1 joerg } 1700 1.1 joerg 1701 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1702 1.1 joerg _mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A) 1703 1.1 joerg { 1704 1.1 joerg return (__m512i) __builtin_ia32_selectb_512(__M, 1705 1.1 joerg (__v64qi)_mm512_set1_epi8(__A), 1706 1.1 joerg (__v64qi) __O); 1707 1.1 joerg } 1708 1.1 joerg 1709 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1710 1.1 joerg _mm512_maskz_set1_epi8 (__mmask64 __M, char __A) 1711 1.1 joerg { 1712 1.1 joerg return (__m512i) __builtin_ia32_selectb_512(__M, 1713 1.1 joerg (__v64qi) _mm512_set1_epi8(__A), 1714 1.1 joerg (__v64qi) _mm512_setzero_si512()); 1715 1.1 joerg } 1716 1.1 joerg 1717 1.1 joerg static __inline__ __mmask64 __DEFAULT_FN_ATTRS 1718 1.1 joerg _mm512_kunpackd (__mmask64 __A, __mmask64 __B) 1719 1.1 joerg { 1720 1.1 joerg return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A, 1721 1.1 joerg (__mmask64) __B); 1722 1.1 joerg } 1723 1.1 joerg 1724 1.1 joerg static __inline__ __mmask32 __DEFAULT_FN_ATTRS 1725 1.1 joerg _mm512_kunpackw (__mmask32 __A, __mmask32 __B) 1726 1.1 joerg { 1727 1.1 joerg return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A, 1728 1.1 joerg (__mmask32) __B); 1729 1.1 joerg } 1730 1.1 joerg 1731 1.1 joerg static __inline __m512i __DEFAULT_FN_ATTRS512 1732 1.1 joerg _mm512_loadu_epi16 (void const *__P) 1733 1.1 joerg { 1734 1.1 joerg struct __loadu_epi16 { 1735 1.1 joerg __m512i_u __v; 1736 1.1 joerg } __attribute__((__packed__, __may_alias__)); 1737 1.1.1.2 joerg return ((const struct __loadu_epi16*)__P)->__v; 1738 1.1 joerg } 1739 1.1 joerg 1740 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1741 1.1 joerg _mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P) 1742 1.1 joerg { 1743 1.1.1.2 joerg return (__m512i) __builtin_ia32_loaddquhi512_mask ((const __v32hi *) __P, 1744 1.1 joerg (__v32hi) __W, 1745 1.1 joerg (__mmask32) __U); 1746 1.1 joerg } 1747 1.1 joerg 1748 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1749 1.1 joerg _mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P) 1750 1.1 joerg { 1751 1.1.1.2 joerg return (__m512i) __builtin_ia32_loaddquhi512_mask ((const __v32hi *) __P, 1752 1.1 joerg (__v32hi) 1753 1.1 joerg _mm512_setzero_si512 (), 1754 1.1 joerg (__mmask32) __U); 1755 1.1 joerg } 1756 1.1 joerg 1757 1.1 joerg static __inline __m512i __DEFAULT_FN_ATTRS512 1758 1.1 joerg _mm512_loadu_epi8 (void const *__P) 1759 1.1 joerg { 1760 1.1 joerg struct __loadu_epi8 { 1761 1.1 joerg __m512i_u __v; 1762 1.1 joerg } __attribute__((__packed__, __may_alias__)); 1763 1.1.1.2 joerg return ((const struct __loadu_epi8*)__P)->__v; 1764 1.1 joerg } 1765 1.1 joerg 1766 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1767 1.1 joerg _mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P) 1768 1.1 joerg { 1769 1.1.1.2 joerg return (__m512i) __builtin_ia32_loaddquqi512_mask ((const __v64qi *) __P, 1770 1.1 joerg (__v64qi) __W, 1771 1.1 joerg (__mmask64) __U); 1772 1.1 joerg } 1773 1.1 joerg 1774 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1775 1.1 joerg _mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P) 1776 1.1 joerg { 1777 1.1.1.2 joerg return (__m512i) __builtin_ia32_loaddquqi512_mask ((const __v64qi *) __P, 1778 1.1 joerg (__v64qi) 1779 1.1 joerg _mm512_setzero_si512 (), 1780 1.1 joerg (__mmask64) __U); 1781 1.1 joerg } 1782 1.1 joerg 1783 1.1 joerg static __inline void __DEFAULT_FN_ATTRS512 1784 1.1 joerg _mm512_storeu_epi16 (void *__P, __m512i __A) 1785 1.1 joerg { 1786 1.1 joerg struct __storeu_epi16 { 1787 1.1 joerg __m512i_u __v; 1788 1.1 joerg } __attribute__((__packed__, __may_alias__)); 1789 1.1 joerg ((struct __storeu_epi16*)__P)->__v = __A; 1790 1.1 joerg } 1791 1.1 joerg 1792 1.1 joerg static __inline__ void __DEFAULT_FN_ATTRS512 1793 1.1 joerg _mm512_mask_storeu_epi16 (void *__P, __mmask32 __U, __m512i __A) 1794 1.1 joerg { 1795 1.1 joerg __builtin_ia32_storedquhi512_mask ((__v32hi *) __P, 1796 1.1 joerg (__v32hi) __A, 1797 1.1 joerg (__mmask32) __U); 1798 1.1 joerg } 1799 1.1 joerg 1800 1.1 joerg static __inline void __DEFAULT_FN_ATTRS512 1801 1.1 joerg _mm512_storeu_epi8 (void *__P, __m512i __A) 1802 1.1 joerg { 1803 1.1 joerg struct __storeu_epi8 { 1804 1.1 joerg __m512i_u __v; 1805 1.1 joerg } __attribute__((__packed__, __may_alias__)); 1806 1.1 joerg ((struct __storeu_epi8*)__P)->__v = __A; 1807 1.1 joerg } 1808 1.1 joerg 1809 1.1 joerg static __inline__ void __DEFAULT_FN_ATTRS512 1810 1.1 joerg _mm512_mask_storeu_epi8 (void *__P, __mmask64 __U, __m512i __A) 1811 1.1 joerg { 1812 1.1 joerg __builtin_ia32_storedquqi512_mask ((__v64qi *) __P, 1813 1.1 joerg (__v64qi) __A, 1814 1.1 joerg (__mmask64) __U); 1815 1.1 joerg } 1816 1.1 joerg 1817 1.1 joerg static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 1818 1.1 joerg _mm512_test_epi8_mask (__m512i __A, __m512i __B) 1819 1.1 joerg { 1820 1.1 joerg return _mm512_cmpneq_epi8_mask (_mm512_and_epi32 (__A, __B), 1821 1.1 joerg _mm512_setzero_si512()); 1822 1.1 joerg } 1823 1.1 joerg 1824 1.1 joerg static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 1825 1.1 joerg _mm512_mask_test_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B) 1826 1.1 joerg { 1827 1.1 joerg return _mm512_mask_cmpneq_epi8_mask (__U, _mm512_and_epi32 (__A, __B), 1828 1.1 joerg _mm512_setzero_si512()); 1829 1.1 joerg } 1830 1.1 joerg 1831 1.1 joerg static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 1832 1.1 joerg _mm512_test_epi16_mask (__m512i __A, __m512i __B) 1833 1.1 joerg { 1834 1.1 joerg return _mm512_cmpneq_epi16_mask (_mm512_and_epi32 (__A, __B), 1835 1.1 joerg _mm512_setzero_si512()); 1836 1.1 joerg } 1837 1.1 joerg 1838 1.1 joerg static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 1839 1.1 joerg _mm512_mask_test_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B) 1840 1.1 joerg { 1841 1.1 joerg return _mm512_mask_cmpneq_epi16_mask (__U, _mm512_and_epi32 (__A, __B), 1842 1.1 joerg _mm512_setzero_si512()); 1843 1.1 joerg } 1844 1.1 joerg 1845 1.1 joerg static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 1846 1.1 joerg _mm512_testn_epi8_mask (__m512i __A, __m512i __B) 1847 1.1 joerg { 1848 1.1 joerg return _mm512_cmpeq_epi8_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_si512()); 1849 1.1 joerg } 1850 1.1 joerg 1851 1.1 joerg static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 1852 1.1 joerg _mm512_mask_testn_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B) 1853 1.1 joerg { 1854 1.1 joerg return _mm512_mask_cmpeq_epi8_mask (__U, _mm512_and_epi32 (__A, __B), 1855 1.1 joerg _mm512_setzero_si512()); 1856 1.1 joerg } 1857 1.1 joerg 1858 1.1 joerg static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 1859 1.1 joerg _mm512_testn_epi16_mask (__m512i __A, __m512i __B) 1860 1.1 joerg { 1861 1.1 joerg return _mm512_cmpeq_epi16_mask (_mm512_and_epi32 (__A, __B), 1862 1.1 joerg _mm512_setzero_si512()); 1863 1.1 joerg } 1864 1.1 joerg 1865 1.1 joerg static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 1866 1.1 joerg _mm512_mask_testn_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B) 1867 1.1 joerg { 1868 1.1 joerg return _mm512_mask_cmpeq_epi16_mask (__U, _mm512_and_epi32 (__A, __B), 1869 1.1 joerg _mm512_setzero_si512()); 1870 1.1 joerg } 1871 1.1 joerg 1872 1.1 joerg static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 1873 1.1 joerg _mm512_movepi8_mask (__m512i __A) 1874 1.1 joerg { 1875 1.1 joerg return (__mmask64) __builtin_ia32_cvtb2mask512 ((__v64qi) __A); 1876 1.1 joerg } 1877 1.1 joerg 1878 1.1 joerg static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 1879 1.1 joerg _mm512_movepi16_mask (__m512i __A) 1880 1.1 joerg { 1881 1.1 joerg return (__mmask32) __builtin_ia32_cvtw2mask512 ((__v32hi) __A); 1882 1.1 joerg } 1883 1.1 joerg 1884 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1885 1.1 joerg _mm512_movm_epi8 (__mmask64 __A) 1886 1.1 joerg { 1887 1.1 joerg return (__m512i) __builtin_ia32_cvtmask2b512 (__A); 1888 1.1 joerg } 1889 1.1 joerg 1890 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1891 1.1 joerg _mm512_movm_epi16 (__mmask32 __A) 1892 1.1 joerg { 1893 1.1 joerg return (__m512i) __builtin_ia32_cvtmask2w512 (__A); 1894 1.1 joerg } 1895 1.1 joerg 1896 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1897 1.1 joerg _mm512_broadcastb_epi8 (__m128i __A) 1898 1.1 joerg { 1899 1.1 joerg return (__m512i)__builtin_shufflevector((__v16qi) __A, (__v16qi) __A, 1900 1.1 joerg 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1901 1.1 joerg 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1902 1.1 joerg 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1903 1.1 joerg 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); 1904 1.1 joerg } 1905 1.1 joerg 1906 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1907 1.1 joerg _mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A) 1908 1.1 joerg { 1909 1.1 joerg return (__m512i)__builtin_ia32_selectb_512(__M, 1910 1.1 joerg (__v64qi) _mm512_broadcastb_epi8(__A), 1911 1.1 joerg (__v64qi) __O); 1912 1.1 joerg } 1913 1.1 joerg 1914 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1915 1.1 joerg _mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A) 1916 1.1 joerg { 1917 1.1 joerg return (__m512i)__builtin_ia32_selectb_512(__M, 1918 1.1 joerg (__v64qi) _mm512_broadcastb_epi8(__A), 1919 1.1 joerg (__v64qi) _mm512_setzero_si512()); 1920 1.1 joerg } 1921 1.1 joerg 1922 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1923 1.1 joerg _mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A) 1924 1.1 joerg { 1925 1.1 joerg return (__m512i) __builtin_ia32_selectw_512(__M, 1926 1.1 joerg (__v32hi) _mm512_set1_epi16(__A), 1927 1.1 joerg (__v32hi) __O); 1928 1.1 joerg } 1929 1.1 joerg 1930 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1931 1.1 joerg _mm512_maskz_set1_epi16 (__mmask32 __M, short __A) 1932 1.1 joerg { 1933 1.1 joerg return (__m512i) __builtin_ia32_selectw_512(__M, 1934 1.1 joerg (__v32hi) _mm512_set1_epi16(__A), 1935 1.1 joerg (__v32hi) _mm512_setzero_si512()); 1936 1.1 joerg } 1937 1.1 joerg 1938 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1939 1.1 joerg _mm512_broadcastw_epi16 (__m128i __A) 1940 1.1 joerg { 1941 1.1 joerg return (__m512i)__builtin_shufflevector((__v8hi) __A, (__v8hi) __A, 1942 1.1 joerg 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1943 1.1 joerg 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); 1944 1.1 joerg } 1945 1.1 joerg 1946 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1947 1.1 joerg _mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A) 1948 1.1 joerg { 1949 1.1 joerg return (__m512i)__builtin_ia32_selectw_512(__M, 1950 1.1 joerg (__v32hi) _mm512_broadcastw_epi16(__A), 1951 1.1 joerg (__v32hi) __O); 1952 1.1 joerg } 1953 1.1 joerg 1954 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1955 1.1 joerg _mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A) 1956 1.1 joerg { 1957 1.1 joerg return (__m512i)__builtin_ia32_selectw_512(__M, 1958 1.1 joerg (__v32hi) _mm512_broadcastw_epi16(__A), 1959 1.1 joerg (__v32hi) _mm512_setzero_si512()); 1960 1.1 joerg } 1961 1.1 joerg 1962 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1963 1.1 joerg _mm512_permutexvar_epi16 (__m512i __A, __m512i __B) 1964 1.1 joerg { 1965 1.1 joerg return (__m512i)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A); 1966 1.1 joerg } 1967 1.1 joerg 1968 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1969 1.1 joerg _mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A, 1970 1.1 joerg __m512i __B) 1971 1.1 joerg { 1972 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 1973 1.1 joerg (__v32hi)_mm512_permutexvar_epi16(__A, __B), 1974 1.1 joerg (__v32hi)_mm512_setzero_si512()); 1975 1.1 joerg } 1976 1.1 joerg 1977 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 1978 1.1 joerg _mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A, 1979 1.1 joerg __m512i __B) 1980 1.1 joerg { 1981 1.1 joerg return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 1982 1.1 joerg (__v32hi)_mm512_permutexvar_epi16(__A, __B), 1983 1.1 joerg (__v32hi)__W); 1984 1.1 joerg } 1985 1.1 joerg 1986 1.1 joerg #define _mm512_alignr_epi8(A, B, N) \ 1987 1.1 joerg (__m512i)__builtin_ia32_palignr512((__v64qi)(__m512i)(A), \ 1988 1.1 joerg (__v64qi)(__m512i)(B), (int)(N)) 1989 1.1 joerg 1990 1.1 joerg #define _mm512_mask_alignr_epi8(W, U, A, B, N) \ 1991 1.1 joerg (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ 1992 1.1 joerg (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \ 1993 1.1 joerg (__v64qi)(__m512i)(W)) 1994 1.1 joerg 1995 1.1 joerg #define _mm512_maskz_alignr_epi8(U, A, B, N) \ 1996 1.1 joerg (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ 1997 1.1 joerg (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \ 1998 1.1 joerg (__v64qi)(__m512i)_mm512_setzero_si512()) 1999 1.1 joerg 2000 1.1 joerg #define _mm512_dbsad_epu8(A, B, imm) \ 2001 1.1 joerg (__m512i)__builtin_ia32_dbpsadbw512((__v64qi)(__m512i)(A), \ 2002 1.1 joerg (__v64qi)(__m512i)(B), (int)(imm)) 2003 1.1 joerg 2004 1.1 joerg #define _mm512_mask_dbsad_epu8(W, U, A, B, imm) \ 2005 1.1 joerg (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 2006 1.1 joerg (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \ 2007 1.1 joerg (__v32hi)(__m512i)(W)) 2008 1.1 joerg 2009 1.1 joerg #define _mm512_maskz_dbsad_epu8(U, A, B, imm) \ 2010 1.1 joerg (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 2011 1.1 joerg (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \ 2012 1.1 joerg (__v32hi)_mm512_setzero_si512()) 2013 1.1 joerg 2014 1.1 joerg static __inline__ __m512i __DEFAULT_FN_ATTRS512 2015 1.1 joerg _mm512_sad_epu8 (__m512i __A, __m512i __B) 2016 1.1 joerg { 2017 1.1 joerg return (__m512i) __builtin_ia32_psadbw512 ((__v64qi) __A, 2018 1.1 joerg (__v64qi) __B); 2019 1.1 joerg } 2020 1.1 joerg 2021 1.1 joerg #undef __DEFAULT_FN_ATTRS512 2022 1.1 joerg #undef __DEFAULT_FN_ATTRS 2023 1.1 joerg 2024 1.1 joerg #endif 2025