1 // Simd SVE specific implementations -*- C++ -*- 2 3 // Copyright The GNU Toolchain Authors. 4 // 5 // This file is part of the GNU ISO C++ Library. This library is free 6 // software; you can redistribute it and/or modify it under the 7 // terms of the GNU General Public License as published by the 8 // Free Software Foundation; either version 3, or (at your option) 9 // any later version. 10 11 // This library is distributed in the hope that it will be useful, 12 // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 // GNU General Public License for more details. 15 16 // Under Section 7 of GPL version 3, you are granted additional 17 // permissions described in the GCC Runtime Library Exception, version 18 // 3.1, as published by the Free Software Foundation. 19 20 // You should have received a copy of the GNU General Public License and 21 // a copy of the GCC Runtime Library Exception along with this program; 22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 // <http://www.gnu.org/licenses/>. 24 25 26 #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_SVE_H_ 27 #define _GLIBCXX_EXPERIMENTAL_SIMD_SVE_H_ 28 29 #if __cplusplus >= 201703L 30 31 #if !_GLIBCXX_SIMD_HAVE_SVE 32 #error "simd_sve.h may only be included when SVE on ARM is available" 33 #endif 34 35 _GLIBCXX_SIMD_BEGIN_NAMESPACE 36 37 // Helper function mapping to sve supported types 38 template <typename _Tp> 39 constexpr auto 40 __get_sve_value_type() 41 { 42 if constexpr (is_integral_v<_Tp>) 43 { 44 if constexpr (is_signed_v<_Tp>) 45 { 46 if constexpr (sizeof(_Tp) == 1) 47 return int8_t{}; 48 else if constexpr (sizeof(_Tp) == 2) 49 return int16_t{}; 50 else if constexpr (sizeof(_Tp) == 4) 51 return int32_t{}; 52 else if constexpr (sizeof(_Tp) == 8) 53 return int64_t{}; 54 else 55 return _Tp{}; 56 } 57 else 58 { 59 if constexpr (sizeof(_Tp) == 1) 60 return uint8_t{}; 61 else if constexpr (sizeof(_Tp) == 2) 62 return uint16_t{}; 63 else if constexpr (sizeof(_Tp) == 4) 64 return uint32_t{}; 65 else if constexpr (sizeof(_Tp) == 8) 66 return uint64_t{}; 67 else 68 return _Tp{}; 69 } 70 } 71 else 72 { 73 if constexpr (is_floating_point_v<_Tp>) 74 { 75 if constexpr (sizeof(_Tp) == 4) 76 return float32_t{}; 77 else if constexpr (sizeof(_Tp) == 8) 78 return float64_t{}; 79 else 80 return _Tp{}; 81 } 82 } 83 } 84 85 template <typename _Tp> 86 using __get_sve_value_type_t = decltype(__get_sve_value_type<_Tp>()); 87 88 typedef svbool_t __sve_bool_type __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))); 89 90 template <typename _Tp, size_t _Np> 91 struct __sve_vector_type; 92 93 template <typename _Tp, size_t _Np> 94 using __sve_vector_type_t = typename __sve_vector_type<_Tp, _Np>::type; 95 96 template <size_t _Np> 97 struct __sve_vector_type<int8_t, _Np> 98 { 99 typedef svint8_t __sve_vlst_type __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))); 100 101 inline static __sve_vlst_type 102 __sve_broadcast(int8_t __dup) 103 { return svdup_s8(__dup); } 104 105 inline static __sve_bool_type 106 __sve_active_mask() 107 { return svwhilelt_b8(size_t(0), _Np); }; 108 109 using type = __sve_vlst_type; 110 }; 111 112 template <size_t _Np> 113 struct __sve_vector_type<uint8_t, _Np> 114 { 115 typedef svuint8_t __sve_vlst_type __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))); 116 117 inline static __sve_vlst_type 118 __sve_broadcast(uint8_t __dup) 119 { return svdup_u8(__dup); } 120 121 inline static __sve_bool_type 122 __sve_active_mask() 123 { return svwhilelt_b8(size_t(0), _Np); }; 124 125 using type = __sve_vlst_type; 126 }; 127 128 template <size_t _Np> 129 struct __sve_vector_type<int16_t, _Np> 130 { 131 typedef svint16_t __sve_vlst_type __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))); 132 133 inline static __sve_vlst_type 134 __sve_broadcast(int16_t __dup) 135 { return svdup_s16(__dup); } 136 137 inline static __sve_bool_type 138 __sve_active_mask() 139 { return svwhilelt_b16(size_t(0), _Np); }; 140 141 using type = __sve_vlst_type; 142 }; 143 144 template <size_t _Np> 145 struct __sve_vector_type<uint16_t, _Np> 146 { 147 typedef svuint16_t __sve_vlst_type __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))); 148 149 inline static __sve_vlst_type 150 __sve_broadcast(uint16_t __dup) 151 { return svdup_u16(__dup); } 152 153 inline static __sve_bool_type 154 __sve_active_mask() 155 { return svwhilelt_b16(size_t(0), _Np); }; 156 157 using type = __sve_vlst_type; 158 }; 159 160 template <size_t _Np> 161 struct __sve_vector_type<int32_t, _Np> 162 { 163 typedef svint32_t __sve_vlst_type __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))); 164 165 inline static __sve_vlst_type 166 __sve_broadcast(int32_t __dup) 167 { return svdup_s32(__dup); } 168 169 inline static __sve_bool_type 170 __sve_active_mask() 171 { return svwhilelt_b32(size_t(0), _Np); }; 172 173 using type = __sve_vlst_type; 174 }; 175 176 template <size_t _Np> 177 struct __sve_vector_type<uint32_t, _Np> 178 { 179 typedef svuint32_t __sve_vlst_type __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))); 180 181 inline static __sve_vlst_type 182 __sve_broadcast(uint32_t __dup) 183 { return svdup_u32(__dup); } 184 185 inline static __sve_bool_type 186 __sve_active_mask() 187 { return svwhilelt_b32(size_t(0), _Np); }; 188 189 using type = __sve_vlst_type; 190 }; 191 192 template <size_t _Np> 193 struct __sve_vector_type<int64_t, _Np> 194 { 195 typedef svint64_t __sve_vlst_type __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))); 196 197 inline static __sve_vlst_type 198 __sve_broadcast(int64_t __dup) 199 { return svdup_s64(__dup); } 200 201 inline static __sve_bool_type 202 __sve_active_mask() 203 { return svwhilelt_b64(size_t(0), _Np); }; 204 205 using type = __sve_vlst_type; 206 }; 207 208 template <size_t _Np> 209 struct __sve_vector_type<uint64_t, _Np> 210 { 211 typedef svuint64_t __sve_vlst_type __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))); 212 213 inline static __sve_vlst_type 214 __sve_broadcast(uint64_t __dup) 215 { return svdup_u64(__dup); } 216 217 inline static __sve_bool_type 218 __sve_active_mask() 219 { return svwhilelt_b64(size_t(0), _Np); }; 220 221 using type = __sve_vlst_type; 222 }; 223 224 template <size_t _Np> 225 struct __sve_vector_type<float, _Np> 226 { 227 typedef svfloat32_t __sve_vlst_type __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))); 228 229 inline static __sve_vlst_type 230 __sve_broadcast(float __dup) 231 { return svdup_f32(__dup); } 232 233 inline static __sve_bool_type 234 __sve_active_mask() 235 { return svwhilelt_b32(size_t(0), _Np); }; 236 237 using type = __sve_vlst_type; 238 }; 239 240 template <size_t _Np> 241 struct __sve_vector_type<double, _Np> 242 { 243 typedef svfloat64_t __sve_vlst_type __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))); 244 245 inline static __sve_vlst_type 246 __sve_broadcast(double __dup) 247 { return svdup_f64(__dup); } 248 249 inline static __sve_bool_type 250 __sve_active_mask() 251 { return svwhilelt_b64(size_t(0), _Np); }; 252 253 using type = __sve_vlst_type; 254 }; 255 256 template <typename _Tp, size_t _Np> 257 struct __sve_vector_type 258 : __sve_vector_type<__get_sve_value_type_t<_Tp>, _Np> 259 {}; 260 261 template <size_t _Size> 262 struct __sve_mask_type 263 { 264 static_assert((_Size & (_Size - 1)) != 0, "This trait may only be used for non-power-of-2 " 265 "sizes. Power-of-2 sizes must be specialized."); 266 267 using type = typename __sve_mask_type<std::__bit_ceil(_Size)>::type; 268 }; 269 270 template <size_t _Size> 271 using __sve_mask_type_t = typename __sve_mask_type<_Size>::type; 272 273 template <> 274 struct __sve_mask_type<1> 275 { 276 using type = __sve_bool_type; 277 278 using __sve_mask_uint_type = uint8_t; 279 280 typedef svuint8_t __sve_mask_vector_type 281 __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))); 282 283 inline static auto 284 __sve_mask_active_count(type __active_mask, type __pred) 285 { return svcntp_b8(__active_mask, __pred); } 286 287 inline static type 288 __sve_mask_first_true() 289 { return svptrue_pat_b8(SV_VL1); } 290 291 inline static type 292 __sve_mask_next_true(type __active_mask, type __pred) 293 { return svpnext_b8(__active_mask, __pred); } 294 295 inline static bool 296 __sve_mask_get(type __active_mask, size_t __i) 297 { return __sve_mask_vector_type(svdup_u8_z(__active_mask, 1))[__i] != 0;} 298 299 inline static const __sve_mask_vector_type __index0123 = svindex_u8(0, 1); 300 }; 301 302 template <> 303 struct __sve_mask_type<2> 304 { 305 using type = __sve_bool_type; 306 307 using __sve_mask_uint_type = uint16_t; 308 309 typedef svuint16_t __sve_mask_vector_type 310 __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))); 311 312 inline static auto 313 __sve_mask_active_count(type __active_mask, type __pred) 314 { return svcntp_b16(__active_mask, __pred); } 315 316 inline static type 317 __sve_mask_first_true() 318 { return svptrue_pat_b16(SV_VL1); } 319 320 inline static type 321 __sve_mask_next_true(type __active_mask, type __pred) 322 { return svpnext_b16(__active_mask, __pred); } 323 324 inline static bool 325 __sve_mask_get(type __active_mask, size_t __i) 326 { return __sve_mask_vector_type(svdup_u16_z(__active_mask, 1))[__i] != 0;} 327 328 inline static const __sve_mask_vector_type __index0123 = svindex_u16(0, 1); 329 }; 330 331 template <> 332 struct __sve_mask_type<4> 333 { 334 using type = __sve_bool_type; 335 336 using __sve_mask_uint_type = uint32_t; 337 338 typedef svuint32_t __sve_mask_vector_type 339 __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))); 340 341 inline static auto 342 __sve_mask_active_count(type __active_mask, type __pred) 343 { return svcntp_b32(__active_mask, __pred); } 344 345 inline static type 346 __sve_mask_first_true() 347 { return svptrue_pat_b32(SV_VL1); } 348 349 inline static type 350 __sve_mask_next_true(type __active_mask, type __pred) 351 { return svpnext_b32(__active_mask, __pred); } 352 353 inline static bool 354 __sve_mask_get(type __active_mask, size_t __i) 355 { return __sve_mask_vector_type(svdup_u32_z(__active_mask, 1))[__i] != 0;} 356 357 inline static const __sve_mask_vector_type __index0123 = svindex_u32(0, 1); 358 }; 359 360 template <> 361 struct __sve_mask_type<8> 362 { 363 using type = __sve_bool_type; 364 365 using __sve_mask_uint_type = uint64_t; 366 367 typedef svuint64_t __sve_mask_vector_type 368 __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))); 369 370 inline static auto 371 __sve_mask_active_count(type __active_mask, type __pred) 372 { return svcntp_b64(__active_mask, __pred); } 373 374 inline static type 375 __sve_mask_first_true() 376 { return svptrue_pat_b64(SV_VL1); } 377 378 inline static type 379 __sve_mask_next_true(type __active_mask, type __pred) 380 { return svpnext_b64(__active_mask, __pred); } 381 382 inline static bool 383 __sve_mask_get(type __active_mask, size_t __i) 384 { return __sve_mask_vector_type(svdup_u64_z(__active_mask, 1))[__i] != 0;} 385 386 inline static const __sve_mask_vector_type __index0123 = svindex_u64(0, 1); 387 }; 388 389 template <typename _To, typename _From> 390 _GLIBCXX_SIMD_INTRINSIC constexpr auto 391 __sve_reinterpret_cast(_From __v) 392 { 393 if constexpr (std::is_same_v<_To, int32_t>) 394 return svreinterpret_s32(__v); 395 else if constexpr (std::is_same_v<_To, int64_t>) 396 return svreinterpret_s64(__v); 397 else if constexpr (std::is_same_v<_To, float32_t>) 398 return svreinterpret_f32(__v); 399 else if constexpr (std::is_same_v<_To, float64_t>) 400 return svreinterpret_f64(__v); 401 else 402 __assert_unreachable<_To>(); // add more cases if needed. 403 } 404 405 template <typename _Tp, size_t _Width> 406 struct _SveSimdWrapper 407 { 408 static_assert(__is_vectorizable_v<_Tp>); 409 410 static_assert(_Width >= 2); // 1 doesn't make sense, use _Tp directly then 411 412 using _BuiltinType = __sve_vector_type_t<_Tp, _Width>; 413 414 using value_type = _Tp; 415 416 static inline constexpr size_t _S_full_size = sizeof(_BuiltinType) / sizeof(value_type); 417 418 static inline constexpr int _S_size = _Width; 419 420 static inline constexpr bool _S_is_partial = _S_full_size != _S_size; 421 422 _BuiltinType _M_data; 423 424 _GLIBCXX_SIMD_INTRINSIC constexpr _SveSimdWrapper<_Tp, _S_full_size> 425 __as_full_vector() const 426 { return _M_data; } 427 428 _GLIBCXX_SIMD_INTRINSIC constexpr 429 _SveSimdWrapper(initializer_list<_Tp> __init) 430 : _M_data(__generate_from_n_evaluations<_Width, _BuiltinType>( 431 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 432 return __init.begin()[__i.value]; 433 })) 434 {} 435 436 _GLIBCXX_SIMD_INTRINSIC constexpr 437 _SveSimdWrapper() = default; 438 439 _GLIBCXX_SIMD_INTRINSIC constexpr 440 _SveSimdWrapper(const _SveSimdWrapper&) = default; 441 442 _GLIBCXX_SIMD_INTRINSIC constexpr 443 _SveSimdWrapper(_SveSimdWrapper&&) = default; 444 445 _GLIBCXX_SIMD_INTRINSIC constexpr _SveSimdWrapper& 446 operator=(const _SveSimdWrapper&) = default; 447 448 _GLIBCXX_SIMD_INTRINSIC constexpr _SveSimdWrapper& 449 operator=(_SveSimdWrapper&&) = default; 450 451 _GLIBCXX_SIMD_INTRINSIC constexpr 452 _SveSimdWrapper(__sve_vector_type_t<_Tp, _Width> __x) 453 : _M_data(__x) 454 {} 455 456 template <typename... _As, typename = enable_if_t<((is_same_v<simd_abi::scalar, _As> && ...) 457 && sizeof...(_As) <= _Width)>> 458 _GLIBCXX_SIMD_INTRINSIC constexpr 459 operator _SimdTuple<_Tp, _As...>() const 460 { 461 return __generate_from_n_evaluations<sizeof...(_As), _SimdTuple<_Tp, _As...>>( 462 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 463 return _M_data[int(__i)]; 464 }); 465 } 466 467 _GLIBCXX_SIMD_INTRINSIC constexpr 468 operator const _BuiltinType&() const 469 { return _M_data; } 470 471 _GLIBCXX_SIMD_INTRINSIC constexpr 472 operator _BuiltinType&() 473 { return _M_data; } 474 475 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp 476 operator[](size_t __i) const 477 { return _M_data[__i]; } 478 479 template <size_t __i> 480 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp 481 operator[](_SizeConstant<__i>) const 482 { return _M_data[__i]; } 483 484 _GLIBCXX_SIMD_INTRINSIC constexpr void 485 _M_set(size_t __i, _Tp __x) 486 { 487 _M_data[__i] = __x; 488 } 489 490 _GLIBCXX_SIMD_INTRINSIC constexpr bool 491 _M_is_constprop() const 492 { return false; } 493 494 _GLIBCXX_SIMD_INTRINSIC constexpr bool 495 _M_is_constprop_none_of() const 496 { return false; } 497 498 _GLIBCXX_SIMD_INTRINSIC constexpr bool 499 _M_is_constprop_all_of() const 500 { return false; } 501 }; 502 503 template <size_t _Bits, size_t _Width> 504 struct _SveMaskWrapper 505 { 506 using _BuiltinSveMaskType = __sve_mask_type<_Bits>; 507 508 using _BuiltinSveVectorType = __sve_vector_type<__int_with_sizeof_t<_Bits>, _Width>; 509 510 using _BuiltinType = typename _BuiltinSveMaskType::type; 511 512 using value_type = bool; 513 514 static constexpr size_t _S_full_size = sizeof(_BuiltinType); 515 516 _GLIBCXX_SIMD_INTRINSIC constexpr _SveMaskWrapper<_Bits, _S_full_size> 517 __as_full_vector() const 518 { return _M_data; } 519 520 _GLIBCXX_SIMD_INTRINSIC constexpr 521 _SveMaskWrapper() = default; 522 523 _GLIBCXX_SIMD_INTRINSIC constexpr 524 _SveMaskWrapper(_BuiltinType __k) 525 : _M_data(__k) 526 {}; 527 528 _GLIBCXX_SIMD_INTRINSIC 529 operator const _BuiltinType&() const 530 { return _M_data; } 531 532 _GLIBCXX_SIMD_INTRINSIC 533 operator _BuiltinType&() 534 { return _M_data; } 535 536 _GLIBCXX_SIMD_INTRINSIC _BuiltinType 537 __intrin() const 538 { return _M_data; } 539 540 _GLIBCXX_SIMD_INTRINSIC constexpr value_type 541 operator[](size_t __i) const 542 { 543 return _BuiltinSveMaskType::__sve_mask_get(_M_data, __i); 544 } 545 546 template <size_t __i> 547 _GLIBCXX_SIMD_INTRINSIC constexpr value_type 548 operator[](_SizeConstant<__i>) const 549 { 550 return _BuiltinSveMaskType::__sve_mask_get(_M_data, __i); 551 } 552 553 _GLIBCXX_SIMD_INTRINSIC constexpr void 554 _M_set(size_t __i, value_type __x) 555 { 556 _BuiltinType __index 557 = svcmpeq(_BuiltinSveVectorType::__sve_active_mask(), _BuiltinSveMaskType::__index0123, 558 typename _BuiltinSveMaskType::__sve_mask_uint_type(__i)); 559 560 if (__x) 561 _M_data = svorr_z(_BuiltinSveVectorType::__sve_active_mask(), _M_data, __index); 562 else 563 _M_data = svbic_z(_BuiltinSveVectorType::__sve_active_mask(), _M_data, __index); 564 } 565 566 _GLIBCXX_SIMD_INTRINSIC constexpr bool 567 _M_is_constprop() const 568 { return false; } 569 570 _GLIBCXX_SIMD_INTRINSIC constexpr bool 571 _M_is_constprop_none_of() const 572 { return false; } 573 574 _GLIBCXX_SIMD_INTRINSIC constexpr bool 575 _M_is_constprop_all_of() const 576 { return false; } 577 578 _BuiltinType _M_data; 579 }; 580 581 struct _CommonImplSve; 582 583 template <typename _Abi, typename = __detail::__odr_helper> 584 struct _SimdImplSve; 585 586 template <typename _Abi, typename = __detail::__odr_helper> 587 struct _MaskImplSve; 588 589 template <int _UsedBytes, int> 590 struct simd_abi::_SveAbi 591 { 592 template <typename _Tp> 593 static constexpr size_t _S_size = _UsedBytes / sizeof(_Tp); 594 595 struct _IsValidAbiTag 596 : __bool_constant<(_UsedBytes > 1)> 597 {}; 598 599 template <typename _Tp> 600 struct _IsValidSizeFor 601 : __bool_constant<(_UsedBytes / sizeof(_Tp) > 1 && _UsedBytes % sizeof(_Tp) == 0 602 && _UsedBytes <= __sve_vectorized_size_bytes)> 603 {}; 604 605 template <typename _Tp> 606 struct _IsValid 607 : conjunction<_IsValidAbiTag, __bool_constant<__have_sve>, 608 __bool_constant<(__vectorized_sizeof<_Tp>() > sizeof(_Tp))>, 609 _IsValidSizeFor<_Tp>> 610 {}; 611 612 template <typename _Tp> 613 static constexpr bool _S_is_valid_v = _IsValid<_Tp>::value; 614 615 using _CommonImpl = _CommonImplSve; 616 617 using _SimdImpl = _SimdImplSve<_SveAbi<_UsedBytes>>; 618 619 using _MaskImpl = _MaskImplSve<_SveAbi<_UsedBytes>>; 620 621 template <typename _Tp> 622 using _MaskMember = _SveMaskWrapper<sizeof(_Tp), _S_size<_Tp>>; 623 624 template <typename _Tp, bool = _S_is_valid_v<_Tp>> 625 struct __traits : _InvalidTraits 626 {}; 627 628 template <typename _Tp> 629 struct __traits<_Tp, true> 630 { 631 using _IsValid = true_type; 632 using _SimdImpl = _SimdImplSve<_SveAbi<_UsedBytes>>; 633 using _MaskImpl = _MaskImplSve<_SveAbi<_UsedBytes>>; 634 635 using _SimdMember = _SveSimdWrapper<_Tp, _S_size<_Tp>>; // sve vector type 636 using _MaskMember = _SveMaskWrapper<sizeof(_Tp), _S_size<_Tp>>; // sve mask type 637 638 static constexpr size_t _S_simd_align = alignof(_SimdMember); 639 static constexpr size_t _S_mask_align = alignof(_MaskMember); 640 641 static constexpr size_t _S_full_size = _SimdMember::_S_full_size; 642 static constexpr bool _S_is_partial = _SimdMember::_S_is_partial; 643 644 struct _SimdBase 645 { 646 _GLIBCXX_SIMD_ALWAYS_INLINE explicit 647 operator __sve_vector_type_t<_Tp, _S_size<_Tp>>() const 648 { return __data(*static_cast<const simd<_Tp, _SveAbi<_UsedBytes>>*>(this)); } 649 }; 650 651 class _SimdCastType 652 { 653 using _Ap = __sve_vector_type_t<_Tp, _S_size<_Tp>>; 654 655 _SimdMember _M_data; 656 657 public: 658 _GLIBCXX_SIMD_ALWAYS_INLINE constexpr 659 _SimdCastType(_Ap __a) 660 : _M_data(__a) 661 {} 662 663 _GLIBCXX_SIMD_ALWAYS_INLINE constexpr 664 operator _SimdMember() const 665 { return _M_data; } 666 }; 667 668 struct _MaskBase 669 { 670 _GLIBCXX_SIMD_ALWAYS_INLINE explicit 671 operator __sve_mask_type_t<sizeof(_Tp)>() const 672 { 673 return __data(*static_cast<const simd_mask<_Tp, _SveAbi<_UsedBytes>>*>(this)); 674 } 675 }; 676 677 class _MaskCastType 678 { 679 using _Ap = __sve_mask_type_t<sizeof(_Tp)>; 680 681 _Ap _M_data; 682 683 public: 684 _GLIBCXX_SIMD_ALWAYS_INLINE constexpr 685 _MaskCastType(_Ap __a) 686 : _M_data(__a) 687 {} 688 689 _GLIBCXX_SIMD_ALWAYS_INLINE constexpr 690 operator _MaskMember() const 691 { return _M_data; } 692 }; 693 }; 694 695 template <typename _Tp> 696 static constexpr size_t _S_full_size = __traits<_Tp>::_S_full_size; 697 698 template <typename _Tp> 699 static constexpr bool _S_is_partial = __traits<_Tp>::_S_is_partial; 700 }; 701 702 template <typename _Tp, size_t _Np> 703 using __sve_mask = __sve_mask_type<sizeof(_Tp)>; 704 705 struct _CommonImplSve 706 { 707 // _S_converts_via_decomposition 708 // This lists all cases where a __vector_convert needs to fall back to 709 // conversion of individual scalars (i.e. decompose the input vector into 710 // scalars, convert, compose output vector). In those cases, _S_masked_load & 711 // _S_masked_store prefer to use the _S_bit_iteration implementation. 712 template <typename _From, typename _To, size_t _ToSize> 713 static inline constexpr bool __converts_via_decomposition_v = sizeof(_From) != sizeof(_To); 714 715 template <typename _Tp, typename _Up, size_t _Np> 716 _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np> 717 _S_load(const _Up* __p, _SveMaskWrapper<sizeof(_Tp), _Np> __k) 718 { 719 using _STp = __get_sve_value_type_t<_Tp>; 720 using _SUp = __get_sve_value_type_t<_Up>; 721 using _V = __sve_vector_type_t<_Tp, _Np>; 722 const _SUp* __up = reinterpret_cast<const _SUp*>(__p); 723 724 if constexpr (std::is_same_v<_Tp, _Up>) 725 return _V(svld1(__k._M_data, __up)); 726 if constexpr (std::is_integral_v<_Tp> && std::is_integral_v<_Up> 727 && (sizeof(_Tp) > sizeof(_Up))) 728 { 729 if constexpr (std::is_same_v<_SUp, int8_t>) 730 { 731 if constexpr (std::is_same_v<_STp, int16_t>) 732 return _V(svld1sb_s16(__k._M_data, __up)); 733 if constexpr (std::is_same_v<_STp, uint16_t>) 734 return _V(svld1sb_u16(__k._M_data, __up)); 735 if constexpr (std::is_same_v<_STp, int32_t>) 736 return _V(svld1sb_s32(__k._M_data, __up)); 737 if constexpr (std::is_same_v<_STp, uint32_t>) 738 return _V(svld1sb_u32(__k._M_data, __up)); 739 if constexpr (std::is_same_v<_STp, int64_t>) 740 return _V(svld1sb_s64(__k._M_data, __up)); 741 if constexpr (std::is_same_v<_STp, uint64_t>) 742 return _V(svld1sb_u64(__k._M_data, __up)); 743 } 744 if constexpr (std::is_same_v<_SUp, uint8_t>) 745 { 746 if constexpr (std::is_same_v<_STp, int16_t>) 747 return _V(svld1ub_s16(__k._M_data, __up)); 748 if constexpr (std::is_same_v<_STp, uint16_t>) 749 return _V(svld1ub_u16(__k._M_data, __up)); 750 if constexpr (std::is_same_v<_STp, int32_t>) 751 return _V(svld1ub_s32(__k._M_data, __up)); 752 if constexpr (std::is_same_v<_STp, uint32_t>) 753 return _V(svld1ub_u32(__k._M_data, __up)); 754 if constexpr (std::is_same_v<_STp, int64_t>) 755 return _V(svld1ub_s64(__k._M_data, __up)); 756 if constexpr (std::is_same_v<_STp, uint64_t>) 757 return _V(svld1ub_u64(__k._M_data, __up)); 758 } 759 if constexpr (std::is_same_v<_SUp, int16_t>) 760 { 761 if constexpr (std::is_same_v<_STp, int32_t>) 762 return _V(svld1sh_s32(__k._M_data, __up)); 763 if constexpr (std::is_same_v<_STp, uint32_t>) 764 return _V(svld1sh_u32(__k._M_data, __up)); 765 if constexpr (std::is_same_v<_STp, int64_t>) 766 return _V(svld1sh_s64(__k._M_data, __up)); 767 if constexpr (std::is_same_v<_STp, uint64_t>) 768 return _V(svld1sh_u64(__k._M_data, __up)); 769 } 770 if constexpr (std::is_same_v<_SUp, uint16_t>) 771 { 772 if constexpr (std::is_same_v<_STp, int32_t>) 773 return _V(svld1uh_s32(__k._M_data, __up)); 774 if constexpr (std::is_same_v<_STp, uint32_t>) 775 return _V(svld1uh_u32(__k._M_data, __up)); 776 if constexpr (std::is_same_v<_STp, int64_t>) 777 return _V(svld1uh_s64(__k._M_data, __up)); 778 if constexpr (std::is_same_v<_STp, uint64_t>) 779 return _V(svld1uh_u64(__k._M_data, __up)); 780 } 781 if constexpr (std::is_same_v<_SUp, int32_t>) 782 { 783 if constexpr (std::is_same_v<_STp, int64_t>) 784 return _V(svld1sw_s64(__k._M_data, __up)); 785 if constexpr (std::is_same_v<_STp, uint64_t>) 786 return _V(svld1sw_u64(__k._M_data, __up)); 787 } 788 if constexpr (std::is_same_v<_SUp, uint32_t>) 789 { 790 if constexpr (std::is_same_v<_STp, int64_t>) 791 return _V(svld1uw_s64(__k._M_data, __up)); 792 if constexpr (std::is_same_v<_STp, uint64_t>) 793 return _V(svld1uw_u64(__k._M_data, __up)); 794 } 795 } 796 return __generate_from_n_evaluations<_Np, __sve_vector_type_t<_Tp, _Np>>( 797 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 798 return __k[__i] ? static_cast<_Tp>(__p[__i]) : _Tp{}; 799 }); 800 } 801 802 template <typename _Tp, typename _Up, size_t _Np> 803 _GLIBCXX_SIMD_INTRINSIC static constexpr void 804 _S_store(_Up* __p, _SveSimdWrapper<_Tp, _Np> __x, _SveMaskWrapper<sizeof(_Tp), _Np> __k) 805 { 806 using _SUp = __get_sve_value_type_t<_Up>; 807 using _STp = __get_sve_value_type_t<_Tp>; 808 809 _SUp* __up = reinterpret_cast<_SUp*>(__p); 810 811 if constexpr (std::is_same_v<_Tp, _Up>) 812 return svst1(__k._M_data, __up, __x); 813 if constexpr (std::is_integral_v<_Tp> && std::is_integral_v<_Up> 814 && (sizeof(_Tp) > sizeof(_Up))) 815 { 816 if constexpr (std::is_same_v<_SUp, int8_t> && std::is_signed_v<_STp>) 817 return svst1b(__k._M_data, __up, __x); 818 if constexpr (std::is_same_v<_SUp, uint8_t> && std::is_unsigned_v<_STp>) 819 return svst1b(__k._M_data, __up, __x); 820 if constexpr (std::is_same_v<_SUp, int16_t> && std::is_signed_v<_STp>) 821 return svst1h(__k._M_data, __up, __x); 822 if constexpr (std::is_same_v<_SUp, uint16_t> && std::is_unsigned_v<_STp>) 823 return svst1h(__k._M_data, __up, __x); 824 if constexpr (std::is_same_v<_SUp, int32_t> && std::is_signed_v<_STp>) 825 return svst1w(__k._M_data, __up, __x); 826 if constexpr (std::is_same_v<_SUp, uint32_t> && std::is_unsigned_v<_STp>) 827 return svst1w(__k._M_data, __up, __x); 828 } 829 830 __execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 831 if (__k[__i]) 832 __p[__i] = static_cast<_Up>(__x[__i]); 833 }); 834 } 835 836 template <typename _Tp, size_t _Np> 837 _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np> 838 _S_blend(_SveMaskWrapper<sizeof(_Tp), _Np> __k, _SveSimdWrapper<_Tp, _Np> __at0, 839 _SveSimdWrapper<_Tp, _Np> __at1) 840 { return svsel(__k._M_data, __at1._M_data, __at0._M_data); } 841 842 template <size_t _Np, bool _Sanitized> 843 _GLIBCXX_SIMD_INTRINSIC static constexpr void 844 _S_store_bool_array(_BitMask<_Np, _Sanitized> __x, bool* __mem) 845 { 846 __execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 847 __mem[__i] = __x[__i]; 848 }); 849 } 850 }; 851 852 template <typename _Abi, typename> 853 struct _SimdImplSve 854 { 855 template <typename _Tp> 856 using _MaskMember = typename _Abi::template _MaskMember<_Tp>; 857 858 template <typename _Tp> 859 using _SimdMember = typename _Abi::template __traits<_Tp>::_SimdMember; 860 861 using _CommonImpl = typename _Abi::_CommonImpl; 862 using _SuperImpl = typename _Abi::_SimdImpl; 863 using _MaskImpl = typename _Abi::_MaskImpl; 864 865 template <typename _Tp> 866 static constexpr size_t _S_full_size = _Abi::template _S_full_size<_Tp>; 867 868 template <typename _Tp> 869 static constexpr size_t _S_size = _Abi::template _S_size<_Tp>; 870 871 template <typename _Tp> 872 using _TypeTag = _Tp*; 873 874 using abi_type = _Abi; 875 876 template <typename _Tp> 877 _GLIBCXX_SIMD_INTRINSIC static constexpr auto 878 _S_broadcast(_Tp __x) noexcept 879 { 880 return __sve_vector_type<_Tp, __sve_vectorized_size_bytes / sizeof(_Tp)> 881 ::__sve_broadcast(__x); 882 } 883 884 template <typename _Fp, typename _Tp> 885 inline static constexpr _SimdMember<_Tp> 886 _S_generator(_Fp&& __gen, _TypeTag<_Tp>) 887 { 888 constexpr size_t _Np = _S_size<_Tp>; 889 _SveSimdWrapper<_Tp, _Np> __ret; 890 __execute_n_times<_S_size<_Tp>>( 891 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { __ret._M_set(__i, __gen(__i)); }); 892 return __ret; 893 } 894 895 template <typename _Tp, typename _Up> 896 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdMember<_Tp> 897 _S_load(const _Up* __mem, _TypeTag<_Tp>) noexcept 898 { 899 constexpr size_t _Np = _S_size<_Tp>; 900 _SimdMember<_Tp> __ret = _CommonImpl::template _S_load<_Tp, _Up, _Np>( 901 __mem, _SveMaskWrapper<sizeof(_Tp), _Np>{ 902 __sve_vector_type<_Tp, _Np>::__sve_active_mask()}); 903 return __ret; 904 } 905 906 template <typename _Tp, size_t _Np, typename _Up> 907 static constexpr inline _SveSimdWrapper<_Tp, _Np> 908 _S_masked_load(_SveSimdWrapper<_Tp, _Np> __merge, _MaskMember<_Tp> __k, const _Up* __mem) 909 noexcept 910 { 911 __sve_vector_type_t<_Tp, _Np> __v 912 = _CommonImpl::template _S_load<_Tp, _Up, _Np>(__mem, __k); 913 __sve_vector_type_t<_Tp, _Np> __ret = svsel(__k._M_data, __v, __merge._M_data); 914 return __ret; 915 } 916 917 template <typename _Tp, typename _Up> 918 _GLIBCXX_SIMD_INTRINSIC static constexpr void 919 _S_store(_SimdMember<_Tp> __v, _Up* __mem, _TypeTag<_Tp>) noexcept 920 { 921 constexpr size_t _Np = _S_size<_Tp>; 922 _CommonImpl::template _S_store<_Tp, _Up, _Np>( 923 __mem, __v, __sve_vector_type<_Tp, _Np>::__sve_active_mask()); 924 } 925 926 template <typename _Tp, typename _Up, size_t _Np> 927 static constexpr inline void 928 _S_masked_store(const _SveSimdWrapper<_Tp, _Np> __v, _Up* __mem, 929 const _SveMaskWrapper<sizeof(_Tp), _Np> __k) noexcept 930 { _CommonImpl::template _S_store<_Tp, _Up, _Np>(__mem, __v, __k); } 931 932 template <typename _Tp, size_t _Np> 933 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp> 934 _S_negate(_SveSimdWrapper<_Tp, _Np> __x) noexcept 935 { 936 return svcmpeq(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data, 937 __sve_vector_type<_Tp, _Np>::__sve_broadcast(_Tp{})); 938 } 939 940 template <typename _Tp, typename _BinaryOperation> 941 _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp 942 _S_reduce(simd<_Tp, _Abi> __x, _BinaryOperation&& __binary_op) 943 { 944 auto __x_data = __x._M_data; 945 constexpr size_t _Np = simd_size_v<_Tp, _Abi>; 946 using __sve_vec_t = __sve_vector_type_t<_Tp, _Np>; 947 std::size_t __i = __x.size(); 948 for (; (__i % 2) != 1; __i /= 2) 949 { 950 __x_data = __binary_op(simd<_Tp, _Abi>( 951 __private_init, _SveSimdWrapper<_Tp, _Np>( 952 __sve_vec_t(svuzp1(__x_data, __x_data)))), 953 simd<_Tp, _Abi>( 954 __private_init, _SveSimdWrapper<_Tp, _Np>( 955 __sve_vec_t(svuzp2(__x_data, __x_data)))) 956 )._M_data; 957 } 958 _Tp __res = __x_data[0]; 959 for (size_t __ri = 1; __ri != __i; __ri++) 960 __res = __binary_op(__x_data[__ri], __res); 961 return __res; 962 } 963 964 template <typename _Tp> 965 _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp 966 _S_reduce(simd<_Tp, _Abi> __x, plus<>) 967 { 968 return svaddv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data); 969 } 970 971 template <typename _Tp> 972 _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp 973 _S_reduce(simd<_Tp, _Abi> __x, bit_and<>) 974 { 975 return svandv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data); 976 } 977 978 template <typename _Tp> 979 _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp 980 _S_reduce(simd<_Tp, _Abi> __x, bit_or<>) 981 { 982 return svorv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data); 983 } 984 985 template <typename _Tp> 986 _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp 987 _S_reduce(simd<_Tp, _Abi> __x, bit_xor<>) 988 { 989 return sveorv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data); 990 } 991 992 template <typename _Tp> 993 _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp 994 _S_reduce(simd<_Tp, _Abi> __x, __detail::_Maximum()) 995 { 996 return svmaxv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data); 997 } 998 999 template <typename _Tp> 1000 _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp 1001 _S_reduce(simd<_Tp, _Abi> __x, __detail::_Minimum()) 1002 { 1003 return svminv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data); 1004 } 1005 1006 template <typename _Tp, size_t _Np> 1007 _GLIBCXX_SIMD_NORMAL_MATH _GLIBCXX_SIMD_INTRINSIC static constexpr 1008 __sve_vector_type_t<_Tp, _Np> 1009 _S_min(_SveSimdWrapper<_Tp, _Np> __a, _SveSimdWrapper<_Tp, _Np> __b) 1010 { 1011 return svmin_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __a._M_data, __b._M_data); 1012 } 1013 1014 template <typename _Tp, size_t _Np> 1015 _GLIBCXX_SIMD_NORMAL_MATH _GLIBCXX_SIMD_INTRINSIC static constexpr 1016 __sve_vector_type_t<_Tp, _Np> 1017 _S_max(_SveSimdWrapper<_Tp, _Np> __a, _SveSimdWrapper<_Tp, _Np> __b) 1018 { 1019 return svmax_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __a._M_data, __b._M_data); 1020 } 1021 1022 template <typename _Tp, size_t _Np> 1023 _GLIBCXX_SIMD_NORMAL_MATH _GLIBCXX_SIMD_INTRINSIC static constexpr 1024 pair<_SveSimdWrapper<_Tp, _Np>, _SveSimdWrapper<_Tp, _Np>> 1025 _S_minmax(_SveSimdWrapper<_Tp, _Np> __a, _SveSimdWrapper<_Tp, _Np> __b) 1026 { 1027 return { 1028 svmin_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __a._M_data, __b._M_data), 1029 svmax_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __a._M_data, __b._M_data) 1030 }; 1031 } 1032 1033 template <typename _Tp, size_t _Np> 1034 _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np> 1035 _S_complement(_SveSimdWrapper<_Tp, _Np> __x) noexcept 1036 { 1037 if constexpr (is_floating_point_v<_Tp>) 1038 { 1039 using _Ip = __get_sve_value_type_t<__int_for_sizeof_t<_Tp>>; 1040 return __sve_reinterpret_cast<_Tp>( 1041 svnot_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), 1042 __sve_reinterpret_cast<_Ip>(__x))); 1043 } 1044 else 1045 return svnot_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data); 1046 } 1047 1048 template <typename _Tp, size_t _Np> 1049 _GLIBCXX_SIMD_INTRINSIC static constexpr _SveSimdWrapper<_Tp, _Np> 1050 _S_unary_minus(_SveSimdWrapper<_Tp, _Np> __x) noexcept 1051 { 1052 return svmul_x(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data, 1053 static_cast<_Tp>(-1)); 1054 } 1055 1056 template <typename _Tp, size_t _Np> 1057 _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np> 1058 _S_plus(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) 1059 { return __x._M_data + __y._M_data; } 1060 1061 template <typename _Tp, size_t _Np> 1062 _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np> 1063 _S_minus(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) 1064 { return __x._M_data - __y._M_data; } 1065 1066 template <typename _Tp, size_t _Np> 1067 _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np> 1068 _S_multiplies(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) 1069 { return __x._M_data * __y._M_data; } 1070 1071 template <typename _Tp, size_t _Np> 1072 _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np> 1073 _S_divides(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) 1074 { 1075 __sve_vector_type_t<_Tp, _Np> __y_padded 1076 = svsel(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), 1077 __y._M_data, __sve_vector_type<_Tp, _Np>::__sve_broadcast(1)); 1078 return __x._M_data / __y_padded; 1079 } 1080 1081 template <typename _Tp, size_t _Np> 1082 _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np> 1083 _S_modulus(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) 1084 { 1085 __sve_vector_type_t<_Tp, _Np> __y_padded 1086 = svsel(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), 1087 __y._M_data, __sve_vector_type<_Tp, _Np>::__sve_broadcast(1)); 1088 return __x._M_data % __y_padded; 1089 } 1090 1091 template <typename _Tp, size_t _Np> 1092 _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np> 1093 _S_bit_and(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) 1094 { 1095 if constexpr (is_floating_point_v<_Tp>) 1096 { 1097 using _Ip = __get_sve_value_type_t<__int_for_sizeof_t<_Tp>>; 1098 return __sve_reinterpret_cast<_Tp>( 1099 svand_x(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), 1100 __sve_reinterpret_cast<_Ip>(__x), __sve_reinterpret_cast<_Ip>(__y))); 1101 } 1102 else 1103 return svand_x(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), 1104 __x._M_data, __y._M_data); 1105 } 1106 1107 template <typename _Tp, size_t _Np> 1108 _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np> 1109 _S_bit_or(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) 1110 { 1111 if constexpr (is_floating_point_v<_Tp>) 1112 { 1113 using _Ip = __get_sve_value_type_t<__int_for_sizeof_t<_Tp>>; 1114 return __sve_reinterpret_cast<_Tp>( 1115 svorr_x(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), 1116 __sve_reinterpret_cast<_Ip>(__x), __sve_reinterpret_cast<_Ip>(__y))); 1117 } 1118 else 1119 return svorr_x(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), 1120 __x._M_data, __y._M_data); 1121 } 1122 1123 template <typename _Tp, size_t _Np> 1124 _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np> 1125 _S_bit_xor(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) 1126 { 1127 if constexpr (is_floating_point_v<_Tp>) 1128 { 1129 using _Ip = __get_sve_value_type_t<__int_for_sizeof_t<_Tp>>; 1130 return __sve_reinterpret_cast<_Tp>( 1131 sveor_x(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), 1132 __sve_reinterpret_cast<_Ip>(__x), __sve_reinterpret_cast<_Ip>(__y))); 1133 } 1134 else 1135 return sveor_x(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), 1136 __x._M_data, __y._M_data); 1137 } 1138 1139 template <typename _Tp, size_t _Np> 1140 _GLIBCXX_SIMD_INTRINSIC static __sve_vector_type_t<_Tp, _Np> 1141 _S_bit_shift_left(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) 1142 { return __x._M_data << __y._M_data; } 1143 1144 template <typename _Tp, size_t _Np> 1145 _GLIBCXX_SIMD_INTRINSIC static __sve_vector_type_t<_Tp, _Np> 1146 _S_bit_shift_right(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) 1147 { return __x._M_data >> __y._M_data; } 1148 1149 template <typename _Tp, size_t _Np> 1150 _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np> 1151 _S_bit_shift_left(_SveSimdWrapper<_Tp, _Np> __x, int __y) 1152 { return __x._M_data << __y; } 1153 1154 template <typename _Tp, size_t _Np> 1155 _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np> 1156 _S_bit_shift_right(_SveSimdWrapper<_Tp, _Np> __x, int __y) 1157 { return __x._M_data >> __y; } 1158 1159 template <typename _Tp, size_t _Np> 1160 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1161 _S_increment(_SveSimdWrapper<_Tp, _Np>& __x) 1162 { __x = __x._M_data + 1; } 1163 1164 template <typename _Tp, size_t _Np> 1165 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1166 _S_decrement(_SveSimdWrapper<_Tp, _Np>& __x) 1167 { __x = __x._M_data - 1; } 1168 1169 template <typename _Tp, size_t _Np> 1170 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp> 1171 _S_equal_to(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) 1172 { 1173 return svcmpeq(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data, __y._M_data); 1174 } 1175 1176 template <typename _Tp, size_t _Np> 1177 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp> 1178 _S_not_equal_to(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) 1179 { 1180 return svcmpne(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data, __y._M_data); 1181 } 1182 1183 template <typename _Tp, size_t _Np> 1184 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp> 1185 _S_less(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) 1186 { 1187 return svcmplt(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data, __y._M_data); 1188 } 1189 1190 template <typename _Tp, size_t _Np> 1191 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp> 1192 _S_less_equal(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) 1193 { 1194 return svcmple(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data, __y._M_data); 1195 } 1196 1197 // simd.math 1198 #define _GLIBCXX_SIMD_MATH_FALLBACK(__name) \ 1199 template <typename _Tp, size_t _Np, typename... _More> \ 1200 static _SveSimdWrapper<_Tp, _Np> _S_##__name(const _SveSimdWrapper<_Tp, _Np>& __x, \ 1201 const _More&... __more) \ 1202 { \ 1203 _SveSimdWrapper<_Tp, _Np> __r; \ 1204 __execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { \ 1205 __r._M_set(__i, __name(__x[__i], __more[__i]...)); \ 1206 }); \ 1207 return __r; \ 1208 } 1209 1210 #define _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(_RetTp, __name) \ 1211 template <typename _Tp, typename... _More> \ 1212 static auto _S_##__name(const _Tp& __x, const _More&... __more) \ 1213 { \ 1214 return __fixed_size_storage_t<_RetTp, _Tp::_S_size>::_S_generate( \ 1215 [&](auto __meta) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { \ 1216 return __meta._S_generator( \ 1217 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { \ 1218 return __name(__x[__meta._S_offset + __i], \ 1219 __more[__meta._S_offset + __i]...); \ 1220 }, static_cast<_RetTp*>(nullptr)); \ 1221 }); \ 1222 } 1223 1224 _GLIBCXX_SIMD_MATH_FALLBACK(acos) 1225 _GLIBCXX_SIMD_MATH_FALLBACK(asin) 1226 _GLIBCXX_SIMD_MATH_FALLBACK(atan) 1227 _GLIBCXX_SIMD_MATH_FALLBACK(atan2) 1228 _GLIBCXX_SIMD_MATH_FALLBACK(cos) 1229 _GLIBCXX_SIMD_MATH_FALLBACK(sin) 1230 _GLIBCXX_SIMD_MATH_FALLBACK(tan) 1231 _GLIBCXX_SIMD_MATH_FALLBACK(acosh) 1232 _GLIBCXX_SIMD_MATH_FALLBACK(asinh) 1233 _GLIBCXX_SIMD_MATH_FALLBACK(atanh) 1234 _GLIBCXX_SIMD_MATH_FALLBACK(cosh) 1235 _GLIBCXX_SIMD_MATH_FALLBACK(sinh) 1236 _GLIBCXX_SIMD_MATH_FALLBACK(tanh) 1237 _GLIBCXX_SIMD_MATH_FALLBACK(exp) 1238 _GLIBCXX_SIMD_MATH_FALLBACK(exp2) 1239 _GLIBCXX_SIMD_MATH_FALLBACK(expm1) 1240 _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(int, ilogb) 1241 _GLIBCXX_SIMD_MATH_FALLBACK(log) 1242 _GLIBCXX_SIMD_MATH_FALLBACK(log10) 1243 _GLIBCXX_SIMD_MATH_FALLBACK(log1p) 1244 _GLIBCXX_SIMD_MATH_FALLBACK(log2) 1245 _GLIBCXX_SIMD_MATH_FALLBACK(logb) 1246 1247 // modf implemented in simd_math.h 1248 _GLIBCXX_SIMD_MATH_FALLBACK(scalbn) 1249 _GLIBCXX_SIMD_MATH_FALLBACK(scalbln) 1250 _GLIBCXX_SIMD_MATH_FALLBACK(cbrt) 1251 _GLIBCXX_SIMD_MATH_FALLBACK(pow) 1252 _GLIBCXX_SIMD_MATH_FALLBACK(erf) 1253 _GLIBCXX_SIMD_MATH_FALLBACK(erfc) 1254 _GLIBCXX_SIMD_MATH_FALLBACK(lgamma) 1255 _GLIBCXX_SIMD_MATH_FALLBACK(tgamma) 1256 1257 _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long, lrint) 1258 _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long long, llrint) 1259 1260 _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long, lround) 1261 _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long long, llround) 1262 1263 _GLIBCXX_SIMD_MATH_FALLBACK(fmod) 1264 _GLIBCXX_SIMD_MATH_FALLBACK(remainder) 1265 1266 template <typename _Tp, size_t _Np> 1267 static _SveSimdWrapper<_Tp, _Np> 1268 _S_remquo(const _SveSimdWrapper<_Tp, _Np> __x, const _SveSimdWrapper<_Tp, _Np> __y, 1269 __fixed_size_storage_t<int, _Np>* __z) 1270 { 1271 _SveSimdWrapper<_Tp, _Np> __r{}; 1272 __execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1273 int __tmp; 1274 __r._M_set(__i, remquo(__x[__i], __y[__i], &__tmp)); 1275 __z->_M_set(__i, __tmp); 1276 }); 1277 return __r; 1278 } 1279 1280 template <typename _Tp, size_t _Np> 1281 _GLIBCXX_SIMD_INTRINSIC static __fixed_size_storage_t<int, _Np> 1282 _S_fpclassify(_SveSimdWrapper<_Tp, _Np> __x) 1283 { 1284 __fixed_size_storage_t<int, _Np> __r{}; 1285 __execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1286 __r._M_set(__i, std::fpclassify(__x[__i])); 1287 }); 1288 return __r; 1289 } 1290 1291 // copysign in simd_math.h 1292 _GLIBCXX_SIMD_MATH_FALLBACK(nextafter) 1293 _GLIBCXX_SIMD_MATH_FALLBACK(fdim) 1294 1295 #undef _GLIBCXX_SIMD_MATH_FALLBACK 1296 #undef _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET 1297 1298 template <typename _Tp, size_t _Np, typename _Op> 1299 static constexpr _MaskMember<_Tp> 1300 __fp_cmp(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y, _Op __op) 1301 { 1302 using _Ip = __get_sve_value_type_t<__int_for_sizeof_t<_Tp>>; 1303 using _VI = __sve_vector_type_t<_Ip, _Np>; 1304 using _WI = _SveSimdWrapper<_Ip, _Np>; 1305 const _WI __fmv = __sve_vector_type<_Ip, _Np>::__sve_broadcast(__finite_max_v<_Ip>); 1306 const _WI __zerov = __sve_vector_type<_Ip, _Np>::__sve_broadcast(0); 1307 const _WI __xn = _VI(__sve_reinterpret_cast<_Ip>(__x)); 1308 const _WI __yn = _VI(__sve_reinterpret_cast<_Ip>(__y)); 1309 1310 const _WI __xp 1311 = svsel(_S_less(__xn, __zerov), _S_unary_minus(_WI(_S_bit_and(__xn, __fmv))), __xn); 1312 const _WI __yp 1313 = svsel(_S_less(__yn, __zerov), _S_unary_minus(_WI(_S_bit_and(__yn, __fmv))), __yn); 1314 return svbic_z(__sve_vector_type<_Ip, _Np>::__sve_active_mask(), __op(__xp, __yp)._M_data, 1315 _SuperImpl::_S_isunordered(__x, __y)._M_data); 1316 } 1317 1318 template <typename _Tp, size_t _Np> 1319 static constexpr _MaskMember<_Tp> 1320 _S_isgreater(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) noexcept 1321 { return __fp_cmp(__x, __y, [](auto __xp, auto __yp) { return _S_less(__yp, __xp); }); } 1322 1323 template <typename _Tp, size_t _Np> 1324 static constexpr _MaskMember<_Tp> 1325 _S_isgreaterequal(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) noexcept 1326 { return __fp_cmp(__x, __y, [](auto __xp, auto __yp) { return _S_less_equal(__yp, __xp); }); } 1327 1328 template <typename _Tp, size_t _Np> 1329 static constexpr _MaskMember<_Tp> 1330 _S_isless(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) noexcept 1331 { return __fp_cmp(__x, __y, [](auto __xp, auto __yp) { return _S_less(__xp, __yp); }); } 1332 1333 template <typename _Tp, size_t _Np> 1334 static constexpr _MaskMember<_Tp> 1335 _S_islessequal(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) noexcept 1336 { return __fp_cmp(__x, __y, [](auto __xp, auto __yp) { return _S_less_equal(__xp, __yp); }); } 1337 1338 template <typename _Tp, size_t _Np> 1339 static constexpr _MaskMember<_Tp> 1340 _S_islessgreater(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) noexcept 1341 { 1342 return svbic_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), 1343 _SuperImpl::_S_not_equal_to(__x, __y)._M_data, 1344 _SuperImpl::_S_isunordered(__x, __y)._M_data); 1345 } 1346 1347 template <typename _Tp, size_t _Np> 1348 _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np> 1349 _S_abs(_SveSimdWrapper<_Tp, _Np> __x) noexcept 1350 { return svabs_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data); } 1351 1352 template <typename _Tp, size_t _Np> 1353 _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np> 1354 _S_fabs(_SveSimdWrapper<_Tp, _Np> __x) noexcept 1355 { return svabs_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data); } 1356 1357 template <typename _Tp, size_t _Np> 1358 _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np> 1359 _S_sqrt(_SveSimdWrapper<_Tp, _Np> __x) noexcept 1360 { return svsqrt_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data); } 1361 1362 template <typename _Tp, size_t _Np> 1363 _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np> 1364 _S_ldexp(_SveSimdWrapper<_Tp, _Np> __x, __fixed_size_storage_t<int, _Np> __y) noexcept 1365 { 1366 auto __sve_register = __y.first; 1367 if constexpr (std::is_same_v<_Tp, float>) 1368 return svscale_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data, 1369 __sve_register._M_data); 1370 else 1371 { 1372 __sve_vector_type_t<int64_t, _Np> __sve_d_register = svunpklo(__sve_register); 1373 return svscale_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data, 1374 __sve_d_register); 1375 } 1376 } 1377 1378 template <typename _Tp, size_t _Np> 1379 _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np> 1380 _S_fma(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y, 1381 _SveSimdWrapper<_Tp, _Np> __z) 1382 { 1383 return svmad_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data, __y._M_data, 1384 __z._M_data); 1385 } 1386 1387 template <typename _Tp, size_t _Np> 1388 _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np> 1389 _S_fmax(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) 1390 { 1391 return svmaxnm_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data, __y._M_data); 1392 } 1393 1394 template <typename _Tp, size_t _Np> 1395 _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np> 1396 _S_fmin(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) 1397 { 1398 return svminnm_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data, __y._M_data); 1399 } 1400 1401 template <typename _Tp, size_t _Np> 1402 _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp> 1403 _S_isfinite([[maybe_unused]] _SveSimdWrapper<_Tp, _Np> __x) 1404 { 1405 #if __FINITE_MATH_ONLY__ 1406 return __sve_vector_type_t<_Tp, _Np>::__sve_all_true_mask(); 1407 #else 1408 // if all exponent bits are set, __x is either inf or NaN 1409 1410 using _Ip = __get_sve_value_type_t<__int_for_sizeof_t<_Tp>>; 1411 const __sve_vector_type_t<_Ip, _Np> __absn = __sve_reinterpret_cast<_Ip>(_S_abs(__x)); 1412 const __sve_vector_type_t<_Ip, _Np> __maxn 1413 = __sve_reinterpret_cast<_Ip>( 1414 __sve_vector_type<_Tp, _Np>::__sve_broadcast(__finite_max_v<_Tp>)); 1415 1416 return _S_less_equal(_SveSimdWrapper<_Ip, _Np>{__absn}, _SveSimdWrapper<_Ip, _Np>{__maxn}); 1417 #endif 1418 } 1419 1420 template <typename _Tp, size_t _Np> 1421 _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp> 1422 _S_isinf([[maybe_unused]] _SveSimdWrapper<_Tp, _Np> __x) 1423 { 1424 #if __FINITE_MATH_ONLY__ 1425 return {}; // false 1426 #else 1427 return _S_equal_to<_Tp, _Np>(_S_abs(__x), _S_broadcast(__infinity_v<_Tp>)); 1428 #endif 1429 } 1430 1431 template <typename _Tp, size_t _Np> 1432 _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp> 1433 _S_isnan([[maybe_unused]] _SveSimdWrapper<_Tp, _Np> __x) 1434 { 1435 #if __FINITE_MATH_ONLY__ 1436 return {}; // false 1437 #else 1438 return svcmpuo(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data, __x._M_data); 1439 #endif 1440 } 1441 1442 template <typename _Tp, size_t _Np> 1443 _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp> 1444 _S_isnormal(_SveSimdWrapper<_Tp, _Np> __x) 1445 { 1446 using _Ip = __get_sve_value_type_t<__int_for_sizeof_t<_Tp>>; 1447 using _V = __sve_vector_type_t<_Ip, _Np>; 1448 using _VW = _SveSimdWrapper<_Ip, _Np>; 1449 1450 const _V __absn = __sve_reinterpret_cast<_Ip>(_S_abs(__x)); 1451 const _V __minn = __sve_reinterpret_cast<_Ip>( 1452 __sve_vector_type<_Tp, _Np>::__sve_broadcast(__norm_min_v<_Tp>)); 1453 #if __FINITE_MATH_ONLY__ 1454 return _S_greater_equal(_VW{__absn}, _VW{__minn}); 1455 #else 1456 const _V __maxn = __sve_reinterpret_cast<_Ip>( 1457 __sve_vector_type<_Tp, _Np>::__sve_broadcast(__finite_max_v<_Tp>)); 1458 return _MaskImpl::_S_bit_and(_S_less_equal(_VW{__minn}, _VW{__absn}), 1459 _S_less_equal(_VW{__absn}, _VW{__maxn})); 1460 #endif 1461 } 1462 1463 template <typename _Tp, size_t _Np> 1464 _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp> 1465 _S_signbit(_SveSimdWrapper<_Tp, _Np> __x) 1466 { 1467 using _Ip = __get_sve_value_type_t<__int_for_sizeof_t<_Tp>>; 1468 using _V = __sve_vector_type_t<_Ip, _Np>; 1469 using _VW = _SveSimdWrapper<_Ip, _Np>; 1470 1471 const _V __xn = __sve_reinterpret_cast<_Ip>(__x); 1472 const _V __zeron = __sve_vector_type<_Ip, _Np>::__sve_broadcast(0); 1473 return _S_less(_VW{__xn}, _VW{__zeron}); 1474 } 1475 1476 template <typename _Tp, size_t _Np> 1477 _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp> 1478 _S_isunordered(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) 1479 { 1480 return svcmpuo(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data, __y._M_data); 1481 } 1482 1483 template <typename _Tp, size_t _Np> 1484 _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np> 1485 _S_nearbyint(_SveSimdWrapper<_Tp, _Np> __x) noexcept 1486 { return svrinti_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data); } 1487 1488 template <typename _Tp, size_t _Np> 1489 _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np> 1490 _S_rint(_SveSimdWrapper<_Tp, _Np> __x) noexcept 1491 { return _SuperImpl::_S_nearbyint(__x); } 1492 1493 template <typename _Tp, size_t _Np> 1494 _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np> 1495 _S_trunc(_SveSimdWrapper<_Tp, _Np> __x) noexcept 1496 { return svrintz_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data); } 1497 1498 template <typename _Tp, size_t _Np> 1499 _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np> 1500 _S_round(_SveSimdWrapper<_Tp, _Np> __x) noexcept 1501 { return svrinta_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data); } 1502 1503 template <typename _Tp, size_t _Np> 1504 _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np> 1505 _S_floor(_SveSimdWrapper<_Tp, _Np> __x) noexcept 1506 { return svrintm_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data); } 1507 1508 template <typename _Tp, size_t _Np> 1509 _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np> 1510 _S_ceil(_SveSimdWrapper<_Tp, _Np> __x) noexcept 1511 { return svrintp_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data); } 1512 1513 template <typename _Tp, size_t _Bits, size_t _Np> 1514 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1515 _S_masked_assign(_SveMaskWrapper<_Bits, _Np> __k, _SveSimdWrapper<_Tp, _Np>& __lhs, 1516 __type_identity_t<_SveSimdWrapper<_Tp, _Np>> __rhs) 1517 { __lhs = _CommonImpl::_S_blend(__k, __lhs, __rhs); } 1518 1519 template <typename _Tp, size_t _Bits, size_t _Np> 1520 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1521 _S_masked_assign(_SveMaskWrapper<_Bits, _Np> __k, _SveSimdWrapper<_Tp, _Np>& __lhs, 1522 __type_identity_t<_Tp> __rhs) 1523 { __lhs = _CommonImpl::_S_blend(__k, __lhs, __data(simd<_Tp, _Abi>(__rhs))); } 1524 1525 template <typename _Op, typename _Tp, size_t _Bits, size_t _Np> 1526 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1527 _S_masked_cassign(const _SveMaskWrapper<_Bits, _Np> __k, _SveSimdWrapper<_Tp, _Np>& __lhs, 1528 const __type_identity_t<_SveSimdWrapper<_Tp, _Np>> __rhs, _Op __op) 1529 { 1530 __lhs = _CommonImpl::_S_blend(__k, __lhs, 1531 _SveSimdWrapper<_Tp, _Np>(__op(_SuperImpl{}, __lhs, __rhs))); 1532 } 1533 1534 template <typename _Op, typename _Tp, size_t _Bits, size_t _Np> 1535 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1536 _S_masked_cassign(const _SveMaskWrapper<_Bits, _Np> __k, _SveSimdWrapper<_Tp, _Np>& __lhs, 1537 const __type_identity_t<_Tp> __rhs, _Op __op) 1538 { _S_masked_cassign(__k, __lhs, _S_broadcast(__rhs), __op); } 1539 1540 template <typename _Tp, size_t _Np, typename _Up> 1541 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1542 _S_set(_SveSimdWrapper<_Tp, _Np>& __v, int __i, _Up&& __x) noexcept 1543 { __v._M_set(__i, static_cast<_Up&&>(__x)); } 1544 1545 template <template <typename> class _Op, typename _Tp, size_t _Bits, size_t _Np> 1546 _GLIBCXX_SIMD_INTRINSIC static constexpr _SveSimdWrapper<_Tp, _Np> 1547 _S_masked_unary(const _SveMaskWrapper<_Bits, _Np> __k, const _SveSimdWrapper<_Tp, _Np> __v) 1548 { 1549 auto __vv = simd<_Tp, _Abi>{__private_init, __v}; 1550 _Op<decltype(__vv)> __op; 1551 return _CommonImpl::_S_blend(__k, __v, __data(__op(__vv))); 1552 } 1553 }; 1554 1555 template <typename _Abi, typename> 1556 struct _MaskImplSve 1557 { 1558 template <typename _Tp> 1559 using _MaskMember = typename _Abi::template _MaskMember<_Tp>; 1560 1561 template <typename _Tp> 1562 using _TypeTag = _Tp*; 1563 1564 template <typename _Tp> 1565 static constexpr size_t _S_size = simd_size_v<_Tp, _Abi>; 1566 1567 template <typename _Tp> 1568 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp> 1569 _S_broadcast(bool __x) 1570 { 1571 constexpr size_t _Np = simd_size_v<_Tp, _Abi>; 1572 __sve_bool_type __tr = __sve_vector_type<_Tp, _Np>::__sve_active_mask(); 1573 __sve_bool_type __fl = svpfalse_b(); 1574 return __x ? __tr : __fl; 1575 } 1576 1577 template <typename _Tp> 1578 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp> 1579 _S_load(const bool* __mem) 1580 { 1581 constexpr size_t _Np = simd_size_v<_Tp, _Abi>; 1582 const uint8_t* __p = reinterpret_cast<const uint8_t*>(__mem); 1583 __sve_bool_type __u8_active_mask = __sve_vector_type<uint8_t, _Np>::__sve_active_mask(); 1584 __sve_vector_type_t<uint8_t, _Np> __u8_vec_mask_load = svld1(__u8_active_mask, __p); 1585 __sve_bool_type __u8_mask = svcmpne(__u8_active_mask, __u8_vec_mask_load, 0); 1586 1587 __sve_bool_type __tp_mask = __u8_mask; 1588 for (size_t __up_size = 1; __up_size != sizeof(_Tp); __up_size *= 2) 1589 { 1590 __tp_mask = svunpklo(__tp_mask); 1591 } 1592 1593 _SveMaskWrapper<sizeof(_Tp), simd_size_v<_Tp, _Abi>> __r{__tp_mask}; 1594 return __r; 1595 } 1596 1597 template <size_t _Bits, size_t _Np> 1598 static inline _SveMaskWrapper<_Bits, _Np> 1599 _S_masked_load(_SveMaskWrapper<_Bits, _Np> __merge, _SveMaskWrapper<_Bits, _Np> __mask, 1600 const bool* __mem) noexcept 1601 { 1602 _SveMaskWrapper<_Bits, _Np> __r; 1603 1604 __execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1605 if (__mask[__i]) 1606 __r._M_set(__i, __mem[__i]); 1607 else 1608 __r._M_set(__i, __merge[__i]); 1609 }); 1610 1611 return __r; 1612 } 1613 1614 template <size_t _Bits, size_t _Np> 1615 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1616 _S_store(_SveMaskWrapper<_Bits, _Np> __v, bool* __mem) noexcept 1617 { 1618 __execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1619 __mem[__i] = __v[__i]; 1620 }); 1621 } 1622 1623 template <size_t _Bits, size_t _Np> 1624 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1625 _S_masked_store(const _SveMaskWrapper<_Bits, _Np> __v, bool* __mem, 1626 const _SveMaskWrapper<_Bits, _Np> __k) noexcept 1627 { 1628 __execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1629 if (__k[__i]) 1630 __mem[__i] = __v[__i]; 1631 }); 1632 } 1633 1634 template <size_t _Bits, size_t _Np> 1635 _GLIBCXX_SIMD_INTRINSIC static constexpr _SanitizedBitMask<_Np> 1636 _S_to_bits(_SveMaskWrapper<_Bits, _Np> __x) 1637 { 1638 _ULLong __r = 0; 1639 __execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1640 __r |= _ULLong(__x[__i]) << __i; 1641 }); 1642 return __r; 1643 } 1644 1645 template <size_t _Np, typename _Tp> 1646 _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp> 1647 _S_from_bitmask(_SanitizedBitMask<_Np> __bits, _TypeTag<_Tp>) 1648 { 1649 _SveMaskWrapper<sizeof(_Tp), _Np> __r; 1650 __execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1651 __r._M_set(__i, __bits[__i]); 1652 }); 1653 return __r; 1654 } 1655 1656 template <typename _Tp, typename _Up, typename _UAbi> 1657 _GLIBCXX_SIMD_INTRINSIC static constexpr auto 1658 _S_convert(simd_mask<_Up, _UAbi> __x) 1659 { 1660 using _R = _SveMaskWrapper<sizeof(_Tp), simd_size_v<_Tp, _Abi>>; 1661 if constexpr (__is_scalar_abi<_UAbi>()) 1662 { 1663 _R __r{__sve_bool_type(svpfalse())}; 1664 __r._M_set(0, __data(__x)); 1665 return __r; 1666 } 1667 if constexpr (__is_sve_abi<_UAbi>()) 1668 { 1669 if constexpr (sizeof(_Up) == sizeof(_Tp)) 1670 return __data(__x); 1671 if constexpr (sizeof(_Up) < sizeof(_Tp)) 1672 { 1673 __sve_bool_type __xmdata = __data(__x)._M_data; 1674 __sve_bool_type __r = __xmdata; 1675 for (size_t __up_size = sizeof(_Up); __up_size != sizeof(_Tp); __up_size *= 2) 1676 { 1677 __r = svunpklo(__r); 1678 } 1679 return _R{__r}; 1680 } 1681 else 1682 { 1683 _R __r{__sve_bool_type(svpfalse())}; 1684 constexpr size_t __min_size 1685 = std::min(simd_size_v<_Tp, _Abi>, simd_mask<_Up, _UAbi>::size()); 1686 __execute_n_times<__min_size>( 1687 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { __r._M_set(__i, __x[__i]); }); 1688 return __r; 1689 } 1690 } 1691 if constexpr (__is_neon_abi<_UAbi>()) 1692 { 1693 _R __r{__sve_bool_type(svpfalse())}; 1694 constexpr size_t __min_size 1695 = std::min(simd_size_v<_Tp, _Abi>, simd_mask<_Up, _UAbi>::size()); 1696 __execute_n_times<__min_size>( 1697 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { __r._M_set(__i, __x[__i]); }); 1698 return __r; 1699 } 1700 if constexpr (__is_fixed_size_abi<_UAbi>()) 1701 { 1702 return _S_convert<_Tp>(__data(__x)); 1703 } 1704 return _R{}; 1705 } 1706 1707 template <typename _Tp, size_t _Np, bool _Sanitized> 1708 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp> 1709 _S_convert(_BitMask<_Np, _Sanitized> __x) 1710 { 1711 _MaskMember<_Tp> __r{}; 1712 __execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1713 __r._M_set(__i, __x[__i]); 1714 }); 1715 return __r; 1716 } 1717 1718 template <size_t _Bits, size_t _Np> 1719 _GLIBCXX_SIMD_INTRINSIC static constexpr _SveMaskWrapper<_Bits, _Np> 1720 _S_logical_and(const _SveMaskWrapper<_Bits, _Np>& __x, const _SveMaskWrapper<_Bits, _Np>& __y) 1721 { 1722 return svand_z(_SveMaskWrapper<_Bits, _Np>::_BuiltinSveVectorType::__sve_active_mask(), 1723 __x._M_data, __y._M_data); 1724 } 1725 1726 template <size_t _Bits, size_t _Np> 1727 _GLIBCXX_SIMD_INTRINSIC static constexpr _SveMaskWrapper<_Bits, _Np> 1728 _S_logical_or(const _SveMaskWrapper<_Bits, _Np>& __x, const _SveMaskWrapper<_Bits, _Np>& __y) 1729 { 1730 return svorr_z(_SveMaskWrapper<_Bits, _Np>::_BuiltinSveVectorType::__sve_active_mask(), 1731 __x._M_data, __y._M_data); 1732 } 1733 1734 template <size_t _Bits, size_t _Np> 1735 _GLIBCXX_SIMD_INTRINSIC static constexpr _SveMaskWrapper<_Bits, _Np> 1736 _S_bit_not(const _SveMaskWrapper<_Bits, _Np>& __x) 1737 { 1738 return svnot_z(_SveMaskWrapper<_Bits, _Np>::_BuiltinSveVectorType::__sve_active_mask(), 1739 __x._M_data); 1740 } 1741 1742 template <size_t _Bits, size_t _Np> 1743 _GLIBCXX_SIMD_INTRINSIC static constexpr _SveMaskWrapper<_Bits, _Np> 1744 _S_bit_and(const _SveMaskWrapper<_Bits, _Np>& __x, const _SveMaskWrapper<_Bits, _Np>& __y) 1745 { 1746 return svand_z(_SveMaskWrapper<_Bits, _Np>::_BuiltinSveVectorType::__sve_active_mask(), 1747 __x._M_data, __y._M_data); 1748 } 1749 1750 template <size_t _Bits, size_t _Np> 1751 _GLIBCXX_SIMD_INTRINSIC static constexpr _SveMaskWrapper<_Bits, _Np> 1752 _S_bit_or(const _SveMaskWrapper<_Bits, _Np>& __x, const _SveMaskWrapper<_Bits, _Np>& __y) 1753 { 1754 return svorr_z(_SveMaskWrapper<_Bits, _Np>::_BuiltinSveVectorType::__sve_active_mask(), 1755 __x._M_data, __y._M_data); 1756 } 1757 1758 template <size_t _Bits, size_t _Np> 1759 _GLIBCXX_SIMD_INTRINSIC static constexpr _SveMaskWrapper<_Bits, _Np> 1760 _S_bit_xor(const _SveMaskWrapper<_Bits, _Np>& __x, const _SveMaskWrapper<_Bits, _Np>& __y) 1761 { 1762 return sveor_z(_SveMaskWrapper<_Bits, _Np>::_BuiltinSveVectorType::__sve_active_mask(), 1763 __x._M_data, __y._M_data); 1764 } 1765 1766 template <size_t _Bits, size_t _Np> 1767 static constexpr void 1768 _S_set(_SveMaskWrapper<_Bits, _Np>& __k, int __i, bool __x) noexcept 1769 { 1770 auto __index = svcmpeq(_SveMaskWrapper<_Bits, _Np>::_BuiltinSveVectorType::__sve_active_mask(), 1771 __sve_mask_type<_Bits>::__index0123, 1772 typename __sve_mask_type<_Bits>::__sve_mask_uint_type(__i)); 1773 if (__x) 1774 __k._M_data = svorr_z(_SveMaskWrapper<_Bits, _Np>::_BuiltinSveVectorType::__sve_active_mask(), 1775 __k._M_data, __index); 1776 else 1777 __k._M_data = svbic_z(_SveMaskWrapper<_Bits, _Np>::_BuiltinSveVectorType::__sve_active_mask(), 1778 __k._M_data, __index); 1779 } 1780 1781 template <size_t _Bits, size_t _Np> 1782 _GLIBCXX_SIMD_INTRINSIC static void 1783 _S_masked_assign(_SveMaskWrapper<_Bits, _Np> __k, _SveMaskWrapper<_Bits, _Np>& __lhs, 1784 _SveMaskWrapper<_Bits, _Np> __rhs) 1785 { __lhs._M_data = svsel(__k._M_data, __rhs._M_data, __lhs._M_data); } 1786 1787 template <size_t _Bits, size_t _Np> 1788 _GLIBCXX_SIMD_INTRINSIC static void 1789 _S_masked_assign(_SveMaskWrapper<_Bits, _Np> __k, _SveMaskWrapper<_Bits, _Np>& __lhs, 1790 bool __rhs) 1791 { 1792 __lhs._M_data 1793 = svsel(__k._M_data, _S_broadcast<__int_with_sizeof_t<_Bits>>(__rhs), __lhs._M_data); 1794 } 1795 1796 template <typename _Tp> 1797 _GLIBCXX_SIMD_INTRINSIC static int 1798 _S_popcount(simd_mask<_Tp, _Abi> __k) 1799 { 1800 constexpr size_t _Np = simd_size_v<_Tp, _Abi>; 1801 1802 return __sve_mask_type<sizeof(_Tp)>::__sve_mask_active_count( 1803 __sve_vector_type<_Tp, _Np>::__sve_active_mask(), __k._M_data); 1804 } 1805 1806 template <typename _Tp> 1807 _GLIBCXX_SIMD_INTRINSIC static bool 1808 _S_all_of(simd_mask<_Tp, _Abi> __k) 1809 { return _S_popcount(__k) == simd_size_v<_Tp, _Abi>; } 1810 1811 template <typename _Tp> 1812 _GLIBCXX_SIMD_INTRINSIC static bool 1813 _S_any_of(simd_mask<_Tp, _Abi> __k) 1814 { 1815 return svptest_any(__sve_vector_type<_Tp, simd_size_v<_Tp, _Abi>>::__sve_active_mask(), 1816 __k._M_data); 1817 } 1818 1819 template <typename _Tp> 1820 _GLIBCXX_SIMD_INTRINSIC static bool 1821 _S_none_of(simd_mask<_Tp, _Abi> __k) 1822 { 1823 return !svptest_any(__sve_vector_type<_Tp, simd_size_v<_Tp, _Abi>>::__sve_active_mask(), 1824 __k._M_data); 1825 } 1826 1827 template <typename _Tp> 1828 _GLIBCXX_SIMD_INTRINSIC static bool 1829 _S_some_of(simd_mask<_Tp, _Abi> __k) 1830 { 1831 int __msk_count = _S_popcount(__k); 1832 return (__msk_count > 0) && (__msk_count < (int) simd_size_v<_Tp, _Abi>); 1833 } 1834 1835 template <typename _Tp> 1836 _GLIBCXX_SIMD_INTRINSIC static int 1837 _S_find_first_set(simd_mask<_Tp, _Abi> __k) 1838 { 1839 return svclastb(svpfirst(__k._M_data, svpfalse()), 1840 -1, __sve_mask_type<sizeof(_Tp)>::__index0123); 1841 } 1842 1843 template <typename _Tp> 1844 _GLIBCXX_SIMD_INTRINSIC static int 1845 _S_find_last_set(simd_mask<_Tp, _Abi> __k) 1846 { return svclastb(__k._M_data, -1, __sve_mask_type<sizeof(_Tp)>::__index0123); } 1847 }; 1848 1849 _GLIBCXX_SIMD_END_NAMESPACE 1850 #endif // __cplusplus >= 201703L 1851 #endif // _GLIBCXX_EXPERIMENTAL_SIMD_SVE_H_ 1852 // vim: sw=2 noet ts=8 sts=2 tw=100 1853