1 // -*- C++ -*- 2 //===-- numeric_impl.h ----------------------------------------------------===// 3 // 4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 // See https://llvm.org/LICENSE.txt for license information. 6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifndef _PSTL_NUMERIC_IMPL_H 11 #define _PSTL_NUMERIC_IMPL_H 12 13 #include <iterator> 14 #include <type_traits> 15 #include <numeric> 16 17 #include "parallel_backend.h" 18 #include "pstl_config.h" 19 #include "execution_impl.h" 20 #include "unseq_backend_simd.h" 21 #include "algorithm_fwd.h" 22 23 namespace __pstl 24 { 25 namespace __internal 26 { 27 28 //------------------------------------------------------------------------ 29 // transform_reduce (version with two binary functions, according to draft N4659) 30 //------------------------------------------------------------------------ 31 32 template <class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation1, class _BinaryOperation2> 33 _Tp 34 __brick_transform_reduce(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Tp __init, 35 _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2, 36 /*is_vector=*/std::false_type) noexcept 37 { 38 return std::inner_product(__first1, __last1, __first2, __init, __binary_op1, __binary_op2); 39 } 40 41 template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _Tp, class _BinaryOperation1, 42 class _BinaryOperation2> 43 _Tp 44 __brick_transform_reduce(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, 45 _RandomAccessIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1, 46 _BinaryOperation2 __binary_op2, 47 /*is_vector=*/std::true_type) noexcept 48 { 49 typedef typename std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; 50 return __unseq_backend::__simd_transform_reduce( 51 __last1 - __first1, __init, __binary_op1, 52 [=, &__binary_op2](_DifferenceType __i) { return __binary_op2(__first1[__i], __first2[__i]); }); 53 } 54 55 template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Tp, 56 class _BinaryOperation1, class _BinaryOperation2> 57 _Tp 58 __pattern_transform_reduce(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, 59 _ForwardIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1, 60 _BinaryOperation2 __binary_op2) noexcept 61 { 62 return __brick_transform_reduce(__first1, __last1, __first2, __init, __binary_op1, __binary_op2, 63 typename _Tag::__is_vector{}); 64 } 65 66 template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2, 67 class _Tp, class _BinaryOperation1, class _BinaryOperation2> 68 _Tp 69 __pattern_transform_reduce(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, 70 _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _Tp __init, 71 _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2) 72 { 73 using __backend_tag = typename decltype(__tag)::__backend_tag; 74 75 return __internal::__except_handler( 76 [&]() 77 { 78 return __par_backend::__parallel_transform_reduce( 79 __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, 80 [__first1, __first2, __binary_op2](_RandomAccessIterator1 __i) mutable 81 { return __binary_op2(*__i, *(__first2 + (__i - __first1))); }, 82 __init, 83 __binary_op1, // Combine 84 [__first1, __first2, __binary_op1, __binary_op2](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j, 85 _Tp __init) -> _Tp 86 { 87 return __internal::__brick_transform_reduce(__i, __j, __first2 + (__i - __first1), __init, 88 __binary_op1, __binary_op2, _IsVector{}); 89 }); 90 }); 91 } 92 93 //------------------------------------------------------------------------ 94 // transform_reduce (version with unary and binary functions) 95 //------------------------------------------------------------------------ 96 97 template <class _ForwardIterator, class _Tp, class _BinaryOperation, class _UnaryOperation> 98 _Tp 99 __brick_transform_reduce(_ForwardIterator __first, _ForwardIterator __last, _Tp __init, _BinaryOperation __binary_op, 100 _UnaryOperation __unary_op, /*is_vector=*/std::false_type) noexcept 101 { 102 return std::transform_reduce(__first, __last, __init, __binary_op, __unary_op); 103 } 104 105 template <class _RandomAccessIterator, class _Tp, class _UnaryOperation, class _BinaryOperation> 106 _Tp 107 __brick_transform_reduce(_RandomAccessIterator __first, _RandomAccessIterator __last, _Tp __init, 108 _BinaryOperation __binary_op, _UnaryOperation __unary_op, 109 /*is_vector=*/std::true_type) noexcept 110 { 111 typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType; 112 return __unseq_backend::__simd_transform_reduce( 113 __last - __first, __init, __binary_op, 114 [=, &__unary_op](_DifferenceType __i) { return __unary_op(__first[__i]); }); 115 } 116 117 template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _Tp, class _BinaryOperation, 118 class _UnaryOperation> 119 _Tp 120 __pattern_transform_reduce(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Tp __init, 121 _BinaryOperation __binary_op, _UnaryOperation __unary_op) noexcept 122 { 123 return __internal::__brick_transform_reduce(__first, __last, __init, __binary_op, __unary_op, 124 typename _Tag::__is_vector{}); 125 } 126 127 template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Tp, class _BinaryOperation, 128 class _UnaryOperation> 129 _Tp 130 __pattern_transform_reduce(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, 131 _RandomAccessIterator __last, _Tp __init, _BinaryOperation __binary_op, 132 _UnaryOperation __unary_op) 133 { 134 using __backend_tag = typename decltype(__tag)::__backend_tag; 135 136 return __internal::__except_handler( 137 [&]() 138 { 139 return __par_backend::__parallel_transform_reduce( 140 __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, 141 [__unary_op](_RandomAccessIterator __i) mutable { return __unary_op(*__i); }, __init, __binary_op, 142 [__unary_op, __binary_op](_RandomAccessIterator __i, _RandomAccessIterator __j, _Tp __init) { 143 return __internal::__brick_transform_reduce(__i, __j, __init, __binary_op, __unary_op, _IsVector{}); 144 }); 145 }); 146 } 147 148 //------------------------------------------------------------------------ 149 // transform_exclusive_scan 150 // 151 // walk3 evaluates f(x,y,z) for (x,y,z) drawn from [first1,last1), [first2,...), [first3,...) 152 //------------------------------------------------------------------------ 153 154 // Exclusive form 155 template <class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation> 156 std::pair<_OutputIterator, _Tp> 157 __brick_transform_scan(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, 158 _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, 159 /*Inclusive*/ std::false_type, /*is_vector=*/std::false_type) noexcept 160 { 161 for (; __first != __last; ++__first, ++__result) 162 { 163 _Tp __v = std::move(__init); 164 _PSTL_PRAGMA_FORCEINLINE 165 __init = __binary_op(__v, __unary_op(*__first)); 166 *__result = std::move(__v); 167 } 168 return std::make_pair(__result, std::move(__init)); 169 } 170 171 // Inclusive form 172 template <class _RandomAccessIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation> 173 std::pair<_OutputIterator, _Tp> 174 __brick_transform_scan(_RandomAccessIterator __first, _RandomAccessIterator __last, _OutputIterator __result, 175 _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, 176 /*Inclusive*/ std::true_type, /*is_vector=*/std::false_type) noexcept 177 { 178 for (; __first != __last; ++__first, ++__result) 179 { 180 _PSTL_PRAGMA_FORCEINLINE 181 __init = __binary_op(__init, __unary_op(*__first)); 182 *__result = __init; 183 } 184 return std::make_pair(__result, __init); 185 } 186 187 // type is arithmetic and binary operation is a user defined operation. 188 template <typename _Tp, typename _BinaryOperation> 189 using is_arithmetic_udop = std::integral_constant<bool, std::is_arithmetic<_Tp>::value && 190 !std::is_same<_BinaryOperation, std::plus<_Tp>>::value>; 191 192 // [restriction] - T shall be DefaultConstructible. 193 // [violation] - default ctor of T shall set the identity value for binary_op. 194 template <class _RandomAccessIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation, 195 class _Inclusive> 196 typename std::enable_if<!is_arithmetic_udop<_Tp, _BinaryOperation>::value, std::pair<_OutputIterator, _Tp>>::type 197 __brick_transform_scan(_RandomAccessIterator __first, _RandomAccessIterator __last, _OutputIterator __result, 198 _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, _Inclusive, 199 /*is_vector=*/std::true_type) noexcept 200 { 201 #if defined(_PSTL_UDS_PRESENT) 202 return __unseq_backend::__simd_scan(__first, __last - __first, __result, __unary_op, __init, __binary_op, 203 _Inclusive()); 204 #else 205 // We need to call serial brick here to call function for inclusive and exclusive scan that depends on _Inclusive() value 206 return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(), 207 /*is_vector=*/std::false_type()); 208 #endif 209 } 210 211 template <class _RandomAccessIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation, 212 class _Inclusive> 213 typename std::enable_if<is_arithmetic_udop<_Tp, _BinaryOperation>::value, std::pair<_OutputIterator, _Tp>>::type 214 __brick_transform_scan(_RandomAccessIterator __first, _RandomAccessIterator __last, _OutputIterator __result, 215 _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, _Inclusive, 216 /*is_vector=*/std::true_type) noexcept 217 { 218 return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(), 219 /*is_vector=*/std::false_type()); 220 } 221 222 template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator, class _UnaryOperation, 223 class _Tp, class _BinaryOperation, class _Inclusive> 224 _OutputIterator 225 __pattern_transform_scan(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, 226 _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, 227 _Inclusive) noexcept 228 { 229 return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, std::move(__init), __binary_op, _Inclusive(), 230 typename _Tag::__is_vector{}) 231 .first; 232 } 233 234 template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _OutputIterator, 235 class _UnaryOperation, class _Tp, class _BinaryOperation, class _Inclusive> 236 typename std::enable_if<!std::is_floating_point<_Tp>::value, _OutputIterator>::type 237 __pattern_transform_scan(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, 238 _RandomAccessIterator __last, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, 239 _BinaryOperation __binary_op, _Inclusive) 240 { 241 using __backend_tag = typename decltype(__tag)::__backend_tag; 242 243 typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType; 244 245 return __internal::__except_handler( 246 [&]() 247 { 248 __par_backend::__parallel_transform_scan( 249 __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __last - __first, 250 [__first, __unary_op](_DifferenceType __i) mutable { return __unary_op(__first[__i]); }, __init, 251 __binary_op, 252 [__first, __unary_op, __binary_op](_DifferenceType __i, _DifferenceType __j, _Tp __init) 253 { 254 // Execute serial __brick_transform_reduce, due to the explicit SIMD vectorization (reduction) requires a commutative operation for the guarantee of correct scan. 255 return __internal::__brick_transform_reduce(__first + __i, __first + __j, __init, __binary_op, 256 __unary_op, 257 /*__is_vector*/ std::false_type()); 258 }, 259 [__first, __unary_op, __binary_op, __result](_DifferenceType __i, _DifferenceType __j, _Tp __init) 260 { 261 return __internal::__brick_transform_scan(__first + __i, __first + __j, __result + __i, __unary_op, 262 __init, __binary_op, _Inclusive(), _IsVector{}) 263 .second; 264 }); 265 return __result + (__last - __first); 266 }); 267 } 268 269 template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _OutputIterator, 270 class _UnaryOperation, class _Tp, class _BinaryOperation, class _Inclusive> 271 typename std::enable_if<std::is_floating_point<_Tp>::value, _OutputIterator>::type 272 __pattern_transform_scan(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, 273 _RandomAccessIterator __last, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, 274 _BinaryOperation __binary_op, _Inclusive) 275 { 276 using __backend_tag = typename decltype(__tag)::__backend_tag; 277 278 typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType; 279 _DifferenceType __n = __last - __first; 280 281 if (__n <= 0) 282 { 283 return __result; 284 } 285 return __internal::__except_handler( 286 [&]() 287 { 288 __par_backend::__parallel_strict_scan( 289 __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __n, __init, 290 [__first, __unary_op, __binary_op, __result](_DifferenceType __i, _DifferenceType __len) 291 { 292 return __internal::__brick_transform_scan(__first + __i, __first + (__i + __len), __result + __i, 293 __unary_op, _Tp{}, __binary_op, _Inclusive(), _IsVector{}) 294 .second; 295 }, 296 __binary_op, 297 [__result, &__binary_op](_DifferenceType __i, _DifferenceType __len, _Tp __initial) 298 { 299 return *(std::transform(__result + __i, __result + __i + __len, __result + __i, 300 [&__initial, &__binary_op](const _Tp& __x) 301 { 302 _PSTL_PRAGMA_FORCEINLINE 303 return __binary_op(__initial, __x); 304 }) - 305 1); 306 }, 307 [](_Tp) {}); 308 return __result + (__last - __first); 309 }); 310 } 311 312 //------------------------------------------------------------------------ 313 // adjacent_difference 314 //------------------------------------------------------------------------ 315 316 template <class _ForwardIterator, class _OutputIterator, class _BinaryOperation> 317 _OutputIterator 318 __brick_adjacent_difference(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __d_first, 319 _BinaryOperation __op, /*is_vector*/ std::false_type) noexcept 320 { 321 return std::adjacent_difference(__first, __last, __d_first, __op); 322 } 323 324 template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _BinaryOperation> 325 _RandomAccessIterator2 326 __brick_adjacent_difference(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, 327 _RandomAccessIterator2 __d_first, _BinaryOperation __op, 328 /*is_vector=*/std::true_type) noexcept 329 { 330 _PSTL_ASSERT(__first != __last); 331 332 typedef typename std::iterator_traits<_RandomAccessIterator1>::reference _ReferenceType1; 333 typedef typename std::iterator_traits<_RandomAccessIterator2>::reference _ReferenceType2; 334 335 auto __n = __last - __first; 336 *__d_first = *__first; 337 return __unseq_backend::__simd_walk_3( 338 __first + 1, __n - 1, __first, __d_first + 1, 339 [&__op](_ReferenceType1 __x, _ReferenceType1 __y, _ReferenceType2 __z) { __z = __op(__x, __y); }); 340 } 341 342 template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator, class _BinaryOperation> 343 _OutputIterator 344 __pattern_adjacent_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, 345 _OutputIterator __d_first, _BinaryOperation __op) noexcept 346 { 347 return __internal::__brick_adjacent_difference(__first, __last, __d_first, __op, typename _Tag::__is_vector{}); 348 } 349 350 template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2, 351 class _BinaryOperation> 352 _RandomAccessIterator2 353 __pattern_adjacent_difference(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, 354 _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, 355 _RandomAccessIterator2 __d_first, _BinaryOperation __op) 356 { 357 _PSTL_ASSERT(__first != __last); 358 typedef typename std::iterator_traits<_RandomAccessIterator1>::reference _ReferenceType1; 359 typedef typename std::iterator_traits<_RandomAccessIterator2>::reference _ReferenceType2; 360 361 using __backend_tag = typename decltype(__tag)::__backend_tag; 362 363 *__d_first = *__first; 364 __par_backend::__parallel_for(__backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last - 1, 365 [&__op, __d_first, __first](_RandomAccessIterator1 __b, _RandomAccessIterator1 __e) 366 { 367 _RandomAccessIterator2 __d_b = __d_first + (__b - __first); 368 __internal::__brick_walk3( 369 __b, __e, __b + 1, __d_b + 1, 370 [&__op](_ReferenceType1 __x, _ReferenceType1 __y, _ReferenceType2 __z) 371 { __z = __op(__y, __x); }, 372 _IsVector{}); 373 }); 374 return __d_first + (__last - __first); 375 } 376 377 } // namespace __internal 378 } // namespace __pstl 379 380 #endif /* _PSTL_NUMERIC_IMPL_H */ 381