Home | History | Annotate | Line # | Download | only in pstl
      1 // -*- C++ -*-
      2 //===-- numeric_impl.h ----------------------------------------------------===//
      3 //
      4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      5 // See https://llvm.org/LICENSE.txt for license information.
      6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      7 //
      8 //===----------------------------------------------------------------------===//
      9 
     10 #ifndef _PSTL_NUMERIC_IMPL_H
     11 #define _PSTL_NUMERIC_IMPL_H
     12 
     13 #include <iterator>
     14 #include <type_traits>
     15 #include <numeric>
     16 
     17 #include "parallel_backend.h"
     18 #include "pstl_config.h"
     19 #include "execution_impl.h"
     20 #include "unseq_backend_simd.h"
     21 #include "algorithm_fwd.h"
     22 
     23 namespace __pstl
     24 {
     25 namespace __internal
     26 {
     27 
     28 //------------------------------------------------------------------------
     29 // transform_reduce (version with two binary functions, according to draft N4659)
     30 //------------------------------------------------------------------------
     31 
     32 template <class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation1, class _BinaryOperation2>
     33 _Tp
     34 __brick_transform_reduce(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Tp __init,
     35                          _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2,
     36                          /*is_vector=*/std::false_type) noexcept
     37 {
     38     return std::inner_product(__first1, __last1, __first2, __init, __binary_op1, __binary_op2);
     39 }
     40 
     41 template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _Tp, class _BinaryOperation1,
     42           class _BinaryOperation2>
     43 _Tp
     44 __brick_transform_reduce(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1,
     45                          _RandomAccessIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1,
     46                          _BinaryOperation2 __binary_op2,
     47                          /*is_vector=*/std::true_type) noexcept
     48 {
     49     typedef typename std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType;
     50     return __unseq_backend::__simd_transform_reduce(
     51         __last1 - __first1, __init, __binary_op1,
     52         [=, &__binary_op2](_DifferenceType __i) { return __binary_op2(__first1[__i], __first2[__i]); });
     53 }
     54 
     55 template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Tp,
     56           class _BinaryOperation1, class _BinaryOperation2>
     57 _Tp
     58 __pattern_transform_reduce(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
     59                            _ForwardIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1,
     60                            _BinaryOperation2 __binary_op2) noexcept
     61 {
     62     return __brick_transform_reduce(__first1, __last1, __first2, __init, __binary_op1, __binary_op2,
     63                                     typename _Tag::__is_vector{});
     64 }
     65 
     66 template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
     67           class _Tp, class _BinaryOperation1, class _BinaryOperation2>
     68 _Tp
     69 __pattern_transform_reduce(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1,
     70                            _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _Tp __init,
     71                            _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2)
     72 {
     73     using __backend_tag = typename decltype(__tag)::__backend_tag;
     74 
     75     return __internal::__except_handler(
     76         [&]()
     77         {
     78             return __par_backend::__parallel_transform_reduce(
     79                 __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first1, __last1,
     80                 [__first1, __first2, __binary_op2](_RandomAccessIterator1 __i) mutable
     81                 { return __binary_op2(*__i, *(__first2 + (__i - __first1))); },
     82                 __init,
     83                 __binary_op1, // Combine
     84                 [__first1, __first2, __binary_op1, __binary_op2](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j,
     85                                                                  _Tp __init) -> _Tp
     86                 {
     87                     return __internal::__brick_transform_reduce(__i, __j, __first2 + (__i - __first1), __init,
     88                                                                 __binary_op1, __binary_op2, _IsVector{});
     89                 });
     90         });
     91 }
     92 
     93 //------------------------------------------------------------------------
     94 // transform_reduce (version with unary and binary functions)
     95 //------------------------------------------------------------------------
     96 
     97 template <class _ForwardIterator, class _Tp, class _BinaryOperation, class _UnaryOperation>
     98 _Tp
     99 __brick_transform_reduce(_ForwardIterator __first, _ForwardIterator __last, _Tp __init, _BinaryOperation __binary_op,
    100                          _UnaryOperation __unary_op, /*is_vector=*/std::false_type) noexcept
    101 {
    102     return std::transform_reduce(__first, __last, __init, __binary_op, __unary_op);
    103 }
    104 
    105 template <class _RandomAccessIterator, class _Tp, class _UnaryOperation, class _BinaryOperation>
    106 _Tp
    107 __brick_transform_reduce(_RandomAccessIterator __first, _RandomAccessIterator __last, _Tp __init,
    108                          _BinaryOperation __binary_op, _UnaryOperation __unary_op,
    109                          /*is_vector=*/std::true_type) noexcept
    110 {
    111     typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType;
    112     return __unseq_backend::__simd_transform_reduce(
    113         __last - __first, __init, __binary_op,
    114         [=, &__unary_op](_DifferenceType __i) { return __unary_op(__first[__i]); });
    115 }
    116 
    117 template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _Tp, class _BinaryOperation,
    118           class _UnaryOperation>
    119 _Tp
    120 __pattern_transform_reduce(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Tp __init,
    121                            _BinaryOperation __binary_op, _UnaryOperation __unary_op) noexcept
    122 {
    123     return __internal::__brick_transform_reduce(__first, __last, __init, __binary_op, __unary_op,
    124                                                 typename _Tag::__is_vector{});
    125 }
    126 
    127 template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Tp, class _BinaryOperation,
    128           class _UnaryOperation>
    129 _Tp
    130 __pattern_transform_reduce(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first,
    131                            _RandomAccessIterator __last, _Tp __init, _BinaryOperation __binary_op,
    132                            _UnaryOperation __unary_op)
    133 {
    134     using __backend_tag = typename decltype(__tag)::__backend_tag;
    135 
    136     return __internal::__except_handler(
    137         [&]()
    138         {
    139             return __par_backend::__parallel_transform_reduce(
    140                 __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last,
    141                 [__unary_op](_RandomAccessIterator __i) mutable { return __unary_op(*__i); }, __init, __binary_op,
    142                 [__unary_op, __binary_op](_RandomAccessIterator __i, _RandomAccessIterator __j, _Tp __init) {
    143                     return __internal::__brick_transform_reduce(__i, __j, __init, __binary_op, __unary_op, _IsVector{});
    144                 });
    145         });
    146 }
    147 
    148 //------------------------------------------------------------------------
    149 // transform_exclusive_scan
    150 //
    151 // walk3 evaluates f(x,y,z) for (x,y,z) drawn from [first1,last1), [first2,...), [first3,...)
    152 //------------------------------------------------------------------------
    153 
    154 // Exclusive form
    155 template <class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation>
    156 std::pair<_OutputIterator, _Tp>
    157 __brick_transform_scan(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result,
    158                        _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op,
    159                        /*Inclusive*/ std::false_type, /*is_vector=*/std::false_type) noexcept
    160 {
    161     for (; __first != __last; ++__first, ++__result)
    162     {
    163 	_Tp __v = std::move(__init);
    164         _PSTL_PRAGMA_FORCEINLINE
    165         __init = __binary_op(__v, __unary_op(*__first));
    166         *__result = std::move(__v);
    167     }
    168     return std::make_pair(__result, std::move(__init));
    169 }
    170 
    171 // Inclusive form
    172 template <class _RandomAccessIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation>
    173 std::pair<_OutputIterator, _Tp>
    174 __brick_transform_scan(_RandomAccessIterator __first, _RandomAccessIterator __last, _OutputIterator __result,
    175                        _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op,
    176                        /*Inclusive*/ std::true_type, /*is_vector=*/std::false_type) noexcept
    177 {
    178     for (; __first != __last; ++__first, ++__result)
    179     {
    180         _PSTL_PRAGMA_FORCEINLINE
    181         __init = __binary_op(__init, __unary_op(*__first));
    182         *__result = __init;
    183     }
    184     return std::make_pair(__result, __init);
    185 }
    186 
    187 // type is arithmetic and binary operation is a user defined operation.
    188 template <typename _Tp, typename _BinaryOperation>
    189 using is_arithmetic_udop = std::integral_constant<bool, std::is_arithmetic<_Tp>::value &&
    190                                                             !std::is_same<_BinaryOperation, std::plus<_Tp>>::value>;
    191 
    192 // [restriction] - T shall be DefaultConstructible.
    193 // [violation] - default ctor of T shall set the identity value for binary_op.
    194 template <class _RandomAccessIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation,
    195           class _Inclusive>
    196 typename std::enable_if<!is_arithmetic_udop<_Tp, _BinaryOperation>::value, std::pair<_OutputIterator, _Tp>>::type
    197 __brick_transform_scan(_RandomAccessIterator __first, _RandomAccessIterator __last, _OutputIterator __result,
    198                        _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, _Inclusive,
    199                        /*is_vector=*/std::true_type) noexcept
    200 {
    201 #if defined(_PSTL_UDS_PRESENT)
    202     return __unseq_backend::__simd_scan(__first, __last - __first, __result, __unary_op, __init, __binary_op,
    203                                         _Inclusive());
    204 #else
    205     // We need to call serial brick here to call function for inclusive and exclusive scan that depends on _Inclusive() value
    206     return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(),
    207                                               /*is_vector=*/std::false_type());
    208 #endif
    209 }
    210 
    211 template <class _RandomAccessIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation,
    212           class _Inclusive>
    213 typename std::enable_if<is_arithmetic_udop<_Tp, _BinaryOperation>::value, std::pair<_OutputIterator, _Tp>>::type
    214 __brick_transform_scan(_RandomAccessIterator __first, _RandomAccessIterator __last, _OutputIterator __result,
    215                        _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, _Inclusive,
    216                        /*is_vector=*/std::true_type) noexcept
    217 {
    218     return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(),
    219                                               /*is_vector=*/std::false_type());
    220 }
    221 
    222 template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator, class _UnaryOperation,
    223           class _Tp, class _BinaryOperation, class _Inclusive>
    224 _OutputIterator
    225 __pattern_transform_scan(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last,
    226                          _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op,
    227                          _Inclusive) noexcept
    228 {
    229     return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, std::move(__init), __binary_op, _Inclusive(),
    230                                               typename _Tag::__is_vector{})
    231         .first;
    232 }
    233 
    234 template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _OutputIterator,
    235           class _UnaryOperation, class _Tp, class _BinaryOperation, class _Inclusive>
    236 typename std::enable_if<!std::is_floating_point<_Tp>::value, _OutputIterator>::type
    237 __pattern_transform_scan(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first,
    238                          _RandomAccessIterator __last, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init,
    239                          _BinaryOperation __binary_op, _Inclusive)
    240 {
    241     using __backend_tag = typename decltype(__tag)::__backend_tag;
    242 
    243     typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType;
    244 
    245     return __internal::__except_handler(
    246         [&]()
    247         {
    248             __par_backend::__parallel_transform_scan(
    249                 __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __last - __first,
    250                 [__first, __unary_op](_DifferenceType __i) mutable { return __unary_op(__first[__i]); }, __init,
    251                 __binary_op,
    252                 [__first, __unary_op, __binary_op](_DifferenceType __i, _DifferenceType __j, _Tp __init)
    253                 {
    254                     // Execute serial __brick_transform_reduce, due to the explicit SIMD vectorization (reduction) requires a commutative operation for the guarantee of correct scan.
    255                     return __internal::__brick_transform_reduce(__first + __i, __first + __j, __init, __binary_op,
    256                                                                 __unary_op,
    257                                                                 /*__is_vector*/ std::false_type());
    258                 },
    259                 [__first, __unary_op, __binary_op, __result](_DifferenceType __i, _DifferenceType __j, _Tp __init)
    260                 {
    261                     return __internal::__brick_transform_scan(__first + __i, __first + __j, __result + __i, __unary_op,
    262                                                               __init, __binary_op, _Inclusive(), _IsVector{})
    263                         .second;
    264                 });
    265             return __result + (__last - __first);
    266         });
    267 }
    268 
    269 template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _OutputIterator,
    270           class _UnaryOperation, class _Tp, class _BinaryOperation, class _Inclusive>
    271 typename std::enable_if<std::is_floating_point<_Tp>::value, _OutputIterator>::type
    272 __pattern_transform_scan(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first,
    273                          _RandomAccessIterator __last, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init,
    274                          _BinaryOperation __binary_op, _Inclusive)
    275 {
    276     using __backend_tag = typename decltype(__tag)::__backend_tag;
    277 
    278     typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType;
    279     _DifferenceType __n = __last - __first;
    280 
    281     if (__n <= 0)
    282     {
    283         return __result;
    284     }
    285     return __internal::__except_handler(
    286         [&]()
    287         {
    288             __par_backend::__parallel_strict_scan(
    289                 __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __n, __init,
    290                 [__first, __unary_op, __binary_op, __result](_DifferenceType __i, _DifferenceType __len)
    291                 {
    292                     return __internal::__brick_transform_scan(__first + __i, __first + (__i + __len), __result + __i,
    293                                                               __unary_op, _Tp{}, __binary_op, _Inclusive(), _IsVector{})
    294                         .second;
    295                 },
    296                 __binary_op,
    297                 [__result, &__binary_op](_DifferenceType __i, _DifferenceType __len, _Tp __initial)
    298                 {
    299                     return *(std::transform(__result + __i, __result + __i + __len, __result + __i,
    300                                             [&__initial, &__binary_op](const _Tp& __x)
    301                                             {
    302                                                 _PSTL_PRAGMA_FORCEINLINE
    303                                                 return __binary_op(__initial, __x);
    304                                             }) -
    305                              1);
    306                 },
    307                 [](_Tp) {});
    308             return __result + (__last - __first);
    309         });
    310 }
    311 
    312 //------------------------------------------------------------------------
    313 // adjacent_difference
    314 //------------------------------------------------------------------------
    315 
    316 template <class _ForwardIterator, class _OutputIterator, class _BinaryOperation>
    317 _OutputIterator
    318 __brick_adjacent_difference(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __d_first,
    319                             _BinaryOperation __op, /*is_vector*/ std::false_type) noexcept
    320 {
    321     return std::adjacent_difference(__first, __last, __d_first, __op);
    322 }
    323 
    324 template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _BinaryOperation>
    325 _RandomAccessIterator2
    326 __brick_adjacent_difference(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last,
    327                             _RandomAccessIterator2 __d_first, _BinaryOperation __op,
    328                             /*is_vector=*/std::true_type) noexcept
    329 {
    330     _PSTL_ASSERT(__first != __last);
    331 
    332     typedef typename std::iterator_traits<_RandomAccessIterator1>::reference _ReferenceType1;
    333     typedef typename std::iterator_traits<_RandomAccessIterator2>::reference _ReferenceType2;
    334 
    335     auto __n = __last - __first;
    336     *__d_first = *__first;
    337     return __unseq_backend::__simd_walk_3(
    338         __first + 1, __n - 1, __first, __d_first + 1,
    339         [&__op](_ReferenceType1 __x, _ReferenceType1 __y, _ReferenceType2 __z) { __z = __op(__x, __y); });
    340 }
    341 
    342 template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator, class _BinaryOperation>
    343 _OutputIterator
    344 __pattern_adjacent_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last,
    345                               _OutputIterator __d_first, _BinaryOperation __op) noexcept
    346 {
    347     return __internal::__brick_adjacent_difference(__first, __last, __d_first, __op, typename _Tag::__is_vector{});
    348 }
    349 
    350 template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
    351           class _BinaryOperation>
    352 _RandomAccessIterator2
    353 __pattern_adjacent_difference(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec,
    354                               _RandomAccessIterator1 __first, _RandomAccessIterator1 __last,
    355                               _RandomAccessIterator2 __d_first, _BinaryOperation __op)
    356 {
    357     _PSTL_ASSERT(__first != __last);
    358     typedef typename std::iterator_traits<_RandomAccessIterator1>::reference _ReferenceType1;
    359     typedef typename std::iterator_traits<_RandomAccessIterator2>::reference _ReferenceType2;
    360 
    361     using __backend_tag = typename decltype(__tag)::__backend_tag;
    362 
    363     *__d_first = *__first;
    364     __par_backend::__parallel_for(__backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last - 1,
    365                                   [&__op, __d_first, __first](_RandomAccessIterator1 __b, _RandomAccessIterator1 __e)
    366                                   {
    367                                       _RandomAccessIterator2 __d_b = __d_first + (__b - __first);
    368                                       __internal::__brick_walk3(
    369                                           __b, __e, __b + 1, __d_b + 1,
    370                                           [&__op](_ReferenceType1 __x, _ReferenceType1 __y, _ReferenceType2 __z)
    371                                           { __z = __op(__y, __x); },
    372                                           _IsVector{});
    373                                   });
    374     return __d_first + (__last - __first);
    375 }
    376 
    377 } // namespace __internal
    378 } // namespace __pstl
    379 
    380 #endif /* _PSTL_NUMERIC_IMPL_H */
    381