Home | History | Annotate | Line # | Download | only in std
      1      1.1  mrg // <barrier> -*- C++ -*-
      2      1.1  mrg 
      3  1.1.1.2  mrg // Copyright (C) 2020-2024 Free Software Foundation, Inc.
      4      1.1  mrg //
      5      1.1  mrg // This file is part of the GNU ISO C++ Library.  This library is free
      6      1.1  mrg // software; you can redistribute it and/or modify it under the
      7      1.1  mrg // terms of the GNU General Public License as published by the
      8      1.1  mrg // Free Software Foundation; either version 3, or (at your option)
      9      1.1  mrg // any later version.
     10      1.1  mrg 
     11      1.1  mrg // This library is distributed in the hope that it will be useful,
     12      1.1  mrg // but WITHOUT ANY WARRANTY; without even the implied warranty of
     13      1.1  mrg // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14      1.1  mrg // GNU General Public License for more details.
     15      1.1  mrg 
     16      1.1  mrg // Under Section 7 of GPL version 3, you are granted additional
     17      1.1  mrg // permissions described in the GCC Runtime Library Exception, version
     18      1.1  mrg // 3.1, as published by the Free Software Foundation.
     19      1.1  mrg 
     20      1.1  mrg // You should have received a copy of the GNU General Public License and
     21      1.1  mrg // a copy of the GCC Runtime Library Exception along with this program;
     22      1.1  mrg // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     23      1.1  mrg // <http://www.gnu.org/licenses/>.
     24      1.1  mrg 
     25      1.1  mrg // This implementation is based on libcxx/include/barrier
     26      1.1  mrg //===-- barrier.h --------------------------------------------------===//
     27      1.1  mrg //
     28      1.1  mrg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
     29      1.1  mrg // See https://llvm.org/LICENSE.txt for license information.
     30      1.1  mrg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
     31      1.1  mrg //
     32      1.1  mrg //===---------------------------------------------------------------===//
     33      1.1  mrg 
     34      1.1  mrg /** @file include/barrier
     35      1.1  mrg  *  This is a Standard C++ Library header.
     36      1.1  mrg  */
     37      1.1  mrg 
     38      1.1  mrg #ifndef _GLIBCXX_BARRIER
     39      1.1  mrg #define _GLIBCXX_BARRIER 1
     40      1.1  mrg 
     41      1.1  mrg #pragma GCC system_header
     42      1.1  mrg 
     43  1.1.1.2  mrg #include <bits/requires_hosted.h> // threading primitive
     44  1.1.1.2  mrg 
     45  1.1.1.2  mrg #define __glibcxx_want_barrier
     46  1.1.1.2  mrg #include <bits/version.h>
     47  1.1.1.2  mrg 
     48  1.1.1.2  mrg #ifdef __cpp_lib_barrier // C++ >= 20 && __cpp_aligned_new && lib_atomic_wait
     49      1.1  mrg #include <bits/atomic_base.h>
     50      1.1  mrg #include <bits/std_thread.h>
     51      1.1  mrg #include <bits/unique_ptr.h>
     52      1.1  mrg 
     53      1.1  mrg #include <array>
     54      1.1  mrg 
     55      1.1  mrg namespace std _GLIBCXX_VISIBILITY(default)
     56      1.1  mrg {
     57      1.1  mrg _GLIBCXX_BEGIN_NAMESPACE_VERSION
     58      1.1  mrg 
     59      1.1  mrg   struct __empty_completion
     60      1.1  mrg   {
     61      1.1  mrg     _GLIBCXX_ALWAYS_INLINE void
     62      1.1  mrg     operator()() noexcept
     63      1.1  mrg     { }
     64      1.1  mrg   };
     65      1.1  mrg 
     66      1.1  mrg /*
     67      1.1  mrg 
     68      1.1  mrg The default implementation of __tree_barrier is a classic tree barrier.
     69      1.1  mrg 
     70      1.1  mrg It looks different from literature pseudocode for two main reasons:
     71      1.1  mrg  1. Threads that call into std::barrier functions do not provide indices,
     72      1.1  mrg     so a numbering step is added before the actual barrier algorithm,
     73      1.1  mrg     appearing as an N+1 round to the N rounds of the tree barrier.
     74      1.1  mrg  2. A great deal of attention has been paid to avoid cache line thrashing
     75      1.1  mrg     by flattening the tree structure into cache-line sized arrays, that
     76      1.1  mrg     are indexed in an efficient way.
     77      1.1  mrg 
     78      1.1  mrg */
     79      1.1  mrg 
     80      1.1  mrg   enum class __barrier_phase_t : unsigned char { };
     81      1.1  mrg 
     82      1.1  mrg   template<typename _CompletionF>
     83      1.1  mrg     class __tree_barrier
     84      1.1  mrg     {
     85      1.1  mrg       using __atomic_phase_ref_t = std::__atomic_ref<__barrier_phase_t>;
     86      1.1  mrg       using __atomic_phase_const_ref_t = std::__atomic_ref<const __barrier_phase_t>;
     87      1.1  mrg       static constexpr auto __phase_alignment =
     88      1.1  mrg 		      __atomic_phase_ref_t::required_alignment;
     89      1.1  mrg 
     90      1.1  mrg       using __tickets_t = std::array<__barrier_phase_t, 64>;
     91      1.1  mrg       struct alignas(64) /* naturally-align the heap state */ __state_t
     92      1.1  mrg       {
     93      1.1  mrg 	alignas(__phase_alignment) __tickets_t __tickets;
     94      1.1  mrg       };
     95      1.1  mrg 
     96      1.1  mrg       ptrdiff_t _M_expected;
     97      1.1  mrg       unique_ptr<__state_t[]> _M_state;
     98      1.1  mrg       __atomic_base<ptrdiff_t> _M_expected_adjustment;
     99      1.1  mrg       _CompletionF _M_completion;
    100      1.1  mrg 
    101      1.1  mrg       alignas(__phase_alignment) __barrier_phase_t  _M_phase;
    102      1.1  mrg 
    103      1.1  mrg       bool
    104      1.1  mrg       _M_arrive(__barrier_phase_t __old_phase, size_t __current)
    105      1.1  mrg       {
    106      1.1  mrg 	const auto __old_phase_val = static_cast<unsigned char>(__old_phase);
    107      1.1  mrg 	const auto __half_step =
    108      1.1  mrg 			   static_cast<__barrier_phase_t>(__old_phase_val + 1);
    109      1.1  mrg 	const auto __full_step =
    110      1.1  mrg 			   static_cast<__barrier_phase_t>(__old_phase_val + 2);
    111      1.1  mrg 
    112      1.1  mrg 	size_t __current_expected = _M_expected;
    113      1.1  mrg 	__current %= ((_M_expected + 1) >> 1);
    114      1.1  mrg 
    115      1.1  mrg 	for (int __round = 0; ; ++__round)
    116      1.1  mrg 	  {
    117      1.1  mrg 	    if (__current_expected <= 1)
    118      1.1  mrg 		return true;
    119      1.1  mrg 	    size_t const __end_node = ((__current_expected + 1) >> 1),
    120      1.1  mrg 			 __last_node = __end_node - 1;
    121      1.1  mrg 	    for ( ; ; ++__current)
    122      1.1  mrg 	      {
    123      1.1  mrg 		if (__current == __end_node)
    124      1.1  mrg 		  __current = 0;
    125      1.1  mrg 		auto __expect = __old_phase;
    126      1.1  mrg 		__atomic_phase_ref_t __phase(_M_state[__current]
    127      1.1  mrg 						.__tickets[__round]);
    128      1.1  mrg 		if (__current == __last_node && (__current_expected & 1))
    129      1.1  mrg 		  {
    130      1.1  mrg 		    if (__phase.compare_exchange_strong(__expect, __full_step,
    131      1.1  mrg 						        memory_order_acq_rel))
    132      1.1  mrg 		      break;     // I'm 1 in 1, go to next __round
    133      1.1  mrg 		  }
    134      1.1  mrg 		else if (__phase.compare_exchange_strong(__expect, __half_step,
    135      1.1  mrg 						         memory_order_acq_rel))
    136      1.1  mrg 		  {
    137      1.1  mrg 		    return false; // I'm 1 in 2, done with arrival
    138      1.1  mrg 		  }
    139      1.1  mrg 		else if (__expect == __half_step)
    140      1.1  mrg 		  {
    141      1.1  mrg 		    if (__phase.compare_exchange_strong(__expect, __full_step,
    142      1.1  mrg 						        memory_order_acq_rel))
    143      1.1  mrg 		      break;    // I'm 2 in 2, go to next __round
    144      1.1  mrg 		  }
    145      1.1  mrg 	      }
    146      1.1  mrg 	    __current_expected = __last_node + 1;
    147      1.1  mrg 	    __current >>= 1;
    148      1.1  mrg 	  }
    149      1.1  mrg       }
    150      1.1  mrg 
    151      1.1  mrg     public:
    152      1.1  mrg       using arrival_token = __barrier_phase_t;
    153      1.1  mrg 
    154      1.1  mrg       static constexpr ptrdiff_t
    155      1.1  mrg       max() noexcept
    156      1.1  mrg       { return __PTRDIFF_MAX__; }
    157      1.1  mrg 
    158      1.1  mrg       __tree_barrier(ptrdiff_t __expected, _CompletionF __completion)
    159      1.1  mrg 	  : _M_expected(__expected), _M_expected_adjustment(0),
    160      1.1  mrg 	    _M_completion(move(__completion)),
    161      1.1  mrg 	    _M_phase(static_cast<__barrier_phase_t>(0))
    162      1.1  mrg       {
    163      1.1  mrg 	size_t const __count = (_M_expected + 1) >> 1;
    164      1.1  mrg 
    165      1.1  mrg 	_M_state = std::make_unique<__state_t[]>(__count);
    166      1.1  mrg       }
    167      1.1  mrg 
    168      1.1  mrg       [[nodiscard]] arrival_token
    169      1.1  mrg       arrive(ptrdiff_t __update)
    170      1.1  mrg       {
    171      1.1  mrg 	std::hash<std::thread::id> __hasher;
    172      1.1  mrg 	size_t __current = __hasher(std::this_thread::get_id());
    173      1.1  mrg 	__atomic_phase_ref_t __phase(_M_phase);
    174      1.1  mrg 	const auto __old_phase = __phase.load(memory_order_relaxed);
    175      1.1  mrg 	const auto __cur = static_cast<unsigned char>(__old_phase);
    176      1.1  mrg 	for(; __update; --__update)
    177      1.1  mrg 	  {
    178      1.1  mrg 	    if(_M_arrive(__old_phase, __current))
    179      1.1  mrg 	      {
    180      1.1  mrg 		_M_completion();
    181      1.1  mrg 		_M_expected += _M_expected_adjustment.load(memory_order_relaxed);
    182      1.1  mrg 		_M_expected_adjustment.store(0, memory_order_relaxed);
    183      1.1  mrg 		auto __new_phase = static_cast<__barrier_phase_t>(__cur + 2);
    184      1.1  mrg 		__phase.store(__new_phase, memory_order_release);
    185      1.1  mrg 		__phase.notify_all();
    186      1.1  mrg 	      }
    187      1.1  mrg 	  }
    188      1.1  mrg 	return __old_phase;
    189      1.1  mrg       }
    190      1.1  mrg 
    191      1.1  mrg       void
    192      1.1  mrg       wait(arrival_token&& __old_phase) const
    193      1.1  mrg       {
    194      1.1  mrg 	__atomic_phase_const_ref_t __phase(_M_phase);
    195      1.1  mrg 	auto const __test_fn = [=]
    196      1.1  mrg 	  {
    197      1.1  mrg 	    return __phase.load(memory_order_acquire) != __old_phase;
    198      1.1  mrg 	  };
    199      1.1  mrg 	std::__atomic_wait_address(&_M_phase, __test_fn);
    200      1.1  mrg       }
    201      1.1  mrg 
    202      1.1  mrg       void
    203      1.1  mrg       arrive_and_drop()
    204      1.1  mrg       {
    205      1.1  mrg 	_M_expected_adjustment.fetch_sub(1, memory_order_relaxed);
    206      1.1  mrg 	(void)arrive(1);
    207      1.1  mrg       }
    208      1.1  mrg     };
    209      1.1  mrg 
    210      1.1  mrg   template<typename _CompletionF = __empty_completion>
    211      1.1  mrg     class barrier
    212      1.1  mrg     {
    213      1.1  mrg       // Note, we may introduce a "central" barrier algorithm at some point
    214      1.1  mrg       // for more space constrained targets
    215      1.1  mrg       using __algorithm_t = __tree_barrier<_CompletionF>;
    216      1.1  mrg       __algorithm_t _M_b;
    217      1.1  mrg 
    218      1.1  mrg     public:
    219      1.1  mrg       class arrival_token final
    220      1.1  mrg       {
    221      1.1  mrg       public:
    222      1.1  mrg 	arrival_token(arrival_token&&) = default;
    223      1.1  mrg 	arrival_token& operator=(arrival_token&&) = default;
    224      1.1  mrg 	~arrival_token() = default;
    225      1.1  mrg 
    226      1.1  mrg       private:
    227      1.1  mrg 	friend class barrier;
    228      1.1  mrg 	using __token = typename __algorithm_t::arrival_token;
    229      1.1  mrg 	explicit arrival_token(__token __tok) noexcept : _M_tok(__tok) { }
    230      1.1  mrg 	__token _M_tok;
    231      1.1  mrg       };
    232      1.1  mrg 
    233      1.1  mrg       static constexpr ptrdiff_t
    234      1.1  mrg       max() noexcept
    235      1.1  mrg       { return __algorithm_t::max(); }
    236      1.1  mrg 
    237      1.1  mrg       explicit
    238      1.1  mrg       barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
    239      1.1  mrg       : _M_b(__count, std::move(__completion))
    240      1.1  mrg       { }
    241      1.1  mrg 
    242      1.1  mrg       barrier(barrier const&) = delete;
    243      1.1  mrg       barrier& operator=(barrier const&) = delete;
    244      1.1  mrg 
    245      1.1  mrg       [[nodiscard]] arrival_token
    246      1.1  mrg       arrive(ptrdiff_t __update = 1)
    247      1.1  mrg       { return arrival_token{_M_b.arrive(__update)}; }
    248      1.1  mrg 
    249      1.1  mrg       void
    250      1.1  mrg       wait(arrival_token&& __phase) const
    251      1.1  mrg       { _M_b.wait(std::move(__phase._M_tok)); }
    252      1.1  mrg 
    253      1.1  mrg       void
    254      1.1  mrg       arrive_and_wait()
    255      1.1  mrg       { wait(arrive()); }
    256      1.1  mrg 
    257      1.1  mrg       void
    258      1.1  mrg       arrive_and_drop()
    259      1.1  mrg       { _M_b.arrive_and_drop(); }
    260      1.1  mrg     };
    261      1.1  mrg 
    262      1.1  mrg _GLIBCXX_END_NAMESPACE_VERSION
    263      1.1  mrg } // namespace
    264  1.1.1.2  mrg #endif // __cpp_lib_barrier
    265      1.1  mrg #endif // _GLIBCXX_BARRIER
    266