1 1.1 mrg // <barrier> -*- C++ -*- 2 1.1 mrg 3 1.1.1.2 mrg // Copyright (C) 2020-2024 Free Software Foundation, Inc. 4 1.1 mrg // 5 1.1 mrg // This file is part of the GNU ISO C++ Library. This library is free 6 1.1 mrg // software; you can redistribute it and/or modify it under the 7 1.1 mrg // terms of the GNU General Public License as published by the 8 1.1 mrg // Free Software Foundation; either version 3, or (at your option) 9 1.1 mrg // any later version. 10 1.1 mrg 11 1.1 mrg // This library is distributed in the hope that it will be useful, 12 1.1 mrg // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 1.1 mrg // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 1.1 mrg // GNU General Public License for more details. 15 1.1 mrg 16 1.1 mrg // Under Section 7 of GPL version 3, you are granted additional 17 1.1 mrg // permissions described in the GCC Runtime Library Exception, version 18 1.1 mrg // 3.1, as published by the Free Software Foundation. 19 1.1 mrg 20 1.1 mrg // You should have received a copy of the GNU General Public License and 21 1.1 mrg // a copy of the GCC Runtime Library Exception along with this program; 22 1.1 mrg // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 1.1 mrg // <http://www.gnu.org/licenses/>. 24 1.1 mrg 25 1.1 mrg // This implementation is based on libcxx/include/barrier 26 1.1 mrg //===-- barrier.h --------------------------------------------------===// 27 1.1 mrg // 28 1.1 mrg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 29 1.1 mrg // See https://llvm.org/LICENSE.txt for license information. 30 1.1 mrg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 31 1.1 mrg // 32 1.1 mrg //===---------------------------------------------------------------===// 33 1.1 mrg 34 1.1 mrg /** @file include/barrier 35 1.1 mrg * This is a Standard C++ Library header. 36 1.1 mrg */ 37 1.1 mrg 38 1.1 mrg #ifndef _GLIBCXX_BARRIER 39 1.1 mrg #define _GLIBCXX_BARRIER 1 40 1.1 mrg 41 1.1 mrg #pragma GCC system_header 42 1.1 mrg 43 1.1.1.2 mrg #include <bits/requires_hosted.h> // threading primitive 44 1.1.1.2 mrg 45 1.1.1.2 mrg #define __glibcxx_want_barrier 46 1.1.1.2 mrg #include <bits/version.h> 47 1.1.1.2 mrg 48 1.1.1.2 mrg #ifdef __cpp_lib_barrier // C++ >= 20 && __cpp_aligned_new && lib_atomic_wait 49 1.1 mrg #include <bits/atomic_base.h> 50 1.1 mrg #include <bits/std_thread.h> 51 1.1 mrg #include <bits/unique_ptr.h> 52 1.1 mrg 53 1.1 mrg #include <array> 54 1.1 mrg 55 1.1 mrg namespace std _GLIBCXX_VISIBILITY(default) 56 1.1 mrg { 57 1.1 mrg _GLIBCXX_BEGIN_NAMESPACE_VERSION 58 1.1 mrg 59 1.1 mrg struct __empty_completion 60 1.1 mrg { 61 1.1 mrg _GLIBCXX_ALWAYS_INLINE void 62 1.1 mrg operator()() noexcept 63 1.1 mrg { } 64 1.1 mrg }; 65 1.1 mrg 66 1.1 mrg /* 67 1.1 mrg 68 1.1 mrg The default implementation of __tree_barrier is a classic tree barrier. 69 1.1 mrg 70 1.1 mrg It looks different from literature pseudocode for two main reasons: 71 1.1 mrg 1. Threads that call into std::barrier functions do not provide indices, 72 1.1 mrg so a numbering step is added before the actual barrier algorithm, 73 1.1 mrg appearing as an N+1 round to the N rounds of the tree barrier. 74 1.1 mrg 2. A great deal of attention has been paid to avoid cache line thrashing 75 1.1 mrg by flattening the tree structure into cache-line sized arrays, that 76 1.1 mrg are indexed in an efficient way. 77 1.1 mrg 78 1.1 mrg */ 79 1.1 mrg 80 1.1 mrg enum class __barrier_phase_t : unsigned char { }; 81 1.1 mrg 82 1.1 mrg template<typename _CompletionF> 83 1.1 mrg class __tree_barrier 84 1.1 mrg { 85 1.1 mrg using __atomic_phase_ref_t = std::__atomic_ref<__barrier_phase_t>; 86 1.1 mrg using __atomic_phase_const_ref_t = std::__atomic_ref<const __barrier_phase_t>; 87 1.1 mrg static constexpr auto __phase_alignment = 88 1.1 mrg __atomic_phase_ref_t::required_alignment; 89 1.1 mrg 90 1.1 mrg using __tickets_t = std::array<__barrier_phase_t, 64>; 91 1.1 mrg struct alignas(64) /* naturally-align the heap state */ __state_t 92 1.1 mrg { 93 1.1 mrg alignas(__phase_alignment) __tickets_t __tickets; 94 1.1 mrg }; 95 1.1 mrg 96 1.1 mrg ptrdiff_t _M_expected; 97 1.1 mrg unique_ptr<__state_t[]> _M_state; 98 1.1 mrg __atomic_base<ptrdiff_t> _M_expected_adjustment; 99 1.1 mrg _CompletionF _M_completion; 100 1.1 mrg 101 1.1 mrg alignas(__phase_alignment) __barrier_phase_t _M_phase; 102 1.1 mrg 103 1.1 mrg bool 104 1.1 mrg _M_arrive(__barrier_phase_t __old_phase, size_t __current) 105 1.1 mrg { 106 1.1 mrg const auto __old_phase_val = static_cast<unsigned char>(__old_phase); 107 1.1 mrg const auto __half_step = 108 1.1 mrg static_cast<__barrier_phase_t>(__old_phase_val + 1); 109 1.1 mrg const auto __full_step = 110 1.1 mrg static_cast<__barrier_phase_t>(__old_phase_val + 2); 111 1.1 mrg 112 1.1 mrg size_t __current_expected = _M_expected; 113 1.1 mrg __current %= ((_M_expected + 1) >> 1); 114 1.1 mrg 115 1.1 mrg for (int __round = 0; ; ++__round) 116 1.1 mrg { 117 1.1 mrg if (__current_expected <= 1) 118 1.1 mrg return true; 119 1.1 mrg size_t const __end_node = ((__current_expected + 1) >> 1), 120 1.1 mrg __last_node = __end_node - 1; 121 1.1 mrg for ( ; ; ++__current) 122 1.1 mrg { 123 1.1 mrg if (__current == __end_node) 124 1.1 mrg __current = 0; 125 1.1 mrg auto __expect = __old_phase; 126 1.1 mrg __atomic_phase_ref_t __phase(_M_state[__current] 127 1.1 mrg .__tickets[__round]); 128 1.1 mrg if (__current == __last_node && (__current_expected & 1)) 129 1.1 mrg { 130 1.1 mrg if (__phase.compare_exchange_strong(__expect, __full_step, 131 1.1 mrg memory_order_acq_rel)) 132 1.1 mrg break; // I'm 1 in 1, go to next __round 133 1.1 mrg } 134 1.1 mrg else if (__phase.compare_exchange_strong(__expect, __half_step, 135 1.1 mrg memory_order_acq_rel)) 136 1.1 mrg { 137 1.1 mrg return false; // I'm 1 in 2, done with arrival 138 1.1 mrg } 139 1.1 mrg else if (__expect == __half_step) 140 1.1 mrg { 141 1.1 mrg if (__phase.compare_exchange_strong(__expect, __full_step, 142 1.1 mrg memory_order_acq_rel)) 143 1.1 mrg break; // I'm 2 in 2, go to next __round 144 1.1 mrg } 145 1.1 mrg } 146 1.1 mrg __current_expected = __last_node + 1; 147 1.1 mrg __current >>= 1; 148 1.1 mrg } 149 1.1 mrg } 150 1.1 mrg 151 1.1 mrg public: 152 1.1 mrg using arrival_token = __barrier_phase_t; 153 1.1 mrg 154 1.1 mrg static constexpr ptrdiff_t 155 1.1 mrg max() noexcept 156 1.1 mrg { return __PTRDIFF_MAX__; } 157 1.1 mrg 158 1.1 mrg __tree_barrier(ptrdiff_t __expected, _CompletionF __completion) 159 1.1 mrg : _M_expected(__expected), _M_expected_adjustment(0), 160 1.1 mrg _M_completion(move(__completion)), 161 1.1 mrg _M_phase(static_cast<__barrier_phase_t>(0)) 162 1.1 mrg { 163 1.1 mrg size_t const __count = (_M_expected + 1) >> 1; 164 1.1 mrg 165 1.1 mrg _M_state = std::make_unique<__state_t[]>(__count); 166 1.1 mrg } 167 1.1 mrg 168 1.1 mrg [[nodiscard]] arrival_token 169 1.1 mrg arrive(ptrdiff_t __update) 170 1.1 mrg { 171 1.1 mrg std::hash<std::thread::id> __hasher; 172 1.1 mrg size_t __current = __hasher(std::this_thread::get_id()); 173 1.1 mrg __atomic_phase_ref_t __phase(_M_phase); 174 1.1 mrg const auto __old_phase = __phase.load(memory_order_relaxed); 175 1.1 mrg const auto __cur = static_cast<unsigned char>(__old_phase); 176 1.1 mrg for(; __update; --__update) 177 1.1 mrg { 178 1.1 mrg if(_M_arrive(__old_phase, __current)) 179 1.1 mrg { 180 1.1 mrg _M_completion(); 181 1.1 mrg _M_expected += _M_expected_adjustment.load(memory_order_relaxed); 182 1.1 mrg _M_expected_adjustment.store(0, memory_order_relaxed); 183 1.1 mrg auto __new_phase = static_cast<__barrier_phase_t>(__cur + 2); 184 1.1 mrg __phase.store(__new_phase, memory_order_release); 185 1.1 mrg __phase.notify_all(); 186 1.1 mrg } 187 1.1 mrg } 188 1.1 mrg return __old_phase; 189 1.1 mrg } 190 1.1 mrg 191 1.1 mrg void 192 1.1 mrg wait(arrival_token&& __old_phase) const 193 1.1 mrg { 194 1.1 mrg __atomic_phase_const_ref_t __phase(_M_phase); 195 1.1 mrg auto const __test_fn = [=] 196 1.1 mrg { 197 1.1 mrg return __phase.load(memory_order_acquire) != __old_phase; 198 1.1 mrg }; 199 1.1 mrg std::__atomic_wait_address(&_M_phase, __test_fn); 200 1.1 mrg } 201 1.1 mrg 202 1.1 mrg void 203 1.1 mrg arrive_and_drop() 204 1.1 mrg { 205 1.1 mrg _M_expected_adjustment.fetch_sub(1, memory_order_relaxed); 206 1.1 mrg (void)arrive(1); 207 1.1 mrg } 208 1.1 mrg }; 209 1.1 mrg 210 1.1 mrg template<typename _CompletionF = __empty_completion> 211 1.1 mrg class barrier 212 1.1 mrg { 213 1.1 mrg // Note, we may introduce a "central" barrier algorithm at some point 214 1.1 mrg // for more space constrained targets 215 1.1 mrg using __algorithm_t = __tree_barrier<_CompletionF>; 216 1.1 mrg __algorithm_t _M_b; 217 1.1 mrg 218 1.1 mrg public: 219 1.1 mrg class arrival_token final 220 1.1 mrg { 221 1.1 mrg public: 222 1.1 mrg arrival_token(arrival_token&&) = default; 223 1.1 mrg arrival_token& operator=(arrival_token&&) = default; 224 1.1 mrg ~arrival_token() = default; 225 1.1 mrg 226 1.1 mrg private: 227 1.1 mrg friend class barrier; 228 1.1 mrg using __token = typename __algorithm_t::arrival_token; 229 1.1 mrg explicit arrival_token(__token __tok) noexcept : _M_tok(__tok) { } 230 1.1 mrg __token _M_tok; 231 1.1 mrg }; 232 1.1 mrg 233 1.1 mrg static constexpr ptrdiff_t 234 1.1 mrg max() noexcept 235 1.1 mrg { return __algorithm_t::max(); } 236 1.1 mrg 237 1.1 mrg explicit 238 1.1 mrg barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF()) 239 1.1 mrg : _M_b(__count, std::move(__completion)) 240 1.1 mrg { } 241 1.1 mrg 242 1.1 mrg barrier(barrier const&) = delete; 243 1.1 mrg barrier& operator=(barrier const&) = delete; 244 1.1 mrg 245 1.1 mrg [[nodiscard]] arrival_token 246 1.1 mrg arrive(ptrdiff_t __update = 1) 247 1.1 mrg { return arrival_token{_M_b.arrive(__update)}; } 248 1.1 mrg 249 1.1 mrg void 250 1.1 mrg wait(arrival_token&& __phase) const 251 1.1 mrg { _M_b.wait(std::move(__phase._M_tok)); } 252 1.1 mrg 253 1.1 mrg void 254 1.1 mrg arrive_and_wait() 255 1.1 mrg { wait(arrive()); } 256 1.1 mrg 257 1.1 mrg void 258 1.1 mrg arrive_and_drop() 259 1.1 mrg { _M_b.arrive_and_drop(); } 260 1.1 mrg }; 261 1.1 mrg 262 1.1 mrg _GLIBCXX_END_NAMESPACE_VERSION 263 1.1 mrg } // namespace 264 1.1.1.2 mrg #endif // __cpp_lib_barrier 265 1.1 mrg #endif // _GLIBCXX_BARRIER 266