1// -*- C++ -*- 2//===----------------------------------------------------------------------===// 3// 4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5// See https://llvm.org/LICENSE.txt for license information. 6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7// 8//===----------------------------------------------------------------------===// 9 10#ifndef _LIBCPP_BARRIER 11#define _LIBCPP_BARRIER 12 13/* 14 barrier synopsis 15 16namespace std 17{ 18 19 template<class CompletionFunction = see below> 20 class barrier 21 { 22 public: 23 using arrival_token = see below; 24 25 static constexpr ptrdiff_t max() noexcept; 26 27 constexpr explicit barrier(ptrdiff_t phase_count, 28 CompletionFunction f = CompletionFunction()); 29 ~barrier(); 30 31 barrier(const barrier&) = delete; 32 barrier& operator=(const barrier&) = delete; 33 34 [[nodiscard]] arrival_token arrive(ptrdiff_t update = 1); 35 void wait(arrival_token&& arrival) const; 36 37 void arrive_and_wait(); 38 void arrive_and_drop(); 39 40 private: 41 CompletionFunction completion; // exposition only 42 }; 43 44} 45 46*/ 47 48#include <__config> 49 50#if !defined(_LIBCPP_HAS_NO_THREADS) 51 52# include <__assert> 53# include <__atomic/atomic_base.h> 54# include <__atomic/memory_order.h> 55# include <__memory/unique_ptr.h> 56# include <__thread/poll_with_backoff.h> 57# include <__thread/timed_backoff_policy.h> 58# include <__utility/move.h> 59# include <cstddef> 60# include <cstdint> 61# include <limits> 62# include <version> 63 64# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) 65# pragma GCC system_header 66# endif 67 68_LIBCPP_PUSH_MACROS 69# include <__undef_macros> 70 71# if _LIBCPP_STD_VER >= 14 72 73_LIBCPP_BEGIN_NAMESPACE_STD 74 75struct __empty_completion { 76 inline _LIBCPP_HIDE_FROM_ABI void operator()() noexcept {} 77}; 78 79# ifndef _LIBCPP_HAS_NO_TREE_BARRIER 80 81/* 82 83The default implementation of __barrier_base is a classic tree barrier. 84 85It looks different from literature pseudocode for two main reasons: 86 1. Threads that call into std::barrier functions do not provide indices, 87 so a numbering step is added before the actual barrier algorithm, 88 appearing as an N+1 round to the N rounds of the tree barrier. 89 2. A great deal of attention has been paid to avoid cache line thrashing 90 by flattening the tree structure into cache-line sized arrays, that 91 are indexed in an efficient way. 92 93*/ 94 95using __barrier_phase_t = uint8_t; 96 97class __barrier_algorithm_base; 98 99_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI __barrier_algorithm_base* 100__construct_barrier_algorithm_base(ptrdiff_t& __expected); 101 102_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI bool 103__arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier, __barrier_phase_t __old_phase) noexcept; 104 105_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void 106__destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier) noexcept; 107 108template <class _CompletionF> 109class __barrier_base { 110 ptrdiff_t __expected_; 111 unique_ptr<__barrier_algorithm_base, void (*)(__barrier_algorithm_base*)> __base_; 112 __atomic_base<ptrdiff_t> __expected_adjustment_; 113 _CompletionF __completion_; 114 __atomic_base<__barrier_phase_t> __phase_; 115 116public: 117 using arrival_token = __barrier_phase_t; 118 119 static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { return numeric_limits<ptrdiff_t>::max(); } 120 121 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI 122 __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF()) 123 : __expected_(__expected), 124 __base_(std::__construct_barrier_algorithm_base(this->__expected_), &__destroy_barrier_algorithm_base), 125 __expected_adjustment_(0), 126 __completion_(std::move(__completion)), 127 __phase_(0) {} 128 _LIBCPP_NODISCARD _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update) { 129 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN( 130 __update <= __expected_, "update is greater than the expected count for the current barrier phase"); 131 132 auto const __old_phase = __phase_.load(memory_order_relaxed); 133 for (; __update; --__update) 134 if (__arrive_barrier_algorithm_base(__base_.get(), __old_phase)) { 135 __completion_(); 136 __expected_ += __expected_adjustment_.load(memory_order_relaxed); 137 __expected_adjustment_.store(0, memory_order_relaxed); 138 __phase_.store(__old_phase + 2, memory_order_release); 139 __phase_.notify_all(); 140 } 141 return __old_phase; 142 } 143 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __old_phase) const { 144 auto const __test_fn = [this, __old_phase]() -> bool { return __phase_.load(memory_order_acquire) != __old_phase; }; 145 std::__libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy()); 146 } 147 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() { 148 __expected_adjustment_.fetch_sub(1, memory_order_relaxed); 149 (void)arrive(1); 150 } 151}; 152 153# else 154 155/* 156 157The alternative implementation of __barrier_base is a central barrier. 158 159Two versions of this algorithm are provided: 160 1. A fairly straightforward implementation of the litterature for the 161 general case where the completion function is not empty. 162 2. An optimized implementation that exploits 2's complement arithmetic 163 and well-defined overflow in atomic arithmetic, to handle the phase 164 roll-over for free. 165 166*/ 167 168template <class _CompletionF> 169class __barrier_base { 170 __atomic_base<ptrdiff_t> __expected; 171 __atomic_base<ptrdiff_t> __arrived; 172 _CompletionF __completion; 173 __atomic_base<bool> __phase; 174 175public: 176 using arrival_token = bool; 177 178 static constexpr ptrdiff_t max() noexcept { return numeric_limits<ptrdiff_t>::max(); } 179 180 _LIBCPP_HIDE_FROM_ABI __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF()) 181 : __expected(__expected), __arrived(__expected), __completion(std::move(__completion)), __phase(false) {} 182 [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t update) { 183 auto const __old_phase = __phase.load(memory_order_relaxed); 184 auto const __result = __arrived.fetch_sub(update, memory_order_acq_rel) - update; 185 auto const new_expected = __expected.load(memory_order_relaxed); 186 187 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN( 188 update <= new_expected, "update is greater than the expected count for the current barrier phase"); 189 190 if (0 == __result) { 191 __completion(); 192 __arrived.store(new_expected, memory_order_relaxed); 193 __phase.store(!__old_phase, memory_order_release); 194 __phase.notify_all(); 195 } 196 return __old_phase; 197 } 198 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __old_phase) const { 199 __phase.wait(__old_phase, memory_order_acquire); 200 } 201 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() { 202 __expected.fetch_sub(1, memory_order_relaxed); 203 (void)arrive(1); 204 } 205}; 206 207template <> 208class __barrier_base<__empty_completion> { 209 static constexpr uint64_t __expected_unit = 1ull; 210 static constexpr uint64_t __arrived_unit = 1ull << 32; 211 static constexpr uint64_t __expected_mask = __arrived_unit - 1; 212 static constexpr uint64_t __phase_bit = 1ull << 63; 213 static constexpr uint64_t __arrived_mask = (__phase_bit - 1) & ~__expected_mask; 214 215 __atomic_base<uint64_t> __phase_arrived_expected; 216 217 static _LIBCPP_HIDE_FROM_ABI constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT { 218 return ((uint64_t(1u << 31) - __count) << 32) | (uint64_t(1u << 31) - __count); 219 } 220 221public: 222 using arrival_token = uint64_t; 223 224 static constexpr ptrdiff_t max() noexcept { return ptrdiff_t(1u << 31) - 1; } 225 226 _LIBCPP_HIDE_FROM_ABI explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion()) 227 : __phase_arrived_expected(__init(__count)) {} 228 [[nodiscard]] inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t update) { 229 auto const __inc = __arrived_unit * update; 230 auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel); 231 232 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN( 233 update <= __old, "update is greater than the expected count for the current barrier phase"); 234 235 if ((__old ^ (__old + __inc)) & __phase_bit) { 236 __phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed); 237 __phase_arrived_expected.notify_all(); 238 } 239 return __old & __phase_bit; 240 } 241 inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __phase) const { 242 auto const __test_fn = [=]() -> bool { 243 uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire); 244 return ((__current & __phase_bit) != __phase); 245 }; 246 __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy()); 247 } 248 inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() { 249 __phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed); 250 (void)arrive(1); 251 } 252}; 253 254# endif // !_LIBCPP_HAS_NO_TREE_BARRIER 255 256template <class _CompletionF = __empty_completion> 257class _LIBCPP_DEPRECATED_ATOMIC_SYNC barrier { 258 __barrier_base<_CompletionF> __b_; 259 260public: 261 using arrival_token = typename __barrier_base<_CompletionF>::arrival_token; 262 263 static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { return __barrier_base<_CompletionF>::max(); } 264 265 _LIBCPP_AVAILABILITY_SYNC 266 _LIBCPP_HIDE_FROM_ABI explicit barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF()) 267 : __b_(__count, std::move(__completion)) { 268 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN( 269 __count >= 0, 270 "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with a negative value"); 271 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN( 272 __count <= max(), 273 "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with " 274 "a value greater than max()"); 275 } 276 277 barrier(barrier const&) = delete; 278 barrier& operator=(barrier const&) = delete; 279 280 _LIBCPP_NODISCARD _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update = 1) { 281 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(__update > 0, "barrier:arrive must be called with a value greater than 0"); 282 return __b_.arrive(__update); 283 } 284 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __phase) const { 285 __b_.wait(std::move(__phase)); 286 } 287 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_wait() { wait(arrive()); } 288 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() { __b_.arrive_and_drop(); } 289}; 290 291_LIBCPP_END_NAMESPACE_STD 292 293# endif // _LIBCPP_STD_VER >= 14 294 295_LIBCPP_POP_MACROS 296 297#endif // !defined(_LIBCPP_HAS_NO_THREADS) 298 299#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 300# include <atomic> 301# include <concepts> 302# include <iterator> 303# include <memory> 304# include <stdexcept> 305# include <variant> 306#endif 307 308#endif //_LIBCPP_BARRIER 309