1// -*- C++ -*- 2//===----------------------------------------------------------------------===// 3// 4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5// See https://llvm.org/LICENSE.txt for license information. 6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7// 8//===----------------------------------------------------------------------===// 9 10#ifndef _LIBCPP_BARRIER 11#define _LIBCPP_BARRIER 12 13/* 14 barrier synopsis 15 16namespace std 17{ 18 19 template<class CompletionFunction = see below> 20 class barrier 21 { 22 public: 23 using arrival_token = see below; 24 25 static constexpr ptrdiff_t max() noexcept; 26 27 constexpr explicit barrier(ptrdiff_t phase_count, 28 CompletionFunction f = CompletionFunction()); 29 ~barrier(); 30 31 barrier(const barrier&) = delete; 32 barrier& operator=(const barrier&) = delete; 33 34 [[nodiscard]] arrival_token arrive(ptrdiff_t update = 1); 35 void wait(arrival_token&& arrival) const; 36 37 void arrive_and_wait(); 38 void arrive_and_drop(); 39 40 private: 41 CompletionFunction completion; // exposition only 42 }; 43 44} 45 46*/ 47 48#include <__assert> // all public C++ headers provide the assertion handler 49#include <__atomic/atomic_base.h> 50#include <__atomic/memory_order.h> 51#include <__availability> 52#include <__config> 53#include <__memory/unique_ptr.h> 54#include <__thread/poll_with_backoff.h> 55#include <__thread/timed_backoff_policy.h> 56#include <__utility/move.h> 57#include <cstddef> 58#include <cstdint> 59#include <limits> 60#include <version> 61 62#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) 63# pragma GCC system_header 64#endif 65 66#ifdef _LIBCPP_HAS_NO_THREADS 67# error "<barrier> is not supported since libc++ has been configured without support for threads." 68#endif 69 70_LIBCPP_PUSH_MACROS 71#include <__undef_macros> 72 73#if _LIBCPP_STD_VER >= 14 74 75_LIBCPP_BEGIN_NAMESPACE_STD 76 77struct __empty_completion { 78 inline _LIBCPP_HIDE_FROM_ABI void operator()() noexcept {} 79}; 80 81# ifndef _LIBCPP_HAS_NO_TREE_BARRIER 82 83/* 84 85The default implementation of __barrier_base is a classic tree barrier. 86 87It looks different from literature pseudocode for two main reasons: 88 1. Threads that call into std::barrier functions do not provide indices, 89 so a numbering step is added before the actual barrier algorithm, 90 appearing as an N+1 round to the N rounds of the tree barrier. 91 2. A great deal of attention has been paid to avoid cache line thrashing 92 by flattening the tree structure into cache-line sized arrays, that 93 are indexed in an efficient way. 94 95*/ 96 97using __barrier_phase_t = uint8_t; 98 99class __barrier_algorithm_base; 100 101_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI __barrier_algorithm_base* 102__construct_barrier_algorithm_base(ptrdiff_t& __expected); 103 104_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI bool 105__arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier, __barrier_phase_t __old_phase); 106 107_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void 108__destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier); 109 110template <class _CompletionF> 111class __barrier_base { 112 ptrdiff_t __expected_; 113 unique_ptr<__barrier_algorithm_base, void (*)(__barrier_algorithm_base*)> __base_; 114 __atomic_base<ptrdiff_t> __expected_adjustment_; 115 _CompletionF __completion_; 116 __atomic_base<__barrier_phase_t> __phase_; 117 118public: 119 using arrival_token = __barrier_phase_t; 120 121 static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { return numeric_limits<ptrdiff_t>::max(); } 122 123 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI 124 __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF()) 125 : __expected_(__expected), 126 __base_(std::__construct_barrier_algorithm_base(this->__expected_), &__destroy_barrier_algorithm_base), 127 __expected_adjustment_(0), 128 __completion_(std::move(__completion)), 129 __phase_(0) {} 130 [[__nodiscard__]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update) { 131 _LIBCPP_ASSERT_UNCATEGORIZED( 132 __update <= __expected_, "update is greater than the expected count for the current barrier phase"); 133 134 auto const __old_phase = __phase_.load(memory_order_relaxed); 135 for (; __update; --__update) 136 if (__arrive_barrier_algorithm_base(__base_.get(), __old_phase)) { 137 __completion_(); 138 __expected_ += __expected_adjustment_.load(memory_order_relaxed); 139 __expected_adjustment_.store(0, memory_order_relaxed); 140 __phase_.store(__old_phase + 2, memory_order_release); 141 __phase_.notify_all(); 142 } 143 return __old_phase; 144 } 145 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __old_phase) const { 146 auto const __test_fn = [this, __old_phase]() -> bool { return __phase_.load(memory_order_acquire) != __old_phase; }; 147 std::__libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy()); 148 } 149 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() { 150 __expected_adjustment_.fetch_sub(1, memory_order_relaxed); 151 (void)arrive(1); 152 } 153}; 154 155# else 156 157/* 158 159The alternative implementation of __barrier_base is a central barrier. 160 161Two versions of this algorithm are provided: 162 1. A fairly straightforward implementation of the litterature for the 163 general case where the completion function is not empty. 164 2. An optimized implementation that exploits 2's complement arithmetic 165 and well-defined overflow in atomic arithmetic, to handle the phase 166 roll-over for free. 167 168*/ 169 170template <class _CompletionF> 171class __barrier_base { 172 __atomic_base<ptrdiff_t> __expected; 173 __atomic_base<ptrdiff_t> __arrived; 174 _CompletionF __completion; 175 __atomic_base<bool> __phase; 176 177public: 178 using arrival_token = bool; 179 180 static constexpr ptrdiff_t max() noexcept { return numeric_limits<ptrdiff_t>::max(); } 181 182 _LIBCPP_HIDE_FROM_ABI __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF()) 183 : __expected(__expected), __arrived(__expected), __completion(std::move(__completion)), __phase(false) {} 184 [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t update) { 185 auto const __old_phase = __phase.load(memory_order_relaxed); 186 auto const __result = __arrived.fetch_sub(update, memory_order_acq_rel) - update; 187 auto const new_expected = __expected.load(memory_order_relaxed); 188 189 _LIBCPP_ASSERT_UNCATEGORIZED( 190 update <= new_expected, "update is greater than the expected count for the current barrier phase"); 191 192 if (0 == __result) { 193 __completion(); 194 __arrived.store(new_expected, memory_order_relaxed); 195 __phase.store(!__old_phase, memory_order_release); 196 __phase.notify_all(); 197 } 198 return __old_phase; 199 } 200 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __old_phase) const { 201 __phase.wait(__old_phase, memory_order_acquire); 202 } 203 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() { 204 __expected.fetch_sub(1, memory_order_relaxed); 205 (void)arrive(1); 206 } 207}; 208 209template <> 210class __barrier_base<__empty_completion> { 211 static constexpr uint64_t __expected_unit = 1ull; 212 static constexpr uint64_t __arrived_unit = 1ull << 32; 213 static constexpr uint64_t __expected_mask = __arrived_unit - 1; 214 static constexpr uint64_t __phase_bit = 1ull << 63; 215 static constexpr uint64_t __arrived_mask = (__phase_bit - 1) & ~__expected_mask; 216 217 __atomic_base<uint64_t> __phase_arrived_expected; 218 219 static _LIBCPP_HIDE_FROM_ABI constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT { 220 return ((uint64_t(1u << 31) - __count) << 32) | (uint64_t(1u << 31) - __count); 221 } 222 223public: 224 using arrival_token = uint64_t; 225 226 static constexpr ptrdiff_t max() noexcept { return ptrdiff_t(1u << 31) - 1; } 227 228 _LIBCPP_HIDE_FROM_ABI explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion()) 229 : __phase_arrived_expected(__init(__count)) {} 230 [[nodiscard]] inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t update) { 231 auto const __inc = __arrived_unit * update; 232 auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel); 233 234 _LIBCPP_ASSERT_UNCATEGORIZED( 235 update <= __old, "update is greater than the expected count for the current barrier phase"); 236 237 if ((__old ^ (__old + __inc)) & __phase_bit) { 238 __phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed); 239 __phase_arrived_expected.notify_all(); 240 } 241 return __old & __phase_bit; 242 } 243 inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __phase) const { 244 auto const __test_fn = [=]() -> bool { 245 uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire); 246 return ((__current & __phase_bit) != __phase); 247 }; 248 __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy()); 249 } 250 inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() { 251 __phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed); 252 (void)arrive(1); 253 } 254}; 255 256# endif // !_LIBCPP_HAS_NO_TREE_BARRIER 257 258template <class _CompletionF = __empty_completion> 259class barrier { 260 __barrier_base<_CompletionF> __b_; 261 262public: 263 using arrival_token = typename __barrier_base<_CompletionF>::arrival_token; 264 265 static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { return __barrier_base<_CompletionF>::max(); } 266 267 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI explicit barrier( 268 ptrdiff_t __count, _CompletionF __completion = _CompletionF()) 269 : __b_(__count, std::move(__completion)) { 270 _LIBCPP_ASSERT_UNCATEGORIZED( 271 __count >= 0, 272 "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with a negative value"); 273 _LIBCPP_ASSERT_UNCATEGORIZED( 274 __count <= max(), 275 "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with " 276 "a value greater than max()"); 277 } 278 279 barrier(barrier const&) = delete; 280 barrier& operator=(barrier const&) = delete; 281 282 [[__nodiscard__]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update = 1) { 283 _LIBCPP_ASSERT_UNCATEGORIZED(__update > 0, "barrier:arrive must be called with a value greater than 0"); 284 return __b_.arrive(__update); 285 } 286 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __phase) const { 287 __b_.wait(std::move(__phase)); 288 } 289 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_wait() { wait(arrive()); } 290 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() { __b_.arrive_and_drop(); } 291}; 292 293_LIBCPP_END_NAMESPACE_STD 294 295#endif // _LIBCPP_STD_VER >= 14 296 297_LIBCPP_POP_MACROS 298 299#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 300# include <atomic> 301# include <concepts> 302# include <iterator> 303# include <memory> 304# include <stdexcept> 305# include <variant> 306#endif 307 308#endif //_LIBCPP_BARRIER 309