1// -*- C++ -*- 2//===----------------------------------------------------------------------===// 3// 4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5// See https://llvm.org/LICENSE.txt for license information. 6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7// 8//===----------------------------------------------------------------------===// 9 10#ifndef _LIBCPP_BARRIER 11#define _LIBCPP_BARRIER 12 13/* 14 barrier synopsis 15 16namespace std 17{ 18 19 template<class CompletionFunction = see below> 20 class barrier 21 { 22 public: 23 using arrival_token = see below; 24 25 static constexpr ptrdiff_t max() noexcept; 26 27 constexpr explicit barrier(ptrdiff_t phase_count, 28 CompletionFunction f = CompletionFunction()); 29 ~barrier(); 30 31 barrier(const barrier&) = delete; 32 barrier& operator=(const barrier&) = delete; 33 34 [[nodiscard]] arrival_token arrive(ptrdiff_t update = 1); 35 void wait(arrival_token&& arrival) const; 36 37 void arrive_and_wait(); 38 void arrive_and_drop(); 39 40 private: 41 CompletionFunction completion; // exposition only 42 }; 43 44} 45 46*/ 47 48#include <__config> 49 50#ifdef _LIBCPP_HAS_NO_THREADS 51# error "<barrier> is not supported since libc++ has been configured without support for threads." 52#endif 53 54#include <__assert> // all public C++ headers provide the assertion handler 55#include <__atomic/atomic_base.h> 56#include <__atomic/memory_order.h> 57#include <__availability> 58#include <__memory/unique_ptr.h> 59#include <__thread/poll_with_backoff.h> 60#include <__thread/timed_backoff_policy.h> 61#include <__utility/move.h> 62#include <cstddef> 63#include <cstdint> 64#include <limits> 65#include <version> 66 67#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) 68# pragma GCC system_header 69#endif 70 71_LIBCPP_PUSH_MACROS 72#include <__undef_macros> 73 74#if _LIBCPP_STD_VER >= 14 75 76_LIBCPP_BEGIN_NAMESPACE_STD 77 78struct __empty_completion { 79 inline _LIBCPP_HIDE_FROM_ABI void operator()() noexcept {} 80}; 81 82# ifndef _LIBCPP_HAS_NO_TREE_BARRIER 83 84/* 85 86The default implementation of __barrier_base is a classic tree barrier. 87 88It looks different from literature pseudocode for two main reasons: 89 1. Threads that call into std::barrier functions do not provide indices, 90 so a numbering step is added before the actual barrier algorithm, 91 appearing as an N+1 round to the N rounds of the tree barrier. 92 2. A great deal of attention has been paid to avoid cache line thrashing 93 by flattening the tree structure into cache-line sized arrays, that 94 are indexed in an efficient way. 95 96*/ 97 98using __barrier_phase_t = uint8_t; 99 100class __barrier_algorithm_base; 101 102_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI __barrier_algorithm_base* 103__construct_barrier_algorithm_base(ptrdiff_t& __expected); 104 105_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI bool 106__arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier, __barrier_phase_t __old_phase); 107 108_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void 109__destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier); 110 111template <class _CompletionF> 112class __barrier_base { 113 ptrdiff_t __expected_; 114 unique_ptr<__barrier_algorithm_base, void (*)(__barrier_algorithm_base*)> __base_; 115 __atomic_base<ptrdiff_t> __expected_adjustment_; 116 _CompletionF __completion_; 117 __atomic_base<__barrier_phase_t> __phase_; 118 119public: 120 using arrival_token = __barrier_phase_t; 121 122 static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { return numeric_limits<ptrdiff_t>::max(); } 123 124 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI 125 __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF()) 126 : __expected_(__expected), 127 __base_(std::__construct_barrier_algorithm_base(this->__expected_), &__destroy_barrier_algorithm_base), 128 __expected_adjustment_(0), 129 __completion_(std::move(__completion)), 130 __phase_(0) {} 131 [[__nodiscard__]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update) { 132 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN( 133 __update <= __expected_, "update is greater than the expected count for the current barrier phase"); 134 135 auto const __old_phase = __phase_.load(memory_order_relaxed); 136 for (; __update; --__update) 137 if (__arrive_barrier_algorithm_base(__base_.get(), __old_phase)) { 138 __completion_(); 139 __expected_ += __expected_adjustment_.load(memory_order_relaxed); 140 __expected_adjustment_.store(0, memory_order_relaxed); 141 __phase_.store(__old_phase + 2, memory_order_release); 142 __phase_.notify_all(); 143 } 144 return __old_phase; 145 } 146 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __old_phase) const { 147 auto const __test_fn = [this, __old_phase]() -> bool { return __phase_.load(memory_order_acquire) != __old_phase; }; 148 std::__libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy()); 149 } 150 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() { 151 __expected_adjustment_.fetch_sub(1, memory_order_relaxed); 152 (void)arrive(1); 153 } 154}; 155 156# else 157 158/* 159 160The alternative implementation of __barrier_base is a central barrier. 161 162Two versions of this algorithm are provided: 163 1. A fairly straightforward implementation of the litterature for the 164 general case where the completion function is not empty. 165 2. An optimized implementation that exploits 2's complement arithmetic 166 and well-defined overflow in atomic arithmetic, to handle the phase 167 roll-over for free. 168 169*/ 170 171template <class _CompletionF> 172class __barrier_base { 173 __atomic_base<ptrdiff_t> __expected; 174 __atomic_base<ptrdiff_t> __arrived; 175 _CompletionF __completion; 176 __atomic_base<bool> __phase; 177 178public: 179 using arrival_token = bool; 180 181 static constexpr ptrdiff_t max() noexcept { return numeric_limits<ptrdiff_t>::max(); } 182 183 _LIBCPP_HIDE_FROM_ABI __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF()) 184 : __expected(__expected), __arrived(__expected), __completion(std::move(__completion)), __phase(false) {} 185 [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t update) { 186 auto const __old_phase = __phase.load(memory_order_relaxed); 187 auto const __result = __arrived.fetch_sub(update, memory_order_acq_rel) - update; 188 auto const new_expected = __expected.load(memory_order_relaxed); 189 190 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN( 191 update <= new_expected, "update is greater than the expected count for the current barrier phase"); 192 193 if (0 == __result) { 194 __completion(); 195 __arrived.store(new_expected, memory_order_relaxed); 196 __phase.store(!__old_phase, memory_order_release); 197 __phase.notify_all(); 198 } 199 return __old_phase; 200 } 201 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __old_phase) const { 202 __phase.wait(__old_phase, memory_order_acquire); 203 } 204 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() { 205 __expected.fetch_sub(1, memory_order_relaxed); 206 (void)arrive(1); 207 } 208}; 209 210template <> 211class __barrier_base<__empty_completion> { 212 static constexpr uint64_t __expected_unit = 1ull; 213 static constexpr uint64_t __arrived_unit = 1ull << 32; 214 static constexpr uint64_t __expected_mask = __arrived_unit - 1; 215 static constexpr uint64_t __phase_bit = 1ull << 63; 216 static constexpr uint64_t __arrived_mask = (__phase_bit - 1) & ~__expected_mask; 217 218 __atomic_base<uint64_t> __phase_arrived_expected; 219 220 static _LIBCPP_HIDE_FROM_ABI constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT { 221 return ((uint64_t(1u << 31) - __count) << 32) | (uint64_t(1u << 31) - __count); 222 } 223 224public: 225 using arrival_token = uint64_t; 226 227 static constexpr ptrdiff_t max() noexcept { return ptrdiff_t(1u << 31) - 1; } 228 229 _LIBCPP_HIDE_FROM_ABI explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion()) 230 : __phase_arrived_expected(__init(__count)) {} 231 [[nodiscard]] inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t update) { 232 auto const __inc = __arrived_unit * update; 233 auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel); 234 235 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN( 236 update <= __old, "update is greater than the expected count for the current barrier phase"); 237 238 if ((__old ^ (__old + __inc)) & __phase_bit) { 239 __phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed); 240 __phase_arrived_expected.notify_all(); 241 } 242 return __old & __phase_bit; 243 } 244 inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __phase) const { 245 auto const __test_fn = [=]() -> bool { 246 uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire); 247 return ((__current & __phase_bit) != __phase); 248 }; 249 __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy()); 250 } 251 inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() { 252 __phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed); 253 (void)arrive(1); 254 } 255}; 256 257# endif // !_LIBCPP_HAS_NO_TREE_BARRIER 258 259template <class _CompletionF = __empty_completion> 260class barrier { 261 __barrier_base<_CompletionF> __b_; 262 263public: 264 using arrival_token = typename __barrier_base<_CompletionF>::arrival_token; 265 266 static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { return __barrier_base<_CompletionF>::max(); } 267 268 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI explicit barrier( 269 ptrdiff_t __count, _CompletionF __completion = _CompletionF()) 270 : __b_(__count, std::move(__completion)) { 271 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN( 272 __count >= 0, 273 "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with a negative value"); 274 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN( 275 __count <= max(), 276 "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with " 277 "a value greater than max()"); 278 } 279 280 barrier(barrier const&) = delete; 281 barrier& operator=(barrier const&) = delete; 282 283 [[__nodiscard__]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update = 1) { 284 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(__update > 0, "barrier:arrive must be called with a value greater than 0"); 285 return __b_.arrive(__update); 286 } 287 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __phase) const { 288 __b_.wait(std::move(__phase)); 289 } 290 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_wait() { wait(arrive()); } 291 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() { __b_.arrive_and_drop(); } 292}; 293 294_LIBCPP_END_NAMESPACE_STD 295 296#endif // _LIBCPP_STD_VER >= 14 297 298_LIBCPP_POP_MACROS 299 300#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 301# include <atomic> 302# include <concepts> 303# include <iterator> 304# include <memory> 305# include <stdexcept> 306# include <variant> 307#endif 308 309#endif //_LIBCPP_BARRIER 310