1// -*- C++ -*- 2//===----------------------------------------------------------------------===// 3// 4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5// See https://llvm.org/LICENSE.txt for license information. 6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7// 8//===----------------------------------------------------------------------===// 9 10#ifndef _LIBCPP_BARRIER 11#define _LIBCPP_BARRIER 12 13/* 14 barrier synopsis 15 16namespace std 17{ 18 19 template<class CompletionFunction = see below> 20 class barrier 21 { 22 public: 23 using arrival_token = see below; 24 25 static constexpr ptrdiff_t max() noexcept; 26 27 constexpr explicit barrier(ptrdiff_t phase_count, 28 CompletionFunction f = CompletionFunction()); 29 ~barrier(); 30 31 barrier(const barrier&) = delete; 32 barrier& operator=(const barrier&) = delete; 33 34 [[nodiscard]] arrival_token arrive(ptrdiff_t update = 1); 35 void wait(arrival_token&& arrival) const; 36 37 void arrive_and_wait(); 38 void arrive_and_drop(); 39 40 private: 41 CompletionFunction completion; // exposition only 42 }; 43 44} 45 46*/ 47 48#include <__assert> // all public C++ headers provide the assertion handler 49#include <__availability> 50#include <__config> 51#include <__memory/unique_ptr.h> 52#include <__thread/timed_backoff_policy.h> 53#include <__utility/move.h> 54#include <atomic> 55#include <limits> 56 57#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) 58# pragma GCC system_header 59#endif 60 61#ifdef _LIBCPP_HAS_NO_THREADS 62# error "<barrier> is not supported since libc++ has been configured without support for threads." 63#endif 64 65_LIBCPP_PUSH_MACROS 66#include <__undef_macros> 67 68#if _LIBCPP_STD_VER >= 14 69 70_LIBCPP_BEGIN_NAMESPACE_STD 71 72struct __empty_completion 73{ 74 inline _LIBCPP_INLINE_VISIBILITY 75 void operator()() noexcept 76 { 77 } 78}; 79 80#ifndef _LIBCPP_HAS_NO_TREE_BARRIER 81 82/* 83 84The default implementation of __barrier_base is a classic tree barrier. 85 86It looks different from literature pseudocode for two main reasons: 87 1. Threads that call into std::barrier functions do not provide indices, 88 so a numbering step is added before the actual barrier algorithm, 89 appearing as an N+1 round to the N rounds of the tree barrier. 90 2. A great deal of attention has been paid to avoid cache line thrashing 91 by flattening the tree structure into cache-line sized arrays, that 92 are indexed in an efficient way. 93 94*/ 95 96using __barrier_phase_t = uint8_t; 97 98class __barrier_algorithm_base; 99 100_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI 101__barrier_algorithm_base* __construct_barrier_algorithm_base(ptrdiff_t& __expected); 102 103_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI 104bool __arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier, 105 __barrier_phase_t __old_phase); 106 107_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI 108void __destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier); 109 110template<class _CompletionF> 111class __barrier_base { 112 ptrdiff_t __expected_; 113 unique_ptr<__barrier_algorithm_base, 114 void (*)(__barrier_algorithm_base*)> __base_; 115 __atomic_base<ptrdiff_t> __expected_adjustment_; 116 _CompletionF __completion_; 117 __atomic_base<__barrier_phase_t> __phase_; 118 119public: 120 using arrival_token = __barrier_phase_t; 121 122 static constexpr ptrdiff_t max() noexcept { 123 return numeric_limits<ptrdiff_t>::max(); 124 } 125 126 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 127 __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF()) 128 : __expected_(__expected), __base_(std::__construct_barrier_algorithm_base(this->__expected_), 129 &__destroy_barrier_algorithm_base), 130 __expected_adjustment_(0), __completion_(std::move(__completion)), __phase_(0) 131 { 132 } 133 [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 134 arrival_token arrive(ptrdiff_t __update) 135 { 136 auto const __old_phase = __phase_.load(memory_order_relaxed); 137 for(; __update; --__update) 138 if(__arrive_barrier_algorithm_base(__base_.get(), __old_phase)) { 139 __completion_(); 140 __expected_ += __expected_adjustment_.load(memory_order_relaxed); 141 __expected_adjustment_.store(0, memory_order_relaxed); 142 __phase_.store(__old_phase + 2, memory_order_release); 143 __phase_.notify_all(); 144 } 145 return __old_phase; 146 } 147 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 148 void wait(arrival_token&& __old_phase) const 149 { 150 auto const __test_fn = [this, __old_phase]() -> bool { 151 return __phase_.load(memory_order_acquire) != __old_phase; 152 }; 153 std::__libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy()); 154 } 155 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 156 void arrive_and_drop() 157 { 158 __expected_adjustment_.fetch_sub(1, memory_order_relaxed); 159 (void)arrive(1); 160 } 161}; 162 163#else 164 165/* 166 167The alternative implementation of __barrier_base is a central barrier. 168 169Two versions of this algorithm are provided: 170 1. A fairly straightforward implementation of the litterature for the 171 general case where the completion function is not empty. 172 2. An optimized implementation that exploits 2's complement arithmetic 173 and well-defined overflow in atomic arithmetic, to handle the phase 174 roll-over for free. 175 176*/ 177 178template<class _CompletionF> 179class __barrier_base { 180 181 __atomic_base<ptrdiff_t> __expected; 182 __atomic_base<ptrdiff_t> __arrived; 183 _CompletionF __completion; 184 __atomic_base<bool> __phase; 185public: 186 using arrival_token = bool; 187 188 static constexpr ptrdiff_t max() noexcept { 189 return numeric_limits<ptrdiff_t>::max(); 190 } 191 192 _LIBCPP_INLINE_VISIBILITY 193 __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF()) 194 : __expected(__expected), __arrived(__expected), __completion(std::move(__completion)), __phase(false) 195 { 196 } 197 [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 198 arrival_token arrive(ptrdiff_t update) 199 { 200 auto const __old_phase = __phase.load(memory_order_relaxed); 201 auto const __result = __arrived.fetch_sub(update, memory_order_acq_rel) - update; 202 auto const new_expected = __expected.load(memory_order_relaxed); 203 if(0 == __result) { 204 __completion(); 205 __arrived.store(new_expected, memory_order_relaxed); 206 __phase.store(!__old_phase, memory_order_release); 207 __phase.notify_all(); 208 } 209 return __old_phase; 210 } 211 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 212 void wait(arrival_token&& __old_phase) const 213 { 214 __phase.wait(__old_phase, memory_order_acquire); 215 } 216 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 217 void arrive_and_drop() 218 { 219 __expected.fetch_sub(1, memory_order_relaxed); 220 (void)arrive(1); 221 } 222}; 223 224template<> 225class __barrier_base<__empty_completion> { 226 227 static constexpr uint64_t __expected_unit = 1ull; 228 static constexpr uint64_t __arrived_unit = 1ull << 32; 229 static constexpr uint64_t __expected_mask = __arrived_unit - 1; 230 static constexpr uint64_t __phase_bit = 1ull << 63; 231 static constexpr uint64_t __arrived_mask = (__phase_bit - 1) & ~__expected_mask; 232 233 __atomic_base<uint64_t> __phase_arrived_expected; 234 235 static _LIBCPP_INLINE_VISIBILITY 236 constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT 237 { 238 return ((uint64_t(1u << 31) - __count) << 32) 239 | (uint64_t(1u << 31) - __count); 240 } 241 242public: 243 using arrival_token = uint64_t; 244 245 static constexpr ptrdiff_t max() noexcept { 246 return ptrdiff_t(1u << 31) - 1; 247 } 248 249 _LIBCPP_INLINE_VISIBILITY 250 explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion()) 251 : __phase_arrived_expected(__init(__count)) 252 { 253 } 254 [[nodiscard]] inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 255 arrival_token arrive(ptrdiff_t update) 256 { 257 auto const __inc = __arrived_unit * update; 258 auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel); 259 if((__old ^ (__old + __inc)) & __phase_bit) { 260 __phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed); 261 __phase_arrived_expected.notify_all(); 262 } 263 return __old & __phase_bit; 264 } 265 inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 266 void wait(arrival_token&& __phase) const 267 { 268 auto const __test_fn = [=]() -> bool { 269 uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire); 270 return ((__current & __phase_bit) != __phase); 271 }; 272 __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy()); 273 } 274 inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 275 void arrive_and_drop() 276 { 277 __phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed); 278 (void)arrive(1); 279 } 280}; 281 282#endif // !_LIBCPP_HAS_NO_TREE_BARRIER 283 284template<class _CompletionF = __empty_completion> 285class barrier { 286 287 __barrier_base<_CompletionF> __b_; 288public: 289 using arrival_token = typename __barrier_base<_CompletionF>::arrival_token; 290 291 static constexpr ptrdiff_t max() noexcept { 292 return __barrier_base<_CompletionF>::max(); 293 } 294 295 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 296 barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF()) 297 : __b_(__count, _VSTD::move(__completion)) { 298 } 299 300 barrier(barrier const&) = delete; 301 barrier& operator=(barrier const&) = delete; 302 303 [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 304 arrival_token arrive(ptrdiff_t __update = 1) 305 { 306 return __b_.arrive(__update); 307 } 308 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 309 void wait(arrival_token&& __phase) const 310 { 311 __b_.wait(_VSTD::move(__phase)); 312 } 313 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 314 void arrive_and_wait() 315 { 316 wait(arrive()); 317 } 318 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 319 void arrive_and_drop() 320 { 321 __b_.arrive_and_drop(); 322 } 323}; 324 325_LIBCPP_END_NAMESPACE_STD 326 327#endif // _LIBCPP_STD_VER >= 14 328 329_LIBCPP_POP_MACROS 330 331#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 332# include <concepts> 333# include <iterator> 334# include <memory> 335# include <stdexcept> 336# include <variant> 337#endif 338 339#endif //_LIBCPP_BARRIER 340