1// -*- C++ -*- 2//===----------------------------------------------------------------------===// 3// 4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5// See https://llvm.org/LICENSE.txt for license information. 6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7// 8//===----------------------------------------------------------------------===// 9 10#ifndef _LIBCPP_BARRIER 11#define _LIBCPP_BARRIER 12 13/* 14 barrier synopsis 15 16namespace std 17{ 18 19 template<class CompletionFunction = see below> 20 class barrier 21 { 22 public: 23 using arrival_token = see below; 24 25 static constexpr ptrdiff_t max() noexcept; 26 27 constexpr explicit barrier(ptrdiff_t phase_count, 28 CompletionFunction f = CompletionFunction()); 29 ~barrier(); 30 31 barrier(const barrier&) = delete; 32 barrier& operator=(const barrier&) = delete; 33 34 [[nodiscard]] arrival_token arrive(ptrdiff_t update = 1); 35 void wait(arrival_token&& arrival) const; 36 37 void arrive_and_wait(); 38 void arrive_and_drop(); 39 40 private: 41 CompletionFunction completion; // exposition only 42 }; 43 44} 45 46*/ 47 48#include <__assert> // all public C++ headers provide the assertion handler 49#include <__atomic/atomic_base.h> 50#include <__atomic/memory_order.h> 51#include <__availability> 52#include <__config> 53#include <__memory/unique_ptr.h> 54#include <__thread/poll_with_backoff.h> 55#include <__thread/timed_backoff_policy.h> 56#include <__utility/move.h> 57#include <cstddef> 58#include <cstdint> 59#include <limits> 60#include <version> 61 62#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) 63# pragma GCC system_header 64#endif 65 66#ifdef _LIBCPP_HAS_NO_THREADS 67# error "<barrier> is not supported since libc++ has been configured without support for threads." 68#endif 69 70_LIBCPP_PUSH_MACROS 71#include <__undef_macros> 72 73#if _LIBCPP_STD_VER >= 14 74 75_LIBCPP_BEGIN_NAMESPACE_STD 76 77struct __empty_completion 78{ 79 inline _LIBCPP_INLINE_VISIBILITY 80 void operator()() noexcept 81 { 82 } 83}; 84 85#ifndef _LIBCPP_HAS_NO_TREE_BARRIER 86 87/* 88 89The default implementation of __barrier_base is a classic tree barrier. 90 91It looks different from literature pseudocode for two main reasons: 92 1. Threads that call into std::barrier functions do not provide indices, 93 so a numbering step is added before the actual barrier algorithm, 94 appearing as an N+1 round to the N rounds of the tree barrier. 95 2. A great deal of attention has been paid to avoid cache line thrashing 96 by flattening the tree structure into cache-line sized arrays, that 97 are indexed in an efficient way. 98 99*/ 100 101using __barrier_phase_t = uint8_t; 102 103class __barrier_algorithm_base; 104 105_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI 106__barrier_algorithm_base* __construct_barrier_algorithm_base(ptrdiff_t& __expected); 107 108_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI 109bool __arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier, 110 __barrier_phase_t __old_phase); 111 112_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI 113void __destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier); 114 115template<class _CompletionF> 116class __barrier_base { 117 ptrdiff_t __expected_; 118 unique_ptr<__barrier_algorithm_base, 119 void (*)(__barrier_algorithm_base*)> __base_; 120 __atomic_base<ptrdiff_t> __expected_adjustment_; 121 _CompletionF __completion_; 122 __atomic_base<__barrier_phase_t> __phase_; 123 124public: 125 using arrival_token = __barrier_phase_t; 126 127 static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { 128 return numeric_limits<ptrdiff_t>::max(); 129 } 130 131 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 132 __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF()) 133 : __expected_(__expected), __base_(std::__construct_barrier_algorithm_base(this->__expected_), 134 &__destroy_barrier_algorithm_base), 135 __expected_adjustment_(0), __completion_(std::move(__completion)), __phase_(0) 136 { 137 } 138 [[__nodiscard__]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 139 arrival_token arrive(ptrdiff_t __update) 140 { 141 _LIBCPP_ASSERT_UNCATEGORIZED( 142 __update <= __expected_, "update is greater than the expected count for the current barrier phase"); 143 144 auto const __old_phase = __phase_.load(memory_order_relaxed); 145 for(; __update; --__update) 146 if(__arrive_barrier_algorithm_base(__base_.get(), __old_phase)) { 147 __completion_(); 148 __expected_ += __expected_adjustment_.load(memory_order_relaxed); 149 __expected_adjustment_.store(0, memory_order_relaxed); 150 __phase_.store(__old_phase + 2, memory_order_release); 151 __phase_.notify_all(); 152 } 153 return __old_phase; 154 } 155 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 156 void wait(arrival_token&& __old_phase) const 157 { 158 auto const __test_fn = [this, __old_phase]() -> bool { 159 return __phase_.load(memory_order_acquire) != __old_phase; 160 }; 161 std::__libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy()); 162 } 163 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 164 void arrive_and_drop() 165 { 166 __expected_adjustment_.fetch_sub(1, memory_order_relaxed); 167 (void)arrive(1); 168 } 169}; 170 171#else 172 173/* 174 175The alternative implementation of __barrier_base is a central barrier. 176 177Two versions of this algorithm are provided: 178 1. A fairly straightforward implementation of the litterature for the 179 general case where the completion function is not empty. 180 2. An optimized implementation that exploits 2's complement arithmetic 181 and well-defined overflow in atomic arithmetic, to handle the phase 182 roll-over for free. 183 184*/ 185 186template<class _CompletionF> 187class __barrier_base { 188 189 __atomic_base<ptrdiff_t> __expected; 190 __atomic_base<ptrdiff_t> __arrived; 191 _CompletionF __completion; 192 __atomic_base<bool> __phase; 193public: 194 using arrival_token = bool; 195 196 static constexpr ptrdiff_t max() noexcept { 197 return numeric_limits<ptrdiff_t>::max(); 198 } 199 200 _LIBCPP_INLINE_VISIBILITY 201 __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF()) 202 : __expected(__expected), __arrived(__expected), __completion(std::move(__completion)), __phase(false) 203 { 204 } 205 [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 206 arrival_token arrive(ptrdiff_t update) 207 { 208 auto const __old_phase = __phase.load(memory_order_relaxed); 209 auto const __result = __arrived.fetch_sub(update, memory_order_acq_rel) - update; 210 auto const new_expected = __expected.load(memory_order_relaxed); 211 212 _LIBCPP_ASSERT_UNCATEGORIZED( 213 update <= new_expected, "update is greater than the expected count for the current barrier phase"); 214 215 if (0 == __result) { 216 __completion(); 217 __arrived.store(new_expected, memory_order_relaxed); 218 __phase.store(!__old_phase, memory_order_release); 219 __phase.notify_all(); 220 } 221 return __old_phase; 222 } 223 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 224 void wait(arrival_token&& __old_phase) const 225 { 226 __phase.wait(__old_phase, memory_order_acquire); 227 } 228 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 229 void arrive_and_drop() 230 { 231 __expected.fetch_sub(1, memory_order_relaxed); 232 (void)arrive(1); 233 } 234}; 235 236template<> 237class __barrier_base<__empty_completion> { 238 239 static constexpr uint64_t __expected_unit = 1ull; 240 static constexpr uint64_t __arrived_unit = 1ull << 32; 241 static constexpr uint64_t __expected_mask = __arrived_unit - 1; 242 static constexpr uint64_t __phase_bit = 1ull << 63; 243 static constexpr uint64_t __arrived_mask = (__phase_bit - 1) & ~__expected_mask; 244 245 __atomic_base<uint64_t> __phase_arrived_expected; 246 247 static _LIBCPP_INLINE_VISIBILITY 248 constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT 249 { 250 return ((uint64_t(1u << 31) - __count) << 32) 251 | (uint64_t(1u << 31) - __count); 252 } 253 254public: 255 using arrival_token = uint64_t; 256 257 static constexpr ptrdiff_t max() noexcept { 258 return ptrdiff_t(1u << 31) - 1; 259 } 260 261 _LIBCPP_INLINE_VISIBILITY 262 explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion()) 263 : __phase_arrived_expected(__init(__count)) 264 { 265 } 266 [[nodiscard]] inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 267 arrival_token arrive(ptrdiff_t update) 268 { 269 auto const __inc = __arrived_unit * update; 270 auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel); 271 272 _LIBCPP_ASSERT_UNCATEGORIZED( 273 update <= __old, "update is greater than the expected count for the current barrier phase"); 274 275 if ((__old ^ (__old + __inc)) & __phase_bit) { 276 __phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed); 277 __phase_arrived_expected.notify_all(); 278 } 279 return __old & __phase_bit; 280 } 281 inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 282 void wait(arrival_token&& __phase) const 283 { 284 auto const __test_fn = [=]() -> bool { 285 uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire); 286 return ((__current & __phase_bit) != __phase); 287 }; 288 __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy()); 289 } 290 inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 291 void arrive_and_drop() 292 { 293 __phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed); 294 (void)arrive(1); 295 } 296}; 297 298#endif // !_LIBCPP_HAS_NO_TREE_BARRIER 299 300template<class _CompletionF = __empty_completion> 301class barrier { 302 303 __barrier_base<_CompletionF> __b_; 304public: 305 using arrival_token = typename __barrier_base<_CompletionF>::arrival_token; 306 307 static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { 308 return __barrier_base<_CompletionF>::max(); 309 } 310 311 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 312 explicit barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF()) 313 : __b_(__count, _VSTD::move(__completion)) { 314 _LIBCPP_ASSERT_UNCATEGORIZED( 315 __count >= 0, 316 "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with a negative value"); 317 _LIBCPP_ASSERT_UNCATEGORIZED( 318 __count <= max(), 319 "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with " 320 "a value greater than max()"); 321 } 322 323 barrier(barrier const&) = delete; 324 barrier& operator=(barrier const&) = delete; 325 326 [[__nodiscard__]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 327 arrival_token arrive(ptrdiff_t __update = 1) 328 { 329 _LIBCPP_ASSERT_UNCATEGORIZED(__update > 0, "barrier:arrive must be called with a value greater than 0"); 330 return __b_.arrive(__update); 331 } 332 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 333 void wait(arrival_token&& __phase) const 334 { 335 __b_.wait(_VSTD::move(__phase)); 336 } 337 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 338 void arrive_and_wait() 339 { 340 wait(arrive()); 341 } 342 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 343 void arrive_and_drop() 344 { 345 __b_.arrive_and_drop(); 346 } 347}; 348 349_LIBCPP_END_NAMESPACE_STD 350 351#endif // _LIBCPP_STD_VER >= 14 352 353_LIBCPP_POP_MACROS 354 355#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 356# include <atomic> 357# include <concepts> 358# include <iterator> 359# include <memory> 360# include <stdexcept> 361# include <variant> 362#endif 363 364#endif //_LIBCPP_BARRIER 365