1// -*- C++ -*- 2//===--------------------------- barrier ----------------------------------===// 3// 4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5// See https://llvm.org/LICENSE.txt for license information. 6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7// 8//===----------------------------------------------------------------------===// 9 10#ifndef _LIBCPP_BARRIER 11#define _LIBCPP_BARRIER 12 13/* 14 barrier synopsis 15 16namespace std 17{ 18 19 template<class CompletionFunction = see below> 20 class barrier 21 { 22 public: 23 using arrival_token = see below; 24 25 constexpr explicit barrier(ptrdiff_t phase_count, 26 CompletionFunction f = CompletionFunction()); 27 ~barrier(); 28 29 barrier(const barrier&) = delete; 30 barrier& operator=(const barrier&) = delete; 31 32 [[nodiscard]] arrival_token arrive(ptrdiff_t update = 1); 33 void wait(arrival_token&& arrival) const; 34 35 void arrive_and_wait(); 36 void arrive_and_drop(); 37 38 private: 39 CompletionFunction completion; // exposition only 40 }; 41 42} 43 44*/ 45 46#include <__config> 47#include <atomic> 48#ifndef _LIBCPP_HAS_NO_TREE_BARRIER 49# include <memory> 50#endif 51 52#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) 53#pragma GCC system_header 54#endif 55 56#ifdef _LIBCPP_HAS_NO_THREADS 57# error <barrier> is not supported on this single threaded system 58#endif 59 60#if _LIBCPP_STD_VER >= 14 61 62_LIBCPP_BEGIN_NAMESPACE_STD 63 64struct __empty_completion 65{ 66 inline _LIBCPP_INLINE_VISIBILITY 67 void operator()() noexcept 68 { 69 } 70}; 71 72#ifndef _LIBCPP_HAS_NO_TREE_BARRIER 73 74/* 75 76The default implementation of __barrier_base is a classic tree barrier. 77 78It looks different from literature pseudocode for two main reasons: 79 1. Threads that call into std::barrier functions do not provide indices, 80 so a numbering step is added before the actual barrier algorithm, 81 appearing as an N+1 round to the N rounds of the tree barrier. 82 2. A great deal of attention has been paid to avoid cache line thrashing 83 by flattening the tree structure into cache-line sized arrays, that 84 are indexed in an efficient way. 85 86*/ 87 88using __barrier_phase_t = uint8_t; 89 90class __barrier_algorithm_base; 91 92_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI 93__barrier_algorithm_base* __construct_barrier_algorithm_base(ptrdiff_t& __expected); 94 95_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI 96bool __arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier, 97 __barrier_phase_t __old_phase); 98 99_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI 100void __destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier); 101 102template<class _CompletionF> 103class __barrier_base { 104 105 ptrdiff_t __expected; 106 unique_ptr<__barrier_algorithm_base, 107 void (*)(__barrier_algorithm_base*)> __base; 108 __atomic_base<ptrdiff_t> __expected_adjustment; 109 _CompletionF __completion; 110 __atomic_base<__barrier_phase_t> __phase; 111 112public: 113 using arrival_token = __barrier_phase_t; 114 115 static constexpr ptrdiff_t max() noexcept { 116 return numeric_limits<ptrdiff_t>::max(); 117 } 118 119 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 120 __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF()) 121 : __expected(__expected), __base(__construct_barrier_algorithm_base(this->__expected), 122 &__destroy_barrier_algorithm_base), 123 __expected_adjustment(0), __completion(move(__completion)), __phase(0) 124 { 125 } 126 [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 127 arrival_token arrive(ptrdiff_t update) 128 { 129 auto const __old_phase = __phase.load(memory_order_relaxed); 130 for(; update; --update) 131 if(__arrive_barrier_algorithm_base(__base.get(), __old_phase)) { 132 __completion(); 133 __expected += __expected_adjustment.load(memory_order_relaxed); 134 __expected_adjustment.store(0, memory_order_relaxed); 135 __phase.store(__old_phase + 2, memory_order_release); 136 __phase.notify_all(); 137 } 138 return __old_phase; 139 } 140 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 141 void wait(arrival_token&& __old_phase) const 142 { 143 auto const __test_fn = [=]() -> bool { 144 return __phase.load(memory_order_acquire) != __old_phase; 145 }; 146 __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy()); 147 } 148 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 149 void arrive_and_drop() 150 { 151 __expected_adjustment.fetch_sub(1, memory_order_relaxed); 152 (void)arrive(1); 153 } 154}; 155 156#else 157 158/* 159 160The alternative implementation of __barrier_base is a central barrier. 161 162Two versions of this algorithm are provided: 163 1. A fairly straightforward implementation of the litterature for the 164 general case where the completion function is not empty. 165 2. An optimized implementation that exploits 2's complement arithmetic 166 and well-defined overflow in atomic arithmetic, to handle the phase 167 roll-over for free. 168 169*/ 170 171template<class _CompletionF> 172class __barrier_base { 173 174 __atomic_base<ptrdiff_t> __expected; 175 __atomic_base<ptrdiff_t> __arrived; 176 _CompletionF __completion; 177 __atomic_base<bool> __phase; 178public: 179 using arrival_token = bool; 180 181 static constexpr ptrdiff_t max() noexcept { 182 return numeric_limits<ptrdiff_t>::max(); 183 } 184 185 _LIBCPP_INLINE_VISIBILITY 186 __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF()) 187 : __expected(__expected), __arrived(__expected), __completion(move(__completion)), __phase(false) 188 { 189 } 190 [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 191 arrival_token arrive(ptrdiff_t update) 192 { 193 auto const __old_phase = __phase.load(memory_order_relaxed); 194 auto const __result = __arrived.fetch_sub(update, memory_order_acq_rel) - update; 195 auto const new_expected = __expected.load(memory_order_relaxed); 196 if(0 == __result) { 197 __completion(); 198 __arrived.store(new_expected, memory_order_relaxed); 199 __phase.store(!__old_phase, memory_order_release); 200 __phase.notify_all(); 201 } 202 return __old_phase; 203 } 204 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 205 void wait(arrival_token&& __old_phase) const 206 { 207 __phase.wait(__old_phase, memory_order_acquire); 208 } 209 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 210 void arrive_and_drop() 211 { 212 __expected.fetch_sub(1, memory_order_relaxed); 213 (void)arrive(1); 214 } 215}; 216 217template<> 218class __barrier_base<__empty_completion> { 219 220 static constexpr uint64_t __expected_unit = 1ull; 221 static constexpr uint64_t __arrived_unit = 1ull << 32; 222 static constexpr uint64_t __expected_mask = __arrived_unit - 1; 223 static constexpr uint64_t __phase_bit = 1ull << 63; 224 static constexpr uint64_t __arrived_mask = (__phase_bit - 1) & ~__expected_mask; 225 226 __atomic_base<uint64_t> __phase_arrived_expected; 227 228 static _LIBCPP_INLINE_VISIBILITY 229 constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT 230 { 231 return ((uint64_t(1u << 31) - __count) << 32) 232 | (uint64_t(1u << 31) - __count); 233 } 234 235public: 236 using arrival_token = uint64_t; 237 238 static constexpr ptrdiff_t max() noexcept { 239 return ptrdiff_t(1u << 31) - 1; 240 } 241 242 _LIBCPP_INLINE_VISIBILITY 243 explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion()) 244 : __phase_arrived_expected(__init(__count)) 245 { 246 } 247 [[nodiscard]] inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 248 arrival_token arrive(ptrdiff_t update) 249 { 250 auto const __inc = __arrived_unit * update; 251 auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel); 252 if((__old ^ (__old + __inc)) & __phase_bit) { 253 __phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed); 254 __phase_arrived_expected.notify_all(); 255 } 256 return __old & __phase_bit; 257 } 258 inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 259 void wait(arrival_token&& __phase) const 260 { 261 auto const __test_fn = [=]() -> bool { 262 uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire); 263 return ((__current & __phase_bit) != __phase); 264 }; 265 __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy()); 266 } 267 inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 268 void arrive_and_drop() 269 { 270 __phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed); 271 (void)arrive(1); 272 } 273}; 274 275#endif //_LIBCPP_HAS_NO_TREE_BARRIER 276 277template<class _CompletionF = __empty_completion> 278class barrier { 279 280 __barrier_base<_CompletionF> __b; 281public: 282 using arrival_token = typename __barrier_base<_CompletionF>::arrival_token; 283 284 static constexpr ptrdiff_t max() noexcept { 285 return __barrier_base<_CompletionF>::max(); 286 } 287 288 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 289 barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF()) 290 : __b(__count, std::move(__completion)) { 291 } 292 293 barrier(barrier const&) = delete; 294 barrier& operator=(barrier const&) = delete; 295 296 [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 297 arrival_token arrive(ptrdiff_t update = 1) 298 { 299 return __b.arrive(update); 300 } 301 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 302 void wait(arrival_token&& __phase) const 303 { 304 __b.wait(std::move(__phase)); 305 } 306 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 307 void arrive_and_wait() 308 { 309 wait(arrive()); 310 } 311 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 312 void arrive_and_drop() 313 { 314 __b.arrive_and_drop(); 315 } 316}; 317 318_LIBCPP_END_NAMESPACE_STD 319 320#endif // _LIBCPP_STD_VER >= 14 321 322#endif //_LIBCPP_BARRIER 323