// -*- C++ -*- //===--------------------------- barrier ----------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef _LIBCPP_BARRIER #define _LIBCPP_BARRIER /* barrier synopsis namespace std { template class barrier { public: using arrival_token = see below; constexpr explicit barrier(ptrdiff_t phase_count, CompletionFunction f = CompletionFunction()); ~barrier(); barrier(const barrier&) = delete; barrier& operator=(const barrier&) = delete; [[nodiscard]] arrival_token arrive(ptrdiff_t update = 1); void wait(arrival_token&& arrival) const; void arrive_and_wait(); void arrive_and_drop(); private: CompletionFunction completion; // exposition only }; } */ #include <__config> #include #ifndef _LIBCPP_HAS_NO_TREE_BARRIER # include #endif #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header #endif #ifdef _LIBCPP_HAS_NO_THREADS # error is not supported on this single threaded system #endif #if _LIBCPP_STD_VER >= 14 _LIBCPP_BEGIN_NAMESPACE_STD struct __empty_completion { inline _LIBCPP_INLINE_VISIBILITY void operator()() noexcept { } }; #ifndef _LIBCPP_HAS_NO_TREE_BARRIER /* The default implementation of __barrier_base is a classic tree barrier. It looks different from literature pseudocode for two main reasons: 1. Threads that call into std::barrier functions do not provide indices, so a numbering step is added before the actual barrier algorithm, appearing as an N+1 round to the N rounds of the tree barrier. 2. A great deal of attention has been paid to avoid cache line thrashing by flattening the tree structure into cache-line sized arrays, that are indexed in an efficient way. */ using __barrier_phase_t = uint8_t; class __barrier_algorithm_base; _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI __barrier_algorithm_base* __construct_barrier_algorithm_base(ptrdiff_t& __expected); _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI bool __arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier, __barrier_phase_t __old_phase); _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier); template class __barrier_base { ptrdiff_t __expected; unique_ptr<__barrier_algorithm_base, void (*)(__barrier_algorithm_base*)> __base; __atomic_base __expected_adjustment; _CompletionF __completion; __atomic_base<__barrier_phase_t> __phase; public: using arrival_token = __barrier_phase_t; static constexpr ptrdiff_t max() noexcept { return numeric_limits::max(); } _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF()) : __expected(__expected), __base(__construct_barrier_algorithm_base(this->__expected), &__destroy_barrier_algorithm_base), __expected_adjustment(0), __completion(move(__completion)), __phase(0) { } [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY arrival_token arrive(ptrdiff_t update) { auto const __old_phase = __phase.load(memory_order_relaxed); for(; update; --update) if(__arrive_barrier_algorithm_base(__base.get(), __old_phase)) { __completion(); __expected += __expected_adjustment.load(memory_order_relaxed); __expected_adjustment.store(0, memory_order_relaxed); __phase.store(__old_phase + 2, memory_order_release); __phase.notify_all(); } return __old_phase; } _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY void wait(arrival_token&& __old_phase) const { auto const __test_fn = [=]() -> bool { return __phase.load(memory_order_acquire) != __old_phase; }; __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy()); } _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY void arrive_and_drop() { __expected_adjustment.fetch_sub(1, memory_order_relaxed); (void)arrive(1); } }; #else /* The alternative implementation of __barrier_base is a central barrier. Two versions of this algorithm are provided: 1. A fairly straightforward implementation of the litterature for the general case where the completion function is not empty. 2. An optimized implementation that exploits 2's complement arithmetic and well-defined overflow in atomic arithmetic, to handle the phase roll-over for free. */ template class __barrier_base { __atomic_base __expected; __atomic_base __arrived; _CompletionF __completion; __atomic_base __phase; public: using arrival_token = bool; static constexpr ptrdiff_t max() noexcept { return numeric_limits::max(); } _LIBCPP_INLINE_VISIBILITY __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF()) : __expected(__expected), __arrived(__expected), __completion(move(__completion)), __phase(false) { } [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY arrival_token arrive(ptrdiff_t update) { auto const __old_phase = __phase.load(memory_order_relaxed); auto const __result = __arrived.fetch_sub(update, memory_order_acq_rel) - update; auto const new_expected = __expected.load(memory_order_relaxed); if(0 == __result) { __completion(); __arrived.store(new_expected, memory_order_relaxed); __phase.store(!__old_phase, memory_order_release); __phase.notify_all(); } return __old_phase; } _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY void wait(arrival_token&& __old_phase) const { __phase.wait(__old_phase, memory_order_acquire); } _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY void arrive_and_drop() { __expected.fetch_sub(1, memory_order_relaxed); (void)arrive(1); } }; template<> class __barrier_base<__empty_completion> { static constexpr uint64_t __expected_unit = 1ull; static constexpr uint64_t __arrived_unit = 1ull << 32; static constexpr uint64_t __expected_mask = __arrived_unit - 1; static constexpr uint64_t __phase_bit = 1ull << 63; static constexpr uint64_t __arrived_mask = (__phase_bit - 1) & ~__expected_mask; __atomic_base __phase_arrived_expected; static _LIBCPP_INLINE_VISIBILITY constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT { return ((uint64_t(1u << 31) - __count) << 32) | (uint64_t(1u << 31) - __count); } public: using arrival_token = uint64_t; static constexpr ptrdiff_t max() noexcept { return ptrdiff_t(1u << 31) - 1; } _LIBCPP_INLINE_VISIBILITY explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion()) : __phase_arrived_expected(__init(__count)) { } [[nodiscard]] inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY arrival_token arrive(ptrdiff_t update) { auto const __inc = __arrived_unit * update; auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel); if((__old ^ (__old + __inc)) & __phase_bit) { __phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed); __phase_arrived_expected.notify_all(); } return __old & __phase_bit; } inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY void wait(arrival_token&& __phase) const { auto const __test_fn = [=]() -> bool { uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire); return ((__current & __phase_bit) != __phase); }; __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy()); } inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY void arrive_and_drop() { __phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed); (void)arrive(1); } }; #endif //_LIBCPP_HAS_NO_TREE_BARRIER template class barrier { __barrier_base<_CompletionF> __b; public: using arrival_token = typename __barrier_base<_CompletionF>::arrival_token; static constexpr ptrdiff_t max() noexcept { return __barrier_base<_CompletionF>::max(); } _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF()) : __b(__count, std::move(__completion)) { } barrier(barrier const&) = delete; barrier& operator=(barrier const&) = delete; [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY arrival_token arrive(ptrdiff_t update = 1) { return __b.arrive(update); } _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY void wait(arrival_token&& __phase) const { __b.wait(std::move(__phase)); } _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY void arrive_and_wait() { wait(arrive()); } _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY void arrive_and_drop() { __b.arrive_and_drop(); } }; _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 14 #endif //_LIBCPP_BARRIER