15ffd83dbSDimitry Andric// -*- C++ -*- 2349cc55cSDimitry Andric//===----------------------------------------------------------------------===// 35ffd83dbSDimitry Andric// 45ffd83dbSDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 55ffd83dbSDimitry Andric// See https://llvm.org/LICENSE.txt for license information. 65ffd83dbSDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 75ffd83dbSDimitry Andric// 85ffd83dbSDimitry Andric//===----------------------------------------------------------------------===// 95ffd83dbSDimitry Andric 105ffd83dbSDimitry Andric#ifndef _LIBCPP_BARRIER 115ffd83dbSDimitry Andric#define _LIBCPP_BARRIER 125ffd83dbSDimitry Andric 135ffd83dbSDimitry Andric/* 145ffd83dbSDimitry Andric barrier synopsis 155ffd83dbSDimitry Andric 165ffd83dbSDimitry Andricnamespace std 175ffd83dbSDimitry Andric{ 185ffd83dbSDimitry Andric 195ffd83dbSDimitry Andric template<class CompletionFunction = see below> 205ffd83dbSDimitry Andric class barrier 215ffd83dbSDimitry Andric { 225ffd83dbSDimitry Andric public: 235ffd83dbSDimitry Andric using arrival_token = see below; 245ffd83dbSDimitry Andric 25e8d8bef9SDimitry Andric static constexpr ptrdiff_t max() noexcept; 26e8d8bef9SDimitry Andric 275ffd83dbSDimitry Andric constexpr explicit barrier(ptrdiff_t phase_count, 285ffd83dbSDimitry Andric CompletionFunction f = CompletionFunction()); 295ffd83dbSDimitry Andric ~barrier(); 305ffd83dbSDimitry Andric 315ffd83dbSDimitry Andric barrier(const barrier&) = delete; 325ffd83dbSDimitry Andric barrier& operator=(const barrier&) = delete; 335ffd83dbSDimitry Andric 345ffd83dbSDimitry Andric [[nodiscard]] arrival_token arrive(ptrdiff_t update = 1); 355ffd83dbSDimitry Andric void wait(arrival_token&& arrival) const; 365ffd83dbSDimitry Andric 375ffd83dbSDimitry Andric void arrive_and_wait(); 385ffd83dbSDimitry Andric void arrive_and_drop(); 395ffd83dbSDimitry Andric 405ffd83dbSDimitry Andric private: 415ffd83dbSDimitry Andric CompletionFunction completion; // exposition only 425ffd83dbSDimitry Andric }; 435ffd83dbSDimitry Andric 445ffd83dbSDimitry Andric} 455ffd83dbSDimitry Andric 465ffd83dbSDimitry Andric*/ 475ffd83dbSDimitry Andric 487a6dacacSDimitry Andric#include <__config> 497a6dacacSDimitry Andric 50*0fca6ea1SDimitry Andric#if !defined(_LIBCPP_HAS_NO_THREADS) 517a6dacacSDimitry Andric 52*0fca6ea1SDimitry Andric# include <__assert> 5306c3fb27SDimitry Andric# include <__atomic/atomic_base.h> 5406c3fb27SDimitry Andric# include <__atomic/memory_order.h> 55bdd1243dSDimitry Andric# include <__memory/unique_ptr.h> 5606c3fb27SDimitry Andric# include <__thread/poll_with_backoff.h> 5704eeddc0SDimitry Andric# include <__thread/timed_backoff_policy.h> 58bdd1243dSDimitry Andric# include <__utility/move.h> 5906c3fb27SDimitry Andric# include <cstddef> 6006c3fb27SDimitry Andric# include <cstdint> 6181ad6265SDimitry Andric# include <limits> 6206c3fb27SDimitry Andric# include <version> 635ffd83dbSDimitry Andric 645ffd83dbSDimitry Andric# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) 655ffd83dbSDimitry Andric# pragma GCC system_header 665ffd83dbSDimitry Andric# endif 675ffd83dbSDimitry Andric 68e8d8bef9SDimitry Andric_LIBCPP_PUSH_MACROS 69e8d8bef9SDimitry Andric# include <__undef_macros> 70e8d8bef9SDimitry Andric 715ffd83dbSDimitry Andric# if _LIBCPP_STD_VER >= 14 725ffd83dbSDimitry Andric 735ffd83dbSDimitry Andric_LIBCPP_BEGIN_NAMESPACE_STD 745ffd83dbSDimitry Andric 75cb14a3feSDimitry Andricstruct __empty_completion { 76cb14a3feSDimitry Andric inline _LIBCPP_HIDE_FROM_ABI void operator()() noexcept {} 775ffd83dbSDimitry Andric}; 785ffd83dbSDimitry Andric 795ffd83dbSDimitry Andric# ifndef _LIBCPP_HAS_NO_TREE_BARRIER 805ffd83dbSDimitry Andric 815ffd83dbSDimitry Andric/* 825ffd83dbSDimitry Andric 835ffd83dbSDimitry AndricThe default implementation of __barrier_base is a classic tree barrier. 845ffd83dbSDimitry Andric 855ffd83dbSDimitry AndricIt looks different from literature pseudocode for two main reasons: 865ffd83dbSDimitry Andric 1. Threads that call into std::barrier functions do not provide indices, 875ffd83dbSDimitry Andric so a numbering step is added before the actual barrier algorithm, 885ffd83dbSDimitry Andric appearing as an N+1 round to the N rounds of the tree barrier. 895ffd83dbSDimitry Andric 2. A great deal of attention has been paid to avoid cache line thrashing 905ffd83dbSDimitry Andric by flattening the tree structure into cache-line sized arrays, that 915ffd83dbSDimitry Andric are indexed in an efficient way. 925ffd83dbSDimitry Andric 935ffd83dbSDimitry Andric*/ 945ffd83dbSDimitry Andric 955ffd83dbSDimitry Andricusing __barrier_phase_t = uint8_t; 965ffd83dbSDimitry Andric 975ffd83dbSDimitry Andricclass __barrier_algorithm_base; 985ffd83dbSDimitry Andric 99cb14a3feSDimitry Andric_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI __barrier_algorithm_base* 100cb14a3feSDimitry Andric__construct_barrier_algorithm_base(ptrdiff_t& __expected); 1015ffd83dbSDimitry Andric 102cb14a3feSDimitry Andric_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI bool 103*0fca6ea1SDimitry Andric__arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier, __barrier_phase_t __old_phase) noexcept; 1045ffd83dbSDimitry Andric 105cb14a3feSDimitry Andric_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void 106*0fca6ea1SDimitry Andric__destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier) noexcept; 1075ffd83dbSDimitry Andric 1085ffd83dbSDimitry Andrictemplate <class _CompletionF> 1095ffd83dbSDimitry Andricclass __barrier_base { 11081ad6265SDimitry Andric ptrdiff_t __expected_; 111cb14a3feSDimitry Andric unique_ptr<__barrier_algorithm_base, void (*)(__barrier_algorithm_base*)> __base_; 11281ad6265SDimitry Andric __atomic_base<ptrdiff_t> __expected_adjustment_; 11381ad6265SDimitry Andric _CompletionF __completion_; 11481ad6265SDimitry Andric __atomic_base<__barrier_phase_t> __phase_; 1155ffd83dbSDimitry Andric 1165ffd83dbSDimitry Andricpublic: 1175ffd83dbSDimitry Andric using arrival_token = __barrier_phase_t; 1185ffd83dbSDimitry Andric 119cb14a3feSDimitry Andric static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { return numeric_limits<ptrdiff_t>::max(); } 1205ffd83dbSDimitry Andric 1215f757f3fSDimitry Andric _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI 1225ffd83dbSDimitry Andric __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF()) 123cb14a3feSDimitry Andric : __expected_(__expected), 124cb14a3feSDimitry Andric __base_(std::__construct_barrier_algorithm_base(this->__expected_), &__destroy_barrier_algorithm_base), 125cb14a3feSDimitry Andric __expected_adjustment_(0), 126cb14a3feSDimitry Andric __completion_(std::move(__completion)), 127cb14a3feSDimitry Andric __phase_(0) {} 128*0fca6ea1SDimitry Andric _LIBCPP_NODISCARD _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update) { 1297a6dacacSDimitry Andric _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN( 13006c3fb27SDimitry Andric __update <= __expected_, "update is greater than the expected count for the current barrier phase"); 13106c3fb27SDimitry Andric 13281ad6265SDimitry Andric auto const __old_phase = __phase_.load(memory_order_relaxed); 133753f127fSDimitry Andric for (; __update; --__update) 13481ad6265SDimitry Andric if (__arrive_barrier_algorithm_base(__base_.get(), __old_phase)) { 13581ad6265SDimitry Andric __completion_(); 13681ad6265SDimitry Andric __expected_ += __expected_adjustment_.load(memory_order_relaxed); 13781ad6265SDimitry Andric __expected_adjustment_.store(0, memory_order_relaxed); 13881ad6265SDimitry Andric __phase_.store(__old_phase + 2, memory_order_release); 13981ad6265SDimitry Andric __phase_.notify_all(); 1405ffd83dbSDimitry Andric } 1415ffd83dbSDimitry Andric return __old_phase; 1425ffd83dbSDimitry Andric } 143cb14a3feSDimitry Andric _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __old_phase) const { 144cb14a3feSDimitry Andric auto const __test_fn = [this, __old_phase]() -> bool { return __phase_.load(memory_order_acquire) != __old_phase; }; 145bdd1243dSDimitry Andric std::__libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy()); 1465ffd83dbSDimitry Andric } 147cb14a3feSDimitry Andric _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() { 14881ad6265SDimitry Andric __expected_adjustment_.fetch_sub(1, memory_order_relaxed); 1495ffd83dbSDimitry Andric (void)arrive(1); 1505ffd83dbSDimitry Andric } 1515ffd83dbSDimitry Andric}; 1525ffd83dbSDimitry Andric 1535ffd83dbSDimitry Andric# else 1545ffd83dbSDimitry Andric 1555ffd83dbSDimitry Andric/* 1565ffd83dbSDimitry Andric 1575ffd83dbSDimitry AndricThe alternative implementation of __barrier_base is a central barrier. 1585ffd83dbSDimitry Andric 1595ffd83dbSDimitry AndricTwo versions of this algorithm are provided: 1605ffd83dbSDimitry Andric 1. A fairly straightforward implementation of the litterature for the 1615ffd83dbSDimitry Andric general case where the completion function is not empty. 1625ffd83dbSDimitry Andric 2. An optimized implementation that exploits 2's complement arithmetic 1635ffd83dbSDimitry Andric and well-defined overflow in atomic arithmetic, to handle the phase 1645ffd83dbSDimitry Andric roll-over for free. 1655ffd83dbSDimitry Andric 1665ffd83dbSDimitry Andric*/ 1675ffd83dbSDimitry Andric 1685ffd83dbSDimitry Andrictemplate <class _CompletionF> 1695ffd83dbSDimitry Andricclass __barrier_base { 1705ffd83dbSDimitry Andric __atomic_base<ptrdiff_t> __expected; 1715ffd83dbSDimitry Andric __atomic_base<ptrdiff_t> __arrived; 1725ffd83dbSDimitry Andric _CompletionF __completion; 1735ffd83dbSDimitry Andric __atomic_base<bool> __phase; 174cb14a3feSDimitry Andric 1755ffd83dbSDimitry Andricpublic: 1765ffd83dbSDimitry Andric using arrival_token = bool; 1775ffd83dbSDimitry Andric 178cb14a3feSDimitry Andric static constexpr ptrdiff_t max() noexcept { return numeric_limits<ptrdiff_t>::max(); } 1795ffd83dbSDimitry Andric 180cb14a3feSDimitry Andric _LIBCPP_HIDE_FROM_ABI __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF()) 181cb14a3feSDimitry Andric : __expected(__expected), __arrived(__expected), __completion(std::move(__completion)), __phase(false) {} 182cb14a3feSDimitry Andric [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t update) { 1835ffd83dbSDimitry Andric auto const __old_phase = __phase.load(memory_order_relaxed); 1845ffd83dbSDimitry Andric auto const __result = __arrived.fetch_sub(update, memory_order_acq_rel) - update; 1855ffd83dbSDimitry Andric auto const new_expected = __expected.load(memory_order_relaxed); 18606c3fb27SDimitry Andric 1877a6dacacSDimitry Andric _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN( 18806c3fb27SDimitry Andric update <= new_expected, "update is greater than the expected count for the current barrier phase"); 18906c3fb27SDimitry Andric 1905ffd83dbSDimitry Andric if (0 == __result) { 1915ffd83dbSDimitry Andric __completion(); 1925ffd83dbSDimitry Andric __arrived.store(new_expected, memory_order_relaxed); 1935ffd83dbSDimitry Andric __phase.store(!__old_phase, memory_order_release); 1945ffd83dbSDimitry Andric __phase.notify_all(); 1955ffd83dbSDimitry Andric } 1965ffd83dbSDimitry Andric return __old_phase; 1975ffd83dbSDimitry Andric } 198cb14a3feSDimitry Andric _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __old_phase) const { 1995ffd83dbSDimitry Andric __phase.wait(__old_phase, memory_order_acquire); 2005ffd83dbSDimitry Andric } 201cb14a3feSDimitry Andric _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() { 2025ffd83dbSDimitry Andric __expected.fetch_sub(1, memory_order_relaxed); 2035ffd83dbSDimitry Andric (void)arrive(1); 2045ffd83dbSDimitry Andric } 2055ffd83dbSDimitry Andric}; 2065ffd83dbSDimitry Andric 2075ffd83dbSDimitry Andrictemplate <> 2085ffd83dbSDimitry Andricclass __barrier_base<__empty_completion> { 2095ffd83dbSDimitry Andric static constexpr uint64_t __expected_unit = 1ull; 2105ffd83dbSDimitry Andric static constexpr uint64_t __arrived_unit = 1ull << 32; 2115ffd83dbSDimitry Andric static constexpr uint64_t __expected_mask = __arrived_unit - 1; 2125ffd83dbSDimitry Andric static constexpr uint64_t __phase_bit = 1ull << 63; 2135ffd83dbSDimitry Andric static constexpr uint64_t __arrived_mask = (__phase_bit - 1) & ~__expected_mask; 2145ffd83dbSDimitry Andric 2155ffd83dbSDimitry Andric __atomic_base<uint64_t> __phase_arrived_expected; 2165ffd83dbSDimitry Andric 217cb14a3feSDimitry Andric static _LIBCPP_HIDE_FROM_ABI constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT { 218cb14a3feSDimitry Andric return ((uint64_t(1u << 31) - __count) << 32) | (uint64_t(1u << 31) - __count); 2195ffd83dbSDimitry Andric } 2205ffd83dbSDimitry Andric 2215ffd83dbSDimitry Andricpublic: 2225ffd83dbSDimitry Andric using arrival_token = uint64_t; 2235ffd83dbSDimitry Andric 224cb14a3feSDimitry Andric static constexpr ptrdiff_t max() noexcept { return ptrdiff_t(1u << 31) - 1; } 2255ffd83dbSDimitry Andric 226cb14a3feSDimitry Andric _LIBCPP_HIDE_FROM_ABI explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion()) 227cb14a3feSDimitry Andric : __phase_arrived_expected(__init(__count)) {} 228cb14a3feSDimitry Andric [[nodiscard]] inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t update) { 2295ffd83dbSDimitry Andric auto const __inc = __arrived_unit * update; 2305ffd83dbSDimitry Andric auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel); 23106c3fb27SDimitry Andric 2327a6dacacSDimitry Andric _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN( 23306c3fb27SDimitry Andric update <= __old, "update is greater than the expected count for the current barrier phase"); 23406c3fb27SDimitry Andric 2355ffd83dbSDimitry Andric if ((__old ^ (__old + __inc)) & __phase_bit) { 2365ffd83dbSDimitry Andric __phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed); 2375ffd83dbSDimitry Andric __phase_arrived_expected.notify_all(); 2385ffd83dbSDimitry Andric } 2395ffd83dbSDimitry Andric return __old & __phase_bit; 2405ffd83dbSDimitry Andric } 241cb14a3feSDimitry Andric inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __phase) const { 2425ffd83dbSDimitry Andric auto const __test_fn = [=]() -> bool { 2435ffd83dbSDimitry Andric uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire); 2445ffd83dbSDimitry Andric return ((__current & __phase_bit) != __phase); 2455ffd83dbSDimitry Andric }; 2465ffd83dbSDimitry Andric __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy()); 2475ffd83dbSDimitry Andric } 248cb14a3feSDimitry Andric inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() { 2495ffd83dbSDimitry Andric __phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed); 2505ffd83dbSDimitry Andric (void)arrive(1); 2515ffd83dbSDimitry Andric } 2525ffd83dbSDimitry Andric}; 2535ffd83dbSDimitry Andric 25481ad6265SDimitry Andric# endif // !_LIBCPP_HAS_NO_TREE_BARRIER 2555ffd83dbSDimitry Andric 2565ffd83dbSDimitry Andrictemplate <class _CompletionF = __empty_completion> 257*0fca6ea1SDimitry Andricclass _LIBCPP_DEPRECATED_ATOMIC_SYNC barrier { 258bdd1243dSDimitry Andric __barrier_base<_CompletionF> __b_; 259cb14a3feSDimitry Andric 2605ffd83dbSDimitry Andricpublic: 2615ffd83dbSDimitry Andric using arrival_token = typename __barrier_base<_CompletionF>::arrival_token; 2625ffd83dbSDimitry Andric 263cb14a3feSDimitry Andric static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { return __barrier_base<_CompletionF>::max(); } 2645ffd83dbSDimitry Andric 265*0fca6ea1SDimitry Andric _LIBCPP_AVAILABILITY_SYNC 266*0fca6ea1SDimitry Andric _LIBCPP_HIDE_FROM_ABI explicit barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF()) 2675f757f3fSDimitry Andric : __b_(__count, std::move(__completion)) { 2687a6dacacSDimitry Andric _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN( 26906c3fb27SDimitry Andric __count >= 0, 27006c3fb27SDimitry Andric "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with a negative value"); 2717a6dacacSDimitry Andric _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN( 27206c3fb27SDimitry Andric __count <= max(), 27306c3fb27SDimitry Andric "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with " 27406c3fb27SDimitry Andric "a value greater than max()"); 2755ffd83dbSDimitry Andric } 2765ffd83dbSDimitry Andric 2775ffd83dbSDimitry Andric barrier(barrier const&) = delete; 2785ffd83dbSDimitry Andric barrier& operator=(barrier const&) = delete; 2795ffd83dbSDimitry Andric 280*0fca6ea1SDimitry Andric _LIBCPP_NODISCARD _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update = 1) { 2817a6dacacSDimitry Andric _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(__update > 0, "barrier:arrive must be called with a value greater than 0"); 282bdd1243dSDimitry Andric return __b_.arrive(__update); 2835ffd83dbSDimitry Andric } 284cb14a3feSDimitry Andric _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __phase) const { 2855f757f3fSDimitry Andric __b_.wait(std::move(__phase)); 2865ffd83dbSDimitry Andric } 287cb14a3feSDimitry Andric _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_wait() { wait(arrive()); } 288cb14a3feSDimitry Andric _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() { __b_.arrive_and_drop(); } 2895ffd83dbSDimitry Andric}; 2905ffd83dbSDimitry Andric 2915ffd83dbSDimitry Andric_LIBCPP_END_NAMESPACE_STD 2925ffd83dbSDimitry Andric 2935ffd83dbSDimitry Andric# endif // _LIBCPP_STD_VER >= 14 2945ffd83dbSDimitry Andric 295e8d8bef9SDimitry Andric_LIBCPP_POP_MACROS 296e8d8bef9SDimitry Andric 297*0fca6ea1SDimitry Andric#endif // !defined(_LIBCPP_HAS_NO_THREADS) 298*0fca6ea1SDimitry Andric 299bdd1243dSDimitry Andric#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 30006c3fb27SDimitry Andric# include <atomic> 301bdd1243dSDimitry Andric# include <concepts> 302bdd1243dSDimitry Andric# include <iterator> 303bdd1243dSDimitry Andric# include <memory> 304bdd1243dSDimitry Andric# include <stdexcept> 305bdd1243dSDimitry Andric# include <variant> 306bdd1243dSDimitry Andric#endif 307bdd1243dSDimitry Andric 3085ffd83dbSDimitry Andric#endif //_LIBCPP_BARRIER 309