15ffd83dbSDimitry Andric// -*- C++ -*- 2*349cc55cSDimitry Andric//===----------------------------------------------------------------------===// 35ffd83dbSDimitry Andric// 45ffd83dbSDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 55ffd83dbSDimitry Andric// See https://llvm.org/LICENSE.txt for license information. 65ffd83dbSDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 75ffd83dbSDimitry Andric// 85ffd83dbSDimitry Andric//===----------------------------------------------------------------------===// 95ffd83dbSDimitry Andric 105ffd83dbSDimitry Andric#ifndef _LIBCPP_BARRIER 115ffd83dbSDimitry Andric#define _LIBCPP_BARRIER 125ffd83dbSDimitry Andric 135ffd83dbSDimitry Andric/* 145ffd83dbSDimitry Andric barrier synopsis 155ffd83dbSDimitry Andric 165ffd83dbSDimitry Andricnamespace std 175ffd83dbSDimitry Andric{ 185ffd83dbSDimitry Andric 195ffd83dbSDimitry Andric template<class CompletionFunction = see below> 205ffd83dbSDimitry Andric class barrier 215ffd83dbSDimitry Andric { 225ffd83dbSDimitry Andric public: 235ffd83dbSDimitry Andric using arrival_token = see below; 245ffd83dbSDimitry Andric 25e8d8bef9SDimitry Andric static constexpr ptrdiff_t max() noexcept; 26e8d8bef9SDimitry Andric 275ffd83dbSDimitry Andric constexpr explicit barrier(ptrdiff_t phase_count, 285ffd83dbSDimitry Andric CompletionFunction f = CompletionFunction()); 295ffd83dbSDimitry Andric ~barrier(); 305ffd83dbSDimitry Andric 315ffd83dbSDimitry Andric barrier(const barrier&) = delete; 325ffd83dbSDimitry Andric barrier& operator=(const barrier&) = delete; 335ffd83dbSDimitry Andric 345ffd83dbSDimitry Andric [[nodiscard]] arrival_token arrive(ptrdiff_t update = 1); 355ffd83dbSDimitry Andric void wait(arrival_token&& arrival) const; 365ffd83dbSDimitry Andric 375ffd83dbSDimitry Andric void arrive_and_wait(); 385ffd83dbSDimitry Andric void arrive_and_drop(); 395ffd83dbSDimitry Andric 405ffd83dbSDimitry Andric private: 415ffd83dbSDimitry Andric CompletionFunction completion; // exposition only 425ffd83dbSDimitry Andric }; 435ffd83dbSDimitry Andric 445ffd83dbSDimitry Andric} 455ffd83dbSDimitry Andric 465ffd83dbSDimitry Andric*/ 475ffd83dbSDimitry Andric 48e8d8bef9SDimitry Andric#include <__availability> 49fe6060f1SDimitry Andric#include <__config> 505ffd83dbSDimitry Andric#include <atomic> 515ffd83dbSDimitry Andric#ifndef _LIBCPP_HAS_NO_TREE_BARRIER 525ffd83dbSDimitry Andric# include <memory> 535ffd83dbSDimitry Andric#endif 545ffd83dbSDimitry Andric 555ffd83dbSDimitry Andric#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) 565ffd83dbSDimitry Andric#pragma GCC system_header 575ffd83dbSDimitry Andric#endif 585ffd83dbSDimitry Andric 595ffd83dbSDimitry Andric#ifdef _LIBCPP_HAS_NO_THREADS 605ffd83dbSDimitry Andric# error <barrier> is not supported on this single threaded system 615ffd83dbSDimitry Andric#endif 625ffd83dbSDimitry Andric 63e8d8bef9SDimitry Andric_LIBCPP_PUSH_MACROS 64e8d8bef9SDimitry Andric#include <__undef_macros> 65e8d8bef9SDimitry Andric 665ffd83dbSDimitry Andric#if _LIBCPP_STD_VER >= 14 675ffd83dbSDimitry Andric 685ffd83dbSDimitry Andric_LIBCPP_BEGIN_NAMESPACE_STD 695ffd83dbSDimitry Andric 705ffd83dbSDimitry Andricstruct __empty_completion 715ffd83dbSDimitry Andric{ 725ffd83dbSDimitry Andric inline _LIBCPP_INLINE_VISIBILITY 735ffd83dbSDimitry Andric void operator()() noexcept 745ffd83dbSDimitry Andric { 755ffd83dbSDimitry Andric } 765ffd83dbSDimitry Andric}; 775ffd83dbSDimitry Andric 785ffd83dbSDimitry Andric#ifndef _LIBCPP_HAS_NO_TREE_BARRIER 795ffd83dbSDimitry Andric 805ffd83dbSDimitry Andric/* 815ffd83dbSDimitry Andric 825ffd83dbSDimitry AndricThe default implementation of __barrier_base is a classic tree barrier. 835ffd83dbSDimitry Andric 845ffd83dbSDimitry AndricIt looks different from literature pseudocode for two main reasons: 855ffd83dbSDimitry Andric 1. Threads that call into std::barrier functions do not provide indices, 865ffd83dbSDimitry Andric so a numbering step is added before the actual barrier algorithm, 875ffd83dbSDimitry Andric appearing as an N+1 round to the N rounds of the tree barrier. 885ffd83dbSDimitry Andric 2. A great deal of attention has been paid to avoid cache line thrashing 895ffd83dbSDimitry Andric by flattening the tree structure into cache-line sized arrays, that 905ffd83dbSDimitry Andric are indexed in an efficient way. 915ffd83dbSDimitry Andric 925ffd83dbSDimitry Andric*/ 935ffd83dbSDimitry Andric 945ffd83dbSDimitry Andricusing __barrier_phase_t = uint8_t; 955ffd83dbSDimitry Andric 965ffd83dbSDimitry Andricclass __barrier_algorithm_base; 975ffd83dbSDimitry Andric 985ffd83dbSDimitry Andric_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI 995ffd83dbSDimitry Andric__barrier_algorithm_base* __construct_barrier_algorithm_base(ptrdiff_t& __expected); 1005ffd83dbSDimitry Andric 1015ffd83dbSDimitry Andric_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI 1025ffd83dbSDimitry Andricbool __arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier, 1035ffd83dbSDimitry Andric __barrier_phase_t __old_phase); 1045ffd83dbSDimitry Andric 1055ffd83dbSDimitry Andric_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI 1065ffd83dbSDimitry Andricvoid __destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier); 1075ffd83dbSDimitry Andric 1085ffd83dbSDimitry Andrictemplate<class _CompletionF> 1095ffd83dbSDimitry Andricclass __barrier_base { 1105ffd83dbSDimitry Andric ptrdiff_t __expected; 1115ffd83dbSDimitry Andric unique_ptr<__barrier_algorithm_base, 1125ffd83dbSDimitry Andric void (*)(__barrier_algorithm_base*)> __base; 1135ffd83dbSDimitry Andric __atomic_base<ptrdiff_t> __expected_adjustment; 1145ffd83dbSDimitry Andric _CompletionF __completion; 1155ffd83dbSDimitry Andric __atomic_base<__barrier_phase_t> __phase; 1165ffd83dbSDimitry Andric 1175ffd83dbSDimitry Andricpublic: 1185ffd83dbSDimitry Andric using arrival_token = __barrier_phase_t; 1195ffd83dbSDimitry Andric 1205ffd83dbSDimitry Andric static constexpr ptrdiff_t max() noexcept { 1215ffd83dbSDimitry Andric return numeric_limits<ptrdiff_t>::max(); 1225ffd83dbSDimitry Andric } 1235ffd83dbSDimitry Andric 1245ffd83dbSDimitry Andric _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 1255ffd83dbSDimitry Andric __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF()) 1265ffd83dbSDimitry Andric : __expected(__expected), __base(__construct_barrier_algorithm_base(this->__expected), 1275ffd83dbSDimitry Andric &__destroy_barrier_algorithm_base), 1285ffd83dbSDimitry Andric __expected_adjustment(0), __completion(move(__completion)), __phase(0) 1295ffd83dbSDimitry Andric { 1305ffd83dbSDimitry Andric } 1315ffd83dbSDimitry Andric [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 1325ffd83dbSDimitry Andric arrival_token arrive(ptrdiff_t update) 1335ffd83dbSDimitry Andric { 1345ffd83dbSDimitry Andric auto const __old_phase = __phase.load(memory_order_relaxed); 1355ffd83dbSDimitry Andric for(; update; --update) 1365ffd83dbSDimitry Andric if(__arrive_barrier_algorithm_base(__base.get(), __old_phase)) { 1375ffd83dbSDimitry Andric __completion(); 1385ffd83dbSDimitry Andric __expected += __expected_adjustment.load(memory_order_relaxed); 1395ffd83dbSDimitry Andric __expected_adjustment.store(0, memory_order_relaxed); 1405ffd83dbSDimitry Andric __phase.store(__old_phase + 2, memory_order_release); 1415ffd83dbSDimitry Andric __phase.notify_all(); 1425ffd83dbSDimitry Andric } 1435ffd83dbSDimitry Andric return __old_phase; 1445ffd83dbSDimitry Andric } 1455ffd83dbSDimitry Andric _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 1465ffd83dbSDimitry Andric void wait(arrival_token&& __old_phase) const 1475ffd83dbSDimitry Andric { 148fe6060f1SDimitry Andric auto const __test_fn = [this, __old_phase]() -> bool { 1495ffd83dbSDimitry Andric return __phase.load(memory_order_acquire) != __old_phase; 1505ffd83dbSDimitry Andric }; 1515ffd83dbSDimitry Andric __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy()); 1525ffd83dbSDimitry Andric } 1535ffd83dbSDimitry Andric _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 1545ffd83dbSDimitry Andric void arrive_and_drop() 1555ffd83dbSDimitry Andric { 1565ffd83dbSDimitry Andric __expected_adjustment.fetch_sub(1, memory_order_relaxed); 1575ffd83dbSDimitry Andric (void)arrive(1); 1585ffd83dbSDimitry Andric } 1595ffd83dbSDimitry Andric}; 1605ffd83dbSDimitry Andric 1615ffd83dbSDimitry Andric#else 1625ffd83dbSDimitry Andric 1635ffd83dbSDimitry Andric/* 1645ffd83dbSDimitry Andric 1655ffd83dbSDimitry AndricThe alternative implementation of __barrier_base is a central barrier. 1665ffd83dbSDimitry Andric 1675ffd83dbSDimitry AndricTwo versions of this algorithm are provided: 1685ffd83dbSDimitry Andric 1. A fairly straightforward implementation of the litterature for the 1695ffd83dbSDimitry Andric general case where the completion function is not empty. 1705ffd83dbSDimitry Andric 2. An optimized implementation that exploits 2's complement arithmetic 1715ffd83dbSDimitry Andric and well-defined overflow in atomic arithmetic, to handle the phase 1725ffd83dbSDimitry Andric roll-over for free. 1735ffd83dbSDimitry Andric 1745ffd83dbSDimitry Andric*/ 1755ffd83dbSDimitry Andric 1765ffd83dbSDimitry Andrictemplate<class _CompletionF> 1775ffd83dbSDimitry Andricclass __barrier_base { 1785ffd83dbSDimitry Andric 1795ffd83dbSDimitry Andric __atomic_base<ptrdiff_t> __expected; 1805ffd83dbSDimitry Andric __atomic_base<ptrdiff_t> __arrived; 1815ffd83dbSDimitry Andric _CompletionF __completion; 1825ffd83dbSDimitry Andric __atomic_base<bool> __phase; 1835ffd83dbSDimitry Andricpublic: 1845ffd83dbSDimitry Andric using arrival_token = bool; 1855ffd83dbSDimitry Andric 1865ffd83dbSDimitry Andric static constexpr ptrdiff_t max() noexcept { 1875ffd83dbSDimitry Andric return numeric_limits<ptrdiff_t>::max(); 1885ffd83dbSDimitry Andric } 1895ffd83dbSDimitry Andric 1905ffd83dbSDimitry Andric _LIBCPP_INLINE_VISIBILITY 1915ffd83dbSDimitry Andric __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF()) 1925ffd83dbSDimitry Andric : __expected(__expected), __arrived(__expected), __completion(move(__completion)), __phase(false) 1935ffd83dbSDimitry Andric { 1945ffd83dbSDimitry Andric } 1955ffd83dbSDimitry Andric [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 1965ffd83dbSDimitry Andric arrival_token arrive(ptrdiff_t update) 1975ffd83dbSDimitry Andric { 1985ffd83dbSDimitry Andric auto const __old_phase = __phase.load(memory_order_relaxed); 1995ffd83dbSDimitry Andric auto const __result = __arrived.fetch_sub(update, memory_order_acq_rel) - update; 2005ffd83dbSDimitry Andric auto const new_expected = __expected.load(memory_order_relaxed); 2015ffd83dbSDimitry Andric if(0 == __result) { 2025ffd83dbSDimitry Andric __completion(); 2035ffd83dbSDimitry Andric __arrived.store(new_expected, memory_order_relaxed); 2045ffd83dbSDimitry Andric __phase.store(!__old_phase, memory_order_release); 2055ffd83dbSDimitry Andric __phase.notify_all(); 2065ffd83dbSDimitry Andric } 2075ffd83dbSDimitry Andric return __old_phase; 2085ffd83dbSDimitry Andric } 2095ffd83dbSDimitry Andric _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 2105ffd83dbSDimitry Andric void wait(arrival_token&& __old_phase) const 2115ffd83dbSDimitry Andric { 2125ffd83dbSDimitry Andric __phase.wait(__old_phase, memory_order_acquire); 2135ffd83dbSDimitry Andric } 2145ffd83dbSDimitry Andric _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 2155ffd83dbSDimitry Andric void arrive_and_drop() 2165ffd83dbSDimitry Andric { 2175ffd83dbSDimitry Andric __expected.fetch_sub(1, memory_order_relaxed); 2185ffd83dbSDimitry Andric (void)arrive(1); 2195ffd83dbSDimitry Andric } 2205ffd83dbSDimitry Andric}; 2215ffd83dbSDimitry Andric 2225ffd83dbSDimitry Andrictemplate<> 2235ffd83dbSDimitry Andricclass __barrier_base<__empty_completion> { 2245ffd83dbSDimitry Andric 2255ffd83dbSDimitry Andric static constexpr uint64_t __expected_unit = 1ull; 2265ffd83dbSDimitry Andric static constexpr uint64_t __arrived_unit = 1ull << 32; 2275ffd83dbSDimitry Andric static constexpr uint64_t __expected_mask = __arrived_unit - 1; 2285ffd83dbSDimitry Andric static constexpr uint64_t __phase_bit = 1ull << 63; 2295ffd83dbSDimitry Andric static constexpr uint64_t __arrived_mask = (__phase_bit - 1) & ~__expected_mask; 2305ffd83dbSDimitry Andric 2315ffd83dbSDimitry Andric __atomic_base<uint64_t> __phase_arrived_expected; 2325ffd83dbSDimitry Andric 2335ffd83dbSDimitry Andric static _LIBCPP_INLINE_VISIBILITY 2345ffd83dbSDimitry Andric constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT 2355ffd83dbSDimitry Andric { 2365ffd83dbSDimitry Andric return ((uint64_t(1u << 31) - __count) << 32) 2375ffd83dbSDimitry Andric | (uint64_t(1u << 31) - __count); 2385ffd83dbSDimitry Andric } 2395ffd83dbSDimitry Andric 2405ffd83dbSDimitry Andricpublic: 2415ffd83dbSDimitry Andric using arrival_token = uint64_t; 2425ffd83dbSDimitry Andric 2435ffd83dbSDimitry Andric static constexpr ptrdiff_t max() noexcept { 2445ffd83dbSDimitry Andric return ptrdiff_t(1u << 31) - 1; 2455ffd83dbSDimitry Andric } 2465ffd83dbSDimitry Andric 2475ffd83dbSDimitry Andric _LIBCPP_INLINE_VISIBILITY 2485ffd83dbSDimitry Andric explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion()) 2495ffd83dbSDimitry Andric : __phase_arrived_expected(__init(__count)) 2505ffd83dbSDimitry Andric { 2515ffd83dbSDimitry Andric } 2525ffd83dbSDimitry Andric [[nodiscard]] inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 2535ffd83dbSDimitry Andric arrival_token arrive(ptrdiff_t update) 2545ffd83dbSDimitry Andric { 2555ffd83dbSDimitry Andric auto const __inc = __arrived_unit * update; 2565ffd83dbSDimitry Andric auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel); 2575ffd83dbSDimitry Andric if((__old ^ (__old + __inc)) & __phase_bit) { 2585ffd83dbSDimitry Andric __phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed); 2595ffd83dbSDimitry Andric __phase_arrived_expected.notify_all(); 2605ffd83dbSDimitry Andric } 2615ffd83dbSDimitry Andric return __old & __phase_bit; 2625ffd83dbSDimitry Andric } 2635ffd83dbSDimitry Andric inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 2645ffd83dbSDimitry Andric void wait(arrival_token&& __phase) const 2655ffd83dbSDimitry Andric { 2665ffd83dbSDimitry Andric auto const __test_fn = [=]() -> bool { 2675ffd83dbSDimitry Andric uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire); 2685ffd83dbSDimitry Andric return ((__current & __phase_bit) != __phase); 2695ffd83dbSDimitry Andric }; 2705ffd83dbSDimitry Andric __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy()); 2715ffd83dbSDimitry Andric } 2725ffd83dbSDimitry Andric inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 2735ffd83dbSDimitry Andric void arrive_and_drop() 2745ffd83dbSDimitry Andric { 2755ffd83dbSDimitry Andric __phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed); 2765ffd83dbSDimitry Andric (void)arrive(1); 2775ffd83dbSDimitry Andric } 2785ffd83dbSDimitry Andric}; 2795ffd83dbSDimitry Andric 2805ffd83dbSDimitry Andric#endif //_LIBCPP_HAS_NO_TREE_BARRIER 2815ffd83dbSDimitry Andric 2825ffd83dbSDimitry Andrictemplate<class _CompletionF = __empty_completion> 2835ffd83dbSDimitry Andricclass barrier { 2845ffd83dbSDimitry Andric 2855ffd83dbSDimitry Andric __barrier_base<_CompletionF> __b; 2865ffd83dbSDimitry Andricpublic: 2875ffd83dbSDimitry Andric using arrival_token = typename __barrier_base<_CompletionF>::arrival_token; 2885ffd83dbSDimitry Andric 2895ffd83dbSDimitry Andric static constexpr ptrdiff_t max() noexcept { 2905ffd83dbSDimitry Andric return __barrier_base<_CompletionF>::max(); 2915ffd83dbSDimitry Andric } 2925ffd83dbSDimitry Andric 2935ffd83dbSDimitry Andric _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 2945ffd83dbSDimitry Andric barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF()) 295e8d8bef9SDimitry Andric : __b(__count, _VSTD::move(__completion)) { 2965ffd83dbSDimitry Andric } 2975ffd83dbSDimitry Andric 2985ffd83dbSDimitry Andric barrier(barrier const&) = delete; 2995ffd83dbSDimitry Andric barrier& operator=(barrier const&) = delete; 3005ffd83dbSDimitry Andric 3015ffd83dbSDimitry Andric [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 3025ffd83dbSDimitry Andric arrival_token arrive(ptrdiff_t update = 1) 3035ffd83dbSDimitry Andric { 3045ffd83dbSDimitry Andric return __b.arrive(update); 3055ffd83dbSDimitry Andric } 3065ffd83dbSDimitry Andric _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 3075ffd83dbSDimitry Andric void wait(arrival_token&& __phase) const 3085ffd83dbSDimitry Andric { 309e8d8bef9SDimitry Andric __b.wait(_VSTD::move(__phase)); 3105ffd83dbSDimitry Andric } 3115ffd83dbSDimitry Andric _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 3125ffd83dbSDimitry Andric void arrive_and_wait() 3135ffd83dbSDimitry Andric { 3145ffd83dbSDimitry Andric wait(arrive()); 3155ffd83dbSDimitry Andric } 3165ffd83dbSDimitry Andric _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 3175ffd83dbSDimitry Andric void arrive_and_drop() 3185ffd83dbSDimitry Andric { 3195ffd83dbSDimitry Andric __b.arrive_and_drop(); 3205ffd83dbSDimitry Andric } 3215ffd83dbSDimitry Andric}; 3225ffd83dbSDimitry Andric 3235ffd83dbSDimitry Andric_LIBCPP_END_NAMESPACE_STD 3245ffd83dbSDimitry Andric 3255ffd83dbSDimitry Andric#endif // _LIBCPP_STD_VER >= 14 3265ffd83dbSDimitry Andric 327e8d8bef9SDimitry Andric_LIBCPP_POP_MACROS 328e8d8bef9SDimitry Andric 3295ffd83dbSDimitry Andric#endif //_LIBCPP_BARRIER 330