xref: /freebsd/contrib/llvm-project/libcxx/include/barrier (revision 753f127f3ace09432b2baeffd71a308760641a62)
15ffd83dbSDimitry Andric// -*- C++ -*-
2349cc55cSDimitry Andric//===----------------------------------------------------------------------===//
35ffd83dbSDimitry Andric//
45ffd83dbSDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
55ffd83dbSDimitry Andric// See https://llvm.org/LICENSE.txt for license information.
65ffd83dbSDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
75ffd83dbSDimitry Andric//
85ffd83dbSDimitry Andric//===----------------------------------------------------------------------===//
95ffd83dbSDimitry Andric
105ffd83dbSDimitry Andric#ifndef _LIBCPP_BARRIER
115ffd83dbSDimitry Andric#define _LIBCPP_BARRIER
125ffd83dbSDimitry Andric
135ffd83dbSDimitry Andric/*
145ffd83dbSDimitry Andric    barrier synopsis
155ffd83dbSDimitry Andric
165ffd83dbSDimitry Andricnamespace std
175ffd83dbSDimitry Andric{
185ffd83dbSDimitry Andric
195ffd83dbSDimitry Andric  template<class CompletionFunction = see below>
205ffd83dbSDimitry Andric  class barrier
215ffd83dbSDimitry Andric  {
225ffd83dbSDimitry Andric  public:
235ffd83dbSDimitry Andric    using arrival_token = see below;
245ffd83dbSDimitry Andric
25e8d8bef9SDimitry Andric    static constexpr ptrdiff_t max() noexcept;
26e8d8bef9SDimitry Andric
275ffd83dbSDimitry Andric    constexpr explicit barrier(ptrdiff_t phase_count,
285ffd83dbSDimitry Andric                               CompletionFunction f = CompletionFunction());
295ffd83dbSDimitry Andric    ~barrier();
305ffd83dbSDimitry Andric
315ffd83dbSDimitry Andric    barrier(const barrier&) = delete;
325ffd83dbSDimitry Andric    barrier& operator=(const barrier&) = delete;
335ffd83dbSDimitry Andric
345ffd83dbSDimitry Andric    [[nodiscard]] arrival_token arrive(ptrdiff_t update = 1);
355ffd83dbSDimitry Andric    void wait(arrival_token&& arrival) const;
365ffd83dbSDimitry Andric
375ffd83dbSDimitry Andric    void arrive_and_wait();
385ffd83dbSDimitry Andric    void arrive_and_drop();
395ffd83dbSDimitry Andric
405ffd83dbSDimitry Andric  private:
415ffd83dbSDimitry Andric    CompletionFunction completion; // exposition only
425ffd83dbSDimitry Andric  };
435ffd83dbSDimitry Andric
445ffd83dbSDimitry Andric}
455ffd83dbSDimitry Andric
465ffd83dbSDimitry Andric*/
475ffd83dbSDimitry Andric
4881ad6265SDimitry Andric#include <__assert> // all public C++ headers provide the assertion handler
49e8d8bef9SDimitry Andric#include <__availability>
50fe6060f1SDimitry Andric#include <__config>
5104eeddc0SDimitry Andric#include <__thread/timed_backoff_policy.h>
525ffd83dbSDimitry Andric#include <atomic>
5381ad6265SDimitry Andric#include <limits>
545ffd83dbSDimitry Andric#include <memory>
555ffd83dbSDimitry Andric
565ffd83dbSDimitry Andric#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
575ffd83dbSDimitry Andric#  pragma GCC system_header
585ffd83dbSDimitry Andric#endif
595ffd83dbSDimitry Andric
605ffd83dbSDimitry Andric#ifdef _LIBCPP_HAS_NO_THREADS
6181ad6265SDimitry Andric# error "<barrier> is not supported since libc++ has been configured without support for threads."
625ffd83dbSDimitry Andric#endif
635ffd83dbSDimitry Andric
64e8d8bef9SDimitry Andric_LIBCPP_PUSH_MACROS
65e8d8bef9SDimitry Andric#include <__undef_macros>
66e8d8bef9SDimitry Andric
675ffd83dbSDimitry Andric#if _LIBCPP_STD_VER >= 14
685ffd83dbSDimitry Andric
695ffd83dbSDimitry Andric_LIBCPP_BEGIN_NAMESPACE_STD
705ffd83dbSDimitry Andric
715ffd83dbSDimitry Andricstruct __empty_completion
725ffd83dbSDimitry Andric{
735ffd83dbSDimitry Andric    inline _LIBCPP_INLINE_VISIBILITY
745ffd83dbSDimitry Andric    void operator()() noexcept
755ffd83dbSDimitry Andric    {
765ffd83dbSDimitry Andric    }
775ffd83dbSDimitry Andric};
785ffd83dbSDimitry Andric
795ffd83dbSDimitry Andric#ifndef _LIBCPP_HAS_NO_TREE_BARRIER
805ffd83dbSDimitry Andric
815ffd83dbSDimitry Andric/*
825ffd83dbSDimitry Andric
835ffd83dbSDimitry AndricThe default implementation of __barrier_base is a classic tree barrier.
845ffd83dbSDimitry Andric
855ffd83dbSDimitry AndricIt looks different from literature pseudocode for two main reasons:
865ffd83dbSDimitry Andric 1. Threads that call into std::barrier functions do not provide indices,
875ffd83dbSDimitry Andric    so a numbering step is added before the actual barrier algorithm,
885ffd83dbSDimitry Andric    appearing as an N+1 round to the N rounds of the tree barrier.
895ffd83dbSDimitry Andric 2. A great deal of attention has been paid to avoid cache line thrashing
905ffd83dbSDimitry Andric    by flattening the tree structure into cache-line sized arrays, that
915ffd83dbSDimitry Andric    are indexed in an efficient way.
925ffd83dbSDimitry Andric
935ffd83dbSDimitry Andric*/
945ffd83dbSDimitry Andric
955ffd83dbSDimitry Andricusing __barrier_phase_t = uint8_t;
965ffd83dbSDimitry Andric
975ffd83dbSDimitry Andricclass __barrier_algorithm_base;
985ffd83dbSDimitry Andric
995ffd83dbSDimitry Andric_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
1005ffd83dbSDimitry Andric__barrier_algorithm_base* __construct_barrier_algorithm_base(ptrdiff_t& __expected);
1015ffd83dbSDimitry Andric
1025ffd83dbSDimitry Andric_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
1035ffd83dbSDimitry Andricbool __arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier,
1045ffd83dbSDimitry Andric                                     __barrier_phase_t __old_phase);
1055ffd83dbSDimitry Andric
1065ffd83dbSDimitry Andric_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
1075ffd83dbSDimitry Andricvoid __destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier);
1085ffd83dbSDimitry Andric
1095ffd83dbSDimitry Andrictemplate<class _CompletionF>
1105ffd83dbSDimitry Andricclass __barrier_base {
11181ad6265SDimitry Andric    ptrdiff_t                                               __expected_;
1125ffd83dbSDimitry Andric    unique_ptr<__barrier_algorithm_base,
11381ad6265SDimitry Andric               void (*)(__barrier_algorithm_base*)>         __base_;
11481ad6265SDimitry Andric    __atomic_base<ptrdiff_t>                                __expected_adjustment_;
11581ad6265SDimitry Andric    _CompletionF                                            __completion_;
11681ad6265SDimitry Andric    __atomic_base<__barrier_phase_t>                        __phase_;
1175ffd83dbSDimitry Andric
1185ffd83dbSDimitry Andricpublic:
1195ffd83dbSDimitry Andric    using arrival_token = __barrier_phase_t;
1205ffd83dbSDimitry Andric
1215ffd83dbSDimitry Andric    static constexpr ptrdiff_t max() noexcept {
1225ffd83dbSDimitry Andric        return numeric_limits<ptrdiff_t>::max();
1235ffd83dbSDimitry Andric    }
1245ffd83dbSDimitry Andric
1255ffd83dbSDimitry Andric    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
1265ffd83dbSDimitry Andric    __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
12781ad6265SDimitry Andric            : __expected_(__expected), __base_(__construct_barrier_algorithm_base(this->__expected_),
1285ffd83dbSDimitry Andric                                               &__destroy_barrier_algorithm_base),
12981ad6265SDimitry Andric              __expected_adjustment_(0), __completion_(std::move(__completion)), __phase_(0)
1305ffd83dbSDimitry Andric    {
1315ffd83dbSDimitry Andric    }
1325ffd83dbSDimitry Andric    [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
133*753f127fSDimitry Andric    arrival_token arrive(ptrdiff_t __update)
1345ffd83dbSDimitry Andric    {
13581ad6265SDimitry Andric        auto const __old_phase = __phase_.load(memory_order_relaxed);
136*753f127fSDimitry Andric        for(; __update; --__update)
13781ad6265SDimitry Andric            if(__arrive_barrier_algorithm_base(__base_.get(), __old_phase)) {
13881ad6265SDimitry Andric                __completion_();
13981ad6265SDimitry Andric                __expected_ += __expected_adjustment_.load(memory_order_relaxed);
14081ad6265SDimitry Andric                __expected_adjustment_.store(0, memory_order_relaxed);
14181ad6265SDimitry Andric                __phase_.store(__old_phase + 2, memory_order_release);
14281ad6265SDimitry Andric                __phase_.notify_all();
1435ffd83dbSDimitry Andric            }
1445ffd83dbSDimitry Andric        return __old_phase;
1455ffd83dbSDimitry Andric    }
1465ffd83dbSDimitry Andric    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
1475ffd83dbSDimitry Andric    void wait(arrival_token&& __old_phase) const
1485ffd83dbSDimitry Andric    {
149fe6060f1SDimitry Andric        auto const __test_fn = [this, __old_phase]() -> bool {
15081ad6265SDimitry Andric            return __phase_.load(memory_order_acquire) != __old_phase;
1515ffd83dbSDimitry Andric        };
1525ffd83dbSDimitry Andric        __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
1535ffd83dbSDimitry Andric    }
1545ffd83dbSDimitry Andric    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
1555ffd83dbSDimitry Andric    void arrive_and_drop()
1565ffd83dbSDimitry Andric    {
15781ad6265SDimitry Andric        __expected_adjustment_.fetch_sub(1, memory_order_relaxed);
1585ffd83dbSDimitry Andric        (void)arrive(1);
1595ffd83dbSDimitry Andric    }
1605ffd83dbSDimitry Andric};
1615ffd83dbSDimitry Andric
1625ffd83dbSDimitry Andric#else
1635ffd83dbSDimitry Andric
1645ffd83dbSDimitry Andric/*
1655ffd83dbSDimitry Andric
1665ffd83dbSDimitry AndricThe alternative implementation of __barrier_base is a central barrier.
1675ffd83dbSDimitry Andric
1685ffd83dbSDimitry AndricTwo versions of this algorithm are provided:
1695ffd83dbSDimitry Andric 1. A fairly straightforward implementation of the litterature for the
1705ffd83dbSDimitry Andric    general case where the completion function is not empty.
1715ffd83dbSDimitry Andric 2. An optimized implementation that exploits 2's complement arithmetic
1725ffd83dbSDimitry Andric    and well-defined overflow in atomic arithmetic, to handle the phase
1735ffd83dbSDimitry Andric    roll-over for free.
1745ffd83dbSDimitry Andric
1755ffd83dbSDimitry Andric*/
1765ffd83dbSDimitry Andric
1775ffd83dbSDimitry Andrictemplate<class _CompletionF>
1785ffd83dbSDimitry Andricclass __barrier_base {
1795ffd83dbSDimitry Andric
1805ffd83dbSDimitry Andric    __atomic_base<ptrdiff_t> __expected;
1815ffd83dbSDimitry Andric    __atomic_base<ptrdiff_t> __arrived;
1825ffd83dbSDimitry Andric    _CompletionF             __completion;
1835ffd83dbSDimitry Andric    __atomic_base<bool>      __phase;
1845ffd83dbSDimitry Andricpublic:
1855ffd83dbSDimitry Andric    using arrival_token = bool;
1865ffd83dbSDimitry Andric
1875ffd83dbSDimitry Andric    static constexpr ptrdiff_t max() noexcept {
1885ffd83dbSDimitry Andric        return numeric_limits<ptrdiff_t>::max();
1895ffd83dbSDimitry Andric    }
1905ffd83dbSDimitry Andric
1915ffd83dbSDimitry Andric    _LIBCPP_INLINE_VISIBILITY
1925ffd83dbSDimitry Andric    __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
19381ad6265SDimitry Andric        : __expected(__expected), __arrived(__expected), __completion(std::move(__completion)), __phase(false)
1945ffd83dbSDimitry Andric    {
1955ffd83dbSDimitry Andric    }
1965ffd83dbSDimitry Andric    [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
1975ffd83dbSDimitry Andric    arrival_token arrive(ptrdiff_t update)
1985ffd83dbSDimitry Andric    {
1995ffd83dbSDimitry Andric        auto const __old_phase = __phase.load(memory_order_relaxed);
2005ffd83dbSDimitry Andric        auto const __result = __arrived.fetch_sub(update, memory_order_acq_rel) - update;
2015ffd83dbSDimitry Andric        auto const new_expected = __expected.load(memory_order_relaxed);
2025ffd83dbSDimitry Andric        if(0 == __result) {
2035ffd83dbSDimitry Andric            __completion();
2045ffd83dbSDimitry Andric            __arrived.store(new_expected, memory_order_relaxed);
2055ffd83dbSDimitry Andric            __phase.store(!__old_phase, memory_order_release);
2065ffd83dbSDimitry Andric            __phase.notify_all();
2075ffd83dbSDimitry Andric        }
2085ffd83dbSDimitry Andric        return __old_phase;
2095ffd83dbSDimitry Andric    }
2105ffd83dbSDimitry Andric    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
2115ffd83dbSDimitry Andric    void wait(arrival_token&& __old_phase) const
2125ffd83dbSDimitry Andric    {
2135ffd83dbSDimitry Andric        __phase.wait(__old_phase, memory_order_acquire);
2145ffd83dbSDimitry Andric    }
2155ffd83dbSDimitry Andric    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
2165ffd83dbSDimitry Andric    void arrive_and_drop()
2175ffd83dbSDimitry Andric    {
2185ffd83dbSDimitry Andric        __expected.fetch_sub(1, memory_order_relaxed);
2195ffd83dbSDimitry Andric        (void)arrive(1);
2205ffd83dbSDimitry Andric    }
2215ffd83dbSDimitry Andric};
2225ffd83dbSDimitry Andric
2235ffd83dbSDimitry Andrictemplate<>
2245ffd83dbSDimitry Andricclass __barrier_base<__empty_completion> {
2255ffd83dbSDimitry Andric
2265ffd83dbSDimitry Andric    static constexpr uint64_t __expected_unit = 1ull;
2275ffd83dbSDimitry Andric    static constexpr uint64_t __arrived_unit = 1ull << 32;
2285ffd83dbSDimitry Andric    static constexpr uint64_t __expected_mask = __arrived_unit - 1;
2295ffd83dbSDimitry Andric    static constexpr uint64_t __phase_bit = 1ull << 63;
2305ffd83dbSDimitry Andric    static constexpr uint64_t __arrived_mask = (__phase_bit - 1) & ~__expected_mask;
2315ffd83dbSDimitry Andric
2325ffd83dbSDimitry Andric    __atomic_base<uint64_t>   __phase_arrived_expected;
2335ffd83dbSDimitry Andric
2345ffd83dbSDimitry Andric    static _LIBCPP_INLINE_VISIBILITY
2355ffd83dbSDimitry Andric    constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT
2365ffd83dbSDimitry Andric    {
2375ffd83dbSDimitry Andric        return ((uint64_t(1u << 31) - __count) << 32)
2385ffd83dbSDimitry Andric              | (uint64_t(1u << 31) - __count);
2395ffd83dbSDimitry Andric    }
2405ffd83dbSDimitry Andric
2415ffd83dbSDimitry Andricpublic:
2425ffd83dbSDimitry Andric    using arrival_token = uint64_t;
2435ffd83dbSDimitry Andric
2445ffd83dbSDimitry Andric    static constexpr ptrdiff_t max() noexcept {
2455ffd83dbSDimitry Andric        return ptrdiff_t(1u << 31) - 1;
2465ffd83dbSDimitry Andric    }
2475ffd83dbSDimitry Andric
2485ffd83dbSDimitry Andric    _LIBCPP_INLINE_VISIBILITY
2495ffd83dbSDimitry Andric    explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion())
2505ffd83dbSDimitry Andric        : __phase_arrived_expected(__init(__count))
2515ffd83dbSDimitry Andric    {
2525ffd83dbSDimitry Andric    }
2535ffd83dbSDimitry Andric    [[nodiscard]] inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
2545ffd83dbSDimitry Andric    arrival_token arrive(ptrdiff_t update)
2555ffd83dbSDimitry Andric    {
2565ffd83dbSDimitry Andric        auto const __inc = __arrived_unit * update;
2575ffd83dbSDimitry Andric        auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel);
2585ffd83dbSDimitry Andric        if((__old ^ (__old + __inc)) & __phase_bit) {
2595ffd83dbSDimitry Andric            __phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed);
2605ffd83dbSDimitry Andric            __phase_arrived_expected.notify_all();
2615ffd83dbSDimitry Andric        }
2625ffd83dbSDimitry Andric        return __old & __phase_bit;
2635ffd83dbSDimitry Andric    }
2645ffd83dbSDimitry Andric    inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
2655ffd83dbSDimitry Andric    void wait(arrival_token&& __phase) const
2665ffd83dbSDimitry Andric    {
2675ffd83dbSDimitry Andric        auto const __test_fn = [=]() -> bool {
2685ffd83dbSDimitry Andric            uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire);
2695ffd83dbSDimitry Andric            return ((__current & __phase_bit) != __phase);
2705ffd83dbSDimitry Andric        };
2715ffd83dbSDimitry Andric        __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
2725ffd83dbSDimitry Andric    }
2735ffd83dbSDimitry Andric    inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
2745ffd83dbSDimitry Andric    void arrive_and_drop()
2755ffd83dbSDimitry Andric    {
2765ffd83dbSDimitry Andric        __phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed);
2775ffd83dbSDimitry Andric        (void)arrive(1);
2785ffd83dbSDimitry Andric    }
2795ffd83dbSDimitry Andric};
2805ffd83dbSDimitry Andric
28181ad6265SDimitry Andric#endif // !_LIBCPP_HAS_NO_TREE_BARRIER
2825ffd83dbSDimitry Andric
2835ffd83dbSDimitry Andrictemplate<class _CompletionF = __empty_completion>
2845ffd83dbSDimitry Andricclass barrier {
2855ffd83dbSDimitry Andric
2865ffd83dbSDimitry Andric    __barrier_base<_CompletionF> __b;
2875ffd83dbSDimitry Andricpublic:
2885ffd83dbSDimitry Andric    using arrival_token = typename __barrier_base<_CompletionF>::arrival_token;
2895ffd83dbSDimitry Andric
2905ffd83dbSDimitry Andric    static constexpr ptrdiff_t max() noexcept {
2915ffd83dbSDimitry Andric        return __barrier_base<_CompletionF>::max();
2925ffd83dbSDimitry Andric    }
2935ffd83dbSDimitry Andric
2945ffd83dbSDimitry Andric    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
2955ffd83dbSDimitry Andric    barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
296e8d8bef9SDimitry Andric        : __b(__count, _VSTD::move(__completion)) {
2975ffd83dbSDimitry Andric    }
2985ffd83dbSDimitry Andric
2995ffd83dbSDimitry Andric    barrier(barrier const&) = delete;
3005ffd83dbSDimitry Andric    barrier& operator=(barrier const&) = delete;
3015ffd83dbSDimitry Andric
3025ffd83dbSDimitry Andric    [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
303*753f127fSDimitry Andric    arrival_token arrive(ptrdiff_t __update = 1)
3045ffd83dbSDimitry Andric    {
305*753f127fSDimitry Andric        return __b.arrive(__update);
3065ffd83dbSDimitry Andric    }
3075ffd83dbSDimitry Andric    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
3085ffd83dbSDimitry Andric    void wait(arrival_token&& __phase) const
3095ffd83dbSDimitry Andric    {
310e8d8bef9SDimitry Andric        __b.wait(_VSTD::move(__phase));
3115ffd83dbSDimitry Andric    }
3125ffd83dbSDimitry Andric    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
3135ffd83dbSDimitry Andric    void arrive_and_wait()
3145ffd83dbSDimitry Andric    {
3155ffd83dbSDimitry Andric        wait(arrive());
3165ffd83dbSDimitry Andric    }
3175ffd83dbSDimitry Andric    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
3185ffd83dbSDimitry Andric    void arrive_and_drop()
3195ffd83dbSDimitry Andric    {
3205ffd83dbSDimitry Andric        __b.arrive_and_drop();
3215ffd83dbSDimitry Andric    }
3225ffd83dbSDimitry Andric};
3235ffd83dbSDimitry Andric
3245ffd83dbSDimitry Andric_LIBCPP_END_NAMESPACE_STD
3255ffd83dbSDimitry Andric
3265ffd83dbSDimitry Andric#endif // _LIBCPP_STD_VER >= 14
3275ffd83dbSDimitry Andric
328e8d8bef9SDimitry Andric_LIBCPP_POP_MACROS
329e8d8bef9SDimitry Andric
3305ffd83dbSDimitry Andric#endif //_LIBCPP_BARRIER
331