xref: /freebsd/contrib/llvm-project/libcxx/include/barrier (revision cb14a3fe5122c879eae1fb480ed7ce82a699ddb6)
15ffd83dbSDimitry Andric// -*- C++ -*-
2349cc55cSDimitry Andric//===----------------------------------------------------------------------===//
35ffd83dbSDimitry Andric//
45ffd83dbSDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
55ffd83dbSDimitry Andric// See https://llvm.org/LICENSE.txt for license information.
65ffd83dbSDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
75ffd83dbSDimitry Andric//
85ffd83dbSDimitry Andric//===----------------------------------------------------------------------===//
95ffd83dbSDimitry Andric
105ffd83dbSDimitry Andric#ifndef _LIBCPP_BARRIER
115ffd83dbSDimitry Andric#define _LIBCPP_BARRIER
125ffd83dbSDimitry Andric
135ffd83dbSDimitry Andric/*
145ffd83dbSDimitry Andric    barrier synopsis
155ffd83dbSDimitry Andric
165ffd83dbSDimitry Andricnamespace std
175ffd83dbSDimitry Andric{
185ffd83dbSDimitry Andric
195ffd83dbSDimitry Andric  template<class CompletionFunction = see below>
205ffd83dbSDimitry Andric  class barrier
215ffd83dbSDimitry Andric  {
225ffd83dbSDimitry Andric  public:
235ffd83dbSDimitry Andric    using arrival_token = see below;
245ffd83dbSDimitry Andric
25e8d8bef9SDimitry Andric    static constexpr ptrdiff_t max() noexcept;
26e8d8bef9SDimitry Andric
275ffd83dbSDimitry Andric    constexpr explicit barrier(ptrdiff_t phase_count,
285ffd83dbSDimitry Andric                               CompletionFunction f = CompletionFunction());
295ffd83dbSDimitry Andric    ~barrier();
305ffd83dbSDimitry Andric
315ffd83dbSDimitry Andric    barrier(const barrier&) = delete;
325ffd83dbSDimitry Andric    barrier& operator=(const barrier&) = delete;
335ffd83dbSDimitry Andric
345ffd83dbSDimitry Andric    [[nodiscard]] arrival_token arrive(ptrdiff_t update = 1);
355ffd83dbSDimitry Andric    void wait(arrival_token&& arrival) const;
365ffd83dbSDimitry Andric
375ffd83dbSDimitry Andric    void arrive_and_wait();
385ffd83dbSDimitry Andric    void arrive_and_drop();
395ffd83dbSDimitry Andric
405ffd83dbSDimitry Andric  private:
415ffd83dbSDimitry Andric    CompletionFunction completion; // exposition only
425ffd83dbSDimitry Andric  };
435ffd83dbSDimitry Andric
445ffd83dbSDimitry Andric}
455ffd83dbSDimitry Andric
465ffd83dbSDimitry Andric*/
475ffd83dbSDimitry Andric
4881ad6265SDimitry Andric#include <__assert> // all public C++ headers provide the assertion handler
4906c3fb27SDimitry Andric#include <__atomic/atomic_base.h>
5006c3fb27SDimitry Andric#include <__atomic/memory_order.h>
51e8d8bef9SDimitry Andric#include <__availability>
52fe6060f1SDimitry Andric#include <__config>
53bdd1243dSDimitry Andric#include <__memory/unique_ptr.h>
5406c3fb27SDimitry Andric#include <__thread/poll_with_backoff.h>
5504eeddc0SDimitry Andric#include <__thread/timed_backoff_policy.h>
56bdd1243dSDimitry Andric#include <__utility/move.h>
5706c3fb27SDimitry Andric#include <cstddef>
5806c3fb27SDimitry Andric#include <cstdint>
5981ad6265SDimitry Andric#include <limits>
6006c3fb27SDimitry Andric#include <version>
615ffd83dbSDimitry Andric
625ffd83dbSDimitry Andric#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
635ffd83dbSDimitry Andric#  pragma GCC system_header
645ffd83dbSDimitry Andric#endif
655ffd83dbSDimitry Andric
665ffd83dbSDimitry Andric#ifdef _LIBCPP_HAS_NO_THREADS
6781ad6265SDimitry Andric#  error "<barrier> is not supported since libc++ has been configured without support for threads."
685ffd83dbSDimitry Andric#endif
695ffd83dbSDimitry Andric
70e8d8bef9SDimitry Andric_LIBCPP_PUSH_MACROS
71e8d8bef9SDimitry Andric#include <__undef_macros>
72e8d8bef9SDimitry Andric
735ffd83dbSDimitry Andric#if _LIBCPP_STD_VER >= 14
745ffd83dbSDimitry Andric
755ffd83dbSDimitry Andric_LIBCPP_BEGIN_NAMESPACE_STD
765ffd83dbSDimitry Andric
77*cb14a3feSDimitry Andricstruct __empty_completion {
78*cb14a3feSDimitry Andric  inline _LIBCPP_HIDE_FROM_ABI void operator()() noexcept {}
795ffd83dbSDimitry Andric};
805ffd83dbSDimitry Andric
815ffd83dbSDimitry Andric#  ifndef _LIBCPP_HAS_NO_TREE_BARRIER
825ffd83dbSDimitry Andric
835ffd83dbSDimitry Andric/*
845ffd83dbSDimitry Andric
855ffd83dbSDimitry AndricThe default implementation of __barrier_base is a classic tree barrier.
865ffd83dbSDimitry Andric
875ffd83dbSDimitry AndricIt looks different from literature pseudocode for two main reasons:
885ffd83dbSDimitry Andric 1. Threads that call into std::barrier functions do not provide indices,
895ffd83dbSDimitry Andric    so a numbering step is added before the actual barrier algorithm,
905ffd83dbSDimitry Andric    appearing as an N+1 round to the N rounds of the tree barrier.
915ffd83dbSDimitry Andric 2. A great deal of attention has been paid to avoid cache line thrashing
925ffd83dbSDimitry Andric    by flattening the tree structure into cache-line sized arrays, that
935ffd83dbSDimitry Andric    are indexed in an efficient way.
945ffd83dbSDimitry Andric
955ffd83dbSDimitry Andric*/
965ffd83dbSDimitry Andric
975ffd83dbSDimitry Andricusing __barrier_phase_t = uint8_t;
985ffd83dbSDimitry Andric
995ffd83dbSDimitry Andricclass __barrier_algorithm_base;
1005ffd83dbSDimitry Andric
101*cb14a3feSDimitry Andric_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI __barrier_algorithm_base*
102*cb14a3feSDimitry Andric__construct_barrier_algorithm_base(ptrdiff_t& __expected);
1035ffd83dbSDimitry Andric
104*cb14a3feSDimitry Andric_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI bool
105*cb14a3feSDimitry Andric__arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier, __barrier_phase_t __old_phase);
1065ffd83dbSDimitry Andric
107*cb14a3feSDimitry Andric_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
108*cb14a3feSDimitry Andric__destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier);
1095ffd83dbSDimitry Andric
1105ffd83dbSDimitry Andrictemplate <class _CompletionF>
1115ffd83dbSDimitry Andricclass __barrier_base {
11281ad6265SDimitry Andric  ptrdiff_t __expected_;
113*cb14a3feSDimitry Andric  unique_ptr<__barrier_algorithm_base, void (*)(__barrier_algorithm_base*)> __base_;
11481ad6265SDimitry Andric  __atomic_base<ptrdiff_t> __expected_adjustment_;
11581ad6265SDimitry Andric  _CompletionF __completion_;
11681ad6265SDimitry Andric  __atomic_base<__barrier_phase_t> __phase_;
1175ffd83dbSDimitry Andric
1185ffd83dbSDimitry Andricpublic:
1195ffd83dbSDimitry Andric  using arrival_token = __barrier_phase_t;
1205ffd83dbSDimitry Andric
121*cb14a3feSDimitry Andric  static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { return numeric_limits<ptrdiff_t>::max(); }
1225ffd83dbSDimitry Andric
1235f757f3fSDimitry Andric  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI
1245ffd83dbSDimitry Andric  __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
125*cb14a3feSDimitry Andric      : __expected_(__expected),
126*cb14a3feSDimitry Andric        __base_(std::__construct_barrier_algorithm_base(this->__expected_), &__destroy_barrier_algorithm_base),
127*cb14a3feSDimitry Andric        __expected_adjustment_(0),
128*cb14a3feSDimitry Andric        __completion_(std::move(__completion)),
129*cb14a3feSDimitry Andric        __phase_(0) {}
130*cb14a3feSDimitry Andric  [[__nodiscard__]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update) {
13106c3fb27SDimitry Andric    _LIBCPP_ASSERT_UNCATEGORIZED(
13206c3fb27SDimitry Andric        __update <= __expected_, "update is greater than the expected count for the current barrier phase");
13306c3fb27SDimitry Andric
13481ad6265SDimitry Andric    auto const __old_phase = __phase_.load(memory_order_relaxed);
135753f127fSDimitry Andric    for (; __update; --__update)
13681ad6265SDimitry Andric      if (__arrive_barrier_algorithm_base(__base_.get(), __old_phase)) {
13781ad6265SDimitry Andric        __completion_();
13881ad6265SDimitry Andric        __expected_ += __expected_adjustment_.load(memory_order_relaxed);
13981ad6265SDimitry Andric        __expected_adjustment_.store(0, memory_order_relaxed);
14081ad6265SDimitry Andric        __phase_.store(__old_phase + 2, memory_order_release);
14181ad6265SDimitry Andric        __phase_.notify_all();
1425ffd83dbSDimitry Andric      }
1435ffd83dbSDimitry Andric    return __old_phase;
1445ffd83dbSDimitry Andric  }
145*cb14a3feSDimitry Andric  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __old_phase) const {
146*cb14a3feSDimitry Andric    auto const __test_fn = [this, __old_phase]() -> bool { return __phase_.load(memory_order_acquire) != __old_phase; };
147bdd1243dSDimitry Andric    std::__libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
1485ffd83dbSDimitry Andric  }
149*cb14a3feSDimitry Andric  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() {
15081ad6265SDimitry Andric    __expected_adjustment_.fetch_sub(1, memory_order_relaxed);
1515ffd83dbSDimitry Andric    (void)arrive(1);
1525ffd83dbSDimitry Andric  }
1535ffd83dbSDimitry Andric};
1545ffd83dbSDimitry Andric
1555ffd83dbSDimitry Andric#  else
1565ffd83dbSDimitry Andric
1575ffd83dbSDimitry Andric/*
1585ffd83dbSDimitry Andric
1595ffd83dbSDimitry AndricThe alternative implementation of __barrier_base is a central barrier.
1605ffd83dbSDimitry Andric
1615ffd83dbSDimitry AndricTwo versions of this algorithm are provided:
1625ffd83dbSDimitry Andric 1. A fairly straightforward implementation of the litterature for the
1635ffd83dbSDimitry Andric    general case where the completion function is not empty.
1645ffd83dbSDimitry Andric 2. An optimized implementation that exploits 2's complement arithmetic
1655ffd83dbSDimitry Andric    and well-defined overflow in atomic arithmetic, to handle the phase
1665ffd83dbSDimitry Andric    roll-over for free.
1675ffd83dbSDimitry Andric
1685ffd83dbSDimitry Andric*/
1695ffd83dbSDimitry Andric
1705ffd83dbSDimitry Andrictemplate <class _CompletionF>
1715ffd83dbSDimitry Andricclass __barrier_base {
1725ffd83dbSDimitry Andric  __atomic_base<ptrdiff_t> __expected;
1735ffd83dbSDimitry Andric  __atomic_base<ptrdiff_t> __arrived;
1745ffd83dbSDimitry Andric  _CompletionF __completion;
1755ffd83dbSDimitry Andric  __atomic_base<bool> __phase;
176*cb14a3feSDimitry Andric
1775ffd83dbSDimitry Andricpublic:
1785ffd83dbSDimitry Andric  using arrival_token = bool;
1795ffd83dbSDimitry Andric
180*cb14a3feSDimitry Andric  static constexpr ptrdiff_t max() noexcept { return numeric_limits<ptrdiff_t>::max(); }
1815ffd83dbSDimitry Andric
182*cb14a3feSDimitry Andric  _LIBCPP_HIDE_FROM_ABI __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
183*cb14a3feSDimitry Andric      : __expected(__expected), __arrived(__expected), __completion(std::move(__completion)), __phase(false) {}
184*cb14a3feSDimitry Andric  [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t update) {
1855ffd83dbSDimitry Andric    auto const __old_phase  = __phase.load(memory_order_relaxed);
1865ffd83dbSDimitry Andric    auto const __result     = __arrived.fetch_sub(update, memory_order_acq_rel) - update;
1875ffd83dbSDimitry Andric    auto const new_expected = __expected.load(memory_order_relaxed);
18806c3fb27SDimitry Andric
18906c3fb27SDimitry Andric    _LIBCPP_ASSERT_UNCATEGORIZED(
19006c3fb27SDimitry Andric        update <= new_expected, "update is greater than the expected count for the current barrier phase");
19106c3fb27SDimitry Andric
1925ffd83dbSDimitry Andric    if (0 == __result) {
1935ffd83dbSDimitry Andric      __completion();
1945ffd83dbSDimitry Andric      __arrived.store(new_expected, memory_order_relaxed);
1955ffd83dbSDimitry Andric      __phase.store(!__old_phase, memory_order_release);
1965ffd83dbSDimitry Andric      __phase.notify_all();
1975ffd83dbSDimitry Andric    }
1985ffd83dbSDimitry Andric    return __old_phase;
1995ffd83dbSDimitry Andric  }
200*cb14a3feSDimitry Andric  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __old_phase) const {
2015ffd83dbSDimitry Andric    __phase.wait(__old_phase, memory_order_acquire);
2025ffd83dbSDimitry Andric  }
203*cb14a3feSDimitry Andric  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() {
2045ffd83dbSDimitry Andric    __expected.fetch_sub(1, memory_order_relaxed);
2055ffd83dbSDimitry Andric    (void)arrive(1);
2065ffd83dbSDimitry Andric  }
2075ffd83dbSDimitry Andric};
2085ffd83dbSDimitry Andric
2095ffd83dbSDimitry Andrictemplate <>
2105ffd83dbSDimitry Andricclass __barrier_base<__empty_completion> {
2115ffd83dbSDimitry Andric  static constexpr uint64_t __expected_unit = 1ull;
2125ffd83dbSDimitry Andric  static constexpr uint64_t __arrived_unit  = 1ull << 32;
2135ffd83dbSDimitry Andric  static constexpr uint64_t __expected_mask = __arrived_unit - 1;
2145ffd83dbSDimitry Andric  static constexpr uint64_t __phase_bit     = 1ull << 63;
2155ffd83dbSDimitry Andric  static constexpr uint64_t __arrived_mask  = (__phase_bit - 1) & ~__expected_mask;
2165ffd83dbSDimitry Andric
2175ffd83dbSDimitry Andric  __atomic_base<uint64_t> __phase_arrived_expected;
2185ffd83dbSDimitry Andric
219*cb14a3feSDimitry Andric  static _LIBCPP_HIDE_FROM_ABI constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT {
220*cb14a3feSDimitry Andric    return ((uint64_t(1u << 31) - __count) << 32) | (uint64_t(1u << 31) - __count);
2215ffd83dbSDimitry Andric  }
2225ffd83dbSDimitry Andric
2235ffd83dbSDimitry Andricpublic:
2245ffd83dbSDimitry Andric  using arrival_token = uint64_t;
2255ffd83dbSDimitry Andric
226*cb14a3feSDimitry Andric  static constexpr ptrdiff_t max() noexcept { return ptrdiff_t(1u << 31) - 1; }
2275ffd83dbSDimitry Andric
228*cb14a3feSDimitry Andric  _LIBCPP_HIDE_FROM_ABI explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion())
229*cb14a3feSDimitry Andric      : __phase_arrived_expected(__init(__count)) {}
230*cb14a3feSDimitry Andric  [[nodiscard]] inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t update) {
2315ffd83dbSDimitry Andric    auto const __inc = __arrived_unit * update;
2325ffd83dbSDimitry Andric    auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel);
23306c3fb27SDimitry Andric
23406c3fb27SDimitry Andric    _LIBCPP_ASSERT_UNCATEGORIZED(
23506c3fb27SDimitry Andric        update <= __old, "update is greater than the expected count for the current barrier phase");
23606c3fb27SDimitry Andric
2375ffd83dbSDimitry Andric    if ((__old ^ (__old + __inc)) & __phase_bit) {
2385ffd83dbSDimitry Andric      __phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed);
2395ffd83dbSDimitry Andric      __phase_arrived_expected.notify_all();
2405ffd83dbSDimitry Andric    }
2415ffd83dbSDimitry Andric    return __old & __phase_bit;
2425ffd83dbSDimitry Andric  }
243*cb14a3feSDimitry Andric  inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __phase) const {
2445ffd83dbSDimitry Andric    auto const __test_fn = [=]() -> bool {
2455ffd83dbSDimitry Andric      uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire);
2465ffd83dbSDimitry Andric      return ((__current & __phase_bit) != __phase);
2475ffd83dbSDimitry Andric    };
2485ffd83dbSDimitry Andric    __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
2495ffd83dbSDimitry Andric  }
250*cb14a3feSDimitry Andric  inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() {
2515ffd83dbSDimitry Andric    __phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed);
2525ffd83dbSDimitry Andric    (void)arrive(1);
2535ffd83dbSDimitry Andric  }
2545ffd83dbSDimitry Andric};
2555ffd83dbSDimitry Andric
25681ad6265SDimitry Andric#  endif // !_LIBCPP_HAS_NO_TREE_BARRIER
2575ffd83dbSDimitry Andric
2585ffd83dbSDimitry Andrictemplate <class _CompletionF = __empty_completion>
2595ffd83dbSDimitry Andricclass barrier {
260bdd1243dSDimitry Andric  __barrier_base<_CompletionF> __b_;
261*cb14a3feSDimitry Andric
2625ffd83dbSDimitry Andricpublic:
2635ffd83dbSDimitry Andric  using arrival_token = typename __barrier_base<_CompletionF>::arrival_token;
2645ffd83dbSDimitry Andric
265*cb14a3feSDimitry Andric  static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { return __barrier_base<_CompletionF>::max(); }
2665ffd83dbSDimitry Andric
267*cb14a3feSDimitry Andric  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI explicit barrier(
268*cb14a3feSDimitry Andric      ptrdiff_t __count, _CompletionF __completion = _CompletionF())
2695f757f3fSDimitry Andric      : __b_(__count, std::move(__completion)) {
27006c3fb27SDimitry Andric    _LIBCPP_ASSERT_UNCATEGORIZED(
27106c3fb27SDimitry Andric        __count >= 0,
27206c3fb27SDimitry Andric        "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with a negative value");
27306c3fb27SDimitry Andric    _LIBCPP_ASSERT_UNCATEGORIZED(
27406c3fb27SDimitry Andric        __count <= max(),
27506c3fb27SDimitry Andric        "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with "
27606c3fb27SDimitry Andric        "a value greater than max()");
2775ffd83dbSDimitry Andric  }
2785ffd83dbSDimitry Andric
2795ffd83dbSDimitry Andric  barrier(barrier const&)            = delete;
2805ffd83dbSDimitry Andric  barrier& operator=(barrier const&) = delete;
2815ffd83dbSDimitry Andric
282*cb14a3feSDimitry Andric  [[__nodiscard__]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update = 1) {
28306c3fb27SDimitry Andric    _LIBCPP_ASSERT_UNCATEGORIZED(__update > 0, "barrier:arrive must be called with a value greater than 0");
284bdd1243dSDimitry Andric    return __b_.arrive(__update);
2855ffd83dbSDimitry Andric  }
286*cb14a3feSDimitry Andric  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __phase) const {
2875f757f3fSDimitry Andric    __b_.wait(std::move(__phase));
2885ffd83dbSDimitry Andric  }
289*cb14a3feSDimitry Andric  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_wait() { wait(arrive()); }
290*cb14a3feSDimitry Andric  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() { __b_.arrive_and_drop(); }
2915ffd83dbSDimitry Andric};
2925ffd83dbSDimitry Andric
2935ffd83dbSDimitry Andric_LIBCPP_END_NAMESPACE_STD
2945ffd83dbSDimitry Andric
2955ffd83dbSDimitry Andric#endif // _LIBCPP_STD_VER >= 14
2965ffd83dbSDimitry Andric
297e8d8bef9SDimitry Andric_LIBCPP_POP_MACROS
298e8d8bef9SDimitry Andric
299bdd1243dSDimitry Andric#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
30006c3fb27SDimitry Andric#  include <atomic>
301bdd1243dSDimitry Andric#  include <concepts>
302bdd1243dSDimitry Andric#  include <iterator>
303bdd1243dSDimitry Andric#  include <memory>
304bdd1243dSDimitry Andric#  include <stdexcept>
305bdd1243dSDimitry Andric#  include <variant>
306bdd1243dSDimitry Andric#endif
307bdd1243dSDimitry Andric
3085ffd83dbSDimitry Andric#endif //_LIBCPP_BARRIER
309