xref: /freebsd/contrib/llvm-project/libcxx/include/barrier (revision 1db9f3b21e39176dd5b67cf8ac378633b172463e)
1// -*- C++ -*-
2//===----------------------------------------------------------------------===//
3//
4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef _LIBCPP_BARRIER
11#define _LIBCPP_BARRIER
12
13/*
14    barrier synopsis
15
16namespace std
17{
18
19  template<class CompletionFunction = see below>
20  class barrier
21  {
22  public:
23    using arrival_token = see below;
24
25    static constexpr ptrdiff_t max() noexcept;
26
27    constexpr explicit barrier(ptrdiff_t phase_count,
28                               CompletionFunction f = CompletionFunction());
29    ~barrier();
30
31    barrier(const barrier&) = delete;
32    barrier& operator=(const barrier&) = delete;
33
34    [[nodiscard]] arrival_token arrive(ptrdiff_t update = 1);
35    void wait(arrival_token&& arrival) const;
36
37    void arrive_and_wait();
38    void arrive_and_drop();
39
40  private:
41    CompletionFunction completion; // exposition only
42  };
43
44}
45
46*/
47
48#include <__assert> // all public C++ headers provide the assertion handler
49#include <__atomic/atomic_base.h>
50#include <__atomic/memory_order.h>
51#include <__availability>
52#include <__config>
53#include <__memory/unique_ptr.h>
54#include <__thread/poll_with_backoff.h>
55#include <__thread/timed_backoff_policy.h>
56#include <__utility/move.h>
57#include <cstddef>
58#include <cstdint>
59#include <limits>
60#include <version>
61
62#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
63#  pragma GCC system_header
64#endif
65
66#ifdef _LIBCPP_HAS_NO_THREADS
67#  error "<barrier> is not supported since libc++ has been configured without support for threads."
68#endif
69
70_LIBCPP_PUSH_MACROS
71#include <__undef_macros>
72
73#if _LIBCPP_STD_VER >= 14
74
75_LIBCPP_BEGIN_NAMESPACE_STD
76
77struct __empty_completion {
78  inline _LIBCPP_HIDE_FROM_ABI void operator()() noexcept {}
79};
80
81#  ifndef _LIBCPP_HAS_NO_TREE_BARRIER
82
83/*
84
85The default implementation of __barrier_base is a classic tree barrier.
86
87It looks different from literature pseudocode for two main reasons:
88 1. Threads that call into std::barrier functions do not provide indices,
89    so a numbering step is added before the actual barrier algorithm,
90    appearing as an N+1 round to the N rounds of the tree barrier.
91 2. A great deal of attention has been paid to avoid cache line thrashing
92    by flattening the tree structure into cache-line sized arrays, that
93    are indexed in an efficient way.
94
95*/
96
97using __barrier_phase_t = uint8_t;
98
99class __barrier_algorithm_base;
100
101_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI __barrier_algorithm_base*
102__construct_barrier_algorithm_base(ptrdiff_t& __expected);
103
104_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI bool
105__arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier, __barrier_phase_t __old_phase);
106
107_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
108__destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier);
109
110template <class _CompletionF>
111class __barrier_base {
112  ptrdiff_t __expected_;
113  unique_ptr<__barrier_algorithm_base, void (*)(__barrier_algorithm_base*)> __base_;
114  __atomic_base<ptrdiff_t> __expected_adjustment_;
115  _CompletionF __completion_;
116  __atomic_base<__barrier_phase_t> __phase_;
117
118public:
119  using arrival_token = __barrier_phase_t;
120
121  static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { return numeric_limits<ptrdiff_t>::max(); }
122
123  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI
124  __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
125      : __expected_(__expected),
126        __base_(std::__construct_barrier_algorithm_base(this->__expected_), &__destroy_barrier_algorithm_base),
127        __expected_adjustment_(0),
128        __completion_(std::move(__completion)),
129        __phase_(0) {}
130  [[__nodiscard__]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update) {
131    _LIBCPP_ASSERT_UNCATEGORIZED(
132        __update <= __expected_, "update is greater than the expected count for the current barrier phase");
133
134    auto const __old_phase = __phase_.load(memory_order_relaxed);
135    for (; __update; --__update)
136      if (__arrive_barrier_algorithm_base(__base_.get(), __old_phase)) {
137        __completion_();
138        __expected_ += __expected_adjustment_.load(memory_order_relaxed);
139        __expected_adjustment_.store(0, memory_order_relaxed);
140        __phase_.store(__old_phase + 2, memory_order_release);
141        __phase_.notify_all();
142      }
143    return __old_phase;
144  }
145  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __old_phase) const {
146    auto const __test_fn = [this, __old_phase]() -> bool { return __phase_.load(memory_order_acquire) != __old_phase; };
147    std::__libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
148  }
149  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() {
150    __expected_adjustment_.fetch_sub(1, memory_order_relaxed);
151    (void)arrive(1);
152  }
153};
154
155#  else
156
157/*
158
159The alternative implementation of __barrier_base is a central barrier.
160
161Two versions of this algorithm are provided:
162 1. A fairly straightforward implementation of the litterature for the
163    general case where the completion function is not empty.
164 2. An optimized implementation that exploits 2's complement arithmetic
165    and well-defined overflow in atomic arithmetic, to handle the phase
166    roll-over for free.
167
168*/
169
170template <class _CompletionF>
171class __barrier_base {
172  __atomic_base<ptrdiff_t> __expected;
173  __atomic_base<ptrdiff_t> __arrived;
174  _CompletionF __completion;
175  __atomic_base<bool> __phase;
176
177public:
178  using arrival_token = bool;
179
180  static constexpr ptrdiff_t max() noexcept { return numeric_limits<ptrdiff_t>::max(); }
181
182  _LIBCPP_HIDE_FROM_ABI __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
183      : __expected(__expected), __arrived(__expected), __completion(std::move(__completion)), __phase(false) {}
184  [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t update) {
185    auto const __old_phase  = __phase.load(memory_order_relaxed);
186    auto const __result     = __arrived.fetch_sub(update, memory_order_acq_rel) - update;
187    auto const new_expected = __expected.load(memory_order_relaxed);
188
189    _LIBCPP_ASSERT_UNCATEGORIZED(
190        update <= new_expected, "update is greater than the expected count for the current barrier phase");
191
192    if (0 == __result) {
193      __completion();
194      __arrived.store(new_expected, memory_order_relaxed);
195      __phase.store(!__old_phase, memory_order_release);
196      __phase.notify_all();
197    }
198    return __old_phase;
199  }
200  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __old_phase) const {
201    __phase.wait(__old_phase, memory_order_acquire);
202  }
203  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() {
204    __expected.fetch_sub(1, memory_order_relaxed);
205    (void)arrive(1);
206  }
207};
208
209template <>
210class __barrier_base<__empty_completion> {
211  static constexpr uint64_t __expected_unit = 1ull;
212  static constexpr uint64_t __arrived_unit  = 1ull << 32;
213  static constexpr uint64_t __expected_mask = __arrived_unit - 1;
214  static constexpr uint64_t __phase_bit     = 1ull << 63;
215  static constexpr uint64_t __arrived_mask  = (__phase_bit - 1) & ~__expected_mask;
216
217  __atomic_base<uint64_t> __phase_arrived_expected;
218
219  static _LIBCPP_HIDE_FROM_ABI constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT {
220    return ((uint64_t(1u << 31) - __count) << 32) | (uint64_t(1u << 31) - __count);
221  }
222
223public:
224  using arrival_token = uint64_t;
225
226  static constexpr ptrdiff_t max() noexcept { return ptrdiff_t(1u << 31) - 1; }
227
228  _LIBCPP_HIDE_FROM_ABI explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion())
229      : __phase_arrived_expected(__init(__count)) {}
230  [[nodiscard]] inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t update) {
231    auto const __inc = __arrived_unit * update;
232    auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel);
233
234    _LIBCPP_ASSERT_UNCATEGORIZED(
235        update <= __old, "update is greater than the expected count for the current barrier phase");
236
237    if ((__old ^ (__old + __inc)) & __phase_bit) {
238      __phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed);
239      __phase_arrived_expected.notify_all();
240    }
241    return __old & __phase_bit;
242  }
243  inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __phase) const {
244    auto const __test_fn = [=]() -> bool {
245      uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire);
246      return ((__current & __phase_bit) != __phase);
247    };
248    __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
249  }
250  inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() {
251    __phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed);
252    (void)arrive(1);
253  }
254};
255
256#  endif // !_LIBCPP_HAS_NO_TREE_BARRIER
257
258template <class _CompletionF = __empty_completion>
259class barrier {
260  __barrier_base<_CompletionF> __b_;
261
262public:
263  using arrival_token = typename __barrier_base<_CompletionF>::arrival_token;
264
265  static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { return __barrier_base<_CompletionF>::max(); }
266
267  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI explicit barrier(
268      ptrdiff_t __count, _CompletionF __completion = _CompletionF())
269      : __b_(__count, std::move(__completion)) {
270    _LIBCPP_ASSERT_UNCATEGORIZED(
271        __count >= 0,
272        "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with a negative value");
273    _LIBCPP_ASSERT_UNCATEGORIZED(
274        __count <= max(),
275        "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with "
276        "a value greater than max()");
277  }
278
279  barrier(barrier const&)            = delete;
280  barrier& operator=(barrier const&) = delete;
281
282  [[__nodiscard__]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update = 1) {
283    _LIBCPP_ASSERT_UNCATEGORIZED(__update > 0, "barrier:arrive must be called with a value greater than 0");
284    return __b_.arrive(__update);
285  }
286  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __phase) const {
287    __b_.wait(std::move(__phase));
288  }
289  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_wait() { wait(arrive()); }
290  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() { __b_.arrive_and_drop(); }
291};
292
293_LIBCPP_END_NAMESPACE_STD
294
295#endif // _LIBCPP_STD_VER >= 14
296
297_LIBCPP_POP_MACROS
298
299#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
300#  include <atomic>
301#  include <concepts>
302#  include <iterator>
303#  include <memory>
304#  include <stdexcept>
305#  include <variant>
306#endif
307
308#endif //_LIBCPP_BARRIER
309