xref: /freebsd/contrib/llvm-project/libcxx/include/barrier (revision 3ceba58a7509418b47b8fca2d2b6bbf088714e26)
1// -*- C++ -*-
2//===----------------------------------------------------------------------===//
3//
4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef _LIBCPP_BARRIER
11#define _LIBCPP_BARRIER
12
13/*
14    barrier synopsis
15
16namespace std
17{
18
19  template<class CompletionFunction = see below>
20  class barrier
21  {
22  public:
23    using arrival_token = see below;
24
25    static constexpr ptrdiff_t max() noexcept;
26
27    constexpr explicit barrier(ptrdiff_t phase_count,
28                               CompletionFunction f = CompletionFunction());
29    ~barrier();
30
31    barrier(const barrier&) = delete;
32    barrier& operator=(const barrier&) = delete;
33
34    [[nodiscard]] arrival_token arrive(ptrdiff_t update = 1);
35    void wait(arrival_token&& arrival) const;
36
37    void arrive_and_wait();
38    void arrive_and_drop();
39
40  private:
41    CompletionFunction completion; // exposition only
42  };
43
44}
45
46*/
47
48#include <__config>
49
50#if !defined(_LIBCPP_HAS_NO_THREADS)
51
52#  include <__assert>
53#  include <__atomic/atomic_base.h>
54#  include <__atomic/memory_order.h>
55#  include <__memory/unique_ptr.h>
56#  include <__thread/poll_with_backoff.h>
57#  include <__thread/timed_backoff_policy.h>
58#  include <__utility/move.h>
59#  include <cstddef>
60#  include <cstdint>
61#  include <limits>
62#  include <version>
63
64#  if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
65#    pragma GCC system_header
66#  endif
67
68_LIBCPP_PUSH_MACROS
69#  include <__undef_macros>
70
71#  if _LIBCPP_STD_VER >= 14
72
73_LIBCPP_BEGIN_NAMESPACE_STD
74
75struct __empty_completion {
76  inline _LIBCPP_HIDE_FROM_ABI void operator()() noexcept {}
77};
78
79#    ifndef _LIBCPP_HAS_NO_TREE_BARRIER
80
81/*
82
83The default implementation of __barrier_base is a classic tree barrier.
84
85It looks different from literature pseudocode for two main reasons:
86 1. Threads that call into std::barrier functions do not provide indices,
87    so a numbering step is added before the actual barrier algorithm,
88    appearing as an N+1 round to the N rounds of the tree barrier.
89 2. A great deal of attention has been paid to avoid cache line thrashing
90    by flattening the tree structure into cache-line sized arrays, that
91    are indexed in an efficient way.
92
93*/
94
95using __barrier_phase_t = uint8_t;
96
97class __barrier_algorithm_base;
98
99_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI __barrier_algorithm_base*
100__construct_barrier_algorithm_base(ptrdiff_t& __expected);
101
102_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI bool
103__arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier, __barrier_phase_t __old_phase) noexcept;
104
105_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
106__destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier) noexcept;
107
108template <class _CompletionF>
109class __barrier_base {
110  ptrdiff_t __expected_;
111  unique_ptr<__barrier_algorithm_base, void (*)(__barrier_algorithm_base*)> __base_;
112  __atomic_base<ptrdiff_t> __expected_adjustment_;
113  _CompletionF __completion_;
114  __atomic_base<__barrier_phase_t> __phase_;
115
116public:
117  using arrival_token = __barrier_phase_t;
118
119  static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { return numeric_limits<ptrdiff_t>::max(); }
120
121  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI
122  __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
123      : __expected_(__expected),
124        __base_(std::__construct_barrier_algorithm_base(this->__expected_), &__destroy_barrier_algorithm_base),
125        __expected_adjustment_(0),
126        __completion_(std::move(__completion)),
127        __phase_(0) {}
128  _LIBCPP_NODISCARD _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update) {
129    _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
130        __update <= __expected_, "update is greater than the expected count for the current barrier phase");
131
132    auto const __old_phase = __phase_.load(memory_order_relaxed);
133    for (; __update; --__update)
134      if (__arrive_barrier_algorithm_base(__base_.get(), __old_phase)) {
135        __completion_();
136        __expected_ += __expected_adjustment_.load(memory_order_relaxed);
137        __expected_adjustment_.store(0, memory_order_relaxed);
138        __phase_.store(__old_phase + 2, memory_order_release);
139        __phase_.notify_all();
140      }
141    return __old_phase;
142  }
143  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __old_phase) const {
144    auto const __test_fn = [this, __old_phase]() -> bool { return __phase_.load(memory_order_acquire) != __old_phase; };
145    std::__libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
146  }
147  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() {
148    __expected_adjustment_.fetch_sub(1, memory_order_relaxed);
149    (void)arrive(1);
150  }
151};
152
153#    else
154
155/*
156
157The alternative implementation of __barrier_base is a central barrier.
158
159Two versions of this algorithm are provided:
160 1. A fairly straightforward implementation of the litterature for the
161    general case where the completion function is not empty.
162 2. An optimized implementation that exploits 2's complement arithmetic
163    and well-defined overflow in atomic arithmetic, to handle the phase
164    roll-over for free.
165
166*/
167
168template <class _CompletionF>
169class __barrier_base {
170  __atomic_base<ptrdiff_t> __expected;
171  __atomic_base<ptrdiff_t> __arrived;
172  _CompletionF __completion;
173  __atomic_base<bool> __phase;
174
175public:
176  using arrival_token = bool;
177
178  static constexpr ptrdiff_t max() noexcept { return numeric_limits<ptrdiff_t>::max(); }
179
180  _LIBCPP_HIDE_FROM_ABI __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
181      : __expected(__expected), __arrived(__expected), __completion(std::move(__completion)), __phase(false) {}
182  [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t update) {
183    auto const __old_phase  = __phase.load(memory_order_relaxed);
184    auto const __result     = __arrived.fetch_sub(update, memory_order_acq_rel) - update;
185    auto const new_expected = __expected.load(memory_order_relaxed);
186
187    _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
188        update <= new_expected, "update is greater than the expected count for the current barrier phase");
189
190    if (0 == __result) {
191      __completion();
192      __arrived.store(new_expected, memory_order_relaxed);
193      __phase.store(!__old_phase, memory_order_release);
194      __phase.notify_all();
195    }
196    return __old_phase;
197  }
198  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __old_phase) const {
199    __phase.wait(__old_phase, memory_order_acquire);
200  }
201  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() {
202    __expected.fetch_sub(1, memory_order_relaxed);
203    (void)arrive(1);
204  }
205};
206
207template <>
208class __barrier_base<__empty_completion> {
209  static constexpr uint64_t __expected_unit = 1ull;
210  static constexpr uint64_t __arrived_unit  = 1ull << 32;
211  static constexpr uint64_t __expected_mask = __arrived_unit - 1;
212  static constexpr uint64_t __phase_bit     = 1ull << 63;
213  static constexpr uint64_t __arrived_mask  = (__phase_bit - 1) & ~__expected_mask;
214
215  __atomic_base<uint64_t> __phase_arrived_expected;
216
217  static _LIBCPP_HIDE_FROM_ABI constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT {
218    return ((uint64_t(1u << 31) - __count) << 32) | (uint64_t(1u << 31) - __count);
219  }
220
221public:
222  using arrival_token = uint64_t;
223
224  static constexpr ptrdiff_t max() noexcept { return ptrdiff_t(1u << 31) - 1; }
225
226  _LIBCPP_HIDE_FROM_ABI explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion())
227      : __phase_arrived_expected(__init(__count)) {}
228  [[nodiscard]] inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t update) {
229    auto const __inc = __arrived_unit * update;
230    auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel);
231
232    _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
233        update <= __old, "update is greater than the expected count for the current barrier phase");
234
235    if ((__old ^ (__old + __inc)) & __phase_bit) {
236      __phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed);
237      __phase_arrived_expected.notify_all();
238    }
239    return __old & __phase_bit;
240  }
241  inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __phase) const {
242    auto const __test_fn = [=]() -> bool {
243      uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire);
244      return ((__current & __phase_bit) != __phase);
245    };
246    __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
247  }
248  inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() {
249    __phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed);
250    (void)arrive(1);
251  }
252};
253
254#    endif // !_LIBCPP_HAS_NO_TREE_BARRIER
255
256template <class _CompletionF = __empty_completion>
257class _LIBCPP_DEPRECATED_ATOMIC_SYNC barrier {
258  __barrier_base<_CompletionF> __b_;
259
260public:
261  using arrival_token = typename __barrier_base<_CompletionF>::arrival_token;
262
263  static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { return __barrier_base<_CompletionF>::max(); }
264
265  _LIBCPP_AVAILABILITY_SYNC
266  _LIBCPP_HIDE_FROM_ABI explicit barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
267      : __b_(__count, std::move(__completion)) {
268    _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
269        __count >= 0,
270        "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with a negative value");
271    _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
272        __count <= max(),
273        "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with "
274        "a value greater than max()");
275  }
276
277  barrier(barrier const&)            = delete;
278  barrier& operator=(barrier const&) = delete;
279
280  _LIBCPP_NODISCARD _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update = 1) {
281    _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(__update > 0, "barrier:arrive must be called with a value greater than 0");
282    return __b_.arrive(__update);
283  }
284  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __phase) const {
285    __b_.wait(std::move(__phase));
286  }
287  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_wait() { wait(arrive()); }
288  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() { __b_.arrive_and_drop(); }
289};
290
291_LIBCPP_END_NAMESPACE_STD
292
293#  endif // _LIBCPP_STD_VER >= 14
294
295_LIBCPP_POP_MACROS
296
297#endif // !defined(_LIBCPP_HAS_NO_THREADS)
298
299#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
300#  include <atomic>
301#  include <concepts>
302#  include <iterator>
303#  include <memory>
304#  include <stdexcept>
305#  include <variant>
306#endif
307
308#endif //_LIBCPP_BARRIER
309