xref: /freebsd/contrib/llvm-project/libcxx/include/barrier (revision ba3c1f5972d7b90feb6e6da47905ff2757e0fe57)
1// -*- C++ -*-
2//===----------------------------------------------------------------------===//
3//
4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef _LIBCPP_BARRIER
11#define _LIBCPP_BARRIER
12
13/*
14    barrier synopsis
15
16namespace std
17{
18
19  template<class CompletionFunction = see below>
20  class barrier
21  {
22  public:
23    using arrival_token = see below;
24
25    static constexpr ptrdiff_t max() noexcept;
26
27    constexpr explicit barrier(ptrdiff_t phase_count,
28                               CompletionFunction f = CompletionFunction());
29    ~barrier();
30
31    barrier(const barrier&) = delete;
32    barrier& operator=(const barrier&) = delete;
33
34    [[nodiscard]] arrival_token arrive(ptrdiff_t update = 1);
35    void wait(arrival_token&& arrival) const;
36
37    void arrive_and_wait();
38    void arrive_and_drop();
39
40  private:
41    CompletionFunction completion; // exposition only
42  };
43
44}
45
46*/
47
48#include <__assert> // all public C++ headers provide the assertion handler
49#include <__availability>
50#include <__config>
51#include <__memory/unique_ptr.h>
52#include <__thread/timed_backoff_policy.h>
53#include <__utility/move.h>
54#include <atomic>
55#include <limits>
56
57#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
58#  pragma GCC system_header
59#endif
60
61#ifdef _LIBCPP_HAS_NO_THREADS
62# error "<barrier> is not supported since libc++ has been configured without support for threads."
63#endif
64
65_LIBCPP_PUSH_MACROS
66#include <__undef_macros>
67
68#if _LIBCPP_STD_VER >= 14
69
70_LIBCPP_BEGIN_NAMESPACE_STD
71
72struct __empty_completion
73{
74    inline _LIBCPP_INLINE_VISIBILITY
75    void operator()() noexcept
76    {
77    }
78};
79
80#ifndef _LIBCPP_HAS_NO_TREE_BARRIER
81
82/*
83
84The default implementation of __barrier_base is a classic tree barrier.
85
86It looks different from literature pseudocode for two main reasons:
87 1. Threads that call into std::barrier functions do not provide indices,
88    so a numbering step is added before the actual barrier algorithm,
89    appearing as an N+1 round to the N rounds of the tree barrier.
90 2. A great deal of attention has been paid to avoid cache line thrashing
91    by flattening the tree structure into cache-line sized arrays, that
92    are indexed in an efficient way.
93
94*/
95
96using __barrier_phase_t = uint8_t;
97
98class __barrier_algorithm_base;
99
100_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
101__barrier_algorithm_base* __construct_barrier_algorithm_base(ptrdiff_t& __expected);
102
103_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
104bool __arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier,
105                                     __barrier_phase_t __old_phase);
106
107_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
108void __destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier);
109
110template<class _CompletionF>
111class __barrier_base {
112    ptrdiff_t                                               __expected_;
113    unique_ptr<__barrier_algorithm_base,
114               void (*)(__barrier_algorithm_base*)>         __base_;
115    __atomic_base<ptrdiff_t>                                __expected_adjustment_;
116    _CompletionF                                            __completion_;
117    __atomic_base<__barrier_phase_t>                        __phase_;
118
119public:
120    using arrival_token = __barrier_phase_t;
121
122    static constexpr ptrdiff_t max() noexcept {
123        return numeric_limits<ptrdiff_t>::max();
124    }
125
126    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
127    __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
128            : __expected_(__expected), __base_(std::__construct_barrier_algorithm_base(this->__expected_),
129                                               &__destroy_barrier_algorithm_base),
130              __expected_adjustment_(0), __completion_(std::move(__completion)), __phase_(0)
131    {
132    }
133    [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
134    arrival_token arrive(ptrdiff_t __update)
135    {
136        auto const __old_phase = __phase_.load(memory_order_relaxed);
137        for(; __update; --__update)
138            if(__arrive_barrier_algorithm_base(__base_.get(), __old_phase)) {
139                __completion_();
140                __expected_ += __expected_adjustment_.load(memory_order_relaxed);
141                __expected_adjustment_.store(0, memory_order_relaxed);
142                __phase_.store(__old_phase + 2, memory_order_release);
143                __phase_.notify_all();
144            }
145        return __old_phase;
146    }
147    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
148    void wait(arrival_token&& __old_phase) const
149    {
150        auto const __test_fn = [this, __old_phase]() -> bool {
151            return __phase_.load(memory_order_acquire) != __old_phase;
152        };
153        std::__libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
154    }
155    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
156    void arrive_and_drop()
157    {
158        __expected_adjustment_.fetch_sub(1, memory_order_relaxed);
159        (void)arrive(1);
160    }
161};
162
163#else
164
165/*
166
167The alternative implementation of __barrier_base is a central barrier.
168
169Two versions of this algorithm are provided:
170 1. A fairly straightforward implementation of the litterature for the
171    general case where the completion function is not empty.
172 2. An optimized implementation that exploits 2's complement arithmetic
173    and well-defined overflow in atomic arithmetic, to handle the phase
174    roll-over for free.
175
176*/
177
178template<class _CompletionF>
179class __barrier_base {
180
181    __atomic_base<ptrdiff_t> __expected;
182    __atomic_base<ptrdiff_t> __arrived;
183    _CompletionF             __completion;
184    __atomic_base<bool>      __phase;
185public:
186    using arrival_token = bool;
187
188    static constexpr ptrdiff_t max() noexcept {
189        return numeric_limits<ptrdiff_t>::max();
190    }
191
192    _LIBCPP_INLINE_VISIBILITY
193    __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
194        : __expected(__expected), __arrived(__expected), __completion(std::move(__completion)), __phase(false)
195    {
196    }
197    [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
198    arrival_token arrive(ptrdiff_t update)
199    {
200        auto const __old_phase = __phase.load(memory_order_relaxed);
201        auto const __result = __arrived.fetch_sub(update, memory_order_acq_rel) - update;
202        auto const new_expected = __expected.load(memory_order_relaxed);
203        if(0 == __result) {
204            __completion();
205            __arrived.store(new_expected, memory_order_relaxed);
206            __phase.store(!__old_phase, memory_order_release);
207            __phase.notify_all();
208        }
209        return __old_phase;
210    }
211    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
212    void wait(arrival_token&& __old_phase) const
213    {
214        __phase.wait(__old_phase, memory_order_acquire);
215    }
216    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
217    void arrive_and_drop()
218    {
219        __expected.fetch_sub(1, memory_order_relaxed);
220        (void)arrive(1);
221    }
222};
223
224template<>
225class __barrier_base<__empty_completion> {
226
227    static constexpr uint64_t __expected_unit = 1ull;
228    static constexpr uint64_t __arrived_unit = 1ull << 32;
229    static constexpr uint64_t __expected_mask = __arrived_unit - 1;
230    static constexpr uint64_t __phase_bit = 1ull << 63;
231    static constexpr uint64_t __arrived_mask = (__phase_bit - 1) & ~__expected_mask;
232
233    __atomic_base<uint64_t>   __phase_arrived_expected;
234
235    static _LIBCPP_INLINE_VISIBILITY
236    constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT
237    {
238        return ((uint64_t(1u << 31) - __count) << 32)
239              | (uint64_t(1u << 31) - __count);
240    }
241
242public:
243    using arrival_token = uint64_t;
244
245    static constexpr ptrdiff_t max() noexcept {
246        return ptrdiff_t(1u << 31) - 1;
247    }
248
249    _LIBCPP_INLINE_VISIBILITY
250    explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion())
251        : __phase_arrived_expected(__init(__count))
252    {
253    }
254    [[nodiscard]] inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
255    arrival_token arrive(ptrdiff_t update)
256    {
257        auto const __inc = __arrived_unit * update;
258        auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel);
259        if((__old ^ (__old + __inc)) & __phase_bit) {
260            __phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed);
261            __phase_arrived_expected.notify_all();
262        }
263        return __old & __phase_bit;
264    }
265    inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
266    void wait(arrival_token&& __phase) const
267    {
268        auto const __test_fn = [=]() -> bool {
269            uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire);
270            return ((__current & __phase_bit) != __phase);
271        };
272        __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
273    }
274    inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
275    void arrive_and_drop()
276    {
277        __phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed);
278        (void)arrive(1);
279    }
280};
281
282#endif // !_LIBCPP_HAS_NO_TREE_BARRIER
283
284template<class _CompletionF = __empty_completion>
285class barrier {
286
287    __barrier_base<_CompletionF> __b_;
288public:
289    using arrival_token = typename __barrier_base<_CompletionF>::arrival_token;
290
291    static constexpr ptrdiff_t max() noexcept {
292        return __barrier_base<_CompletionF>::max();
293    }
294
295    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
296    barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
297        : __b_(__count, _VSTD::move(__completion)) {
298    }
299
300    barrier(barrier const&) = delete;
301    barrier& operator=(barrier const&) = delete;
302
303    [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
304    arrival_token arrive(ptrdiff_t __update = 1)
305    {
306        return __b_.arrive(__update);
307    }
308    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
309    void wait(arrival_token&& __phase) const
310    {
311        __b_.wait(_VSTD::move(__phase));
312    }
313    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
314    void arrive_and_wait()
315    {
316        wait(arrive());
317    }
318    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
319    void arrive_and_drop()
320    {
321        __b_.arrive_and_drop();
322    }
323};
324
325_LIBCPP_END_NAMESPACE_STD
326
327#endif // _LIBCPP_STD_VER >= 14
328
329_LIBCPP_POP_MACROS
330
331#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
332#  include <concepts>
333#  include <iterator>
334#  include <memory>
335#  include <stdexcept>
336#  include <variant>
337#endif
338
339#endif //_LIBCPP_BARRIER
340