xref: /freebsd/contrib/llvm-project/libcxx/include/barrier (revision 06e20d1babecec1f45ffda513f55a8db5f1c0f56)
1// -*- C++ -*-
2//===--------------------------- barrier ----------------------------------===//
3//
4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef _LIBCPP_BARRIER
11#define _LIBCPP_BARRIER
12
13/*
14    barrier synopsis
15
16namespace std
17{
18
19  template<class CompletionFunction = see below>
20  class barrier
21  {
22  public:
23    using arrival_token = see below;
24
25    constexpr explicit barrier(ptrdiff_t phase_count,
26                               CompletionFunction f = CompletionFunction());
27    ~barrier();
28
29    barrier(const barrier&) = delete;
30    barrier& operator=(const barrier&) = delete;
31
32    [[nodiscard]] arrival_token arrive(ptrdiff_t update = 1);
33    void wait(arrival_token&& arrival) const;
34
35    void arrive_and_wait();
36    void arrive_and_drop();
37
38  private:
39    CompletionFunction completion; // exposition only
40  };
41
42}
43
44*/
45
46#include <__config>
47#include <atomic>
48#ifndef _LIBCPP_HAS_NO_TREE_BARRIER
49# include <memory>
50#endif
51
52#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
53#pragma GCC system_header
54#endif
55
56#ifdef _LIBCPP_HAS_NO_THREADS
57# error <barrier> is not supported on this single threaded system
58#endif
59
60#if _LIBCPP_STD_VER >= 14
61
62_LIBCPP_BEGIN_NAMESPACE_STD
63
64struct __empty_completion
65{
66    inline _LIBCPP_INLINE_VISIBILITY
67    void operator()() noexcept
68    {
69    }
70};
71
72#ifndef _LIBCPP_HAS_NO_TREE_BARRIER
73
74/*
75
76The default implementation of __barrier_base is a classic tree barrier.
77
78It looks different from literature pseudocode for two main reasons:
79 1. Threads that call into std::barrier functions do not provide indices,
80    so a numbering step is added before the actual barrier algorithm,
81    appearing as an N+1 round to the N rounds of the tree barrier.
82 2. A great deal of attention has been paid to avoid cache line thrashing
83    by flattening the tree structure into cache-line sized arrays, that
84    are indexed in an efficient way.
85
86*/
87
88using __barrier_phase_t = uint8_t;
89
90class __barrier_algorithm_base;
91
92_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
93__barrier_algorithm_base* __construct_barrier_algorithm_base(ptrdiff_t& __expected);
94
95_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
96bool __arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier,
97                                     __barrier_phase_t __old_phase);
98
99_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
100void __destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier);
101
102template<class _CompletionF>
103class __barrier_base {
104
105    ptrdiff_t                                               __expected;
106    unique_ptr<__barrier_algorithm_base,
107               void (*)(__barrier_algorithm_base*)>         __base;
108    __atomic_base<ptrdiff_t>                                __expected_adjustment;
109    _CompletionF                                            __completion;
110    __atomic_base<__barrier_phase_t>                        __phase;
111
112public:
113    using arrival_token = __barrier_phase_t;
114
115    static constexpr ptrdiff_t max() noexcept {
116        return numeric_limits<ptrdiff_t>::max();
117    }
118
119    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
120    __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
121            : __expected(__expected), __base(__construct_barrier_algorithm_base(this->__expected),
122                                             &__destroy_barrier_algorithm_base),
123              __expected_adjustment(0), __completion(move(__completion)), __phase(0)
124    {
125    }
126    [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
127    arrival_token arrive(ptrdiff_t update)
128    {
129        auto const __old_phase = __phase.load(memory_order_relaxed);
130        for(; update; --update)
131            if(__arrive_barrier_algorithm_base(__base.get(), __old_phase)) {
132                __completion();
133                __expected += __expected_adjustment.load(memory_order_relaxed);
134                __expected_adjustment.store(0, memory_order_relaxed);
135                __phase.store(__old_phase + 2, memory_order_release);
136                __phase.notify_all();
137            }
138        return __old_phase;
139    }
140    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
141    void wait(arrival_token&& __old_phase) const
142    {
143        auto const __test_fn = [=]() -> bool {
144            return __phase.load(memory_order_acquire) != __old_phase;
145        };
146        __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
147    }
148    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
149    void arrive_and_drop()
150    {
151        __expected_adjustment.fetch_sub(1, memory_order_relaxed);
152        (void)arrive(1);
153    }
154};
155
156#else
157
158/*
159
160The alternative implementation of __barrier_base is a central barrier.
161
162Two versions of this algorithm are provided:
163 1. A fairly straightforward implementation of the litterature for the
164    general case where the completion function is not empty.
165 2. An optimized implementation that exploits 2's complement arithmetic
166    and well-defined overflow in atomic arithmetic, to handle the phase
167    roll-over for free.
168
169*/
170
171template<class _CompletionF>
172class __barrier_base {
173
174    __atomic_base<ptrdiff_t> __expected;
175    __atomic_base<ptrdiff_t> __arrived;
176    _CompletionF             __completion;
177    __atomic_base<bool>      __phase;
178public:
179    using arrival_token = bool;
180
181    static constexpr ptrdiff_t max() noexcept {
182        return numeric_limits<ptrdiff_t>::max();
183    }
184
185    _LIBCPP_INLINE_VISIBILITY
186    __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
187        : __expected(__expected), __arrived(__expected), __completion(move(__completion)), __phase(false)
188    {
189    }
190    [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
191    arrival_token arrive(ptrdiff_t update)
192    {
193        auto const __old_phase = __phase.load(memory_order_relaxed);
194        auto const __result = __arrived.fetch_sub(update, memory_order_acq_rel) - update;
195        auto const new_expected = __expected.load(memory_order_relaxed);
196        if(0 == __result) {
197            __completion();
198            __arrived.store(new_expected, memory_order_relaxed);
199            __phase.store(!__old_phase, memory_order_release);
200            __phase.notify_all();
201        }
202        return __old_phase;
203    }
204    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
205    void wait(arrival_token&& __old_phase) const
206    {
207        __phase.wait(__old_phase, memory_order_acquire);
208    }
209    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
210    void arrive_and_drop()
211    {
212        __expected.fetch_sub(1, memory_order_relaxed);
213        (void)arrive(1);
214    }
215};
216
217template<>
218class __barrier_base<__empty_completion> {
219
220    static constexpr uint64_t __expected_unit = 1ull;
221    static constexpr uint64_t __arrived_unit = 1ull << 32;
222    static constexpr uint64_t __expected_mask = __arrived_unit - 1;
223    static constexpr uint64_t __phase_bit = 1ull << 63;
224    static constexpr uint64_t __arrived_mask = (__phase_bit - 1) & ~__expected_mask;
225
226    __atomic_base<uint64_t>   __phase_arrived_expected;
227
228    static _LIBCPP_INLINE_VISIBILITY
229    constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT
230    {
231        return ((uint64_t(1u << 31) - __count) << 32)
232              | (uint64_t(1u << 31) - __count);
233    }
234
235public:
236    using arrival_token = uint64_t;
237
238    static constexpr ptrdiff_t max() noexcept {
239        return ptrdiff_t(1u << 31) - 1;
240    }
241
242    _LIBCPP_INLINE_VISIBILITY
243    explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion())
244        : __phase_arrived_expected(__init(__count))
245    {
246    }
247    [[nodiscard]] inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
248    arrival_token arrive(ptrdiff_t update)
249    {
250        auto const __inc = __arrived_unit * update;
251        auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel);
252        if((__old ^ (__old + __inc)) & __phase_bit) {
253            __phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed);
254            __phase_arrived_expected.notify_all();
255        }
256        return __old & __phase_bit;
257    }
258    inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
259    void wait(arrival_token&& __phase) const
260    {
261        auto const __test_fn = [=]() -> bool {
262            uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire);
263            return ((__current & __phase_bit) != __phase);
264        };
265        __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
266    }
267    inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
268    void arrive_and_drop()
269    {
270        __phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed);
271        (void)arrive(1);
272    }
273};
274
275#endif //_LIBCPP_HAS_NO_TREE_BARRIER
276
277template<class _CompletionF = __empty_completion>
278class barrier {
279
280    __barrier_base<_CompletionF> __b;
281public:
282    using arrival_token = typename __barrier_base<_CompletionF>::arrival_token;
283
284    static constexpr ptrdiff_t max() noexcept {
285        return __barrier_base<_CompletionF>::max();
286    }
287
288    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
289    barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
290        : __b(__count, std::move(__completion)) {
291    }
292
293    barrier(barrier const&) = delete;
294    barrier& operator=(barrier const&) = delete;
295
296    [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
297    arrival_token arrive(ptrdiff_t update = 1)
298    {
299        return __b.arrive(update);
300    }
301    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
302    void wait(arrival_token&& __phase) const
303    {
304        __b.wait(std::move(__phase));
305    }
306	_LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
307    void arrive_and_wait()
308    {
309        wait(arrive());
310	}
311    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
312    void arrive_and_drop()
313    {
314        __b.arrive_and_drop();
315    }
316};
317
318_LIBCPP_END_NAMESPACE_STD
319
320#endif // _LIBCPP_STD_VER >= 14
321
322#endif //_LIBCPP_BARRIER
323