1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #ifndef _LIBCPP___PSTL_CPU_ALGOS_FIND_IF_H
10 #define _LIBCPP___PSTL_CPU_ALGOS_FIND_IF_H
11
12 #include <__algorithm/find_if.h>
13 #include <__assert>
14 #include <__atomic/atomic.h>
15 #include <__config>
16 #include <__functional/operations.h>
17 #include <__iterator/concepts.h>
18 #include <__iterator/iterator_traits.h>
19 #include <__pstl/backend_fwd.h>
20 #include <__pstl/cpu_algos/cpu_traits.h>
21 #include <__type_traits/is_execution_policy.h>
22 #include <__utility/move.h>
23 #include <__utility/pair.h>
24 #include <cstddef>
25 #include <optional>
26
27 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
28 # pragma GCC system_header
29 #endif
30
31 _LIBCPP_PUSH_MACROS
32 #include <__undef_macros>
33
34 _LIBCPP_BEGIN_NAMESPACE_STD
35 namespace __pstl {
36
37 template <class _Backend, class _Index, class _Brick, class _Compare>
38 _LIBCPP_HIDE_FROM_ABI optional<_Index>
__parallel_find(_Index __first,_Index __last,_Brick __f,_Compare __comp,bool __b_first)39 __parallel_find(_Index __first, _Index __last, _Brick __f, _Compare __comp, bool __b_first) {
40 typedef typename std::iterator_traits<_Index>::difference_type _DifferenceType;
41 const _DifferenceType __n = __last - __first;
42 _DifferenceType __initial_dist = __b_first ? __n : -1;
43 std::atomic<_DifferenceType> __extremum(__initial_dist);
44 // TODO: find out what is better here: parallel_for or parallel_reduce
45 auto __res =
46 __cpu_traits<_Backend>::__for_each(__first, __last, [__comp, __f, __first, &__extremum](_Index __i, _Index __j) {
47 // See "Reducing Contention Through Priority Updates", PPoPP '13, for discussion of
48 // why using a shared variable scales fairly well in this situation.
49 if (__comp(__i - __first, __extremum)) {
50 _Index __result = __f(__i, __j);
51 // If not '__last' returned then we found what we want so put this to extremum
52 if (__result != __j) {
53 const _DifferenceType __k = __result - __first;
54 for (_DifferenceType __old = __extremum; __comp(__k, __old); __old = __extremum) {
55 __extremum.compare_exchange_weak(__old, __k);
56 }
57 }
58 }
59 });
60 if (!__res)
61 return nullopt;
62 return __extremum.load() != __initial_dist ? __first + __extremum.load() : __last;
63 }
64
65 template <class _Backend, class _Index, class _DifferenceType, class _Compare>
66 _LIBCPP_HIDE_FROM_ABI _Index
__simd_first(_Index __first,_DifferenceType __begin,_DifferenceType __end,_Compare __comp)67 __simd_first(_Index __first, _DifferenceType __begin, _DifferenceType __end, _Compare __comp) noexcept {
68 // Experiments show good block sizes like this
69 const _DifferenceType __block_size = 8;
70 alignas(__cpu_traits<_Backend>::__lane_size) _DifferenceType __lane[__block_size] = {0};
71 while (__end - __begin >= __block_size) {
72 _DifferenceType __found = 0;
73 _PSTL_PRAGMA_SIMD_REDUCTION(| : __found) for (_DifferenceType __i = __begin; __i < __begin + __block_size; ++__i) {
74 const _DifferenceType __t = __comp(__first, __i);
75 __lane[__i - __begin] = __t;
76 __found |= __t;
77 }
78 if (__found) {
79 _DifferenceType __i;
80 // This will vectorize
81 for (__i = 0; __i < __block_size; ++__i) {
82 if (__lane[__i]) {
83 break;
84 }
85 }
86 return __first + __begin + __i;
87 }
88 __begin += __block_size;
89 }
90
91 // Keep remainder scalar
92 while (__begin != __end) {
93 if (__comp(__first, __begin)) {
94 return __first + __begin;
95 }
96 ++__begin;
97 }
98 return __first + __end;
99 }
100
101 template <class _Backend, class _RawExecutionPolicy>
102 struct __cpu_parallel_find_if {
103 template <class _Policy, class _ForwardIterator, class _Predicate>
104 _LIBCPP_HIDE_FROM_ABI optional<_ForwardIterator>
operator__cpu_parallel_find_if105 operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) const noexcept {
106 if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
107 __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
108 return __pstl::__parallel_find<_Backend>(
109 __first,
110 __last,
111 [&__policy, &__pred](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
112 using _FindIfUnseq = __pstl::__find_if<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>;
113 auto __res = _FindIfUnseq()(std::__remove_parallel_policy(__policy), __brick_first, __brick_last, __pred);
114 _LIBCPP_ASSERT_INTERNAL(__res, "unseq/seq should never try to allocate!");
115 return *std::move(__res);
116 },
117 less<>{},
118 true);
119 } else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
120 __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
121 using __diff_t = __iter_diff_t<_ForwardIterator>;
122 return __pstl::__simd_first<_Backend>(
123 __first, __diff_t(0), __last - __first, [&__pred](_ForwardIterator __iter, __diff_t __i) {
124 return __pred(__iter[__i]);
125 });
126 } else {
127 return std::find_if(__first, __last, __pred);
128 }
129 }
130 };
131
132 } // namespace __pstl
133 _LIBCPP_END_NAMESPACE_STD
134
135 _LIBCPP_POP_MACROS
136
137 #endif // _LIBCPP___PSTL_CPU_ALGOS_FIND_IF_H
138