xref: /freebsd/contrib/llvm-project/libcxx/include/__pstl/cpu_algos/transform_reduce.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef _LIBCPP___PSTL_CPU_ALGOS_TRANSFORM_REDUCE_H
10 #define _LIBCPP___PSTL_CPU_ALGOS_TRANSFORM_REDUCE_H
11 
12 #include <__assert>
13 #include <__config>
14 #include <__iterator/concepts.h>
15 #include <__iterator/iterator_traits.h>
16 #include <__numeric/transform_reduce.h>
17 #include <__pstl/backend_fwd.h>
18 #include <__pstl/cpu_algos/cpu_traits.h>
19 #include <__type_traits/desugars_to.h>
20 #include <__type_traits/is_arithmetic.h>
21 #include <__type_traits/is_execution_policy.h>
22 #include <__utility/move.h>
23 #include <cstddef>
24 #include <new>
25 #include <optional>
26 
27 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
28 #  pragma GCC system_header
29 #endif
30 
31 _LIBCPP_PUSH_MACROS
32 #include <__undef_macros>
33 
34 _LIBCPP_BEGIN_NAMESPACE_STD
35 namespace __pstl {
36 
37 template <typename _Backend,
38           typename _DifferenceType,
39           typename _Tp,
40           typename _BinaryOperation,
41           typename _UnaryOperation,
42           typename _UnaryResult = invoke_result_t<_UnaryOperation, _DifferenceType>,
43           __enable_if_t<__desugars_to_v<__plus_tag, _BinaryOperation, _Tp, _UnaryResult> && is_arithmetic_v<_Tp> &&
44                             is_arithmetic_v<_UnaryResult>,
45                         int>    = 0>
46 _LIBCPP_HIDE_FROM_ABI _Tp
__simd_transform_reduce(_DifferenceType __n,_Tp __init,_BinaryOperation,_UnaryOperation __f)47 __simd_transform_reduce(_DifferenceType __n, _Tp __init, _BinaryOperation, _UnaryOperation __f) noexcept {
48   _PSTL_PRAGMA_SIMD_REDUCTION(+ : __init)
49   for (_DifferenceType __i = 0; __i < __n; ++__i)
50     __init += __f(__i);
51   return __init;
52 }
53 
54 template <typename _Backend,
55           typename _Size,
56           typename _Tp,
57           typename _BinaryOperation,
58           typename _UnaryOperation,
59           typename _UnaryResult = invoke_result_t<_UnaryOperation, _Size>,
60           __enable_if_t<!(__desugars_to_v<__plus_tag, _BinaryOperation, _Tp, _UnaryResult> && is_arithmetic_v<_Tp> &&
61                           is_arithmetic_v<_UnaryResult>),
62                         int>    = 0>
63 _LIBCPP_HIDE_FROM_ABI _Tp
__simd_transform_reduce(_Size __n,_Tp __init,_BinaryOperation __binary_op,_UnaryOperation __f)64 __simd_transform_reduce(_Size __n, _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __f) noexcept {
65   constexpr size_t __lane_size = __cpu_traits<_Backend>::__lane_size;
66   const _Size __block_size     = __lane_size / sizeof(_Tp);
67   if (__n > 2 * __block_size && __block_size > 1) {
68     alignas(__lane_size) char __lane_buffer[__lane_size];
69     _Tp* __lane = reinterpret_cast<_Tp*>(__lane_buffer);
70 
71     // initializer
72     _PSTL_PRAGMA_SIMD
73     for (_Size __i = 0; __i < __block_size; ++__i) {
74       ::new (__lane + __i) _Tp(__binary_op(__f(__i), __f(__block_size + __i)));
75     }
76     // main loop
77     _Size __i                    = 2 * __block_size;
78     const _Size __last_iteration = __block_size * (__n / __block_size);
79     for (; __i < __last_iteration; __i += __block_size) {
80       _PSTL_PRAGMA_SIMD
81       for (_Size __j = 0; __j < __block_size; ++__j) {
82         __lane[__j] = __binary_op(std::move(__lane[__j]), __f(__i + __j));
83       }
84     }
85     // remainder
86     _PSTL_PRAGMA_SIMD
87     for (_Size __j = 0; __j < __n - __last_iteration; ++__j) {
88       __lane[__j] = __binary_op(std::move(__lane[__j]), __f(__last_iteration + __j));
89     }
90     // combiner
91     for (_Size __j = 0; __j < __block_size; ++__j) {
92       __init = __binary_op(std::move(__init), std::move(__lane[__j]));
93     }
94     // destroyer
95     _PSTL_PRAGMA_SIMD
96     for (_Size __j = 0; __j < __block_size; ++__j) {
97       __lane[__j].~_Tp();
98     }
99   } else {
100     for (_Size __i = 0; __i < __n; ++__i) {
101       __init = __binary_op(std::move(__init), __f(__i));
102     }
103   }
104   return __init;
105 }
106 
107 template <class _Backend, class _RawExecutionPolicy>
108 struct __cpu_parallel_transform_reduce_binary {
109   template <class _Policy,
110             class _ForwardIterator1,
111             class _ForwardIterator2,
112             class _Tp,
113             class _BinaryOperation1,
114             class _BinaryOperation2>
operator__cpu_parallel_transform_reduce_binary115   _LIBCPP_HIDE_FROM_ABI optional<_Tp> operator()(
116       _Policy&& __policy,
117       _ForwardIterator1 __first1,
118       _ForwardIterator1 __last1,
119       _ForwardIterator2 __first2,
120       _Tp __init,
121       _BinaryOperation1 __reduce,
122       _BinaryOperation2 __transform) const noexcept {
123     if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
124                   __has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
125                   __has_random_access_iterator_category_or_concept<_ForwardIterator2>::value) {
126       return __cpu_traits<_Backend>::__transform_reduce(
127           __first1,
128           std::move(__last1),
129           [__first1, __first2, __transform](_ForwardIterator1 __iter) {
130             return __transform(*__iter, *(__first2 + (__iter - __first1)));
131           },
132           std::move(__init),
133           std::move(__reduce),
134           [&__policy, __first1, __first2, __reduce, __transform](
135               _ForwardIterator1 __brick_first, _ForwardIterator1 __brick_last, _Tp __brick_init) {
136             using _TransformReduceBinaryUnseq =
137                 __pstl::__transform_reduce_binary<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>;
138             return *_TransformReduceBinaryUnseq()(
139                 std::__remove_parallel_policy(__policy),
140                 __brick_first,
141                 std::move(__brick_last),
142                 __first2 + (__brick_first - __first1),
143                 std::move(__brick_init),
144                 std::move(__reduce),
145                 std::move(__transform));
146           });
147     } else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
148                          __has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
149                          __has_random_access_iterator_category_or_concept<_ForwardIterator2>::value) {
150       return __pstl::__simd_transform_reduce<_Backend>(
151           __last1 - __first1, std::move(__init), std::move(__reduce), [&](__iter_diff_t<_ForwardIterator1> __i) {
152             return __transform(__first1[__i], __first2[__i]);
153           });
154     } else {
155       return std::transform_reduce(
156           std::move(__first1),
157           std::move(__last1),
158           std::move(__first2),
159           std::move(__init),
160           std::move(__reduce),
161           std::move(__transform));
162     }
163   }
164 };
165 
166 template <class _Backend, class _RawExecutionPolicy>
167 struct __cpu_parallel_transform_reduce {
168   template <class _Policy, class _ForwardIterator, class _Tp, class _BinaryOperation, class _UnaryOperation>
169   _LIBCPP_HIDE_FROM_ABI optional<_Tp>
operator__cpu_parallel_transform_reduce170   operator()(_Policy&& __policy,
171              _ForwardIterator __first,
172              _ForwardIterator __last,
173              _Tp __init,
174              _BinaryOperation __reduce,
175              _UnaryOperation __transform) const noexcept {
176     if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
177                   __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
178       return __cpu_traits<_Backend>::__transform_reduce(
179           std::move(__first),
180           std::move(__last),
181           [__transform](_ForwardIterator __iter) { return __transform(*__iter); },
182           std::move(__init),
183           __reduce,
184           [&__policy, __transform, __reduce](auto __brick_first, auto __brick_last, _Tp __brick_init) {
185             using _TransformReduceUnseq =
186                 __pstl::__transform_reduce<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>;
187             auto __res = _TransformReduceUnseq()(
188                 std::__remove_parallel_policy(__policy),
189                 std::move(__brick_first),
190                 std::move(__brick_last),
191                 std::move(__brick_init),
192                 std::move(__reduce),
193                 std::move(__transform));
194             _LIBCPP_ASSERT_INTERNAL(__res, "unseq/seq should never try to allocate!");
195             return *std::move(__res);
196           });
197     } else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
198                          __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
199       return __pstl::__simd_transform_reduce<_Backend>(
200           __last - __first,
201           std::move(__init),
202           std::move(__reduce),
203           [=, &__transform](__iter_diff_t<_ForwardIterator> __i) { return __transform(__first[__i]); });
204     } else {
205       return std::transform_reduce(
206           std::move(__first), std::move(__last), std::move(__init), std::move(__reduce), std::move(__transform));
207     }
208   }
209 };
210 
211 } // namespace __pstl
212 _LIBCPP_END_NAMESPACE_STD
213 
214 _LIBCPP_POP_MACROS
215 
216 #endif // _LIBCPP___PSTL_CPU_ALGOS_TRANSFORM_REDUCE_H
217