xref: /freebsd/contrib/llvm-project/libc/shared/rpc_util.h (revision 8f6978f83cc64a2e644d9bdf380a6996d3acdc4b)
1*8f6978f8SDimitry Andric //===-- Shared memory RPC client / server utilities -------------*- C++ -*-===//
2*8f6978f8SDimitry Andric //
3*8f6978f8SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*8f6978f8SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*8f6978f8SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*8f6978f8SDimitry Andric //
7*8f6978f8SDimitry Andric //===----------------------------------------------------------------------===//
8*8f6978f8SDimitry Andric 
9*8f6978f8SDimitry Andric #ifndef LLVM_LIBC_SHARED_RPC_UTIL_H
10*8f6978f8SDimitry Andric #define LLVM_LIBC_SHARED_RPC_UTIL_H
11*8f6978f8SDimitry Andric 
12*8f6978f8SDimitry Andric #include <stddef.h>
13*8f6978f8SDimitry Andric #include <stdint.h>
14*8f6978f8SDimitry Andric 
15*8f6978f8SDimitry Andric #if (defined(__NVPTX__) || defined(__AMDGPU__)) &&                             \
16*8f6978f8SDimitry Andric     !((defined(__CUDA__) && !defined(__CUDA_ARCH__)) ||                        \
17*8f6978f8SDimitry Andric       (defined(__HIP__) && !defined(__HIP_DEVICE_COMPILE__)))
18*8f6978f8SDimitry Andric #include <gpuintrin.h>
19*8f6978f8SDimitry Andric #define RPC_TARGET_IS_GPU
20*8f6978f8SDimitry Andric #endif
21*8f6978f8SDimitry Andric 
22*8f6978f8SDimitry Andric // Workaround for missing __has_builtin in < GCC 10.
23*8f6978f8SDimitry Andric #ifndef __has_builtin
24*8f6978f8SDimitry Andric #define __has_builtin(x) 0
25*8f6978f8SDimitry Andric #endif
26*8f6978f8SDimitry Andric 
27*8f6978f8SDimitry Andric #ifndef RPC_ATTRS
28*8f6978f8SDimitry Andric #if defined(__CUDA__) || defined(__HIP__)
29*8f6978f8SDimitry Andric #define RPC_ATTRS __attribute__((host, device)) inline
30*8f6978f8SDimitry Andric #else
31*8f6978f8SDimitry Andric #define RPC_ATTRS inline
32*8f6978f8SDimitry Andric #endif
33*8f6978f8SDimitry Andric #endif
34*8f6978f8SDimitry Andric 
35*8f6978f8SDimitry Andric namespace rpc {
36*8f6978f8SDimitry Andric 
37*8f6978f8SDimitry Andric template <typename T> struct type_identity {
38*8f6978f8SDimitry Andric   using type = T;
39*8f6978f8SDimitry Andric };
40*8f6978f8SDimitry Andric 
41*8f6978f8SDimitry Andric template <class T, T v> struct type_constant {
42*8f6978f8SDimitry Andric   static inline constexpr T value = v;
43*8f6978f8SDimitry Andric };
44*8f6978f8SDimitry Andric 
45*8f6978f8SDimitry Andric template <class T> struct remove_reference : type_identity<T> {};
46*8f6978f8SDimitry Andric template <class T> struct remove_reference<T &> : type_identity<T> {};
47*8f6978f8SDimitry Andric template <class T> struct remove_reference<T &&> : type_identity<T> {};
48*8f6978f8SDimitry Andric 
49*8f6978f8SDimitry Andric template <class T> struct is_const : type_constant<bool, false> {};
50*8f6978f8SDimitry Andric template <class T> struct is_const<const T> : type_constant<bool, true> {};
51*8f6978f8SDimitry Andric 
52*8f6978f8SDimitry Andric /// Freestanding implementation of std::move.
53*8f6978f8SDimitry Andric template <class T>
54*8f6978f8SDimitry Andric RPC_ATTRS constexpr typename remove_reference<T>::type &&move(T &&t) {
55*8f6978f8SDimitry Andric   return static_cast<typename remove_reference<T>::type &&>(t);
56*8f6978f8SDimitry Andric }
57*8f6978f8SDimitry Andric 
58*8f6978f8SDimitry Andric /// Freestanding implementation of std::forward.
59*8f6978f8SDimitry Andric template <typename T>
60*8f6978f8SDimitry Andric RPC_ATTRS constexpr T &&forward(typename remove_reference<T>::type &value) {
61*8f6978f8SDimitry Andric   return static_cast<T &&>(value);
62*8f6978f8SDimitry Andric }
63*8f6978f8SDimitry Andric template <typename T>
64*8f6978f8SDimitry Andric RPC_ATTRS constexpr T &&forward(typename remove_reference<T>::type &&value) {
65*8f6978f8SDimitry Andric   return static_cast<T &&>(value);
66*8f6978f8SDimitry Andric }
67*8f6978f8SDimitry Andric 
68*8f6978f8SDimitry Andric struct in_place_t {
69*8f6978f8SDimitry Andric   RPC_ATTRS explicit in_place_t() = default;
70*8f6978f8SDimitry Andric };
71*8f6978f8SDimitry Andric 
72*8f6978f8SDimitry Andric struct nullopt_t {
73*8f6978f8SDimitry Andric   RPC_ATTRS constexpr explicit nullopt_t() = default;
74*8f6978f8SDimitry Andric };
75*8f6978f8SDimitry Andric 
76*8f6978f8SDimitry Andric constexpr inline in_place_t in_place{};
77*8f6978f8SDimitry Andric constexpr inline nullopt_t nullopt{};
78*8f6978f8SDimitry Andric 
79*8f6978f8SDimitry Andric /// Freestanding and minimal implementation of std::optional.
80*8f6978f8SDimitry Andric template <typename T> class optional {
81*8f6978f8SDimitry Andric   template <typename U> struct OptionalStorage {
82*8f6978f8SDimitry Andric     union {
83*8f6978f8SDimitry Andric       char empty;
84*8f6978f8SDimitry Andric       U stored_value;
85*8f6978f8SDimitry Andric     };
86*8f6978f8SDimitry Andric 
87*8f6978f8SDimitry Andric     bool in_use = false;
88*8f6978f8SDimitry Andric 
89*8f6978f8SDimitry Andric     RPC_ATTRS ~OptionalStorage() { reset(); }
90*8f6978f8SDimitry Andric 
91*8f6978f8SDimitry Andric     RPC_ATTRS constexpr OptionalStorage() : empty() {}
92*8f6978f8SDimitry Andric 
93*8f6978f8SDimitry Andric     template <typename... Args>
94*8f6978f8SDimitry Andric     RPC_ATTRS constexpr explicit OptionalStorage(in_place_t, Args &&...args)
95*8f6978f8SDimitry Andric         : stored_value(forward<Args>(args)...) {}
96*8f6978f8SDimitry Andric 
97*8f6978f8SDimitry Andric     RPC_ATTRS constexpr void reset() {
98*8f6978f8SDimitry Andric       if (in_use)
99*8f6978f8SDimitry Andric         stored_value.~U();
100*8f6978f8SDimitry Andric       in_use = false;
101*8f6978f8SDimitry Andric     }
102*8f6978f8SDimitry Andric   };
103*8f6978f8SDimitry Andric 
104*8f6978f8SDimitry Andric   OptionalStorage<T> storage;
105*8f6978f8SDimitry Andric 
106*8f6978f8SDimitry Andric public:
107*8f6978f8SDimitry Andric   RPC_ATTRS constexpr optional() = default;
108*8f6978f8SDimitry Andric   RPC_ATTRS constexpr optional(nullopt_t) {}
109*8f6978f8SDimitry Andric 
110*8f6978f8SDimitry Andric   RPC_ATTRS constexpr optional(const T &t) : storage(in_place, t) {
111*8f6978f8SDimitry Andric     storage.in_use = true;
112*8f6978f8SDimitry Andric   }
113*8f6978f8SDimitry Andric   RPC_ATTRS constexpr optional(const optional &) = default;
114*8f6978f8SDimitry Andric 
115*8f6978f8SDimitry Andric   RPC_ATTRS constexpr optional(T &&t) : storage(in_place, move(t)) {
116*8f6978f8SDimitry Andric     storage.in_use = true;
117*8f6978f8SDimitry Andric   }
118*8f6978f8SDimitry Andric   RPC_ATTRS constexpr optional(optional &&O) = default;
119*8f6978f8SDimitry Andric 
120*8f6978f8SDimitry Andric   RPC_ATTRS constexpr optional &operator=(T &&t) {
121*8f6978f8SDimitry Andric     storage = move(t);
122*8f6978f8SDimitry Andric     return *this;
123*8f6978f8SDimitry Andric   }
124*8f6978f8SDimitry Andric   RPC_ATTRS constexpr optional &operator=(optional &&) = default;
125*8f6978f8SDimitry Andric 
126*8f6978f8SDimitry Andric   RPC_ATTRS constexpr optional &operator=(const T &t) {
127*8f6978f8SDimitry Andric     storage = t;
128*8f6978f8SDimitry Andric     return *this;
129*8f6978f8SDimitry Andric   }
130*8f6978f8SDimitry Andric   RPC_ATTRS constexpr optional &operator=(const optional &) = default;
131*8f6978f8SDimitry Andric 
132*8f6978f8SDimitry Andric   RPC_ATTRS constexpr void reset() { storage.reset(); }
133*8f6978f8SDimitry Andric 
134*8f6978f8SDimitry Andric   RPC_ATTRS constexpr const T &value() const & { return storage.stored_value; }
135*8f6978f8SDimitry Andric 
136*8f6978f8SDimitry Andric   RPC_ATTRS constexpr T &value() & { return storage.stored_value; }
137*8f6978f8SDimitry Andric 
138*8f6978f8SDimitry Andric   RPC_ATTRS constexpr explicit operator bool() const { return storage.in_use; }
139*8f6978f8SDimitry Andric   RPC_ATTRS constexpr bool has_value() const { return storage.in_use; }
140*8f6978f8SDimitry Andric   RPC_ATTRS constexpr const T *operator->() const {
141*8f6978f8SDimitry Andric     return &storage.stored_value;
142*8f6978f8SDimitry Andric   }
143*8f6978f8SDimitry Andric   RPC_ATTRS constexpr T *operator->() { return &storage.stored_value; }
144*8f6978f8SDimitry Andric   RPC_ATTRS constexpr const T &operator*() const & {
145*8f6978f8SDimitry Andric     return storage.stored_value;
146*8f6978f8SDimitry Andric   }
147*8f6978f8SDimitry Andric   RPC_ATTRS constexpr T &operator*() & { return storage.stored_value; }
148*8f6978f8SDimitry Andric 
149*8f6978f8SDimitry Andric   RPC_ATTRS constexpr T &&value() && { return move(storage.stored_value); }
150*8f6978f8SDimitry Andric   RPC_ATTRS constexpr T &&operator*() && { return move(storage.stored_value); }
151*8f6978f8SDimitry Andric };
152*8f6978f8SDimitry Andric 
153*8f6978f8SDimitry Andric /// Suspend the thread briefly to assist the thread scheduler during busy loops.
154*8f6978f8SDimitry Andric RPC_ATTRS void sleep_briefly() {
155*8f6978f8SDimitry Andric #if __has_builtin(__nvvm_reflect)
156*8f6978f8SDimitry Andric   if (__nvvm_reflect("__CUDA_ARCH") >= 700)
157*8f6978f8SDimitry Andric     asm("nanosleep.u32 64;" ::: "memory");
158*8f6978f8SDimitry Andric #elif __has_builtin(__builtin_amdgcn_s_sleep)
159*8f6978f8SDimitry Andric   __builtin_amdgcn_s_sleep(2);
160*8f6978f8SDimitry Andric #elif __has_builtin(__builtin_ia32_pause)
161*8f6978f8SDimitry Andric   __builtin_ia32_pause();
162*8f6978f8SDimitry Andric #elif __has_builtin(__builtin_arm_isb)
163*8f6978f8SDimitry Andric   __builtin_arm_isb(0xf);
164*8f6978f8SDimitry Andric #else
165*8f6978f8SDimitry Andric   // Simply do nothing if sleeping isn't supported on this platform.
166*8f6978f8SDimitry Andric #endif
167*8f6978f8SDimitry Andric }
168*8f6978f8SDimitry Andric 
169*8f6978f8SDimitry Andric /// Conditional to indicate if this process is running on the GPU.
170*8f6978f8SDimitry Andric RPC_ATTRS constexpr bool is_process_gpu() {
171*8f6978f8SDimitry Andric #ifdef RPC_TARGET_IS_GPU
172*8f6978f8SDimitry Andric   return true;
173*8f6978f8SDimitry Andric #else
174*8f6978f8SDimitry Andric   return false;
175*8f6978f8SDimitry Andric #endif
176*8f6978f8SDimitry Andric }
177*8f6978f8SDimitry Andric 
178*8f6978f8SDimitry Andric /// Wait for all lanes in the group to complete.
179*8f6978f8SDimitry Andric RPC_ATTRS void sync_lane([[maybe_unused]] uint64_t lane_mask) {
180*8f6978f8SDimitry Andric #ifdef RPC_TARGET_IS_GPU
181*8f6978f8SDimitry Andric   return __gpu_sync_lane(lane_mask);
182*8f6978f8SDimitry Andric #endif
183*8f6978f8SDimitry Andric }
184*8f6978f8SDimitry Andric 
185*8f6978f8SDimitry Andric /// Copies the value from the first active thread to the rest.
186*8f6978f8SDimitry Andric RPC_ATTRS uint32_t broadcast_value([[maybe_unused]] uint64_t lane_mask,
187*8f6978f8SDimitry Andric                                    uint32_t x) {
188*8f6978f8SDimitry Andric #ifdef RPC_TARGET_IS_GPU
189*8f6978f8SDimitry Andric   return __gpu_read_first_lane_u32(lane_mask, x);
190*8f6978f8SDimitry Andric #else
191*8f6978f8SDimitry Andric   return x;
192*8f6978f8SDimitry Andric #endif
193*8f6978f8SDimitry Andric }
194*8f6978f8SDimitry Andric 
195*8f6978f8SDimitry Andric /// Returns the number lanes that participate in the RPC interface.
196*8f6978f8SDimitry Andric RPC_ATTRS uint32_t get_num_lanes() {
197*8f6978f8SDimitry Andric #ifdef RPC_TARGET_IS_GPU
198*8f6978f8SDimitry Andric   return __gpu_num_lanes();
199*8f6978f8SDimitry Andric #else
200*8f6978f8SDimitry Andric   return 1;
201*8f6978f8SDimitry Andric #endif
202*8f6978f8SDimitry Andric }
203*8f6978f8SDimitry Andric 
204*8f6978f8SDimitry Andric /// Returns the id of the thread inside of an AMD wavefront executing together.
205*8f6978f8SDimitry Andric RPC_ATTRS uint64_t get_lane_mask() {
206*8f6978f8SDimitry Andric #ifdef RPC_TARGET_IS_GPU
207*8f6978f8SDimitry Andric   return __gpu_lane_mask();
208*8f6978f8SDimitry Andric #else
209*8f6978f8SDimitry Andric   return 1;
210*8f6978f8SDimitry Andric #endif
211*8f6978f8SDimitry Andric }
212*8f6978f8SDimitry Andric 
213*8f6978f8SDimitry Andric /// Returns the id of the thread inside of an AMD wavefront executing together.
214*8f6978f8SDimitry Andric RPC_ATTRS uint32_t get_lane_id() {
215*8f6978f8SDimitry Andric #ifdef RPC_TARGET_IS_GPU
216*8f6978f8SDimitry Andric   return __gpu_lane_id();
217*8f6978f8SDimitry Andric #else
218*8f6978f8SDimitry Andric   return 0;
219*8f6978f8SDimitry Andric #endif
220*8f6978f8SDimitry Andric }
221*8f6978f8SDimitry Andric 
222*8f6978f8SDimitry Andric /// Conditional that is only true for a single thread in a lane.
223*8f6978f8SDimitry Andric RPC_ATTRS bool is_first_lane([[maybe_unused]] uint64_t lane_mask) {
224*8f6978f8SDimitry Andric #ifdef RPC_TARGET_IS_GPU
225*8f6978f8SDimitry Andric   return __gpu_is_first_in_lane(lane_mask);
226*8f6978f8SDimitry Andric #else
227*8f6978f8SDimitry Andric   return true;
228*8f6978f8SDimitry Andric #endif
229*8f6978f8SDimitry Andric }
230*8f6978f8SDimitry Andric 
231*8f6978f8SDimitry Andric /// Returns a bitmask of threads in the current lane for which \p x is true.
232*8f6978f8SDimitry Andric RPC_ATTRS uint64_t ballot([[maybe_unused]] uint64_t lane_mask, bool x) {
233*8f6978f8SDimitry Andric #ifdef RPC_TARGET_IS_GPU
234*8f6978f8SDimitry Andric   return __gpu_ballot(lane_mask, x);
235*8f6978f8SDimitry Andric #else
236*8f6978f8SDimitry Andric   return x;
237*8f6978f8SDimitry Andric #endif
238*8f6978f8SDimitry Andric }
239*8f6978f8SDimitry Andric 
240*8f6978f8SDimitry Andric /// Return \p val aligned "upwards" according to \p align.
241*8f6978f8SDimitry Andric template <typename V, typename A>
242*8f6978f8SDimitry Andric RPC_ATTRS constexpr V align_up(V val, A align) {
243*8f6978f8SDimitry Andric   return ((val + V(align) - 1) / V(align)) * V(align);
244*8f6978f8SDimitry Andric }
245*8f6978f8SDimitry Andric 
246*8f6978f8SDimitry Andric /// Utility to provide a unified interface between the CPU and GPU's memory
247*8f6978f8SDimitry Andric /// model. On the GPU stack variables are always private to a lane so we can
248*8f6978f8SDimitry Andric /// simply use the variable passed in. On the CPU we need to allocate enough
249*8f6978f8SDimitry Andric /// space for the whole lane and index into it.
250*8f6978f8SDimitry Andric template <typename V> RPC_ATTRS V &lane_value(V *val, uint32_t id) {
251*8f6978f8SDimitry Andric   if constexpr (is_process_gpu())
252*8f6978f8SDimitry Andric     return *val;
253*8f6978f8SDimitry Andric   return val[id];
254*8f6978f8SDimitry Andric }
255*8f6978f8SDimitry Andric 
256*8f6978f8SDimitry Andric /// Advance the \p p by \p bytes.
257*8f6978f8SDimitry Andric template <typename T, typename U> RPC_ATTRS T *advance(T *ptr, U bytes) {
258*8f6978f8SDimitry Andric   if constexpr (is_const<T>::value)
259*8f6978f8SDimitry Andric     return reinterpret_cast<T *>(reinterpret_cast<const uint8_t *>(ptr) +
260*8f6978f8SDimitry Andric                                  bytes);
261*8f6978f8SDimitry Andric   else
262*8f6978f8SDimitry Andric     return reinterpret_cast<T *>(reinterpret_cast<uint8_t *>(ptr) + bytes);
263*8f6978f8SDimitry Andric }
264*8f6978f8SDimitry Andric 
265*8f6978f8SDimitry Andric /// Wrapper around the optimal memory copy implementation for the target.
266*8f6978f8SDimitry Andric RPC_ATTRS void rpc_memcpy(void *dst, const void *src, size_t count) {
267*8f6978f8SDimitry Andric   __builtin_memcpy(dst, src, count);
268*8f6978f8SDimitry Andric }
269*8f6978f8SDimitry Andric 
270*8f6978f8SDimitry Andric template <class T> RPC_ATTRS constexpr const T &max(const T &a, const T &b) {
271*8f6978f8SDimitry Andric   return (a < b) ? b : a;
272*8f6978f8SDimitry Andric }
273*8f6978f8SDimitry Andric 
274*8f6978f8SDimitry Andric } // namespace rpc
275*8f6978f8SDimitry Andric 
276*8f6978f8SDimitry Andric #endif // LLVM_LIBC_SHARED_RPC_UTIL_H
277