1*8f6978f8SDimitry Andric //===-- Shared memory RPC client / server utilities -------------*- C++ -*-===// 2*8f6978f8SDimitry Andric // 3*8f6978f8SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*8f6978f8SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*8f6978f8SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*8f6978f8SDimitry Andric // 7*8f6978f8SDimitry Andric //===----------------------------------------------------------------------===// 8*8f6978f8SDimitry Andric 9*8f6978f8SDimitry Andric #ifndef LLVM_LIBC_SHARED_RPC_UTIL_H 10*8f6978f8SDimitry Andric #define LLVM_LIBC_SHARED_RPC_UTIL_H 11*8f6978f8SDimitry Andric 12*8f6978f8SDimitry Andric #include <stddef.h> 13*8f6978f8SDimitry Andric #include <stdint.h> 14*8f6978f8SDimitry Andric 15*8f6978f8SDimitry Andric #if (defined(__NVPTX__) || defined(__AMDGPU__)) && \ 16*8f6978f8SDimitry Andric !((defined(__CUDA__) && !defined(__CUDA_ARCH__)) || \ 17*8f6978f8SDimitry Andric (defined(__HIP__) && !defined(__HIP_DEVICE_COMPILE__))) 18*8f6978f8SDimitry Andric #include <gpuintrin.h> 19*8f6978f8SDimitry Andric #define RPC_TARGET_IS_GPU 20*8f6978f8SDimitry Andric #endif 21*8f6978f8SDimitry Andric 22*8f6978f8SDimitry Andric // Workaround for missing __has_builtin in < GCC 10. 23*8f6978f8SDimitry Andric #ifndef __has_builtin 24*8f6978f8SDimitry Andric #define __has_builtin(x) 0 25*8f6978f8SDimitry Andric #endif 26*8f6978f8SDimitry Andric 27*8f6978f8SDimitry Andric #ifndef RPC_ATTRS 28*8f6978f8SDimitry Andric #if defined(__CUDA__) || defined(__HIP__) 29*8f6978f8SDimitry Andric #define RPC_ATTRS __attribute__((host, device)) inline 30*8f6978f8SDimitry Andric #else 31*8f6978f8SDimitry Andric #define RPC_ATTRS inline 32*8f6978f8SDimitry Andric #endif 33*8f6978f8SDimitry Andric #endif 34*8f6978f8SDimitry Andric 35*8f6978f8SDimitry Andric namespace rpc { 36*8f6978f8SDimitry Andric 37*8f6978f8SDimitry Andric template <typename T> struct type_identity { 38*8f6978f8SDimitry Andric using type = T; 39*8f6978f8SDimitry Andric }; 40*8f6978f8SDimitry Andric 41*8f6978f8SDimitry Andric template <class T, T v> struct type_constant { 42*8f6978f8SDimitry Andric static inline constexpr T value = v; 43*8f6978f8SDimitry Andric }; 44*8f6978f8SDimitry Andric 45*8f6978f8SDimitry Andric template <class T> struct remove_reference : type_identity<T> {}; 46*8f6978f8SDimitry Andric template <class T> struct remove_reference<T &> : type_identity<T> {}; 47*8f6978f8SDimitry Andric template <class T> struct remove_reference<T &&> : type_identity<T> {}; 48*8f6978f8SDimitry Andric 49*8f6978f8SDimitry Andric template <class T> struct is_const : type_constant<bool, false> {}; 50*8f6978f8SDimitry Andric template <class T> struct is_const<const T> : type_constant<bool, true> {}; 51*8f6978f8SDimitry Andric 52*8f6978f8SDimitry Andric /// Freestanding implementation of std::move. 53*8f6978f8SDimitry Andric template <class T> 54*8f6978f8SDimitry Andric RPC_ATTRS constexpr typename remove_reference<T>::type &&move(T &&t) { 55*8f6978f8SDimitry Andric return static_cast<typename remove_reference<T>::type &&>(t); 56*8f6978f8SDimitry Andric } 57*8f6978f8SDimitry Andric 58*8f6978f8SDimitry Andric /// Freestanding implementation of std::forward. 59*8f6978f8SDimitry Andric template <typename T> 60*8f6978f8SDimitry Andric RPC_ATTRS constexpr T &&forward(typename remove_reference<T>::type &value) { 61*8f6978f8SDimitry Andric return static_cast<T &&>(value); 62*8f6978f8SDimitry Andric } 63*8f6978f8SDimitry Andric template <typename T> 64*8f6978f8SDimitry Andric RPC_ATTRS constexpr T &&forward(typename remove_reference<T>::type &&value) { 65*8f6978f8SDimitry Andric return static_cast<T &&>(value); 66*8f6978f8SDimitry Andric } 67*8f6978f8SDimitry Andric 68*8f6978f8SDimitry Andric struct in_place_t { 69*8f6978f8SDimitry Andric RPC_ATTRS explicit in_place_t() = default; 70*8f6978f8SDimitry Andric }; 71*8f6978f8SDimitry Andric 72*8f6978f8SDimitry Andric struct nullopt_t { 73*8f6978f8SDimitry Andric RPC_ATTRS constexpr explicit nullopt_t() = default; 74*8f6978f8SDimitry Andric }; 75*8f6978f8SDimitry Andric 76*8f6978f8SDimitry Andric constexpr inline in_place_t in_place{}; 77*8f6978f8SDimitry Andric constexpr inline nullopt_t nullopt{}; 78*8f6978f8SDimitry Andric 79*8f6978f8SDimitry Andric /// Freestanding and minimal implementation of std::optional. 80*8f6978f8SDimitry Andric template <typename T> class optional { 81*8f6978f8SDimitry Andric template <typename U> struct OptionalStorage { 82*8f6978f8SDimitry Andric union { 83*8f6978f8SDimitry Andric char empty; 84*8f6978f8SDimitry Andric U stored_value; 85*8f6978f8SDimitry Andric }; 86*8f6978f8SDimitry Andric 87*8f6978f8SDimitry Andric bool in_use = false; 88*8f6978f8SDimitry Andric 89*8f6978f8SDimitry Andric RPC_ATTRS ~OptionalStorage() { reset(); } 90*8f6978f8SDimitry Andric 91*8f6978f8SDimitry Andric RPC_ATTRS constexpr OptionalStorage() : empty() {} 92*8f6978f8SDimitry Andric 93*8f6978f8SDimitry Andric template <typename... Args> 94*8f6978f8SDimitry Andric RPC_ATTRS constexpr explicit OptionalStorage(in_place_t, Args &&...args) 95*8f6978f8SDimitry Andric : stored_value(forward<Args>(args)...) {} 96*8f6978f8SDimitry Andric 97*8f6978f8SDimitry Andric RPC_ATTRS constexpr void reset() { 98*8f6978f8SDimitry Andric if (in_use) 99*8f6978f8SDimitry Andric stored_value.~U(); 100*8f6978f8SDimitry Andric in_use = false; 101*8f6978f8SDimitry Andric } 102*8f6978f8SDimitry Andric }; 103*8f6978f8SDimitry Andric 104*8f6978f8SDimitry Andric OptionalStorage<T> storage; 105*8f6978f8SDimitry Andric 106*8f6978f8SDimitry Andric public: 107*8f6978f8SDimitry Andric RPC_ATTRS constexpr optional() = default; 108*8f6978f8SDimitry Andric RPC_ATTRS constexpr optional(nullopt_t) {} 109*8f6978f8SDimitry Andric 110*8f6978f8SDimitry Andric RPC_ATTRS constexpr optional(const T &t) : storage(in_place, t) { 111*8f6978f8SDimitry Andric storage.in_use = true; 112*8f6978f8SDimitry Andric } 113*8f6978f8SDimitry Andric RPC_ATTRS constexpr optional(const optional &) = default; 114*8f6978f8SDimitry Andric 115*8f6978f8SDimitry Andric RPC_ATTRS constexpr optional(T &&t) : storage(in_place, move(t)) { 116*8f6978f8SDimitry Andric storage.in_use = true; 117*8f6978f8SDimitry Andric } 118*8f6978f8SDimitry Andric RPC_ATTRS constexpr optional(optional &&O) = default; 119*8f6978f8SDimitry Andric 120*8f6978f8SDimitry Andric RPC_ATTRS constexpr optional &operator=(T &&t) { 121*8f6978f8SDimitry Andric storage = move(t); 122*8f6978f8SDimitry Andric return *this; 123*8f6978f8SDimitry Andric } 124*8f6978f8SDimitry Andric RPC_ATTRS constexpr optional &operator=(optional &&) = default; 125*8f6978f8SDimitry Andric 126*8f6978f8SDimitry Andric RPC_ATTRS constexpr optional &operator=(const T &t) { 127*8f6978f8SDimitry Andric storage = t; 128*8f6978f8SDimitry Andric return *this; 129*8f6978f8SDimitry Andric } 130*8f6978f8SDimitry Andric RPC_ATTRS constexpr optional &operator=(const optional &) = default; 131*8f6978f8SDimitry Andric 132*8f6978f8SDimitry Andric RPC_ATTRS constexpr void reset() { storage.reset(); } 133*8f6978f8SDimitry Andric 134*8f6978f8SDimitry Andric RPC_ATTRS constexpr const T &value() const & { return storage.stored_value; } 135*8f6978f8SDimitry Andric 136*8f6978f8SDimitry Andric RPC_ATTRS constexpr T &value() & { return storage.stored_value; } 137*8f6978f8SDimitry Andric 138*8f6978f8SDimitry Andric RPC_ATTRS constexpr explicit operator bool() const { return storage.in_use; } 139*8f6978f8SDimitry Andric RPC_ATTRS constexpr bool has_value() const { return storage.in_use; } 140*8f6978f8SDimitry Andric RPC_ATTRS constexpr const T *operator->() const { 141*8f6978f8SDimitry Andric return &storage.stored_value; 142*8f6978f8SDimitry Andric } 143*8f6978f8SDimitry Andric RPC_ATTRS constexpr T *operator->() { return &storage.stored_value; } 144*8f6978f8SDimitry Andric RPC_ATTRS constexpr const T &operator*() const & { 145*8f6978f8SDimitry Andric return storage.stored_value; 146*8f6978f8SDimitry Andric } 147*8f6978f8SDimitry Andric RPC_ATTRS constexpr T &operator*() & { return storage.stored_value; } 148*8f6978f8SDimitry Andric 149*8f6978f8SDimitry Andric RPC_ATTRS constexpr T &&value() && { return move(storage.stored_value); } 150*8f6978f8SDimitry Andric RPC_ATTRS constexpr T &&operator*() && { return move(storage.stored_value); } 151*8f6978f8SDimitry Andric }; 152*8f6978f8SDimitry Andric 153*8f6978f8SDimitry Andric /// Suspend the thread briefly to assist the thread scheduler during busy loops. 154*8f6978f8SDimitry Andric RPC_ATTRS void sleep_briefly() { 155*8f6978f8SDimitry Andric #if __has_builtin(__nvvm_reflect) 156*8f6978f8SDimitry Andric if (__nvvm_reflect("__CUDA_ARCH") >= 700) 157*8f6978f8SDimitry Andric asm("nanosleep.u32 64;" ::: "memory"); 158*8f6978f8SDimitry Andric #elif __has_builtin(__builtin_amdgcn_s_sleep) 159*8f6978f8SDimitry Andric __builtin_amdgcn_s_sleep(2); 160*8f6978f8SDimitry Andric #elif __has_builtin(__builtin_ia32_pause) 161*8f6978f8SDimitry Andric __builtin_ia32_pause(); 162*8f6978f8SDimitry Andric #elif __has_builtin(__builtin_arm_isb) 163*8f6978f8SDimitry Andric __builtin_arm_isb(0xf); 164*8f6978f8SDimitry Andric #else 165*8f6978f8SDimitry Andric // Simply do nothing if sleeping isn't supported on this platform. 166*8f6978f8SDimitry Andric #endif 167*8f6978f8SDimitry Andric } 168*8f6978f8SDimitry Andric 169*8f6978f8SDimitry Andric /// Conditional to indicate if this process is running on the GPU. 170*8f6978f8SDimitry Andric RPC_ATTRS constexpr bool is_process_gpu() { 171*8f6978f8SDimitry Andric #ifdef RPC_TARGET_IS_GPU 172*8f6978f8SDimitry Andric return true; 173*8f6978f8SDimitry Andric #else 174*8f6978f8SDimitry Andric return false; 175*8f6978f8SDimitry Andric #endif 176*8f6978f8SDimitry Andric } 177*8f6978f8SDimitry Andric 178*8f6978f8SDimitry Andric /// Wait for all lanes in the group to complete. 179*8f6978f8SDimitry Andric RPC_ATTRS void sync_lane([[maybe_unused]] uint64_t lane_mask) { 180*8f6978f8SDimitry Andric #ifdef RPC_TARGET_IS_GPU 181*8f6978f8SDimitry Andric return __gpu_sync_lane(lane_mask); 182*8f6978f8SDimitry Andric #endif 183*8f6978f8SDimitry Andric } 184*8f6978f8SDimitry Andric 185*8f6978f8SDimitry Andric /// Copies the value from the first active thread to the rest. 186*8f6978f8SDimitry Andric RPC_ATTRS uint32_t broadcast_value([[maybe_unused]] uint64_t lane_mask, 187*8f6978f8SDimitry Andric uint32_t x) { 188*8f6978f8SDimitry Andric #ifdef RPC_TARGET_IS_GPU 189*8f6978f8SDimitry Andric return __gpu_read_first_lane_u32(lane_mask, x); 190*8f6978f8SDimitry Andric #else 191*8f6978f8SDimitry Andric return x; 192*8f6978f8SDimitry Andric #endif 193*8f6978f8SDimitry Andric } 194*8f6978f8SDimitry Andric 195*8f6978f8SDimitry Andric /// Returns the number lanes that participate in the RPC interface. 196*8f6978f8SDimitry Andric RPC_ATTRS uint32_t get_num_lanes() { 197*8f6978f8SDimitry Andric #ifdef RPC_TARGET_IS_GPU 198*8f6978f8SDimitry Andric return __gpu_num_lanes(); 199*8f6978f8SDimitry Andric #else 200*8f6978f8SDimitry Andric return 1; 201*8f6978f8SDimitry Andric #endif 202*8f6978f8SDimitry Andric } 203*8f6978f8SDimitry Andric 204*8f6978f8SDimitry Andric /// Returns the id of the thread inside of an AMD wavefront executing together. 205*8f6978f8SDimitry Andric RPC_ATTRS uint64_t get_lane_mask() { 206*8f6978f8SDimitry Andric #ifdef RPC_TARGET_IS_GPU 207*8f6978f8SDimitry Andric return __gpu_lane_mask(); 208*8f6978f8SDimitry Andric #else 209*8f6978f8SDimitry Andric return 1; 210*8f6978f8SDimitry Andric #endif 211*8f6978f8SDimitry Andric } 212*8f6978f8SDimitry Andric 213*8f6978f8SDimitry Andric /// Returns the id of the thread inside of an AMD wavefront executing together. 214*8f6978f8SDimitry Andric RPC_ATTRS uint32_t get_lane_id() { 215*8f6978f8SDimitry Andric #ifdef RPC_TARGET_IS_GPU 216*8f6978f8SDimitry Andric return __gpu_lane_id(); 217*8f6978f8SDimitry Andric #else 218*8f6978f8SDimitry Andric return 0; 219*8f6978f8SDimitry Andric #endif 220*8f6978f8SDimitry Andric } 221*8f6978f8SDimitry Andric 222*8f6978f8SDimitry Andric /// Conditional that is only true for a single thread in a lane. 223*8f6978f8SDimitry Andric RPC_ATTRS bool is_first_lane([[maybe_unused]] uint64_t lane_mask) { 224*8f6978f8SDimitry Andric #ifdef RPC_TARGET_IS_GPU 225*8f6978f8SDimitry Andric return __gpu_is_first_in_lane(lane_mask); 226*8f6978f8SDimitry Andric #else 227*8f6978f8SDimitry Andric return true; 228*8f6978f8SDimitry Andric #endif 229*8f6978f8SDimitry Andric } 230*8f6978f8SDimitry Andric 231*8f6978f8SDimitry Andric /// Returns a bitmask of threads in the current lane for which \p x is true. 232*8f6978f8SDimitry Andric RPC_ATTRS uint64_t ballot([[maybe_unused]] uint64_t lane_mask, bool x) { 233*8f6978f8SDimitry Andric #ifdef RPC_TARGET_IS_GPU 234*8f6978f8SDimitry Andric return __gpu_ballot(lane_mask, x); 235*8f6978f8SDimitry Andric #else 236*8f6978f8SDimitry Andric return x; 237*8f6978f8SDimitry Andric #endif 238*8f6978f8SDimitry Andric } 239*8f6978f8SDimitry Andric 240*8f6978f8SDimitry Andric /// Return \p val aligned "upwards" according to \p align. 241*8f6978f8SDimitry Andric template <typename V, typename A> 242*8f6978f8SDimitry Andric RPC_ATTRS constexpr V align_up(V val, A align) { 243*8f6978f8SDimitry Andric return ((val + V(align) - 1) / V(align)) * V(align); 244*8f6978f8SDimitry Andric } 245*8f6978f8SDimitry Andric 246*8f6978f8SDimitry Andric /// Utility to provide a unified interface between the CPU and GPU's memory 247*8f6978f8SDimitry Andric /// model. On the GPU stack variables are always private to a lane so we can 248*8f6978f8SDimitry Andric /// simply use the variable passed in. On the CPU we need to allocate enough 249*8f6978f8SDimitry Andric /// space for the whole lane and index into it. 250*8f6978f8SDimitry Andric template <typename V> RPC_ATTRS V &lane_value(V *val, uint32_t id) { 251*8f6978f8SDimitry Andric if constexpr (is_process_gpu()) 252*8f6978f8SDimitry Andric return *val; 253*8f6978f8SDimitry Andric return val[id]; 254*8f6978f8SDimitry Andric } 255*8f6978f8SDimitry Andric 256*8f6978f8SDimitry Andric /// Advance the \p p by \p bytes. 257*8f6978f8SDimitry Andric template <typename T, typename U> RPC_ATTRS T *advance(T *ptr, U bytes) { 258*8f6978f8SDimitry Andric if constexpr (is_const<T>::value) 259*8f6978f8SDimitry Andric return reinterpret_cast<T *>(reinterpret_cast<const uint8_t *>(ptr) + 260*8f6978f8SDimitry Andric bytes); 261*8f6978f8SDimitry Andric else 262*8f6978f8SDimitry Andric return reinterpret_cast<T *>(reinterpret_cast<uint8_t *>(ptr) + bytes); 263*8f6978f8SDimitry Andric } 264*8f6978f8SDimitry Andric 265*8f6978f8SDimitry Andric /// Wrapper around the optimal memory copy implementation for the target. 266*8f6978f8SDimitry Andric RPC_ATTRS void rpc_memcpy(void *dst, const void *src, size_t count) { 267*8f6978f8SDimitry Andric __builtin_memcpy(dst, src, count); 268*8f6978f8SDimitry Andric } 269*8f6978f8SDimitry Andric 270*8f6978f8SDimitry Andric template <class T> RPC_ATTRS constexpr const T &max(const T &a, const T &b) { 271*8f6978f8SDimitry Andric return (a < b) ? b : a; 272*8f6978f8SDimitry Andric } 273*8f6978f8SDimitry Andric 274*8f6978f8SDimitry Andric } // namespace rpc 275*8f6978f8SDimitry Andric 276*8f6978f8SDimitry Andric #endif // LLVM_LIBC_SHARED_RPC_UTIL_H 277