1 //===---------------- Implementation of GPU utils ---------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIBC_SRC___SUPPORT_GPU_UTILS_H 10 #define LLVM_LIBC_SRC___SUPPORT_GPU_UTILS_H 11 12 #include "src/__support/macros/attributes.h" 13 #include "src/__support/macros/config.h" 14 #include "src/__support/macros/properties/architectures.h" 15 16 #if !__has_include(<gpuintrin.h>) 17 #error "Unsupported compiler" 18 #endif 19 20 #include <gpuintrin.h> 21 22 namespace LIBC_NAMESPACE_DECL { 23 namespace gpu { 24 25 template <typename T> using Private = __gpu_private T; 26 template <typename T> using Constant = __gpu_constant T; 27 template <typename T> using Local = __gpu_local T; 28 template <typename T> using Global = __gpu_local T; 29 30 LIBC_INLINE uint32_t get_num_blocks_x() { return __gpu_num_blocks(0); } 31 32 LIBC_INLINE uint32_t get_num_blocks_y() { return __gpu_num_blocks(1); } 33 34 LIBC_INLINE uint32_t get_num_blocks_z() { return __gpu_num_blocks(2); } 35 36 LIBC_INLINE uint64_t get_num_blocks() { 37 return get_num_blocks_x() * get_num_blocks_y() * get_num_blocks_z(); 38 } 39 40 LIBC_INLINE uint32_t get_block_id_x() { return __gpu_block_id(0); } 41 42 LIBC_INLINE uint32_t get_block_id_y() { return __gpu_block_id(1); } 43 44 LIBC_INLINE uint32_t get_block_id_z() { return __gpu_block_id(2); } 45 46 LIBC_INLINE uint64_t get_block_id() { 47 return get_block_id_x() + get_num_blocks_x() * get_block_id_y() + 48 get_num_blocks_x() * get_num_blocks_y() * get_block_id_z(); 49 } 50 51 LIBC_INLINE uint32_t get_num_threads_x() { return __gpu_num_threads(0); } 52 53 LIBC_INLINE uint32_t get_num_threads_y() { return __gpu_num_threads(1); } 54 55 LIBC_INLINE uint32_t get_num_threads_z() { return __gpu_num_threads(2); } 56 57 LIBC_INLINE uint64_t get_num_threads() { 58 return get_num_threads_x() * get_num_threads_y() * get_num_threads_z(); 59 } 60 61 LIBC_INLINE uint32_t get_thread_id_x() { return __gpu_thread_id(0); } 62 63 LIBC_INLINE uint32_t get_thread_id_y() { return __gpu_thread_id(1); } 64 65 LIBC_INLINE uint32_t get_thread_id_z() { return __gpu_thread_id(2); } 66 67 LIBC_INLINE uint64_t get_thread_id() { 68 return get_thread_id_x() + get_num_threads_x() * get_thread_id_y() + 69 get_num_threads_x() * get_num_threads_y() * get_thread_id_z(); 70 } 71 72 LIBC_INLINE uint32_t get_lane_size() { return __gpu_num_lanes(); } 73 74 LIBC_INLINE uint32_t get_lane_id() { return __gpu_lane_id(); } 75 76 LIBC_INLINE uint64_t get_lane_mask() { return __gpu_lane_mask(); } 77 78 LIBC_INLINE uint32_t broadcast_value(uint64_t lane_mask, uint32_t x) { 79 return __gpu_read_first_lane_u32(lane_mask, x); 80 } 81 82 LIBC_INLINE uint64_t ballot(uint64_t lane_mask, bool x) { 83 return __gpu_ballot(lane_mask, x); 84 } 85 86 LIBC_INLINE void sync_threads() { __gpu_sync_threads(); } 87 88 LIBC_INLINE void sync_lane(uint64_t lane_mask) { __gpu_sync_lane(lane_mask); } 89 90 LIBC_INLINE uint32_t shuffle(uint64_t lane_mask, uint32_t idx, uint32_t x, 91 uint32_t width = __gpu_num_lanes()) { 92 return __gpu_shuffle_idx_u32(lane_mask, idx, x, width); 93 } 94 95 LIBC_INLINE uint64_t shuffle(uint64_t lane_mask, uint32_t idx, uint64_t x, 96 uint32_t width = __gpu_num_lanes()) { 97 return __gpu_shuffle_idx_u64(lane_mask, idx, x, width); 98 } 99 100 template <typename T> 101 LIBC_INLINE T *shuffle(uint64_t lane_mask, uint32_t idx, T *x, 102 uint32_t width = __gpu_num_lanes()) { 103 return reinterpret_cast<T *>(__gpu_shuffle_idx_u64( 104 lane_mask, idx, reinterpret_cast<uintptr_t>(x), width)); 105 } 106 107 LIBC_INLINE uint64_t match_any(uint64_t lane_mask, uint32_t x) { 108 return __gpu_match_any_u32(lane_mask, x); 109 } 110 111 LIBC_INLINE uint64_t match_all(uint64_t lane_mask, uint32_t x) { 112 return __gpu_match_all_u32(lane_mask, x); 113 } 114 115 [[noreturn]] LIBC_INLINE void end_program() { __gpu_exit(); } 116 117 LIBC_INLINE bool is_first_lane(uint64_t lane_mask) { 118 return __gpu_is_first_in_lane(lane_mask); 119 } 120 121 LIBC_INLINE uint32_t reduce(uint64_t lane_mask, uint32_t x) { 122 return __gpu_lane_sum_u32(lane_mask, x); 123 } 124 125 LIBC_INLINE uint32_t scan(uint64_t lane_mask, uint32_t x) { 126 return __gpu_lane_scan_u32(lane_mask, x); 127 } 128 129 LIBC_INLINE uint64_t fixed_frequency_clock() { 130 return __builtin_readsteadycounter(); 131 } 132 133 LIBC_INLINE uint64_t processor_clock() { return __builtin_readcyclecounter(); } 134 135 } // namespace gpu 136 } // namespace LIBC_NAMESPACE_DECL 137 138 #endif // LLVM_LIBC_SRC___SUPPORT_GPU_UTILS_H 139