1//===----------------------------------------------------------------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9#ifndef __OMPX_H 10#define __OMPX_H 11 12#ifdef __AMDGCN_WAVEFRONT_SIZE 13#define __WARP_SIZE __AMDGCN_WAVEFRONT_SIZE 14#else 15#define __WARP_SIZE 32 16#endif 17 18typedef unsigned long uint64_t; 19 20#ifdef __cplusplus 21extern "C" { 22#endif 23 24int omp_get_ancestor_thread_num(int); 25int omp_get_team_size(int); 26 27#ifdef __cplusplus 28} 29#endif 30 31/// Target kernel language extensions 32/// 33/// These extensions exist for the host to allow fallback implementations, 34/// however, they cannot be arbitrarily composed with OpenMP. If the rules of 35/// the kernel language are followed, the host fallbacks should behave as 36/// expected since the kernel is represented as 3 sequential outer loops, one 37/// for each grid dimension, and three (nested) parallel loops, one for each 38/// block dimension. This fallback is not supposed to be optimal and should be 39/// configurable by the user. 40/// 41///{ 42 43#ifdef __cplusplus 44extern "C" { 45#endif 46 47enum { 48 ompx_relaxed = __ATOMIC_RELAXED, 49 ompx_aquire = __ATOMIC_ACQUIRE, 50 ompx_release = __ATOMIC_RELEASE, 51 ompx_acq_rel = __ATOMIC_ACQ_REL, 52 ompx_seq_cst = __ATOMIC_SEQ_CST, 53}; 54 55enum { 56 ompx_dim_x = 0, 57 ompx_dim_y = 1, 58 ompx_dim_z = 2, 59}; 60 61// TODO: The following implementation is for host fallback. We need to disable 62// generation of host fallback in kernel language mode. 63#pragma omp begin declare variant match(device = {kind(cpu)}) 64 65/// ompx_{thread,block}_{id,dim} 66///{ 67#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(NAME, VALUE) \ 68 static inline int ompx_##NAME(int Dim) { return VALUE; } 69 70_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(thread_id, 71 omp_get_ancestor_thread_num(Dim + 1)) 72_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(block_dim, omp_get_team_size(Dim + 1)) 73_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(block_id, 0) 74_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(grid_dim, 1) 75#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C 76///} 77 78/// ompx_{sync_block}_{,divergent} 79///{ 80#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(RETTY, NAME, ARGS, BODY) \ 81 static inline RETTY ompx_##NAME(ARGS) { BODY; } 82 83_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block, int Ordering, 84 _Pragma("omp barrier")) 85_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block_acq_rel, void, 86 ompx_sync_block(ompx_acq_rel)) 87_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block_divergent, int Ordering, 88 ompx_sync_block(Ordering)) 89#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C 90///} 91 92static inline uint64_t ompx_ballot_sync(uint64_t mask, int pred) { 93 __builtin_trap(); 94} 95 96/// ompx_shfl_down_sync_{i,f,l,d} 97///{ 98#define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(TYPE, TY) \ 99 static inline TYPE ompx_shfl_down_sync_##TY(uint64_t mask, TYPE var, \ 100 unsigned delta, int width) { \ 101 __builtin_trap(); \ 102 } 103 104_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(int, i) 105_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(float, f) 106_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(long, l) 107_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(double, d) 108 109#undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL 110///} 111 112#pragma omp end declare variant 113 114/// ompx_{sync_block}_{,divergent} 115///{ 116#define _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(RETTY, NAME, ARGS) \ 117 RETTY ompx_##NAME(ARGS); 118 119_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block, int Ordering) 120_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_acq_rel, void) 121_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_divergent, int Ordering) 122#undef _TGT_KERNEL_LANGUAGE_DECL_SYNC_C 123///} 124 125/// ompx_{thread,block}_{id,dim}_{x,y,z} 126///{ 127#define _TGT_KERNEL_LANGUAGE_DECL_GRID_C(NAME) \ 128 int ompx_##NAME(int Dim); \ 129 static inline int ompx_##NAME##_x() { return ompx_##NAME(ompx_dim_x); } \ 130 static inline int ompx_##NAME##_y() { return ompx_##NAME(ompx_dim_y); } \ 131 static inline int ompx_##NAME##_z() { return ompx_##NAME(ompx_dim_z); } 132 133_TGT_KERNEL_LANGUAGE_DECL_GRID_C(thread_id) 134_TGT_KERNEL_LANGUAGE_DECL_GRID_C(block_dim) 135_TGT_KERNEL_LANGUAGE_DECL_GRID_C(block_id) 136_TGT_KERNEL_LANGUAGE_DECL_GRID_C(grid_dim) 137#undef _TGT_KERNEL_LANGUAGE_DECL_GRID_C 138///} 139 140uint64_t ompx_ballot_sync(uint64_t mask, int pred); 141 142/// ompx_shfl_down_sync_{i,f,l,d} 143///{ 144#define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(TYPE, TY) \ 145 TYPE ompx_shfl_down_sync_##TY(uint64_t mask, TYPE var, unsigned delta, \ 146 int width); 147 148_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(int, i) 149_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(float, f) 150_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(long, l) 151_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(double, d) 152 153#undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC 154///} 155 156#ifdef __cplusplus 157} 158#endif 159 160#ifdef __cplusplus 161 162namespace ompx { 163 164enum { 165 dim_x = ompx_dim_x, 166 dim_y = ompx_dim_y, 167 dim_z = ompx_dim_z, 168}; 169 170enum { 171 relaxed = ompx_relaxed , 172 aquire = ompx_aquire, 173 release = ompx_release, 174 acc_rel = ompx_acq_rel, 175 seq_cst = ompx_seq_cst, 176}; 177 178/// ompx::{thread,block}_{id,dim}_{,x,y,z} 179///{ 180#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(NAME) \ 181 static inline int NAME(int Dim) noexcept { return ompx_##NAME(Dim); } \ 182 static inline int NAME##_x() noexcept { return NAME(ompx_dim_x); } \ 183 static inline int NAME##_y() noexcept { return NAME(ompx_dim_y); } \ 184 static inline int NAME##_z() noexcept { return NAME(ompx_dim_z); } 185 186_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(thread_id) 187_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(block_dim) 188_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(block_id) 189_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(grid_dim) 190#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX 191///} 192 193/// ompx_{sync_block}_{,divergent} 194///{ 195#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(RETTY, NAME, ARGS, CALL_ARGS) \ 196 static inline RETTY NAME(ARGS) { \ 197 return ompx_##NAME(CALL_ARGS); \ 198 } 199 200_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(void, sync_block, int Ordering = acc_rel, 201 Ordering) 202_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(void, sync_block_divergent, 203 int Ordering = acc_rel, Ordering) 204#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX 205///} 206 207static inline uint64_t ballot_sync(uint64_t mask, int pred) { 208 return ompx_ballot_sync(mask, pred); 209} 210 211/// shfl_down_sync 212///{ 213#define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(TYPE, TY) \ 214 static inline TYPE shfl_down_sync(uint64_t mask, TYPE var, unsigned delta, \ 215 int width = __WARP_SIZE) { \ 216 return ompx_shfl_down_sync_##TY(mask, var, delta, width); \ 217 } 218 219_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(int, i) 220_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(float, f) 221_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(long, l) 222_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(double, d) 223 224#undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC 225///} 226 227} // namespace ompx 228#endif 229 230///} 231 232#endif /* __OMPX_H */ 233