xref: /freebsd/contrib/llvm-project/openmp/runtime/src/include/ompx.h.var (revision 3ceba58a7509418b47b8fca2d2b6bbf088714e26)
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef __OMPX_H
10#define __OMPX_H
11
12#ifdef __AMDGCN_WAVEFRONT_SIZE
13#define __WARP_SIZE __AMDGCN_WAVEFRONT_SIZE
14#else
15#define __WARP_SIZE 32
16#endif
17
18typedef unsigned long uint64_t;
19
20#ifdef __cplusplus
21extern "C" {
22#endif
23
24int omp_get_ancestor_thread_num(int);
25int omp_get_team_size(int);
26
27#ifdef __cplusplus
28}
29#endif
30
31/// Target kernel language extensions
32///
33/// These extensions exist for the host to allow fallback implementations,
34/// however, they cannot be arbitrarily composed with OpenMP. If the rules of
35/// the kernel language are followed, the host fallbacks should behave as
36/// expected since the kernel is represented as 3 sequential outer loops, one
37/// for each grid dimension, and three (nested) parallel loops, one for each
38/// block dimension. This fallback is not supposed to be optimal and should be
39/// configurable by the user.
40///
41///{
42
43#ifdef __cplusplus
44extern "C" {
45#endif
46
47enum {
48  ompx_relaxed = __ATOMIC_RELAXED,
49  ompx_aquire = __ATOMIC_ACQUIRE,
50  ompx_release = __ATOMIC_RELEASE,
51  ompx_acq_rel = __ATOMIC_ACQ_REL,
52  ompx_seq_cst = __ATOMIC_SEQ_CST,
53};
54
55enum {
56  ompx_dim_x = 0,
57  ompx_dim_y = 1,
58  ompx_dim_z = 2,
59};
60
61// TODO: The following implementation is for host fallback. We need to disable
62// generation of host fallback in kernel language mode.
63#pragma omp begin declare variant match(device = {kind(cpu)})
64
65/// ompx_{thread,block}_{id,dim}
66///{
67#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(NAME, VALUE)                     \
68  static inline int ompx_##NAME(int Dim) { return VALUE; }
69
70_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(thread_id,
71                                      omp_get_ancestor_thread_num(Dim + 1))
72_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(block_dim, omp_get_team_size(Dim + 1))
73_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(block_id, 0)
74_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(grid_dim, 1)
75#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C
76///}
77
78/// ompx_{sync_block}_{,divergent}
79///{
80#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(RETTY, NAME, ARGS, BODY)         \
81  static inline RETTY ompx_##NAME(ARGS) { BODY; }
82
83_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block, int Ordering,
84                                      _Pragma("omp barrier"))
85_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block_acq_rel, void,
86                                      ompx_sync_block(ompx_acq_rel))
87_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block_divergent, int Ordering,
88                                      ompx_sync_block(Ordering))
89#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C
90///}
91
92static inline uint64_t ompx_ballot_sync(uint64_t mask, int pred) {
93  __builtin_trap();
94}
95
96/// ompx_shfl_down_sync_{i,f,l,d}
97///{
98#define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(TYPE, TY)                \
99  static inline TYPE ompx_shfl_down_sync_##TY(uint64_t mask, TYPE var,         \
100                                              unsigned delta, int width) {     \
101    __builtin_trap();                                                          \
102  }
103
104_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(int, i)
105_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(float, f)
106_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(long, l)
107_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(double, d)
108
109#undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL
110///}
111
112#pragma omp end declare variant
113
114/// ompx_{sync_block}_{,divergent}
115///{
116#define _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(RETTY, NAME, ARGS)         \
117  RETTY ompx_##NAME(ARGS);
118
119_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block, int Ordering)
120_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_acq_rel, void)
121_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_divergent, int Ordering)
122#undef _TGT_KERNEL_LANGUAGE_DECL_SYNC_C
123///}
124
125/// ompx_{thread,block}_{id,dim}_{x,y,z}
126///{
127#define _TGT_KERNEL_LANGUAGE_DECL_GRID_C(NAME)                                 \
128  int ompx_##NAME(int Dim);                                                    \
129  static inline int ompx_##NAME##_x() { return ompx_##NAME(ompx_dim_x); }      \
130  static inline int ompx_##NAME##_y() { return ompx_##NAME(ompx_dim_y); }      \
131  static inline int ompx_##NAME##_z() { return ompx_##NAME(ompx_dim_z); }
132
133_TGT_KERNEL_LANGUAGE_DECL_GRID_C(thread_id)
134_TGT_KERNEL_LANGUAGE_DECL_GRID_C(block_dim)
135_TGT_KERNEL_LANGUAGE_DECL_GRID_C(block_id)
136_TGT_KERNEL_LANGUAGE_DECL_GRID_C(grid_dim)
137#undef _TGT_KERNEL_LANGUAGE_DECL_GRID_C
138///}
139
140uint64_t ompx_ballot_sync(uint64_t mask, int pred);
141
142/// ompx_shfl_down_sync_{i,f,l,d}
143///{
144#define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(TYPE, TY)                          \
145  TYPE ompx_shfl_down_sync_##TY(uint64_t mask, TYPE var, unsigned delta,       \
146                                int width);
147
148_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(int, i)
149_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(float, f)
150_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(long, l)
151_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(double, d)
152
153#undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC
154///}
155
156#ifdef __cplusplus
157}
158#endif
159
160#ifdef __cplusplus
161
162namespace ompx {
163
164enum {
165  dim_x = ompx_dim_x,
166  dim_y = ompx_dim_y,
167  dim_z = ompx_dim_z,
168};
169
170enum {
171  relaxed = ompx_relaxed ,
172  aquire = ompx_aquire,
173  release = ompx_release,
174  acc_rel = ompx_acq_rel,
175  seq_cst = ompx_seq_cst,
176};
177
178/// ompx::{thread,block}_{id,dim}_{,x,y,z}
179///{
180#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(NAME)                          \
181  static inline int NAME(int Dim) noexcept { return ompx_##NAME(Dim); }        \
182  static inline int NAME##_x() noexcept { return NAME(ompx_dim_x); }           \
183  static inline int NAME##_y() noexcept { return NAME(ompx_dim_y); }           \
184  static inline int NAME##_z() noexcept { return NAME(ompx_dim_z); }
185
186_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(thread_id)
187_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(block_dim)
188_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(block_id)
189_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(grid_dim)
190#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX
191///}
192
193/// ompx_{sync_block}_{,divergent}
194///{
195#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(RETTY, NAME, ARGS, CALL_ARGS)  \
196  static inline RETTY NAME(ARGS) {               \
197    return ompx_##NAME(CALL_ARGS);                                             \
198  }
199
200_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(void, sync_block, int Ordering = acc_rel,
201                                        Ordering)
202_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(void, sync_block_divergent,
203                                        int Ordering = acc_rel, Ordering)
204#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX
205///}
206
207static inline uint64_t ballot_sync(uint64_t mask, int pred) {
208  return ompx_ballot_sync(mask, pred);
209}
210
211/// shfl_down_sync
212///{
213#define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(TYPE, TY)                          \
214  static inline TYPE shfl_down_sync(uint64_t mask, TYPE var, unsigned delta,   \
215                                    int width = __WARP_SIZE) {                 \
216    return ompx_shfl_down_sync_##TY(mask, var, delta, width);                  \
217  }
218
219_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(int, i)
220_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(float, f)
221_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(long, l)
222_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(double, d)
223
224#undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC
225///}
226
227} // namespace ompx
228#endif
229
230///}
231
232#endif /* __OMPX_H */
233