xref: /freebsd/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===------ CGOpenMPRuntimeGPU.h - Interface to OpenMP GPU Runtimes ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a generalized class for OpenMP runtime code generation
10 // specialized by GPU targets NVPTX and AMDGCN.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEGPU_H
15 #define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEGPU_H
16 
17 #include "CGOpenMPRuntime.h"
18 #include "CodeGenFunction.h"
19 #include "clang/AST/StmtOpenMP.h"
20 
21 namespace clang {
22 namespace CodeGen {
23 
24 class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
25 public:
26   /// Defines the execution mode.
27   enum ExecutionMode {
28     /// SPMD execution mode (all threads are worker threads).
29     EM_SPMD,
30     /// Non-SPMD execution mode (1 master thread, others are workers).
31     EM_NonSPMD,
32     /// Unknown execution mode (orphaned directive).
33     EM_Unknown,
34   };
35 
36   /// Target codegen is specialized based on two data-sharing modes: CUDA, in
37   /// which the local variables are actually global threadlocal, and Generic, in
38   /// which the local variables are placed in global memory if they may escape
39   /// their declaration context.
40   enum DataSharingMode {
41     /// CUDA data sharing mode.
42     DS_CUDA,
43     /// Generic data-sharing mode.
44     DS_Generic,
45   };
46 
47 private:
48   /// Parallel outlined function work for workers to execute.
49   llvm::SmallVector<llvm::Function *, 16> Work;
50 
51   struct EntryFunctionState {
52     SourceLocation Loc;
53   };
54 
55   ExecutionMode getExecutionMode() const;
56 
57   DataSharingMode getDataSharingMode() const;
58 
59   /// Get barrier to synchronize all threads in a block.
60   void syncCTAThreads(CodeGenFunction &CGF);
61 
62   /// Helper for target directive initialization.
63   void emitKernelInit(const OMPExecutableDirective &D, CodeGenFunction &CGF,
64                       EntryFunctionState &EST, bool IsSPMD);
65 
66   /// Helper for target directive finalization.
67   void emitKernelDeinit(CodeGenFunction &CGF, EntryFunctionState &EST,
68                         bool IsSPMD);
69 
70   /// Helper for generic variables globalization prolog.
71   void emitGenericVarsProlog(CodeGenFunction &CGF, SourceLocation Loc);
72 
73   /// Helper for generic variables globalization epilog.
74   void emitGenericVarsEpilog(CodeGenFunction &CGF);
75 
76   //
77   // Base class overrides.
78   //
79 
80   /// Emit outlined function specialized for the Fork-Join
81   /// programming model for applicable target directives on the NVPTX device.
82   /// \param D Directive to emit.
83   /// \param ParentName Name of the function that encloses the target region.
84   /// \param OutlinedFn Outlined function value to be defined by this call.
85   /// \param OutlinedFnID Outlined function ID value to be defined by this call.
86   /// \param IsOffloadEntry True if the outlined function is an offload entry.
87   /// An outlined function may not be an entry if, e.g. the if clause always
88   /// evaluates to false.
89   void emitNonSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName,
90                          llvm::Function *&OutlinedFn,
91                          llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
92                          const RegionCodeGenTy &CodeGen);
93 
94   /// Emit outlined function specialized for the Single Program
95   /// Multiple Data programming model for applicable target directives on the
96   /// NVPTX device.
97   /// \param D Directive to emit.
98   /// \param ParentName Name of the function that encloses the target region.
99   /// \param OutlinedFn Outlined function value to be defined by this call.
100   /// \param OutlinedFnID Outlined function ID value to be defined by this call.
101   /// \param IsOffloadEntry True if the outlined function is an offload entry.
102   /// \param CodeGen Object containing the target statements.
103   /// An outlined function may not be an entry if, e.g. the if clause always
104   /// evaluates to false.
105   void emitSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName,
106                       llvm::Function *&OutlinedFn,
107                       llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
108                       const RegionCodeGenTy &CodeGen);
109 
110   /// Emit outlined function for 'target' directive on the NVPTX
111   /// device.
112   /// \param D Directive to emit.
113   /// \param ParentName Name of the function that encloses the target region.
114   /// \param OutlinedFn Outlined function value to be defined by this call.
115   /// \param OutlinedFnID Outlined function ID value to be defined by this call.
116   /// \param IsOffloadEntry True if the outlined function is an offload entry.
117   /// An outlined function may not be an entry if, e.g. the if clause always
118   /// evaluates to false.
119   void emitTargetOutlinedFunction(const OMPExecutableDirective &D,
120                                   StringRef ParentName,
121                                   llvm::Function *&OutlinedFn,
122                                   llvm::Constant *&OutlinedFnID,
123                                   bool IsOffloadEntry,
124                                   const RegionCodeGenTy &CodeGen) override;
125 
126 protected:
127   /// Check if the default location must be constant.
128   /// Constant for NVPTX for better optimization.
isDefaultLocationConstant()129   bool isDefaultLocationConstant() const override { return true; }
130 
131 public:
132   explicit CGOpenMPRuntimeGPU(CodeGenModule &CGM);
133 
isGPU()134   bool isGPU() const override { return true; };
135 
136   /// Declare generalized virtual functions which need to be defined
137   /// by all specializations of OpenMPGPURuntime Targets like AMDGCN
138   /// and NVPTX.
139 
140   /// Check if the variable length declaration is delayed:
141   bool isDelayedVariableLengthDecl(CodeGenFunction &CGF,
142                                    const VarDecl *VD) const override;
143 
144   /// Get call to __kmpc_alloc_shared
145   std::pair<llvm::Value *, llvm::Value *>
146   getKmpcAllocShared(CodeGenFunction &CGF, const VarDecl *VD) override;
147 
148   /// Get call to __kmpc_free_shared
149   void getKmpcFreeShared(
150       CodeGenFunction &CGF,
151       const std::pair<llvm::Value *, llvm::Value *> &AddrSizePair) override;
152 
153   /// Get the id of the current thread on the GPU.
154   llvm::Value *getGPUThreadID(CodeGenFunction &CGF);
155 
156   /// Get the maximum number of threads in a block of the GPU.
157   llvm::Value *getGPUNumThreads(CodeGenFunction &CGF);
158 
159   /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
160   /// global_tid, int proc_bind) to generate code for 'proc_bind' clause.
161   void emitProcBindClause(CodeGenFunction &CGF,
162                           llvm::omp::ProcBindKind ProcBind,
163                           SourceLocation Loc) override;
164 
165   /// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
166   /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
167   /// clause.
168   /// \param NumThreads An integer value of threads.
169   void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads,
170                             SourceLocation Loc) override;
171 
172   /// This function ought to emit, in the general case, a call to
173   // the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed
174   // as these numbers are obtained through the PTX grid and block configuration.
175   /// \param NumTeams An integer expression of teams.
176   /// \param ThreadLimit An integer expression of threads.
177   void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams,
178                           const Expr *ThreadLimit, SourceLocation Loc) override;
179 
180   /// Emits inlined function for the specified OpenMP parallel
181   //  directive.
182   /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
183   /// kmp_int32 BoundID, struct context_vars*).
184   /// \param CGF Reference to current CodeGenFunction.
185   /// \param D OpenMP directive.
186   /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
187   /// \param InnermostKind Kind of innermost directive (for simple directives it
188   /// is a directive itself, for combined - its innermost directive).
189   /// \param CodeGen Code generation sequence for the \a D directive.
190   llvm::Function *emitParallelOutlinedFunction(
191       CodeGenFunction &CGF, const OMPExecutableDirective &D,
192       const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
193       const RegionCodeGenTy &CodeGen) override;
194 
195   /// Emits inlined function for the specified OpenMP teams
196   //  directive.
197   /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
198   /// kmp_int32 BoundID, struct context_vars*).
199   /// \param CGF Reference to current CodeGenFunction.
200   /// \param D OpenMP directive.
201   /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
202   /// \param InnermostKind Kind of innermost directive (for simple directives it
203   /// is a directive itself, for combined - its innermost directive).
204   /// \param CodeGen Code generation sequence for the \a D directive.
205   llvm::Function *emitTeamsOutlinedFunction(
206       CodeGenFunction &CGF, const OMPExecutableDirective &D,
207       const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
208       const RegionCodeGenTy &CodeGen) override;
209 
210   /// Emits code for teams call of the \a OutlinedFn with
211   /// variables captured in a record which address is stored in \a
212   /// CapturedStruct.
213   /// \param OutlinedFn Outlined function to be run by team masters. Type of
214   /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
215   /// \param CapturedVars A pointer to the record with the references to
216   /// variables used in \a OutlinedFn function.
217   ///
218   void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
219                      SourceLocation Loc, llvm::Function *OutlinedFn,
220                      ArrayRef<llvm::Value *> CapturedVars) override;
221 
222   /// Emits code for parallel or serial call of the \a OutlinedFn with
223   /// variables captured in a record which address is stored in \a
224   /// CapturedStruct.
225   /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
226   /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
227   /// \param CapturedVars A pointer to the record with the references to
228   /// variables used in \a OutlinedFn function.
229   /// \param IfCond Condition in the associated 'if' clause, if it was
230   /// specified, nullptr otherwise.
231   /// \param NumThreads The value corresponding to the num_threads clause, if
232   /// any,
233   ///                   or nullptr.
234   void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
235                         llvm::Function *OutlinedFn,
236                         ArrayRef<llvm::Value *> CapturedVars,
237                         const Expr *IfCond, llvm::Value *NumThreads) override;
238 
239   /// Emit an implicit/explicit barrier for OpenMP threads.
240   /// \param Kind Directive for which this implicit barrier call must be
241   /// generated. Must be OMPD_barrier for explicit barrier generation.
242   /// \param EmitChecks true if need to emit checks for cancellation barriers.
243   /// \param ForceSimpleCall true simple barrier call must be emitted, false if
244   /// runtime class decides which one to emit (simple or with cancellation
245   /// checks).
246   ///
247   void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
248                        OpenMPDirectiveKind Kind, bool EmitChecks = true,
249                        bool ForceSimpleCall = false) override;
250 
251   /// Emits a critical region.
252   /// \param CriticalName Name of the critical region.
253   /// \param CriticalOpGen Generator for the statement associated with the given
254   /// critical region.
255   /// \param Hint Value of the 'hint' clause (optional).
256   void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName,
257                           const RegionCodeGenTy &CriticalOpGen,
258                           SourceLocation Loc,
259                           const Expr *Hint = nullptr) override;
260 
261   /// Emit a code for reduction clause.
262   ///
263   /// \param Privates List of private copies for original reduction arguments.
264   /// \param LHSExprs List of LHS in \a ReductionOps reduction operations.
265   /// \param RHSExprs List of RHS in \a ReductionOps reduction operations.
266   /// \param ReductionOps List of reduction operations in form 'LHS binop RHS'
267   /// or 'operator binop(LHS, RHS)'.
268   /// \param Options List of options for reduction codegen:
269   ///     WithNowait true if parent directive has also nowait clause, false
270   ///     otherwise.
271   ///     SimpleReduction Emit reduction operation only. Used for omp simd
272   ///     directive on the host.
273   ///     ReductionKind The kind of reduction to perform.
274   void emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
275                      ArrayRef<const Expr *> Privates,
276                      ArrayRef<const Expr *> LHSExprs,
277                      ArrayRef<const Expr *> RHSExprs,
278                      ArrayRef<const Expr *> ReductionOps,
279                      ReductionOptionsTy Options) override;
280 
281   /// Translates the native parameter of outlined function if this is required
282   /// for target.
283   /// \param FD Field decl from captured record for the parameter.
284   /// \param NativeParam Parameter itself.
285   const VarDecl *translateParameter(const FieldDecl *FD,
286                                     const VarDecl *NativeParam) const override;
287 
288   /// Gets the address of the native argument basing on the address of the
289   /// target-specific parameter.
290   /// \param NativeParam Parameter itself.
291   /// \param TargetParam Corresponding target-specific parameter.
292   Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam,
293                               const VarDecl *TargetParam) const override;
294 
295   /// Emits call of the outlined function with the provided arguments,
296   /// translating these arguments to correct target-specific arguments.
297   void emitOutlinedFunctionCall(
298       CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
299       ArrayRef<llvm::Value *> Args = std::nullopt) const override;
300 
301   /// Emits OpenMP-specific function prolog.
302   /// Required for device constructs.
303   void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) override;
304 
305   /// Gets the OpenMP-specific address of the local variable.
306   Address getAddressOfLocalVariable(CodeGenFunction &CGF,
307                                     const VarDecl *VD) override;
308 
309   /// Cleans up references to the objects in finished function.
310   ///
311   void functionFinished(CodeGenFunction &CGF) override;
312 
313   /// Choose a default value for the dist_schedule clause.
314   void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF,
315       const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind,
316       llvm::Value *&Chunk) const override;
317 
318   /// Choose a default value for the schedule clause.
319   void getDefaultScheduleAndChunk(CodeGenFunction &CGF,
320       const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind,
321       const Expr *&ChunkExpr) const override;
322 
323   /// Adjust some parameters for the target-based directives, like addresses of
324   /// the variables captured by reference in lambdas.
325   void adjustTargetSpecificDataForLambdas(
326       CodeGenFunction &CGF, const OMPExecutableDirective &D) const override;
327 
328   /// Perform check on requires decl to ensure that target architecture
329   /// supports unified addressing
330   void processRequiresDirective(const OMPRequiresDecl *D) override;
331 
332   /// Checks if the variable has associated OMPAllocateDeclAttr attribute with
333   /// the predefined allocator and translates it into the corresponding address
334   /// space.
335   bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS) override;
336 
337 private:
338   /// Track the execution mode when codegening directives within a target
339   /// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the
340   /// target region and used by containing directives such as 'parallel'
341   /// to emit optimized code.
342   ExecutionMode CurrentExecutionMode = EM_Unknown;
343 
344   /// Track the data sharing mode when codegening directives within a target
345   /// region.
346   DataSharingMode CurrentDataSharingMode = DataSharingMode::DS_Generic;
347 
348   /// true if currently emitting code for target/teams/distribute region, false
349   /// - otherwise.
350   bool IsInTTDRegion = false;
351 
352   /// Map between an outlined function and its wrapper.
353   llvm::DenseMap<llvm::Function *, llvm::Function *> WrapperFunctionsMap;
354 
355   /// Emit function which wraps the outline parallel region
356   /// and controls the parameters which are passed to this function.
357   /// The wrapper ensures that the outlined function is called
358   /// with the correct arguments when data is shared.
359   llvm::Function *createParallelDataSharingWrapper(
360       llvm::Function *OutlinedParallelFn, const OMPExecutableDirective &D);
361 
362   /// The data for the single globalized variable.
363   struct MappedVarData {
364     /// Corresponding field in the global record.
365     llvm::Value *GlobalizedVal = nullptr;
366     /// Corresponding address.
367     Address PrivateAddr = Address::invalid();
368   };
369   /// The map of local variables to their addresses in the global memory.
370   using DeclToAddrMapTy = llvm::MapVector<const Decl *, MappedVarData>;
371   /// Set of the parameters passed by value escaping OpenMP context.
372   using EscapedParamsTy = llvm::SmallPtrSet<const Decl *, 4>;
373   struct FunctionData {
374     DeclToAddrMapTy LocalVarData;
375     EscapedParamsTy EscapedParameters;
376     llvm::SmallVector<const ValueDecl*, 4> EscapedVariableLengthDecls;
377     llvm::SmallVector<const ValueDecl *, 4> DelayedVariableLengthDecls;
378     llvm::SmallVector<std::pair<llvm::Value *, llvm::Value *>, 4>
379         EscapedVariableLengthDeclsAddrs;
380     std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams;
381   };
382   /// Maps the function to the list of the globalized variables with their
383   /// addresses.
384   llvm::SmallDenseMap<llvm::Function *, FunctionData> FunctionGlobalizedDecls;
385   /// List of the records with the list of fields for the reductions across the
386   /// teams. Used to build the intermediate buffer for the fast teams
387   /// reductions.
388   /// All the records are gathered into a union `union.type` is created.
389   llvm::SmallVector<const RecordDecl *, 4> TeamsReductions;
390   /// Pair of the Non-SPMD team and all reductions variables in this team
391   /// region.
392   std::pair<const Decl *, llvm::SmallVector<const ValueDecl *, 4>>
393       TeamAndReductions;
394 };
395 
396 } // CodeGen namespace.
397 } // clang namespace.
398 
399 #endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEGPU_H
400