xref: /freebsd/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h (revision 38a52bd3b5cac3da6f7f6eef3dd050e6aa08ebb3)
1 //===------ CGOpenMPRuntimeGPU.h - Interface to OpenMP GPU Runtimes ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a generalized class for OpenMP runtime code generation
10 // specialized by GPU targets NVPTX and AMDGCN.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEGPU_H
15 #define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEGPU_H
16 
17 #include "CGOpenMPRuntime.h"
18 #include "CodeGenFunction.h"
19 #include "clang/AST/StmtOpenMP.h"
20 
21 namespace clang {
22 namespace CodeGen {
23 
24 class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
25 public:
26   /// Defines the execution mode.
27   enum ExecutionMode {
28     /// SPMD execution mode (all threads are worker threads).
29     EM_SPMD,
30     /// Non-SPMD execution mode (1 master thread, others are workers).
31     EM_NonSPMD,
32     /// Unknown execution mode (orphaned directive).
33     EM_Unknown,
34   };
35 private:
36   /// Parallel outlined function work for workers to execute.
37   llvm::SmallVector<llvm::Function *, 16> Work;
38 
39   struct EntryFunctionState {
40     SourceLocation Loc;
41   };
42 
43   ExecutionMode getExecutionMode() const;
44 
45   bool requiresFullRuntime() const { return RequiresFullRuntime; }
46 
47   /// Get barrier to synchronize all threads in a block.
48   void syncCTAThreads(CodeGenFunction &CGF);
49 
50   /// Helper for target directive initialization.
51   void emitKernelInit(CodeGenFunction &CGF, EntryFunctionState &EST,
52                       bool IsSPMD);
53 
54   /// Helper for target directive finalization.
55   void emitKernelDeinit(CodeGenFunction &CGF, EntryFunctionState &EST,
56                         bool IsSPMD);
57 
58   /// Helper for generic variables globalization prolog.
59   void emitGenericVarsProlog(CodeGenFunction &CGF, SourceLocation Loc,
60                              bool WithSPMDCheck = false);
61 
62   /// Helper for generic variables globalization epilog.
63   void emitGenericVarsEpilog(CodeGenFunction &CGF, bool WithSPMDCheck = false);
64 
65   //
66   // Base class overrides.
67   //
68 
69   /// Creates offloading entry for the provided entry ID \a ID,
70   /// address \a Addr, size \a Size, and flags \a Flags.
71   void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr,
72                           uint64_t Size, int32_t Flags,
73                           llvm::GlobalValue::LinkageTypes Linkage) override;
74 
75   /// Emit outlined function specialized for the Fork-Join
76   /// programming model for applicable target directives on the NVPTX device.
77   /// \param D Directive to emit.
78   /// \param ParentName Name of the function that encloses the target region.
79   /// \param OutlinedFn Outlined function value to be defined by this call.
80   /// \param OutlinedFnID Outlined function ID value to be defined by this call.
81   /// \param IsOffloadEntry True if the outlined function is an offload entry.
82   /// An outlined function may not be an entry if, e.g. the if clause always
83   /// evaluates to false.
84   void emitNonSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName,
85                          llvm::Function *&OutlinedFn,
86                          llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
87                          const RegionCodeGenTy &CodeGen);
88 
89   /// Emit outlined function specialized for the Single Program
90   /// Multiple Data programming model for applicable target directives on the
91   /// NVPTX device.
92   /// \param D Directive to emit.
93   /// \param ParentName Name of the function that encloses the target region.
94   /// \param OutlinedFn Outlined function value to be defined by this call.
95   /// \param OutlinedFnID Outlined function ID value to be defined by this call.
96   /// \param IsOffloadEntry True if the outlined function is an offload entry.
97   /// \param CodeGen Object containing the target statements.
98   /// An outlined function may not be an entry if, e.g. the if clause always
99   /// evaluates to false.
100   void emitSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName,
101                       llvm::Function *&OutlinedFn,
102                       llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
103                       const RegionCodeGenTy &CodeGen);
104 
105   /// Emit outlined function for 'target' directive on the NVPTX
106   /// device.
107   /// \param D Directive to emit.
108   /// \param ParentName Name of the function that encloses the target region.
109   /// \param OutlinedFn Outlined function value to be defined by this call.
110   /// \param OutlinedFnID Outlined function ID value to be defined by this call.
111   /// \param IsOffloadEntry True if the outlined function is an offload entry.
112   /// An outlined function may not be an entry if, e.g. the if clause always
113   /// evaluates to false.
114   void emitTargetOutlinedFunction(const OMPExecutableDirective &D,
115                                   StringRef ParentName,
116                                   llvm::Function *&OutlinedFn,
117                                   llvm::Constant *&OutlinedFnID,
118                                   bool IsOffloadEntry,
119                                   const RegionCodeGenTy &CodeGen) override;
120 
121   /// Emits code for parallel or serial call of the \a OutlinedFn with
122   /// variables captured in a record which address is stored in \a
123   /// CapturedStruct.
124   /// This call is for the Non-SPMD Execution Mode.
125   /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
126   /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
127   /// \param CapturedVars A pointer to the record with the references to
128   /// variables used in \a OutlinedFn function.
129   /// \param IfCond Condition in the associated 'if' clause, if it was
130   /// specified, nullptr otherwise.
131   void emitNonSPMDParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
132                                llvm::Value *OutlinedFn,
133                                ArrayRef<llvm::Value *> CapturedVars,
134                                const Expr *IfCond);
135 
136   /// Emits code for parallel or serial call of the \a OutlinedFn with
137   /// variables captured in a record which address is stored in \a
138   /// CapturedStruct.
139   /// This call is for a parallel directive within an SPMD target directive.
140   /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
141   /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
142   /// \param CapturedVars A pointer to the record with the references to
143   /// variables used in \a OutlinedFn function.
144   /// \param IfCond Condition in the associated 'if' clause, if it was
145   /// specified, nullptr otherwise.
146   ///
147   void emitSPMDParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
148                             llvm::Function *OutlinedFn,
149                             ArrayRef<llvm::Value *> CapturedVars,
150                             const Expr *IfCond);
151 
152 protected:
153   /// Get the function name of an outlined region.
154   //  The name can be customized depending on the target.
155   //
156   StringRef getOutlinedHelperName() const override {
157     return "__omp_outlined__";
158   }
159 
160   /// Check if the default location must be constant.
161   /// Constant for NVPTX for better optimization.
162   bool isDefaultLocationConstant() const override { return true; }
163 
164   /// Returns additional flags that can be stored in reserved_2 field of the
165   /// default location.
166   /// For NVPTX target contains data about SPMD/Non-SPMD execution mode +
167   /// Full/Lightweight runtime mode. Used for better optimization.
168   unsigned getDefaultLocationReserved2Flags() const override;
169 
170 public:
171   explicit CGOpenMPRuntimeGPU(CodeGenModule &CGM);
172   void clear() override;
173 
174   /// Declare generalized virtual functions which need to be defined
175   /// by all specializations of OpenMPGPURuntime Targets like AMDGCN
176   /// and NVPTX.
177 
178   /// Get the GPU warp size.
179   llvm::Value *getGPUWarpSize(CodeGenFunction &CGF);
180 
181   /// Get the id of the current thread on the GPU.
182   llvm::Value *getGPUThreadID(CodeGenFunction &CGF);
183 
184   /// Get the maximum number of threads in a block of the GPU.
185   llvm::Value *getGPUNumThreads(CodeGenFunction &CGF);
186 
187   /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
188   /// global_tid, int proc_bind) to generate code for 'proc_bind' clause.
189   virtual void emitProcBindClause(CodeGenFunction &CGF,
190                                   llvm::omp::ProcBindKind ProcBind,
191                                   SourceLocation Loc) override;
192 
193   /// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
194   /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
195   /// clause.
196   /// \param NumThreads An integer value of threads.
197   virtual void emitNumThreadsClause(CodeGenFunction &CGF,
198                                     llvm::Value *NumThreads,
199                                     SourceLocation Loc) override;
200 
201   /// This function ought to emit, in the general case, a call to
202   // the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed
203   // as these numbers are obtained through the PTX grid and block configuration.
204   /// \param NumTeams An integer expression of teams.
205   /// \param ThreadLimit An integer expression of threads.
206   void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams,
207                           const Expr *ThreadLimit, SourceLocation Loc) override;
208 
209   /// Emits inlined function for the specified OpenMP parallel
210   //  directive.
211   /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
212   /// kmp_int32 BoundID, struct context_vars*).
213   /// \param D OpenMP directive.
214   /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
215   /// \param InnermostKind Kind of innermost directive (for simple directives it
216   /// is a directive itself, for combined - its innermost directive).
217   /// \param CodeGen Code generation sequence for the \a D directive.
218   llvm::Function *
219   emitParallelOutlinedFunction(const OMPExecutableDirective &D,
220                                const VarDecl *ThreadIDVar,
221                                OpenMPDirectiveKind InnermostKind,
222                                const RegionCodeGenTy &CodeGen) override;
223 
224   /// Emits inlined function for the specified OpenMP teams
225   //  directive.
226   /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
227   /// kmp_int32 BoundID, struct context_vars*).
228   /// \param D OpenMP directive.
229   /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
230   /// \param InnermostKind Kind of innermost directive (for simple directives it
231   /// is a directive itself, for combined - its innermost directive).
232   /// \param CodeGen Code generation sequence for the \a D directive.
233   llvm::Function *
234   emitTeamsOutlinedFunction(const OMPExecutableDirective &D,
235                             const VarDecl *ThreadIDVar,
236                             OpenMPDirectiveKind InnermostKind,
237                             const RegionCodeGenTy &CodeGen) override;
238 
239   /// Emits code for teams call of the \a OutlinedFn with
240   /// variables captured in a record which address is stored in \a
241   /// CapturedStruct.
242   /// \param OutlinedFn Outlined function to be run by team masters. Type of
243   /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
244   /// \param CapturedVars A pointer to the record with the references to
245   /// variables used in \a OutlinedFn function.
246   ///
247   void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
248                      SourceLocation Loc, llvm::Function *OutlinedFn,
249                      ArrayRef<llvm::Value *> CapturedVars) override;
250 
251   /// Emits code for parallel or serial call of the \a OutlinedFn with
252   /// variables captured in a record which address is stored in \a
253   /// CapturedStruct.
254   /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
255   /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
256   /// \param CapturedVars A pointer to the record with the references to
257   /// variables used in \a OutlinedFn function.
258   /// \param IfCond Condition in the associated 'if' clause, if it was
259   /// specified, nullptr otherwise.
260   /// \param NumThreads The value corresponding to the num_threads clause, if
261   /// any,
262   ///                   or nullptr.
263   void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
264                         llvm::Function *OutlinedFn,
265                         ArrayRef<llvm::Value *> CapturedVars,
266                         const Expr *IfCond, llvm::Value *NumThreads) override;
267 
268   /// Emit an implicit/explicit barrier for OpenMP threads.
269   /// \param Kind Directive for which this implicit barrier call must be
270   /// generated. Must be OMPD_barrier for explicit barrier generation.
271   /// \param EmitChecks true if need to emit checks for cancellation barriers.
272   /// \param ForceSimpleCall true simple barrier call must be emitted, false if
273   /// runtime class decides which one to emit (simple or with cancellation
274   /// checks).
275   ///
276   void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
277                        OpenMPDirectiveKind Kind, bool EmitChecks = true,
278                        bool ForceSimpleCall = false) override;
279 
280   /// Emits a critical region.
281   /// \param CriticalName Name of the critical region.
282   /// \param CriticalOpGen Generator for the statement associated with the given
283   /// critical region.
284   /// \param Hint Value of the 'hint' clause (optional).
285   void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName,
286                           const RegionCodeGenTy &CriticalOpGen,
287                           SourceLocation Loc,
288                           const Expr *Hint = nullptr) override;
289 
290   /// Emit a code for reduction clause.
291   ///
292   /// \param Privates List of private copies for original reduction arguments.
293   /// \param LHSExprs List of LHS in \a ReductionOps reduction operations.
294   /// \param RHSExprs List of RHS in \a ReductionOps reduction operations.
295   /// \param ReductionOps List of reduction operations in form 'LHS binop RHS'
296   /// or 'operator binop(LHS, RHS)'.
297   /// \param Options List of options for reduction codegen:
298   ///     WithNowait true if parent directive has also nowait clause, false
299   ///     otherwise.
300   ///     SimpleReduction Emit reduction operation only. Used for omp simd
301   ///     directive on the host.
302   ///     ReductionKind The kind of reduction to perform.
303   virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
304                              ArrayRef<const Expr *> Privates,
305                              ArrayRef<const Expr *> LHSExprs,
306                              ArrayRef<const Expr *> RHSExprs,
307                              ArrayRef<const Expr *> ReductionOps,
308                              ReductionOptionsTy Options) override;
309 
310   /// Returns specified OpenMP runtime function for the current OpenMP
311   /// implementation.  Specialized for the NVPTX device.
312   /// \param Function OpenMP runtime function.
313   /// \return Specified function.
314   llvm::FunctionCallee createNVPTXRuntimeFunction(unsigned Function);
315 
316   /// Translates the native parameter of outlined function if this is required
317   /// for target.
318   /// \param FD Field decl from captured record for the parameter.
319   /// \param NativeParam Parameter itself.
320   const VarDecl *translateParameter(const FieldDecl *FD,
321                                     const VarDecl *NativeParam) const override;
322 
323   /// Gets the address of the native argument basing on the address of the
324   /// target-specific parameter.
325   /// \param NativeParam Parameter itself.
326   /// \param TargetParam Corresponding target-specific parameter.
327   Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam,
328                               const VarDecl *TargetParam) const override;
329 
330   /// Emits call of the outlined function with the provided arguments,
331   /// translating these arguments to correct target-specific arguments.
332   void emitOutlinedFunctionCall(
333       CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
334       ArrayRef<llvm::Value *> Args = llvm::None) const override;
335 
336   /// Emits OpenMP-specific function prolog.
337   /// Required for device constructs.
338   void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) override;
339 
340   /// Gets the OpenMP-specific address of the local variable.
341   Address getAddressOfLocalVariable(CodeGenFunction &CGF,
342                                     const VarDecl *VD) override;
343 
344   /// Target codegen is specialized based on two data-sharing modes: CUDA, in
345   /// which the local variables are actually global threadlocal, and Generic, in
346   /// which the local variables are placed in global memory if they may escape
347   /// their declaration context.
348   enum DataSharingMode {
349     /// CUDA data sharing mode.
350     CUDA,
351     /// Generic data-sharing mode.
352     Generic,
353   };
354 
355   /// Cleans up references to the objects in finished function.
356   ///
357   void functionFinished(CodeGenFunction &CGF) override;
358 
359   /// Choose a default value for the dist_schedule clause.
360   void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF,
361       const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind,
362       llvm::Value *&Chunk) const override;
363 
364   /// Choose a default value for the schedule clause.
365   void getDefaultScheduleAndChunk(CodeGenFunction &CGF,
366       const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind,
367       const Expr *&ChunkExpr) const override;
368 
369   /// Adjust some parameters for the target-based directives, like addresses of
370   /// the variables captured by reference in lambdas.
371   void adjustTargetSpecificDataForLambdas(
372       CodeGenFunction &CGF, const OMPExecutableDirective &D) const override;
373 
374   /// Perform check on requires decl to ensure that target architecture
375   /// supports unified addressing
376   void processRequiresDirective(const OMPRequiresDecl *D) override;
377 
378   /// Checks if the variable has associated OMPAllocateDeclAttr attribute with
379   /// the predefined allocator and translates it into the corresponding address
380   /// space.
381   bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS) override;
382 
383 private:
384   /// Track the execution mode when codegening directives within a target
385   /// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the
386   /// target region and used by containing directives such as 'parallel'
387   /// to emit optimized code.
388   ExecutionMode CurrentExecutionMode = EM_Unknown;
389 
390   /// Check if the full runtime is required (default - yes).
391   bool RequiresFullRuntime = true;
392 
393   /// true if we're emitting the code for the target region and next parallel
394   /// region is L0 for sure.
395   bool IsInTargetMasterThreadRegion = false;
396   /// true if currently emitting code for target/teams/distribute region, false
397   /// - otherwise.
398   bool IsInTTDRegion = false;
399   /// true if we're definitely in the parallel region.
400   bool IsInParallelRegion = false;
401 
402   /// Map between an outlined function and its wrapper.
403   llvm::DenseMap<llvm::Function *, llvm::Function *> WrapperFunctionsMap;
404 
405   /// Emit function which wraps the outline parallel region
406   /// and controls the parameters which are passed to this function.
407   /// The wrapper ensures that the outlined function is called
408   /// with the correct arguments when data is shared.
409   llvm::Function *createParallelDataSharingWrapper(
410       llvm::Function *OutlinedParallelFn, const OMPExecutableDirective &D);
411 
412   /// The data for the single globalized variable.
413   struct MappedVarData {
414     /// Corresponding field in the global record.
415     llvm::Value *GlobalizedVal = nullptr;
416     /// Corresponding address.
417     Address PrivateAddr = Address::invalid();
418   };
419   /// The map of local variables to their addresses in the global memory.
420   using DeclToAddrMapTy = llvm::MapVector<const Decl *, MappedVarData>;
421   /// Set of the parameters passed by value escaping OpenMP context.
422   using EscapedParamsTy = llvm::SmallPtrSet<const Decl *, 4>;
423   struct FunctionData {
424     DeclToAddrMapTy LocalVarData;
425     llvm::Optional<DeclToAddrMapTy> SecondaryLocalVarData = llvm::None;
426     EscapedParamsTy EscapedParameters;
427     llvm::SmallVector<const ValueDecl*, 4> EscapedVariableLengthDecls;
428     llvm::SmallVector<std::pair<llvm::Value *, llvm::Value *>, 4>
429         EscapedVariableLengthDeclsAddrs;
430     llvm::Value *IsInSPMDModeFlag = nullptr;
431     std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams;
432   };
433   /// Maps the function to the list of the globalized variables with their
434   /// addresses.
435   llvm::SmallDenseMap<llvm::Function *, FunctionData> FunctionGlobalizedDecls;
436   llvm::GlobalVariable *KernelTeamsReductionPtr = nullptr;
437   /// List of the records with the list of fields for the reductions across the
438   /// teams. Used to build the intermediate buffer for the fast teams
439   /// reductions.
440   /// All the records are gathered into a union `union.type` is created.
441   llvm::SmallVector<const RecordDecl *, 4> TeamsReductions;
442   /// Shared pointer for the global memory in the global memory buffer used for
443   /// the given kernel.
444   llvm::GlobalVariable *KernelStaticGlobalized = nullptr;
445   /// Pair of the Non-SPMD team and all reductions variables in this team
446   /// region.
447   std::pair<const Decl *, llvm::SmallVector<const ValueDecl *, 4>>
448       TeamAndReductions;
449 };
450 
451 } // CodeGen namespace.
452 } // clang namespace.
453 
454 #endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEGPU_H
455