xref: /freebsd/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h (revision ec0ea6efa1ad229d75c394c1a9b9cac33af2b1d3)
1 //===------ CGOpenMPRuntimeGPU.h - Interface to OpenMP GPU Runtimes ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a generalized class for OpenMP runtime code generation
10 // specialized by GPU targets NVPTX and AMDGCN.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEGPU_H
15 #define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEGPU_H
16 
17 #include "CGOpenMPRuntime.h"
18 #include "CodeGenFunction.h"
19 #include "clang/AST/StmtOpenMP.h"
20 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
21 
22 namespace clang {
23 namespace CodeGen {
24 
25 class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
26 public:
27   /// Defines the execution mode.
28   enum ExecutionMode {
29     /// SPMD execution mode (all threads are worker threads).
30     EM_SPMD,
31     /// Non-SPMD execution mode (1 master thread, others are workers).
32     EM_NonSPMD,
33     /// Unknown execution mode (orphaned directive).
34     EM_Unknown,
35   };
36 private:
37   /// Parallel outlined function work for workers to execute.
38   llvm::SmallVector<llvm::Function *, 16> Work;
39 
40   struct EntryFunctionState {
41     SourceLocation Loc;
42   };
43 
44   ExecutionMode getExecutionMode() const;
45 
46   bool requiresFullRuntime() const { return RequiresFullRuntime; }
47 
48   /// Get barrier to synchronize all threads in a block.
49   void syncCTAThreads(CodeGenFunction &CGF);
50 
51   /// Helper for target directive initialization.
52   void emitKernelInit(CodeGenFunction &CGF, EntryFunctionState &EST,
53                       bool IsSPMD);
54 
55   /// Helper for target directive finalization.
56   void emitKernelDeinit(CodeGenFunction &CGF, EntryFunctionState &EST,
57                         bool IsSPMD);
58 
59   /// Helper for generic variables globalization prolog.
60   void emitGenericVarsProlog(CodeGenFunction &CGF, SourceLocation Loc,
61                              bool WithSPMDCheck = false);
62 
63   /// Helper for generic variables globalization epilog.
64   void emitGenericVarsEpilog(CodeGenFunction &CGF, bool WithSPMDCheck = false);
65 
66   //
67   // Base class overrides.
68   //
69 
70   /// Creates offloading entry for the provided entry ID \a ID,
71   /// address \a Addr, size \a Size, and flags \a Flags.
72   void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr,
73                           uint64_t Size, int32_t Flags,
74                           llvm::GlobalValue::LinkageTypes Linkage) override;
75 
76   /// Emit outlined function specialized for the Fork-Join
77   /// programming model for applicable target directives on the NVPTX device.
78   /// \param D Directive to emit.
79   /// \param ParentName Name of the function that encloses the target region.
80   /// \param OutlinedFn Outlined function value to be defined by this call.
81   /// \param OutlinedFnID Outlined function ID value to be defined by this call.
82   /// \param IsOffloadEntry True if the outlined function is an offload entry.
83   /// An outlined function may not be an entry if, e.g. the if clause always
84   /// evaluates to false.
85   void emitNonSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName,
86                          llvm::Function *&OutlinedFn,
87                          llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
88                          const RegionCodeGenTy &CodeGen);
89 
90   /// Emit outlined function specialized for the Single Program
91   /// Multiple Data programming model for applicable target directives on the
92   /// NVPTX device.
93   /// \param D Directive to emit.
94   /// \param ParentName Name of the function that encloses the target region.
95   /// \param OutlinedFn Outlined function value to be defined by this call.
96   /// \param OutlinedFnID Outlined function ID value to be defined by this call.
97   /// \param IsOffloadEntry True if the outlined function is an offload entry.
98   /// \param CodeGen Object containing the target statements.
99   /// An outlined function may not be an entry if, e.g. the if clause always
100   /// evaluates to false.
101   void emitSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName,
102                       llvm::Function *&OutlinedFn,
103                       llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
104                       const RegionCodeGenTy &CodeGen);
105 
106   /// Emit outlined function for 'target' directive on the NVPTX
107   /// device.
108   /// \param D Directive to emit.
109   /// \param ParentName Name of the function that encloses the target region.
110   /// \param OutlinedFn Outlined function value to be defined by this call.
111   /// \param OutlinedFnID Outlined function ID value to be defined by this call.
112   /// \param IsOffloadEntry True if the outlined function is an offload entry.
113   /// An outlined function may not be an entry if, e.g. the if clause always
114   /// evaluates to false.
115   void emitTargetOutlinedFunction(const OMPExecutableDirective &D,
116                                   StringRef ParentName,
117                                   llvm::Function *&OutlinedFn,
118                                   llvm::Constant *&OutlinedFnID,
119                                   bool IsOffloadEntry,
120                                   const RegionCodeGenTy &CodeGen) override;
121 
122   /// Emits code for parallel or serial call of the \a OutlinedFn with
123   /// variables captured in a record which address is stored in \a
124   /// CapturedStruct.
125   /// This call is for the Non-SPMD Execution Mode.
126   /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
127   /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
128   /// \param CapturedVars A pointer to the record with the references to
129   /// variables used in \a OutlinedFn function.
130   /// \param IfCond Condition in the associated 'if' clause, if it was
131   /// specified, nullptr otherwise.
132   void emitNonSPMDParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
133                                llvm::Value *OutlinedFn,
134                                ArrayRef<llvm::Value *> CapturedVars,
135                                const Expr *IfCond);
136 
137   /// Emits code for parallel or serial call of the \a OutlinedFn with
138   /// variables captured in a record which address is stored in \a
139   /// CapturedStruct.
140   /// This call is for a parallel directive within an SPMD target directive.
141   /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
142   /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
143   /// \param CapturedVars A pointer to the record with the references to
144   /// variables used in \a OutlinedFn function.
145   /// \param IfCond Condition in the associated 'if' clause, if it was
146   /// specified, nullptr otherwise.
147   ///
148   void emitSPMDParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
149                             llvm::Function *OutlinedFn,
150                             ArrayRef<llvm::Value *> CapturedVars,
151                             const Expr *IfCond);
152 
153 protected:
154   /// Get the function name of an outlined region.
155   //  The name can be customized depending on the target.
156   //
157   StringRef getOutlinedHelperName() const override {
158     return "__omp_outlined__";
159   }
160 
161   /// Check if the default location must be constant.
162   /// Constant for NVPTX for better optimization.
163   bool isDefaultLocationConstant() const override { return true; }
164 
165   /// Returns additional flags that can be stored in reserved_2 field of the
166   /// default location.
167   /// For NVPTX target contains data about SPMD/Non-SPMD execution mode +
168   /// Full/Lightweight runtime mode. Used for better optimization.
169   unsigned getDefaultLocationReserved2Flags() const override;
170 
171 public:
172   explicit CGOpenMPRuntimeGPU(CodeGenModule &CGM);
173   void clear() override;
174 
175   /// Declare generalized virtual functions which need to be defined
176   /// by all specializations of OpenMPGPURuntime Targets like AMDGCN
177   /// and NVPTX.
178 
179   /// Get the GPU warp size.
180   virtual llvm::Value *getGPUWarpSize(CodeGenFunction &CGF) = 0;
181 
182   /// Get the id of the current thread on the GPU.
183   virtual llvm::Value *getGPUThreadID(CodeGenFunction &CGF) = 0;
184 
185   /// Get the maximum number of threads in a block of the GPU.
186   virtual llvm::Value *getGPUNumThreads(CodeGenFunction &CGF) = 0;
187 
188   /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
189   /// global_tid, int proc_bind) to generate code for 'proc_bind' clause.
190   virtual void emitProcBindClause(CodeGenFunction &CGF,
191                                   llvm::omp::ProcBindKind ProcBind,
192                                   SourceLocation Loc) override;
193 
194   /// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
195   /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
196   /// clause.
197   /// \param NumThreads An integer value of threads.
198   virtual void emitNumThreadsClause(CodeGenFunction &CGF,
199                                     llvm::Value *NumThreads,
200                                     SourceLocation Loc) override;
201 
202   /// This function ought to emit, in the general case, a call to
203   // the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed
204   // as these numbers are obtained through the PTX grid and block configuration.
205   /// \param NumTeams An integer expression of teams.
206   /// \param ThreadLimit An integer expression of threads.
207   void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams,
208                           const Expr *ThreadLimit, SourceLocation Loc) override;
209 
210   /// Emits inlined function for the specified OpenMP parallel
211   //  directive.
212   /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
213   /// kmp_int32 BoundID, struct context_vars*).
214   /// \param D OpenMP directive.
215   /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
216   /// \param InnermostKind Kind of innermost directive (for simple directives it
217   /// is a directive itself, for combined - its innermost directive).
218   /// \param CodeGen Code generation sequence for the \a D directive.
219   llvm::Function *
220   emitParallelOutlinedFunction(const OMPExecutableDirective &D,
221                                const VarDecl *ThreadIDVar,
222                                OpenMPDirectiveKind InnermostKind,
223                                const RegionCodeGenTy &CodeGen) override;
224 
225   /// Emits inlined function for the specified OpenMP teams
226   //  directive.
227   /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
228   /// kmp_int32 BoundID, struct context_vars*).
229   /// \param D OpenMP directive.
230   /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
231   /// \param InnermostKind Kind of innermost directive (for simple directives it
232   /// is a directive itself, for combined - its innermost directive).
233   /// \param CodeGen Code generation sequence for the \a D directive.
234   llvm::Function *
235   emitTeamsOutlinedFunction(const OMPExecutableDirective &D,
236                             const VarDecl *ThreadIDVar,
237                             OpenMPDirectiveKind InnermostKind,
238                             const RegionCodeGenTy &CodeGen) override;
239 
240   /// Emits code for teams call of the \a OutlinedFn with
241   /// variables captured in a record which address is stored in \a
242   /// CapturedStruct.
243   /// \param OutlinedFn Outlined function to be run by team masters. Type of
244   /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
245   /// \param CapturedVars A pointer to the record with the references to
246   /// variables used in \a OutlinedFn function.
247   ///
248   void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
249                      SourceLocation Loc, llvm::Function *OutlinedFn,
250                      ArrayRef<llvm::Value *> CapturedVars) override;
251 
252   /// Emits code for parallel or serial call of the \a OutlinedFn with
253   /// variables captured in a record which address is stored in \a
254   /// CapturedStruct.
255   /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
256   /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
257   /// \param CapturedVars A pointer to the record with the references to
258   /// variables used in \a OutlinedFn function.
259   /// \param IfCond Condition in the associated 'if' clause, if it was
260   /// specified, nullptr otherwise.
261   void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
262                         llvm::Function *OutlinedFn,
263                         ArrayRef<llvm::Value *> CapturedVars,
264                         const Expr *IfCond) override;
265 
266   /// Emit an implicit/explicit barrier for OpenMP threads.
267   /// \param Kind Directive for which this implicit barrier call must be
268   /// generated. Must be OMPD_barrier for explicit barrier generation.
269   /// \param EmitChecks true if need to emit checks for cancellation barriers.
270   /// \param ForceSimpleCall true simple barrier call must be emitted, false if
271   /// runtime class decides which one to emit (simple or with cancellation
272   /// checks).
273   ///
274   void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
275                        OpenMPDirectiveKind Kind, bool EmitChecks = true,
276                        bool ForceSimpleCall = false) override;
277 
278   /// Emits a critical region.
279   /// \param CriticalName Name of the critical region.
280   /// \param CriticalOpGen Generator for the statement associated with the given
281   /// critical region.
282   /// \param Hint Value of the 'hint' clause (optional).
283   void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName,
284                           const RegionCodeGenTy &CriticalOpGen,
285                           SourceLocation Loc,
286                           const Expr *Hint = nullptr) override;
287 
288   /// Emit a code for reduction clause.
289   ///
290   /// \param Privates List of private copies for original reduction arguments.
291   /// \param LHSExprs List of LHS in \a ReductionOps reduction operations.
292   /// \param RHSExprs List of RHS in \a ReductionOps reduction operations.
293   /// \param ReductionOps List of reduction operations in form 'LHS binop RHS'
294   /// or 'operator binop(LHS, RHS)'.
295   /// \param Options List of options for reduction codegen:
296   ///     WithNowait true if parent directive has also nowait clause, false
297   ///     otherwise.
298   ///     SimpleReduction Emit reduction operation only. Used for omp simd
299   ///     directive on the host.
300   ///     ReductionKind The kind of reduction to perform.
301   virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
302                              ArrayRef<const Expr *> Privates,
303                              ArrayRef<const Expr *> LHSExprs,
304                              ArrayRef<const Expr *> RHSExprs,
305                              ArrayRef<const Expr *> ReductionOps,
306                              ReductionOptionsTy Options) override;
307 
308   /// Returns specified OpenMP runtime function for the current OpenMP
309   /// implementation.  Specialized for the NVPTX device.
310   /// \param Function OpenMP runtime function.
311   /// \return Specified function.
312   llvm::FunctionCallee createNVPTXRuntimeFunction(unsigned Function);
313 
314   /// Translates the native parameter of outlined function if this is required
315   /// for target.
316   /// \param FD Field decl from captured record for the parameter.
317   /// \param NativeParam Parameter itself.
318   const VarDecl *translateParameter(const FieldDecl *FD,
319                                     const VarDecl *NativeParam) const override;
320 
321   /// Gets the address of the native argument basing on the address of the
322   /// target-specific parameter.
323   /// \param NativeParam Parameter itself.
324   /// \param TargetParam Corresponding target-specific parameter.
325   Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam,
326                               const VarDecl *TargetParam) const override;
327 
328   /// Emits call of the outlined function with the provided arguments,
329   /// translating these arguments to correct target-specific arguments.
330   void emitOutlinedFunctionCall(
331       CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
332       ArrayRef<llvm::Value *> Args = llvm::None) const override;
333 
334   /// Emits OpenMP-specific function prolog.
335   /// Required for device constructs.
336   void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) override;
337 
338   /// Gets the OpenMP-specific address of the local variable.
339   Address getAddressOfLocalVariable(CodeGenFunction &CGF,
340                                     const VarDecl *VD) override;
341 
342   /// Target codegen is specialized based on two data-sharing modes: CUDA, in
343   /// which the local variables are actually global threadlocal, and Generic, in
344   /// which the local variables are placed in global memory if they may escape
345   /// their declaration context.
346   enum DataSharingMode {
347     /// CUDA data sharing mode.
348     CUDA,
349     /// Generic data-sharing mode.
350     Generic,
351   };
352 
353   /// Cleans up references to the objects in finished function.
354   ///
355   void functionFinished(CodeGenFunction &CGF) override;
356 
357   /// Choose a default value for the dist_schedule clause.
358   void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF,
359       const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind,
360       llvm::Value *&Chunk) const override;
361 
362   /// Choose a default value for the schedule clause.
363   void getDefaultScheduleAndChunk(CodeGenFunction &CGF,
364       const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind,
365       const Expr *&ChunkExpr) const override;
366 
367   /// Adjust some parameters for the target-based directives, like addresses of
368   /// the variables captured by reference in lambdas.
369   void adjustTargetSpecificDataForLambdas(
370       CodeGenFunction &CGF, const OMPExecutableDirective &D) const override;
371 
372   /// Perform check on requires decl to ensure that target architecture
373   /// supports unified addressing
374   void processRequiresDirective(const OMPRequiresDecl *D) override;
375 
376   /// Checks if the variable has associated OMPAllocateDeclAttr attribute with
377   /// the predefined allocator and translates it into the corresponding address
378   /// space.
379   bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS) override;
380 
381 private:
382   /// Track the execution mode when codegening directives within a target
383   /// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the
384   /// target region and used by containing directives such as 'parallel'
385   /// to emit optimized code.
386   ExecutionMode CurrentExecutionMode = EM_Unknown;
387 
388   /// Check if the full runtime is required (default - yes).
389   bool RequiresFullRuntime = true;
390 
391   /// true if we're emitting the code for the target region and next parallel
392   /// region is L0 for sure.
393   bool IsInTargetMasterThreadRegion = false;
394   /// true if currently emitting code for target/teams/distribute region, false
395   /// - otherwise.
396   bool IsInTTDRegion = false;
397   /// true if we're definitely in the parallel region.
398   bool IsInParallelRegion = false;
399 
400   /// Map between an outlined function and its wrapper.
401   llvm::DenseMap<llvm::Function *, llvm::Function *> WrapperFunctionsMap;
402 
403   /// Emit function which wraps the outline parallel region
404   /// and controls the parameters which are passed to this function.
405   /// The wrapper ensures that the outlined function is called
406   /// with the correct arguments when data is shared.
407   llvm::Function *createParallelDataSharingWrapper(
408       llvm::Function *OutlinedParallelFn, const OMPExecutableDirective &D);
409 
410   /// The data for the single globalized variable.
411   struct MappedVarData {
412     /// Corresponding field in the global record.
413     llvm::Value *GlobalizedVal = nullptr;
414     /// Corresponding address.
415     Address PrivateAddr = Address::invalid();
416   };
417   /// The map of local variables to their addresses in the global memory.
418   using DeclToAddrMapTy = llvm::MapVector<const Decl *, MappedVarData>;
419   /// Set of the parameters passed by value escaping OpenMP context.
420   using EscapedParamsTy = llvm::SmallPtrSet<const Decl *, 4>;
421   struct FunctionData {
422     DeclToAddrMapTy LocalVarData;
423     llvm::Optional<DeclToAddrMapTy> SecondaryLocalVarData = llvm::None;
424     EscapedParamsTy EscapedParameters;
425     llvm::SmallVector<const ValueDecl*, 4> EscapedVariableLengthDecls;
426     llvm::SmallVector<std::pair<llvm::Value *, llvm::Value *>, 4>
427         EscapedVariableLengthDeclsAddrs;
428     llvm::Value *IsInSPMDModeFlag = nullptr;
429     std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams;
430   };
431   /// Maps the function to the list of the globalized variables with their
432   /// addresses.
433   llvm::SmallDenseMap<llvm::Function *, FunctionData> FunctionGlobalizedDecls;
434   llvm::GlobalVariable *KernelTeamsReductionPtr = nullptr;
435   /// List of the records with the list of fields for the reductions across the
436   /// teams. Used to build the intermediate buffer for the fast teams
437   /// reductions.
438   /// All the records are gathered into a union `union.type` is created.
439   llvm::SmallVector<const RecordDecl *, 4> TeamsReductions;
440   /// Shared pointer for the global memory in the global memory buffer used for
441   /// the given kernel.
442   llvm::GlobalVariable *KernelStaticGlobalized = nullptr;
443   /// Pair of the Non-SPMD team and all reductions variables in this team
444   /// region.
445   std::pair<const Decl *, llvm::SmallVector<const ValueDecl *, 4>>
446       TeamAndReductions;
447 };
448 
449 } // CodeGen namespace.
450 } // clang namespace.
451 
452 #endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEGPU_H
453