1 //===------ CGOpenMPRuntimeGPU.h - Interface to OpenMP GPU Runtimes ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a generalized class for OpenMP runtime code generation 10 // specialized by GPU targets NVPTX and AMDGCN. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEGPU_H 15 #define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEGPU_H 16 17 #include "CGOpenMPRuntime.h" 18 #include "CodeGenFunction.h" 19 #include "clang/AST/StmtOpenMP.h" 20 21 namespace clang { 22 namespace CodeGen { 23 24 class CGOpenMPRuntimeGPU : public CGOpenMPRuntime { 25 public: 26 /// Defines the execution mode. 27 enum ExecutionMode { 28 /// SPMD execution mode (all threads are worker threads). 29 EM_SPMD, 30 /// Non-SPMD execution mode (1 master thread, others are workers). 31 EM_NonSPMD, 32 /// Unknown execution mode (orphaned directive). 33 EM_Unknown, 34 }; 35 private: 36 /// Parallel outlined function work for workers to execute. 37 llvm::SmallVector<llvm::Function *, 16> Work; 38 39 struct EntryFunctionState { 40 SourceLocation Loc; 41 }; 42 43 ExecutionMode getExecutionMode() const; 44 45 /// Get barrier to synchronize all threads in a block. 46 void syncCTAThreads(CodeGenFunction &CGF); 47 48 /// Helper for target directive initialization. 49 void emitKernelInit(CodeGenFunction &CGF, EntryFunctionState &EST, 50 bool IsSPMD); 51 52 /// Helper for target directive finalization. 53 void emitKernelDeinit(CodeGenFunction &CGF, EntryFunctionState &EST, 54 bool IsSPMD); 55 56 /// Helper for generic variables globalization prolog. 57 void emitGenericVarsProlog(CodeGenFunction &CGF, SourceLocation Loc, 58 bool WithSPMDCheck = false); 59 60 /// Helper for generic variables globalization epilog. 61 void emitGenericVarsEpilog(CodeGenFunction &CGF, bool WithSPMDCheck = false); 62 63 // 64 // Base class overrides. 65 // 66 67 /// Emit outlined function specialized for the Fork-Join 68 /// programming model for applicable target directives on the NVPTX device. 69 /// \param D Directive to emit. 70 /// \param ParentName Name of the function that encloses the target region. 71 /// \param OutlinedFn Outlined function value to be defined by this call. 72 /// \param OutlinedFnID Outlined function ID value to be defined by this call. 73 /// \param IsOffloadEntry True if the outlined function is an offload entry. 74 /// An outlined function may not be an entry if, e.g. the if clause always 75 /// evaluates to false. 76 void emitNonSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName, 77 llvm::Function *&OutlinedFn, 78 llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, 79 const RegionCodeGenTy &CodeGen); 80 81 /// Emit outlined function specialized for the Single Program 82 /// Multiple Data programming model for applicable target directives on the 83 /// NVPTX device. 84 /// \param D Directive to emit. 85 /// \param ParentName Name of the function that encloses the target region. 86 /// \param OutlinedFn Outlined function value to be defined by this call. 87 /// \param OutlinedFnID Outlined function ID value to be defined by this call. 88 /// \param IsOffloadEntry True if the outlined function is an offload entry. 89 /// \param CodeGen Object containing the target statements. 90 /// An outlined function may not be an entry if, e.g. the if clause always 91 /// evaluates to false. 92 void emitSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName, 93 llvm::Function *&OutlinedFn, 94 llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, 95 const RegionCodeGenTy &CodeGen); 96 97 /// Emit outlined function for 'target' directive on the NVPTX 98 /// device. 99 /// \param D Directive to emit. 100 /// \param ParentName Name of the function that encloses the target region. 101 /// \param OutlinedFn Outlined function value to be defined by this call. 102 /// \param OutlinedFnID Outlined function ID value to be defined by this call. 103 /// \param IsOffloadEntry True if the outlined function is an offload entry. 104 /// An outlined function may not be an entry if, e.g. the if clause always 105 /// evaluates to false. 106 void emitTargetOutlinedFunction(const OMPExecutableDirective &D, 107 StringRef ParentName, 108 llvm::Function *&OutlinedFn, 109 llvm::Constant *&OutlinedFnID, 110 bool IsOffloadEntry, 111 const RegionCodeGenTy &CodeGen) override; 112 113 /// Emits code for parallel or serial call of the \a OutlinedFn with 114 /// variables captured in a record which address is stored in \a 115 /// CapturedStruct. 116 /// This call is for the Non-SPMD Execution Mode. 117 /// \param OutlinedFn Outlined function to be run in parallel threads. Type of 118 /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*). 119 /// \param CapturedVars A pointer to the record with the references to 120 /// variables used in \a OutlinedFn function. 121 /// \param IfCond Condition in the associated 'if' clause, if it was 122 /// specified, nullptr otherwise. 123 void emitNonSPMDParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 124 llvm::Value *OutlinedFn, 125 ArrayRef<llvm::Value *> CapturedVars, 126 const Expr *IfCond); 127 128 /// Emits code for parallel or serial call of the \a OutlinedFn with 129 /// variables captured in a record which address is stored in \a 130 /// CapturedStruct. 131 /// This call is for a parallel directive within an SPMD target directive. 132 /// \param OutlinedFn Outlined function to be run in parallel threads. Type of 133 /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*). 134 /// \param CapturedVars A pointer to the record with the references to 135 /// variables used in \a OutlinedFn function. 136 /// \param IfCond Condition in the associated 'if' clause, if it was 137 /// specified, nullptr otherwise. 138 /// 139 void emitSPMDParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 140 llvm::Function *OutlinedFn, 141 ArrayRef<llvm::Value *> CapturedVars, 142 const Expr *IfCond); 143 144 protected: 145 /// Get the function name of an outlined region. 146 // The name can be customized depending on the target. 147 // 148 StringRef getOutlinedHelperName() const override { 149 return "__omp_outlined__"; 150 } 151 152 /// Check if the default location must be constant. 153 /// Constant for NVPTX for better optimization. 154 bool isDefaultLocationConstant() const override { return true; } 155 156 public: 157 explicit CGOpenMPRuntimeGPU(CodeGenModule &CGM); 158 void clear() override; 159 160 bool isTargetCodegen() const override { return true; }; 161 162 /// Declare generalized virtual functions which need to be defined 163 /// by all specializations of OpenMPGPURuntime Targets like AMDGCN 164 /// and NVPTX. 165 166 /// Get the GPU warp size. 167 llvm::Value *getGPUWarpSize(CodeGenFunction &CGF); 168 169 /// Get the id of the current thread on the GPU. 170 llvm::Value *getGPUThreadID(CodeGenFunction &CGF); 171 172 /// Get the maximum number of threads in a block of the GPU. 173 llvm::Value *getGPUNumThreads(CodeGenFunction &CGF); 174 175 /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 176 /// global_tid, int proc_bind) to generate code for 'proc_bind' clause. 177 void emitProcBindClause(CodeGenFunction &CGF, 178 llvm::omp::ProcBindKind ProcBind, 179 SourceLocation Loc) override; 180 181 /// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 182 /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads' 183 /// clause. 184 /// \param NumThreads An integer value of threads. 185 void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, 186 SourceLocation Loc) override; 187 188 /// This function ought to emit, in the general case, a call to 189 // the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed 190 // as these numbers are obtained through the PTX grid and block configuration. 191 /// \param NumTeams An integer expression of teams. 192 /// \param ThreadLimit An integer expression of threads. 193 void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, 194 const Expr *ThreadLimit, SourceLocation Loc) override; 195 196 /// Emits inlined function for the specified OpenMP parallel 197 // directive. 198 /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, 199 /// kmp_int32 BoundID, struct context_vars*). 200 /// \param D OpenMP directive. 201 /// \param ThreadIDVar Variable for thread id in the current OpenMP region. 202 /// \param InnermostKind Kind of innermost directive (for simple directives it 203 /// is a directive itself, for combined - its innermost directive). 204 /// \param CodeGen Code generation sequence for the \a D directive. 205 llvm::Function * 206 emitParallelOutlinedFunction(const OMPExecutableDirective &D, 207 const VarDecl *ThreadIDVar, 208 OpenMPDirectiveKind InnermostKind, 209 const RegionCodeGenTy &CodeGen) override; 210 211 /// Emits inlined function for the specified OpenMP teams 212 // directive. 213 /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, 214 /// kmp_int32 BoundID, struct context_vars*). 215 /// \param D OpenMP directive. 216 /// \param ThreadIDVar Variable for thread id in the current OpenMP region. 217 /// \param InnermostKind Kind of innermost directive (for simple directives it 218 /// is a directive itself, for combined - its innermost directive). 219 /// \param CodeGen Code generation sequence for the \a D directive. 220 llvm::Function * 221 emitTeamsOutlinedFunction(const OMPExecutableDirective &D, 222 const VarDecl *ThreadIDVar, 223 OpenMPDirectiveKind InnermostKind, 224 const RegionCodeGenTy &CodeGen) override; 225 226 /// Emits code for teams call of the \a OutlinedFn with 227 /// variables captured in a record which address is stored in \a 228 /// CapturedStruct. 229 /// \param OutlinedFn Outlined function to be run by team masters. Type of 230 /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*). 231 /// \param CapturedVars A pointer to the record with the references to 232 /// variables used in \a OutlinedFn function. 233 /// 234 void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, 235 SourceLocation Loc, llvm::Function *OutlinedFn, 236 ArrayRef<llvm::Value *> CapturedVars) override; 237 238 /// Emits code for parallel or serial call of the \a OutlinedFn with 239 /// variables captured in a record which address is stored in \a 240 /// CapturedStruct. 241 /// \param OutlinedFn Outlined function to be run in parallel threads. Type of 242 /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*). 243 /// \param CapturedVars A pointer to the record with the references to 244 /// variables used in \a OutlinedFn function. 245 /// \param IfCond Condition in the associated 'if' clause, if it was 246 /// specified, nullptr otherwise. 247 /// \param NumThreads The value corresponding to the num_threads clause, if 248 /// any, 249 /// or nullptr. 250 void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 251 llvm::Function *OutlinedFn, 252 ArrayRef<llvm::Value *> CapturedVars, 253 const Expr *IfCond, llvm::Value *NumThreads) override; 254 255 /// Emit an implicit/explicit barrier for OpenMP threads. 256 /// \param Kind Directive for which this implicit barrier call must be 257 /// generated. Must be OMPD_barrier for explicit barrier generation. 258 /// \param EmitChecks true if need to emit checks for cancellation barriers. 259 /// \param ForceSimpleCall true simple barrier call must be emitted, false if 260 /// runtime class decides which one to emit (simple or with cancellation 261 /// checks). 262 /// 263 void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 264 OpenMPDirectiveKind Kind, bool EmitChecks = true, 265 bool ForceSimpleCall = false) override; 266 267 /// Emits a critical region. 268 /// \param CriticalName Name of the critical region. 269 /// \param CriticalOpGen Generator for the statement associated with the given 270 /// critical region. 271 /// \param Hint Value of the 'hint' clause (optional). 272 void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, 273 const RegionCodeGenTy &CriticalOpGen, 274 SourceLocation Loc, 275 const Expr *Hint = nullptr) override; 276 277 /// Emit a code for reduction clause. 278 /// 279 /// \param Privates List of private copies for original reduction arguments. 280 /// \param LHSExprs List of LHS in \a ReductionOps reduction operations. 281 /// \param RHSExprs List of RHS in \a ReductionOps reduction operations. 282 /// \param ReductionOps List of reduction operations in form 'LHS binop RHS' 283 /// or 'operator binop(LHS, RHS)'. 284 /// \param Options List of options for reduction codegen: 285 /// WithNowait true if parent directive has also nowait clause, false 286 /// otherwise. 287 /// SimpleReduction Emit reduction operation only. Used for omp simd 288 /// directive on the host. 289 /// ReductionKind The kind of reduction to perform. 290 void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 291 ArrayRef<const Expr *> Privates, 292 ArrayRef<const Expr *> LHSExprs, 293 ArrayRef<const Expr *> RHSExprs, 294 ArrayRef<const Expr *> ReductionOps, 295 ReductionOptionsTy Options) override; 296 297 /// Returns specified OpenMP runtime function for the current OpenMP 298 /// implementation. Specialized for the NVPTX device. 299 /// \param Function OpenMP runtime function. 300 /// \return Specified function. 301 llvm::FunctionCallee createNVPTXRuntimeFunction(unsigned Function); 302 303 /// Translates the native parameter of outlined function if this is required 304 /// for target. 305 /// \param FD Field decl from captured record for the parameter. 306 /// \param NativeParam Parameter itself. 307 const VarDecl *translateParameter(const FieldDecl *FD, 308 const VarDecl *NativeParam) const override; 309 310 /// Gets the address of the native argument basing on the address of the 311 /// target-specific parameter. 312 /// \param NativeParam Parameter itself. 313 /// \param TargetParam Corresponding target-specific parameter. 314 Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, 315 const VarDecl *TargetParam) const override; 316 317 /// Emits call of the outlined function with the provided arguments, 318 /// translating these arguments to correct target-specific arguments. 319 void emitOutlinedFunctionCall( 320 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 321 ArrayRef<llvm::Value *> Args = std::nullopt) const override; 322 323 /// Emits OpenMP-specific function prolog. 324 /// Required for device constructs. 325 void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) override; 326 327 /// Gets the OpenMP-specific address of the local variable. 328 Address getAddressOfLocalVariable(CodeGenFunction &CGF, 329 const VarDecl *VD) override; 330 331 /// Target codegen is specialized based on two data-sharing modes: CUDA, in 332 /// which the local variables are actually global threadlocal, and Generic, in 333 /// which the local variables are placed in global memory if they may escape 334 /// their declaration context. 335 enum DataSharingMode { 336 /// CUDA data sharing mode. 337 CUDA, 338 /// Generic data-sharing mode. 339 Generic, 340 }; 341 342 /// Cleans up references to the objects in finished function. 343 /// 344 void functionFinished(CodeGenFunction &CGF) override; 345 346 /// Choose a default value for the dist_schedule clause. 347 void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF, 348 const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind, 349 llvm::Value *&Chunk) const override; 350 351 /// Choose a default value for the schedule clause. 352 void getDefaultScheduleAndChunk(CodeGenFunction &CGF, 353 const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, 354 const Expr *&ChunkExpr) const override; 355 356 /// Adjust some parameters for the target-based directives, like addresses of 357 /// the variables captured by reference in lambdas. 358 void adjustTargetSpecificDataForLambdas( 359 CodeGenFunction &CGF, const OMPExecutableDirective &D) const override; 360 361 /// Perform check on requires decl to ensure that target architecture 362 /// supports unified addressing 363 void processRequiresDirective(const OMPRequiresDecl *D) override; 364 365 /// Checks if the variable has associated OMPAllocateDeclAttr attribute with 366 /// the predefined allocator and translates it into the corresponding address 367 /// space. 368 bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS) override; 369 370 private: 371 /// Track the execution mode when codegening directives within a target 372 /// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the 373 /// target region and used by containing directives such as 'parallel' 374 /// to emit optimized code. 375 ExecutionMode CurrentExecutionMode = EM_Unknown; 376 377 /// true if currently emitting code for target/teams/distribute region, false 378 /// - otherwise. 379 bool IsInTTDRegion = false; 380 381 /// Map between an outlined function and its wrapper. 382 llvm::DenseMap<llvm::Function *, llvm::Function *> WrapperFunctionsMap; 383 384 /// Emit function which wraps the outline parallel region 385 /// and controls the parameters which are passed to this function. 386 /// The wrapper ensures that the outlined function is called 387 /// with the correct arguments when data is shared. 388 llvm::Function *createParallelDataSharingWrapper( 389 llvm::Function *OutlinedParallelFn, const OMPExecutableDirective &D); 390 391 /// The data for the single globalized variable. 392 struct MappedVarData { 393 /// Corresponding field in the global record. 394 llvm::Value *GlobalizedVal = nullptr; 395 /// Corresponding address. 396 Address PrivateAddr = Address::invalid(); 397 }; 398 /// The map of local variables to their addresses in the global memory. 399 using DeclToAddrMapTy = llvm::MapVector<const Decl *, MappedVarData>; 400 /// Set of the parameters passed by value escaping OpenMP context. 401 using EscapedParamsTy = llvm::SmallPtrSet<const Decl *, 4>; 402 struct FunctionData { 403 DeclToAddrMapTy LocalVarData; 404 EscapedParamsTy EscapedParameters; 405 llvm::SmallVector<const ValueDecl*, 4> EscapedVariableLengthDecls; 406 llvm::SmallVector<std::pair<llvm::Value *, llvm::Value *>, 4> 407 EscapedVariableLengthDeclsAddrs; 408 std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams; 409 }; 410 /// Maps the function to the list of the globalized variables with their 411 /// addresses. 412 llvm::SmallDenseMap<llvm::Function *, FunctionData> FunctionGlobalizedDecls; 413 llvm::GlobalVariable *KernelTeamsReductionPtr = nullptr; 414 /// List of the records with the list of fields for the reductions across the 415 /// teams. Used to build the intermediate buffer for the fast teams 416 /// reductions. 417 /// All the records are gathered into a union `union.type` is created. 418 llvm::SmallVector<const RecordDecl *, 4> TeamsReductions; 419 /// Pair of the Non-SPMD team and all reductions variables in this team 420 /// region. 421 std::pair<const Decl *, llvm::SmallVector<const ValueDecl *, 4>> 422 TeamAndReductions; 423 }; 424 425 } // CodeGen namespace. 426 } // clang namespace. 427 428 #endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEGPU_H 429