1 //===------ CGOpenMPRuntimeGPU.h - Interface to OpenMP GPU Runtimes ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a generalized class for OpenMP runtime code generation 10 // specialized by GPU targets NVPTX and AMDGCN. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEGPU_H 15 #define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEGPU_H 16 17 #include "CGOpenMPRuntime.h" 18 #include "CodeGenFunction.h" 19 #include "clang/AST/StmtOpenMP.h" 20 #include "llvm/Frontend/OpenMP/OMPGridValues.h" 21 22 namespace clang { 23 namespace CodeGen { 24 25 class CGOpenMPRuntimeGPU : public CGOpenMPRuntime { 26 public: 27 /// Defines the execution mode. 28 enum ExecutionMode { 29 /// SPMD execution mode (all threads are worker threads). 30 EM_SPMD, 31 /// Non-SPMD execution mode (1 master thread, others are workers). 32 EM_NonSPMD, 33 /// Unknown execution mode (orphaned directive). 34 EM_Unknown, 35 }; 36 private: 37 /// Parallel outlined function work for workers to execute. 38 llvm::SmallVector<llvm::Function *, 16> Work; 39 40 struct EntryFunctionState { 41 llvm::BasicBlock *ExitBB = nullptr; 42 }; 43 44 class WorkerFunctionState { 45 public: 46 llvm::Function *WorkerFn; 47 const CGFunctionInfo &CGFI; 48 SourceLocation Loc; 49 50 WorkerFunctionState(CodeGenModule &CGM, SourceLocation Loc); 51 52 private: 53 void createWorkerFunction(CodeGenModule &CGM); 54 }; 55 56 ExecutionMode getExecutionMode() const; 57 58 bool requiresFullRuntime() const { return RequiresFullRuntime; } 59 60 /// Get barrier to synchronize all threads in a block. 61 void syncCTAThreads(CodeGenFunction &CGF); 62 63 /// Emit the worker function for the current target region. 64 void emitWorkerFunction(WorkerFunctionState &WST); 65 66 /// Helper for worker function. Emit body of worker loop. 67 void emitWorkerLoop(CodeGenFunction &CGF, WorkerFunctionState &WST); 68 69 /// Helper for non-SPMD target entry function. Guide the master and 70 /// worker threads to their respective locations. 71 void emitNonSPMDEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST, 72 WorkerFunctionState &WST); 73 74 /// Signal termination of OMP execution for non-SPMD target entry 75 /// function. 76 void emitNonSPMDEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST); 77 78 /// Helper for generic variables globalization prolog. 79 void emitGenericVarsProlog(CodeGenFunction &CGF, SourceLocation Loc, 80 bool WithSPMDCheck = false); 81 82 /// Helper for generic variables globalization epilog. 83 void emitGenericVarsEpilog(CodeGenFunction &CGF, bool WithSPMDCheck = false); 84 85 /// Helper for SPMD mode target directive's entry function. 86 void emitSPMDEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST, 87 const OMPExecutableDirective &D); 88 89 /// Signal termination of SPMD mode execution. 90 void emitSPMDEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST); 91 92 // 93 // Base class overrides. 94 // 95 96 /// Creates offloading entry for the provided entry ID \a ID, 97 /// address \a Addr, size \a Size, and flags \a Flags. 98 void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr, 99 uint64_t Size, int32_t Flags, 100 llvm::GlobalValue::LinkageTypes Linkage) override; 101 102 /// Emit outlined function specialized for the Fork-Join 103 /// programming model for applicable target directives on the NVPTX device. 104 /// \param D Directive to emit. 105 /// \param ParentName Name of the function that encloses the target region. 106 /// \param OutlinedFn Outlined function value to be defined by this call. 107 /// \param OutlinedFnID Outlined function ID value to be defined by this call. 108 /// \param IsOffloadEntry True if the outlined function is an offload entry. 109 /// An outlined function may not be an entry if, e.g. the if clause always 110 /// evaluates to false. 111 void emitNonSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName, 112 llvm::Function *&OutlinedFn, 113 llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, 114 const RegionCodeGenTy &CodeGen); 115 116 /// Emit outlined function specialized for the Single Program 117 /// Multiple Data programming model for applicable target directives on the 118 /// NVPTX device. 119 /// \param D Directive to emit. 120 /// \param ParentName Name of the function that encloses the target region. 121 /// \param OutlinedFn Outlined function value to be defined by this call. 122 /// \param OutlinedFnID Outlined function ID value to be defined by this call. 123 /// \param IsOffloadEntry True if the outlined function is an offload entry. 124 /// \param CodeGen Object containing the target statements. 125 /// An outlined function may not be an entry if, e.g. the if clause always 126 /// evaluates to false. 127 void emitSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName, 128 llvm::Function *&OutlinedFn, 129 llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, 130 const RegionCodeGenTy &CodeGen); 131 132 /// Emit outlined function for 'target' directive on the NVPTX 133 /// device. 134 /// \param D Directive to emit. 135 /// \param ParentName Name of the function that encloses the target region. 136 /// \param OutlinedFn Outlined function value to be defined by this call. 137 /// \param OutlinedFnID Outlined function ID value to be defined by this call. 138 /// \param IsOffloadEntry True if the outlined function is an offload entry. 139 /// An outlined function may not be an entry if, e.g. the if clause always 140 /// evaluates to false. 141 void emitTargetOutlinedFunction(const OMPExecutableDirective &D, 142 StringRef ParentName, 143 llvm::Function *&OutlinedFn, 144 llvm::Constant *&OutlinedFnID, 145 bool IsOffloadEntry, 146 const RegionCodeGenTy &CodeGen) override; 147 148 /// Emits code for parallel or serial call of the \a OutlinedFn with 149 /// variables captured in a record which address is stored in \a 150 /// CapturedStruct. 151 /// This call is for the Non-SPMD Execution Mode. 152 /// \param OutlinedFn Outlined function to be run in parallel threads. Type of 153 /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*). 154 /// \param CapturedVars A pointer to the record with the references to 155 /// variables used in \a OutlinedFn function. 156 /// \param IfCond Condition in the associated 'if' clause, if it was 157 /// specified, nullptr otherwise. 158 void emitNonSPMDParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 159 llvm::Value *OutlinedFn, 160 ArrayRef<llvm::Value *> CapturedVars, 161 const Expr *IfCond); 162 163 /// Emits code for parallel or serial call of the \a OutlinedFn with 164 /// variables captured in a record which address is stored in \a 165 /// CapturedStruct. 166 /// This call is for a parallel directive within an SPMD target directive. 167 /// \param OutlinedFn Outlined function to be run in parallel threads. Type of 168 /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*). 169 /// \param CapturedVars A pointer to the record with the references to 170 /// variables used in \a OutlinedFn function. 171 /// \param IfCond Condition in the associated 'if' clause, if it was 172 /// specified, nullptr otherwise. 173 /// 174 void emitSPMDParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 175 llvm::Function *OutlinedFn, 176 ArrayRef<llvm::Value *> CapturedVars, 177 const Expr *IfCond); 178 179 protected: 180 /// Get the function name of an outlined region. 181 // The name can be customized depending on the target. 182 // 183 StringRef getOutlinedHelperName() const override { 184 return "__omp_outlined__"; 185 } 186 187 /// Check if the default location must be constant. 188 /// Constant for NVPTX for better optimization. 189 bool isDefaultLocationConstant() const override { return true; } 190 191 /// Returns additional flags that can be stored in reserved_2 field of the 192 /// default location. 193 /// For NVPTX target contains data about SPMD/Non-SPMD execution mode + 194 /// Full/Lightweight runtime mode. Used for better optimization. 195 unsigned getDefaultLocationReserved2Flags() const override; 196 197 public: 198 explicit CGOpenMPRuntimeGPU(CodeGenModule &CGM); 199 void clear() override; 200 201 /// Declare generalized virtual functions which need to be defined 202 /// by all specializations of OpenMPGPURuntime Targets like AMDGCN 203 /// and NVPTX. 204 205 /// Get the GPU warp size. 206 virtual llvm::Value *getGPUWarpSize(CodeGenFunction &CGF) = 0; 207 208 /// Get the id of the current thread on the GPU. 209 virtual llvm::Value *getGPUThreadID(CodeGenFunction &CGF) = 0; 210 211 /// Get the maximum number of threads in a block of the GPU. 212 virtual llvm::Value *getGPUNumThreads(CodeGenFunction &CGF) = 0; 213 214 /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 215 /// global_tid, int proc_bind) to generate code for 'proc_bind' clause. 216 virtual void emitProcBindClause(CodeGenFunction &CGF, 217 llvm::omp::ProcBindKind ProcBind, 218 SourceLocation Loc) override; 219 220 /// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 221 /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads' 222 /// clause. 223 /// \param NumThreads An integer value of threads. 224 virtual void emitNumThreadsClause(CodeGenFunction &CGF, 225 llvm::Value *NumThreads, 226 SourceLocation Loc) override; 227 228 /// This function ought to emit, in the general case, a call to 229 // the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed 230 // as these numbers are obtained through the PTX grid and block configuration. 231 /// \param NumTeams An integer expression of teams. 232 /// \param ThreadLimit An integer expression of threads. 233 void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, 234 const Expr *ThreadLimit, SourceLocation Loc) override; 235 236 /// Emits inlined function for the specified OpenMP parallel 237 // directive. 238 /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, 239 /// kmp_int32 BoundID, struct context_vars*). 240 /// \param D OpenMP directive. 241 /// \param ThreadIDVar Variable for thread id in the current OpenMP region. 242 /// \param InnermostKind Kind of innermost directive (for simple directives it 243 /// is a directive itself, for combined - its innermost directive). 244 /// \param CodeGen Code generation sequence for the \a D directive. 245 llvm::Function * 246 emitParallelOutlinedFunction(const OMPExecutableDirective &D, 247 const VarDecl *ThreadIDVar, 248 OpenMPDirectiveKind InnermostKind, 249 const RegionCodeGenTy &CodeGen) override; 250 251 /// Emits inlined function for the specified OpenMP teams 252 // directive. 253 /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, 254 /// kmp_int32 BoundID, struct context_vars*). 255 /// \param D OpenMP directive. 256 /// \param ThreadIDVar Variable for thread id in the current OpenMP region. 257 /// \param InnermostKind Kind of innermost directive (for simple directives it 258 /// is a directive itself, for combined - its innermost directive). 259 /// \param CodeGen Code generation sequence for the \a D directive. 260 llvm::Function * 261 emitTeamsOutlinedFunction(const OMPExecutableDirective &D, 262 const VarDecl *ThreadIDVar, 263 OpenMPDirectiveKind InnermostKind, 264 const RegionCodeGenTy &CodeGen) override; 265 266 /// Emits code for teams call of the \a OutlinedFn with 267 /// variables captured in a record which address is stored in \a 268 /// CapturedStruct. 269 /// \param OutlinedFn Outlined function to be run by team masters. Type of 270 /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*). 271 /// \param CapturedVars A pointer to the record with the references to 272 /// variables used in \a OutlinedFn function. 273 /// 274 void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, 275 SourceLocation Loc, llvm::Function *OutlinedFn, 276 ArrayRef<llvm::Value *> CapturedVars) override; 277 278 /// Emits code for parallel or serial call of the \a OutlinedFn with 279 /// variables captured in a record which address is stored in \a 280 /// CapturedStruct. 281 /// \param OutlinedFn Outlined function to be run in parallel threads. Type of 282 /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*). 283 /// \param CapturedVars A pointer to the record with the references to 284 /// variables used in \a OutlinedFn function. 285 /// \param IfCond Condition in the associated 'if' clause, if it was 286 /// specified, nullptr otherwise. 287 void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 288 llvm::Function *OutlinedFn, 289 ArrayRef<llvm::Value *> CapturedVars, 290 const Expr *IfCond) override; 291 292 /// Emit an implicit/explicit barrier for OpenMP threads. 293 /// \param Kind Directive for which this implicit barrier call must be 294 /// generated. Must be OMPD_barrier for explicit barrier generation. 295 /// \param EmitChecks true if need to emit checks for cancellation barriers. 296 /// \param ForceSimpleCall true simple barrier call must be emitted, false if 297 /// runtime class decides which one to emit (simple or with cancellation 298 /// checks). 299 /// 300 void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 301 OpenMPDirectiveKind Kind, bool EmitChecks = true, 302 bool ForceSimpleCall = false) override; 303 304 /// Emits a critical region. 305 /// \param CriticalName Name of the critical region. 306 /// \param CriticalOpGen Generator for the statement associated with the given 307 /// critical region. 308 /// \param Hint Value of the 'hint' clause (optional). 309 void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, 310 const RegionCodeGenTy &CriticalOpGen, 311 SourceLocation Loc, 312 const Expr *Hint = nullptr) override; 313 314 /// Emit a code for reduction clause. 315 /// 316 /// \param Privates List of private copies for original reduction arguments. 317 /// \param LHSExprs List of LHS in \a ReductionOps reduction operations. 318 /// \param RHSExprs List of RHS in \a ReductionOps reduction operations. 319 /// \param ReductionOps List of reduction operations in form 'LHS binop RHS' 320 /// or 'operator binop(LHS, RHS)'. 321 /// \param Options List of options for reduction codegen: 322 /// WithNowait true if parent directive has also nowait clause, false 323 /// otherwise. 324 /// SimpleReduction Emit reduction operation only. Used for omp simd 325 /// directive on the host. 326 /// ReductionKind The kind of reduction to perform. 327 virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 328 ArrayRef<const Expr *> Privates, 329 ArrayRef<const Expr *> LHSExprs, 330 ArrayRef<const Expr *> RHSExprs, 331 ArrayRef<const Expr *> ReductionOps, 332 ReductionOptionsTy Options) override; 333 334 /// Returns specified OpenMP runtime function for the current OpenMP 335 /// implementation. Specialized for the NVPTX device. 336 /// \param Function OpenMP runtime function. 337 /// \return Specified function. 338 llvm::FunctionCallee createNVPTXRuntimeFunction(unsigned Function); 339 340 /// Translates the native parameter of outlined function if this is required 341 /// for target. 342 /// \param FD Field decl from captured record for the parameter. 343 /// \param NativeParam Parameter itself. 344 const VarDecl *translateParameter(const FieldDecl *FD, 345 const VarDecl *NativeParam) const override; 346 347 /// Gets the address of the native argument basing on the address of the 348 /// target-specific parameter. 349 /// \param NativeParam Parameter itself. 350 /// \param TargetParam Corresponding target-specific parameter. 351 Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, 352 const VarDecl *TargetParam) const override; 353 354 /// Emits call of the outlined function with the provided arguments, 355 /// translating these arguments to correct target-specific arguments. 356 void emitOutlinedFunctionCall( 357 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 358 ArrayRef<llvm::Value *> Args = llvm::None) const override; 359 360 /// Emits OpenMP-specific function prolog. 361 /// Required for device constructs. 362 void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) override; 363 364 /// Gets the OpenMP-specific address of the local variable. 365 Address getAddressOfLocalVariable(CodeGenFunction &CGF, 366 const VarDecl *VD) override; 367 368 /// Target codegen is specialized based on two data-sharing modes: CUDA, in 369 /// which the local variables are actually global threadlocal, and Generic, in 370 /// which the local variables are placed in global memory if they may escape 371 /// their declaration context. 372 enum DataSharingMode { 373 /// CUDA data sharing mode. 374 CUDA, 375 /// Generic data-sharing mode. 376 Generic, 377 }; 378 379 /// Cleans up references to the objects in finished function. 380 /// 381 void functionFinished(CodeGenFunction &CGF) override; 382 383 /// Choose a default value for the dist_schedule clause. 384 void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF, 385 const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind, 386 llvm::Value *&Chunk) const override; 387 388 /// Choose a default value for the schedule clause. 389 void getDefaultScheduleAndChunk(CodeGenFunction &CGF, 390 const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, 391 const Expr *&ChunkExpr) const override; 392 393 /// Adjust some parameters for the target-based directives, like addresses of 394 /// the variables captured by reference in lambdas. 395 void adjustTargetSpecificDataForLambdas( 396 CodeGenFunction &CGF, const OMPExecutableDirective &D) const override; 397 398 /// Perform check on requires decl to ensure that target architecture 399 /// supports unified addressing 400 void processRequiresDirective(const OMPRequiresDecl *D) override; 401 402 /// Returns default address space for the constant firstprivates, __constant__ 403 /// address space by default. 404 unsigned getDefaultFirstprivateAddressSpace() const override; 405 406 /// Checks if the variable has associated OMPAllocateDeclAttr attribute with 407 /// the predefined allocator and translates it into the corresponding address 408 /// space. 409 bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS) override; 410 411 private: 412 /// Track the execution mode when codegening directives within a target 413 /// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the 414 /// target region and used by containing directives such as 'parallel' 415 /// to emit optimized code. 416 ExecutionMode CurrentExecutionMode = EM_Unknown; 417 418 /// Check if the full runtime is required (default - yes). 419 bool RequiresFullRuntime = true; 420 421 /// true if we're emitting the code for the target region and next parallel 422 /// region is L0 for sure. 423 bool IsInTargetMasterThreadRegion = false; 424 /// true if currently emitting code for target/teams/distribute region, false 425 /// - otherwise. 426 bool IsInTTDRegion = false; 427 /// true if we're definitely in the parallel region. 428 bool IsInParallelRegion = false; 429 430 /// Map between an outlined function and its wrapper. 431 llvm::DenseMap<llvm::Function *, llvm::Function *> WrapperFunctionsMap; 432 433 /// Emit function which wraps the outline parallel region 434 /// and controls the parameters which are passed to this function. 435 /// The wrapper ensures that the outlined function is called 436 /// with the correct arguments when data is shared. 437 llvm::Function *createParallelDataSharingWrapper( 438 llvm::Function *OutlinedParallelFn, const OMPExecutableDirective &D); 439 440 /// The data for the single globalized variable. 441 struct MappedVarData { 442 /// Corresponding field in the global record. 443 const FieldDecl *FD = nullptr; 444 /// Corresponding address. 445 Address PrivateAddr = Address::invalid(); 446 /// true, if only one element is required (for latprivates in SPMD mode), 447 /// false, if need to create based on the warp-size. 448 bool IsOnePerTeam = false; 449 MappedVarData() = delete; 450 MappedVarData(const FieldDecl *FD, bool IsOnePerTeam = false) 451 : FD(FD), IsOnePerTeam(IsOnePerTeam) {} 452 }; 453 /// The map of local variables to their addresses in the global memory. 454 using DeclToAddrMapTy = llvm::MapVector<const Decl *, MappedVarData>; 455 /// Set of the parameters passed by value escaping OpenMP context. 456 using EscapedParamsTy = llvm::SmallPtrSet<const Decl *, 4>; 457 struct FunctionData { 458 DeclToAddrMapTy LocalVarData; 459 llvm::Optional<DeclToAddrMapTy> SecondaryLocalVarData = llvm::None; 460 EscapedParamsTy EscapedParameters; 461 llvm::SmallVector<const ValueDecl*, 4> EscapedVariableLengthDecls; 462 llvm::SmallVector<llvm::Value *, 4> EscapedVariableLengthDeclsAddrs; 463 const RecordDecl *GlobalRecord = nullptr; 464 llvm::Optional<const RecordDecl *> SecondaryGlobalRecord = llvm::None; 465 llvm::Value *GlobalRecordAddr = nullptr; 466 llvm::Value *IsInSPMDModeFlag = nullptr; 467 std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams; 468 }; 469 /// Maps the function to the list of the globalized variables with their 470 /// addresses. 471 llvm::SmallDenseMap<llvm::Function *, FunctionData> FunctionGlobalizedDecls; 472 /// List of records for the globalized variables in target/teams/distribute 473 /// contexts. Inner records are going to be joined into the single record, 474 /// while those resulting records are going to be joined into the single 475 /// union. This resulting union (one per CU) is the entry point for the static 476 /// memory management runtime functions. 477 struct GlobalPtrSizeRecsTy { 478 llvm::GlobalVariable *UseSharedMemory = nullptr; 479 llvm::GlobalVariable *RecSize = nullptr; 480 llvm::GlobalVariable *Buffer = nullptr; 481 SourceLocation Loc; 482 llvm::SmallVector<const RecordDecl *, 2> Records; 483 unsigned RegionCounter = 0; 484 }; 485 llvm::SmallVector<GlobalPtrSizeRecsTy, 8> GlobalizedRecords; 486 llvm::GlobalVariable *KernelTeamsReductionPtr = nullptr; 487 /// List of the records with the list of fields for the reductions across the 488 /// teams. Used to build the intermediate buffer for the fast teams 489 /// reductions. 490 /// All the records are gathered into a union `union.type` is created. 491 llvm::SmallVector<const RecordDecl *, 4> TeamsReductions; 492 /// Shared pointer for the global memory in the global memory buffer used for 493 /// the given kernel. 494 llvm::GlobalVariable *KernelStaticGlobalized = nullptr; 495 /// Pair of the Non-SPMD team and all reductions variables in this team 496 /// region. 497 std::pair<const Decl *, llvm::SmallVector<const ValueDecl *, 4>> 498 TeamAndReductions; 499 }; 500 501 } // CodeGen namespace. 502 } // clang namespace. 503 504 #endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEGPU_H 505