1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/APValue.h" 20 #include "clang/AST/Attr.h" 21 #include "clang/AST/Decl.h" 22 #include "clang/AST/OpenMPClause.h" 23 #include "clang/AST/StmtOpenMP.h" 24 #include "clang/AST/StmtVisitor.h" 25 #include "clang/Basic/BitmaskEnum.h" 26 #include "clang/Basic/FileManager.h" 27 #include "clang/Basic/OpenMPKinds.h" 28 #include "clang/Basic/SourceManager.h" 29 #include "clang/CodeGen/ConstantInitBuilder.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/SetOperations.h" 32 #include "llvm/ADT/SmallBitVector.h" 33 #include "llvm/ADT/StringExtras.h" 34 #include "llvm/Bitcode/BitcodeReader.h" 35 #include "llvm/IR/Constants.h" 36 #include "llvm/IR/DerivedTypes.h" 37 #include "llvm/IR/GlobalValue.h" 38 #include "llvm/IR/InstrTypes.h" 39 #include "llvm/IR/Value.h" 40 #include "llvm/Support/AtomicOrdering.h" 41 #include "llvm/Support/Format.h" 42 #include "llvm/Support/raw_ostream.h" 43 #include <cassert> 44 #include <numeric> 45 #include <optional> 46 47 using namespace clang; 48 using namespace CodeGen; 49 using namespace llvm::omp; 50 51 namespace { 52 /// Base class for handling code generation inside OpenMP regions. 53 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 54 public: 55 /// Kinds of OpenMP regions used in codegen. 56 enum CGOpenMPRegionKind { 57 /// Region with outlined function for standalone 'parallel' 58 /// directive. 59 ParallelOutlinedRegion, 60 /// Region with outlined function for standalone 'task' directive. 61 TaskOutlinedRegion, 62 /// Region for constructs that do not require function outlining, 63 /// like 'for', 'sections', 'atomic' etc. directives. 64 InlinedRegion, 65 /// Region with outlined function for standalone 'target' directive. 66 TargetRegion, 67 }; 68 69 CGOpenMPRegionInfo(const CapturedStmt &CS, 70 const CGOpenMPRegionKind RegionKind, 71 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 72 bool HasCancel) 73 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 74 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 75 76 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 77 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 78 bool HasCancel) 79 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 80 Kind(Kind), HasCancel(HasCancel) {} 81 82 /// Get a variable or parameter for storing global thread id 83 /// inside OpenMP construct. 84 virtual const VarDecl *getThreadIDVariable() const = 0; 85 86 /// Emit the captured statement body. 87 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 88 89 /// Get an LValue for the current ThreadID variable. 90 /// \return LValue for thread id variable. This LValue always has type int32*. 91 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 92 93 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 94 95 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 96 97 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 98 99 bool hasCancel() const { return HasCancel; } 100 101 static bool classof(const CGCapturedStmtInfo *Info) { 102 return Info->getKind() == CR_OpenMP; 103 } 104 105 ~CGOpenMPRegionInfo() override = default; 106 107 protected: 108 CGOpenMPRegionKind RegionKind; 109 RegionCodeGenTy CodeGen; 110 OpenMPDirectiveKind Kind; 111 bool HasCancel; 112 }; 113 114 /// API for captured statement code generation in OpenMP constructs. 115 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 116 public: 117 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 118 const RegionCodeGenTy &CodeGen, 119 OpenMPDirectiveKind Kind, bool HasCancel, 120 StringRef HelperName) 121 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 122 HasCancel), 123 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 124 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 125 } 126 127 /// Get a variable or parameter for storing global thread id 128 /// inside OpenMP construct. 129 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 130 131 /// Get the name of the capture helper. 132 StringRef getHelperName() const override { return HelperName; } 133 134 static bool classof(const CGCapturedStmtInfo *Info) { 135 return CGOpenMPRegionInfo::classof(Info) && 136 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 137 ParallelOutlinedRegion; 138 } 139 140 private: 141 /// A variable or parameter storing global thread id for OpenMP 142 /// constructs. 143 const VarDecl *ThreadIDVar; 144 StringRef HelperName; 145 }; 146 147 /// API for captured statement code generation in OpenMP constructs. 148 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 149 public: 150 class UntiedTaskActionTy final : public PrePostActionTy { 151 bool Untied; 152 const VarDecl *PartIDVar; 153 const RegionCodeGenTy UntiedCodeGen; 154 llvm::SwitchInst *UntiedSwitch = nullptr; 155 156 public: 157 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 158 const RegionCodeGenTy &UntiedCodeGen) 159 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 160 void Enter(CodeGenFunction &CGF) override { 161 if (Untied) { 162 // Emit task switching point. 163 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 164 CGF.GetAddrOfLocalVar(PartIDVar), 165 PartIDVar->getType()->castAs<PointerType>()); 166 llvm::Value *Res = 167 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 168 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 169 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 170 CGF.EmitBlock(DoneBB); 171 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 172 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 173 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 174 CGF.Builder.GetInsertBlock()); 175 emitUntiedSwitch(CGF); 176 } 177 } 178 void emitUntiedSwitch(CodeGenFunction &CGF) const { 179 if (Untied) { 180 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 181 CGF.GetAddrOfLocalVar(PartIDVar), 182 PartIDVar->getType()->castAs<PointerType>()); 183 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 184 PartIdLVal); 185 UntiedCodeGen(CGF); 186 CodeGenFunction::JumpDest CurPoint = 187 CGF.getJumpDestInCurrentScope(".untied.next."); 188 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 189 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 190 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 191 CGF.Builder.GetInsertBlock()); 192 CGF.EmitBranchThroughCleanup(CurPoint); 193 CGF.EmitBlock(CurPoint.getBlock()); 194 } 195 } 196 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 197 }; 198 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 199 const VarDecl *ThreadIDVar, 200 const RegionCodeGenTy &CodeGen, 201 OpenMPDirectiveKind Kind, bool HasCancel, 202 const UntiedTaskActionTy &Action) 203 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 204 ThreadIDVar(ThreadIDVar), Action(Action) { 205 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 206 } 207 208 /// Get a variable or parameter for storing global thread id 209 /// inside OpenMP construct. 210 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 211 212 /// Get an LValue for the current ThreadID variable. 213 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 214 215 /// Get the name of the capture helper. 216 StringRef getHelperName() const override { return ".omp_outlined."; } 217 218 void emitUntiedSwitch(CodeGenFunction &CGF) override { 219 Action.emitUntiedSwitch(CGF); 220 } 221 222 static bool classof(const CGCapturedStmtInfo *Info) { 223 return CGOpenMPRegionInfo::classof(Info) && 224 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 225 TaskOutlinedRegion; 226 } 227 228 private: 229 /// A variable or parameter storing global thread id for OpenMP 230 /// constructs. 231 const VarDecl *ThreadIDVar; 232 /// Action for emitting code for untied tasks. 233 const UntiedTaskActionTy &Action; 234 }; 235 236 /// API for inlined captured statement code generation in OpenMP 237 /// constructs. 238 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 239 public: 240 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 241 const RegionCodeGenTy &CodeGen, 242 OpenMPDirectiveKind Kind, bool HasCancel) 243 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 244 OldCSI(OldCSI), 245 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 246 247 // Retrieve the value of the context parameter. 248 llvm::Value *getContextValue() const override { 249 if (OuterRegionInfo) 250 return OuterRegionInfo->getContextValue(); 251 llvm_unreachable("No context value for inlined OpenMP region"); 252 } 253 254 void setContextValue(llvm::Value *V) override { 255 if (OuterRegionInfo) { 256 OuterRegionInfo->setContextValue(V); 257 return; 258 } 259 llvm_unreachable("No context value for inlined OpenMP region"); 260 } 261 262 /// Lookup the captured field decl for a variable. 263 const FieldDecl *lookup(const VarDecl *VD) const override { 264 if (OuterRegionInfo) 265 return OuterRegionInfo->lookup(VD); 266 // If there is no outer outlined region,no need to lookup in a list of 267 // captured variables, we can use the original one. 268 return nullptr; 269 } 270 271 FieldDecl *getThisFieldDecl() const override { 272 if (OuterRegionInfo) 273 return OuterRegionInfo->getThisFieldDecl(); 274 return nullptr; 275 } 276 277 /// Get a variable or parameter for storing global thread id 278 /// inside OpenMP construct. 279 const VarDecl *getThreadIDVariable() const override { 280 if (OuterRegionInfo) 281 return OuterRegionInfo->getThreadIDVariable(); 282 return nullptr; 283 } 284 285 /// Get an LValue for the current ThreadID variable. 286 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 287 if (OuterRegionInfo) 288 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 289 llvm_unreachable("No LValue for inlined OpenMP construct"); 290 } 291 292 /// Get the name of the capture helper. 293 StringRef getHelperName() const override { 294 if (auto *OuterRegionInfo = getOldCSI()) 295 return OuterRegionInfo->getHelperName(); 296 llvm_unreachable("No helper name for inlined OpenMP construct"); 297 } 298 299 void emitUntiedSwitch(CodeGenFunction &CGF) override { 300 if (OuterRegionInfo) 301 OuterRegionInfo->emitUntiedSwitch(CGF); 302 } 303 304 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 305 306 static bool classof(const CGCapturedStmtInfo *Info) { 307 return CGOpenMPRegionInfo::classof(Info) && 308 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 309 } 310 311 ~CGOpenMPInlinedRegionInfo() override = default; 312 313 private: 314 /// CodeGen info about outer OpenMP region. 315 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 316 CGOpenMPRegionInfo *OuterRegionInfo; 317 }; 318 319 /// API for captured statement code generation in OpenMP target 320 /// constructs. For this captures, implicit parameters are used instead of the 321 /// captured fields. The name of the target region has to be unique in a given 322 /// application so it is provided by the client, because only the client has 323 /// the information to generate that. 324 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 325 public: 326 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 327 const RegionCodeGenTy &CodeGen, StringRef HelperName) 328 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 329 /*HasCancel=*/false), 330 HelperName(HelperName) {} 331 332 /// This is unused for target regions because each starts executing 333 /// with a single thread. 334 const VarDecl *getThreadIDVariable() const override { return nullptr; } 335 336 /// Get the name of the capture helper. 337 StringRef getHelperName() const override { return HelperName; } 338 339 static bool classof(const CGCapturedStmtInfo *Info) { 340 return CGOpenMPRegionInfo::classof(Info) && 341 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 342 } 343 344 private: 345 StringRef HelperName; 346 }; 347 348 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 349 llvm_unreachable("No codegen for expressions"); 350 } 351 /// API for generation of expressions captured in a innermost OpenMP 352 /// region. 353 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 354 public: 355 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 356 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 357 OMPD_unknown, 358 /*HasCancel=*/false), 359 PrivScope(CGF) { 360 // Make sure the globals captured in the provided statement are local by 361 // using the privatization logic. We assume the same variable is not 362 // captured more than once. 363 for (const auto &C : CS.captures()) { 364 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 365 continue; 366 367 const VarDecl *VD = C.getCapturedVar(); 368 if (VD->isLocalVarDeclOrParm()) 369 continue; 370 371 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 372 /*RefersToEnclosingVariableOrCapture=*/false, 373 VD->getType().getNonReferenceType(), VK_LValue, 374 C.getLocation()); 375 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF)); 376 } 377 (void)PrivScope.Privatize(); 378 } 379 380 /// Lookup the captured field decl for a variable. 381 const FieldDecl *lookup(const VarDecl *VD) const override { 382 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 383 return FD; 384 return nullptr; 385 } 386 387 /// Emit the captured statement body. 388 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 389 llvm_unreachable("No body for expressions"); 390 } 391 392 /// Get a variable or parameter for storing global thread id 393 /// inside OpenMP construct. 394 const VarDecl *getThreadIDVariable() const override { 395 llvm_unreachable("No thread id for expressions"); 396 } 397 398 /// Get the name of the capture helper. 399 StringRef getHelperName() const override { 400 llvm_unreachable("No helper name for expressions"); 401 } 402 403 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 404 405 private: 406 /// Private scope to capture global variables. 407 CodeGenFunction::OMPPrivateScope PrivScope; 408 }; 409 410 /// RAII for emitting code of OpenMP constructs. 411 class InlinedOpenMPRegionRAII { 412 CodeGenFunction &CGF; 413 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields; 414 FieldDecl *LambdaThisCaptureField = nullptr; 415 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 416 bool NoInheritance = false; 417 418 public: 419 /// Constructs region for combined constructs. 420 /// \param CodeGen Code generation sequence for combined directives. Includes 421 /// a list of functions used for code generation of implicitly inlined 422 /// regions. 423 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 424 OpenMPDirectiveKind Kind, bool HasCancel, 425 bool NoInheritance = true) 426 : CGF(CGF), NoInheritance(NoInheritance) { 427 // Start emission for the construct. 428 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 429 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 430 if (NoInheritance) { 431 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 432 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 433 CGF.LambdaThisCaptureField = nullptr; 434 BlockInfo = CGF.BlockInfo; 435 CGF.BlockInfo = nullptr; 436 } 437 } 438 439 ~InlinedOpenMPRegionRAII() { 440 // Restore original CapturedStmtInfo only if we're done with code emission. 441 auto *OldCSI = 442 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 443 delete CGF.CapturedStmtInfo; 444 CGF.CapturedStmtInfo = OldCSI; 445 if (NoInheritance) { 446 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 447 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 448 CGF.BlockInfo = BlockInfo; 449 } 450 } 451 }; 452 453 /// Values for bit flags used in the ident_t to describe the fields. 454 /// All enumeric elements are named and described in accordance with the code 455 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 456 enum OpenMPLocationFlags : unsigned { 457 /// Use trampoline for internal microtask. 458 OMP_IDENT_IMD = 0x01, 459 /// Use c-style ident structure. 460 OMP_IDENT_KMPC = 0x02, 461 /// Atomic reduction option for kmpc_reduce. 462 OMP_ATOMIC_REDUCE = 0x10, 463 /// Explicit 'barrier' directive. 464 OMP_IDENT_BARRIER_EXPL = 0x20, 465 /// Implicit barrier in code. 466 OMP_IDENT_BARRIER_IMPL = 0x40, 467 /// Implicit barrier in 'for' directive. 468 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 469 /// Implicit barrier in 'sections' directive. 470 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 471 /// Implicit barrier in 'single' directive. 472 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 473 /// Call of __kmp_for_static_init for static loop. 474 OMP_IDENT_WORK_LOOP = 0x200, 475 /// Call of __kmp_for_static_init for sections. 476 OMP_IDENT_WORK_SECTIONS = 0x400, 477 /// Call of __kmp_for_static_init for distribute. 478 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 479 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 480 }; 481 482 namespace { 483 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 484 /// Values for bit flags for marking which requires clauses have been used. 485 enum OpenMPOffloadingRequiresDirFlags : int64_t { 486 /// flag undefined. 487 OMP_REQ_UNDEFINED = 0x000, 488 /// no requires clause present. 489 OMP_REQ_NONE = 0x001, 490 /// reverse_offload clause. 491 OMP_REQ_REVERSE_OFFLOAD = 0x002, 492 /// unified_address clause. 493 OMP_REQ_UNIFIED_ADDRESS = 0x004, 494 /// unified_shared_memory clause. 495 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 496 /// dynamic_allocators clause. 497 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 498 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 499 }; 500 501 } // anonymous namespace 502 503 /// Describes ident structure that describes a source location. 504 /// All descriptions are taken from 505 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 506 /// Original structure: 507 /// typedef struct ident { 508 /// kmp_int32 reserved_1; /**< might be used in Fortran; 509 /// see above */ 510 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 511 /// KMP_IDENT_KMPC identifies this union 512 /// member */ 513 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 514 /// see above */ 515 ///#if USE_ITT_BUILD 516 /// /* but currently used for storing 517 /// region-specific ITT */ 518 /// /* contextual information. */ 519 ///#endif /* USE_ITT_BUILD */ 520 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 521 /// C++ */ 522 /// char const *psource; /**< String describing the source location. 523 /// The string is composed of semi-colon separated 524 // fields which describe the source file, 525 /// the function and a pair of line numbers that 526 /// delimit the construct. 527 /// */ 528 /// } ident_t; 529 enum IdentFieldIndex { 530 /// might be used in Fortran 531 IdentField_Reserved_1, 532 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 533 IdentField_Flags, 534 /// Not really used in Fortran any more 535 IdentField_Reserved_2, 536 /// Source[4] in Fortran, do not use for C++ 537 IdentField_Reserved_3, 538 /// String describing the source location. The string is composed of 539 /// semi-colon separated fields which describe the source file, the function 540 /// and a pair of line numbers that delimit the construct. 541 IdentField_PSource 542 }; 543 544 /// Schedule types for 'omp for' loops (these enumerators are taken from 545 /// the enum sched_type in kmp.h). 546 enum OpenMPSchedType { 547 /// Lower bound for default (unordered) versions. 548 OMP_sch_lower = 32, 549 OMP_sch_static_chunked = 33, 550 OMP_sch_static = 34, 551 OMP_sch_dynamic_chunked = 35, 552 OMP_sch_guided_chunked = 36, 553 OMP_sch_runtime = 37, 554 OMP_sch_auto = 38, 555 /// static with chunk adjustment (e.g., simd) 556 OMP_sch_static_balanced_chunked = 45, 557 /// Lower bound for 'ordered' versions. 558 OMP_ord_lower = 64, 559 OMP_ord_static_chunked = 65, 560 OMP_ord_static = 66, 561 OMP_ord_dynamic_chunked = 67, 562 OMP_ord_guided_chunked = 68, 563 OMP_ord_runtime = 69, 564 OMP_ord_auto = 70, 565 OMP_sch_default = OMP_sch_static, 566 /// dist_schedule types 567 OMP_dist_sch_static_chunked = 91, 568 OMP_dist_sch_static = 92, 569 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 570 /// Set if the monotonic schedule modifier was present. 571 OMP_sch_modifier_monotonic = (1 << 29), 572 /// Set if the nonmonotonic schedule modifier was present. 573 OMP_sch_modifier_nonmonotonic = (1 << 30), 574 }; 575 576 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 577 /// region. 578 class CleanupTy final : public EHScopeStack::Cleanup { 579 PrePostActionTy *Action; 580 581 public: 582 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 583 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 584 if (!CGF.HaveInsertPoint()) 585 return; 586 Action->Exit(CGF); 587 } 588 }; 589 590 } // anonymous namespace 591 592 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 593 CodeGenFunction::RunCleanupsScope Scope(CGF); 594 if (PrePostAction) { 595 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 596 Callback(CodeGen, CGF, *PrePostAction); 597 } else { 598 PrePostActionTy Action; 599 Callback(CodeGen, CGF, Action); 600 } 601 } 602 603 /// Check if the combiner is a call to UDR combiner and if it is so return the 604 /// UDR decl used for reduction. 605 static const OMPDeclareReductionDecl * 606 getReductionInit(const Expr *ReductionOp) { 607 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 608 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 609 if (const auto *DRE = 610 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 611 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 612 return DRD; 613 return nullptr; 614 } 615 616 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 617 const OMPDeclareReductionDecl *DRD, 618 const Expr *InitOp, 619 Address Private, Address Original, 620 QualType Ty) { 621 if (DRD->getInitializer()) { 622 std::pair<llvm::Function *, llvm::Function *> Reduction = 623 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 624 const auto *CE = cast<CallExpr>(InitOp); 625 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 626 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 627 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 628 const auto *LHSDRE = 629 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 630 const auto *RHSDRE = 631 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 632 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 633 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private); 634 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original); 635 (void)PrivateScope.Privatize(); 636 RValue Func = RValue::get(Reduction.second); 637 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 638 CGF.EmitIgnoredExpr(InitOp); 639 } else { 640 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 641 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 642 auto *GV = new llvm::GlobalVariable( 643 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 644 llvm::GlobalValue::PrivateLinkage, Init, Name); 645 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 646 RValue InitRVal; 647 switch (CGF.getEvaluationKind(Ty)) { 648 case TEK_Scalar: 649 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 650 break; 651 case TEK_Complex: 652 InitRVal = 653 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 654 break; 655 case TEK_Aggregate: { 656 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); 657 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); 658 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 659 /*IsInitializer=*/false); 660 return; 661 } 662 } 663 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue); 664 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 665 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 666 /*IsInitializer=*/false); 667 } 668 } 669 670 /// Emit initialization of arrays of complex types. 671 /// \param DestAddr Address of the array. 672 /// \param Type Type of array. 673 /// \param Init Initial expression of array. 674 /// \param SrcAddr Address of the original array. 675 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 676 QualType Type, bool EmitDeclareReductionInit, 677 const Expr *Init, 678 const OMPDeclareReductionDecl *DRD, 679 Address SrcAddr = Address::invalid()) { 680 // Perform element-by-element initialization. 681 QualType ElementTy; 682 683 // Drill down to the base element type on both arrays. 684 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 685 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 686 if (DRD) 687 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType()); 688 689 llvm::Value *SrcBegin = nullptr; 690 if (DRD) 691 SrcBegin = SrcAddr.getPointer(); 692 llvm::Value *DestBegin = DestAddr.getPointer(); 693 // Cast from pointer to array type to pointer to single element. 694 llvm::Value *DestEnd = 695 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); 696 // The basic structure here is a while-do loop. 697 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 698 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 699 llvm::Value *IsEmpty = 700 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 701 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 702 703 // Enter the loop body, making that address the current address. 704 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 705 CGF.EmitBlock(BodyBB); 706 707 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 708 709 llvm::PHINode *SrcElementPHI = nullptr; 710 Address SrcElementCurrent = Address::invalid(); 711 if (DRD) { 712 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 713 "omp.arraycpy.srcElementPast"); 714 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 715 SrcElementCurrent = 716 Address(SrcElementPHI, SrcAddr.getElementType(), 717 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 718 } 719 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 720 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 721 DestElementPHI->addIncoming(DestBegin, EntryBB); 722 Address DestElementCurrent = 723 Address(DestElementPHI, DestAddr.getElementType(), 724 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 725 726 // Emit copy. 727 { 728 CodeGenFunction::RunCleanupsScope InitScope(CGF); 729 if (EmitDeclareReductionInit) { 730 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 731 SrcElementCurrent, ElementTy); 732 } else 733 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 734 /*IsInitializer=*/false); 735 } 736 737 if (DRD) { 738 // Shift the address forward by one element. 739 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 740 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, 741 "omp.arraycpy.dest.element"); 742 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 743 } 744 745 // Shift the address forward by one element. 746 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 747 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, 748 "omp.arraycpy.dest.element"); 749 // Check whether we've reached the end. 750 llvm::Value *Done = 751 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 752 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 753 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 754 755 // Done. 756 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 757 } 758 759 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 760 return CGF.EmitOMPSharedLValue(E); 761 } 762 763 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 764 const Expr *E) { 765 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 766 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 767 return LValue(); 768 } 769 770 void ReductionCodeGen::emitAggregateInitialization( 771 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 772 const OMPDeclareReductionDecl *DRD) { 773 // Emit VarDecl with copy init for arrays. 774 // Get the address of the original variable captured in current 775 // captured region. 776 const auto *PrivateVD = 777 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 778 bool EmitDeclareReductionInit = 779 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 780 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 781 EmitDeclareReductionInit, 782 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 783 : PrivateVD->getInit(), 784 DRD, SharedAddr); 785 } 786 787 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 788 ArrayRef<const Expr *> Origs, 789 ArrayRef<const Expr *> Privates, 790 ArrayRef<const Expr *> ReductionOps) { 791 ClausesData.reserve(Shareds.size()); 792 SharedAddresses.reserve(Shareds.size()); 793 Sizes.reserve(Shareds.size()); 794 BaseDecls.reserve(Shareds.size()); 795 const auto *IOrig = Origs.begin(); 796 const auto *IPriv = Privates.begin(); 797 const auto *IRed = ReductionOps.begin(); 798 for (const Expr *Ref : Shareds) { 799 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 800 std::advance(IOrig, 1); 801 std::advance(IPriv, 1); 802 std::advance(IRed, 1); 803 } 804 } 805 806 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 807 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 808 "Number of generated lvalues must be exactly N."); 809 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 810 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 811 SharedAddresses.emplace_back(First, Second); 812 if (ClausesData[N].Shared == ClausesData[N].Ref) { 813 OrigAddresses.emplace_back(First, Second); 814 } else { 815 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 816 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 817 OrigAddresses.emplace_back(First, Second); 818 } 819 } 820 821 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 822 QualType PrivateType = getPrivateType(N); 823 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 824 if (!PrivateType->isVariablyModifiedType()) { 825 Sizes.emplace_back( 826 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 827 nullptr); 828 return; 829 } 830 llvm::Value *Size; 831 llvm::Value *SizeInChars; 832 auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType(); 833 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 834 if (AsArraySection) { 835 Size = CGF.Builder.CreatePtrDiff(ElemType, 836 OrigAddresses[N].second.getPointer(CGF), 837 OrigAddresses[N].first.getPointer(CGF)); 838 Size = CGF.Builder.CreateNUWAdd( 839 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 840 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 841 } else { 842 SizeInChars = 843 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 844 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 845 } 846 Sizes.emplace_back(SizeInChars, Size); 847 CodeGenFunction::OpaqueValueMapping OpaqueMap( 848 CGF, 849 cast<OpaqueValueExpr>( 850 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 851 RValue::get(Size)); 852 CGF.EmitVariablyModifiedType(PrivateType); 853 } 854 855 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 856 llvm::Value *Size) { 857 QualType PrivateType = getPrivateType(N); 858 if (!PrivateType->isVariablyModifiedType()) { 859 assert(!Size && !Sizes[N].second && 860 "Size should be nullptr for non-variably modified reduction " 861 "items."); 862 return; 863 } 864 CodeGenFunction::OpaqueValueMapping OpaqueMap( 865 CGF, 866 cast<OpaqueValueExpr>( 867 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 868 RValue::get(Size)); 869 CGF.EmitVariablyModifiedType(PrivateType); 870 } 871 872 void ReductionCodeGen::emitInitialization( 873 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 874 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 875 assert(SharedAddresses.size() > N && "No variable was generated"); 876 const auto *PrivateVD = 877 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 878 const OMPDeclareReductionDecl *DRD = 879 getReductionInit(ClausesData[N].ReductionOp); 880 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 881 if (DRD && DRD->getInitializer()) 882 (void)DefaultInit(CGF); 883 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD); 884 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 885 (void)DefaultInit(CGF); 886 QualType SharedType = SharedAddresses[N].first.getType(); 887 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 888 PrivateAddr, SharedAddr, SharedType); 889 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 890 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 891 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 892 PrivateVD->getType().getQualifiers(), 893 /*IsInitializer=*/false); 894 } 895 } 896 897 bool ReductionCodeGen::needCleanups(unsigned N) { 898 QualType PrivateType = getPrivateType(N); 899 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 900 return DTorKind != QualType::DK_none; 901 } 902 903 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 904 Address PrivateAddr) { 905 QualType PrivateType = getPrivateType(N); 906 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 907 if (needCleanups(N)) { 908 PrivateAddr = 909 PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType)); 910 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 911 } 912 } 913 914 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 915 LValue BaseLV) { 916 BaseTy = BaseTy.getNonReferenceType(); 917 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 918 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 919 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 920 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 921 } else { 922 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 923 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 924 } 925 BaseTy = BaseTy->getPointeeType(); 926 } 927 return CGF.MakeAddrLValue( 928 BaseLV.getAddress(CGF).withElementType(CGF.ConvertTypeForMem(ElTy)), 929 BaseLV.getType(), BaseLV.getBaseInfo(), 930 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 931 } 932 933 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 934 Address OriginalBaseAddress, llvm::Value *Addr) { 935 Address Tmp = Address::invalid(); 936 Address TopTmp = Address::invalid(); 937 Address MostTopTmp = Address::invalid(); 938 BaseTy = BaseTy.getNonReferenceType(); 939 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 940 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 941 Tmp = CGF.CreateMemTemp(BaseTy); 942 if (TopTmp.isValid()) 943 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 944 else 945 MostTopTmp = Tmp; 946 TopTmp = Tmp; 947 BaseTy = BaseTy->getPointeeType(); 948 } 949 950 if (Tmp.isValid()) { 951 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 952 Addr, Tmp.getElementType()); 953 CGF.Builder.CreateStore(Addr, Tmp); 954 return MostTopTmp; 955 } 956 957 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 958 Addr, OriginalBaseAddress.getType()); 959 return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull); 960 } 961 962 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 963 const VarDecl *OrigVD = nullptr; 964 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 965 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 966 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 967 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 968 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 969 Base = TempASE->getBase()->IgnoreParenImpCasts(); 970 DE = cast<DeclRefExpr>(Base); 971 OrigVD = cast<VarDecl>(DE->getDecl()); 972 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 973 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 974 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 975 Base = TempASE->getBase()->IgnoreParenImpCasts(); 976 DE = cast<DeclRefExpr>(Base); 977 OrigVD = cast<VarDecl>(DE->getDecl()); 978 } 979 return OrigVD; 980 } 981 982 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 983 Address PrivateAddr) { 984 const DeclRefExpr *DE; 985 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 986 BaseDecls.emplace_back(OrigVD); 987 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 988 LValue BaseLValue = 989 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 990 OriginalBaseLValue); 991 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF); 992 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 993 SharedAddr.getElementType(), BaseLValue.getPointer(CGF), 994 SharedAddr.getPointer()); 995 llvm::Value *PrivatePointer = 996 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 997 PrivateAddr.getPointer(), SharedAddr.getType()); 998 llvm::Value *Ptr = CGF.Builder.CreateGEP( 999 SharedAddr.getElementType(), PrivatePointer, Adjustment); 1000 return castToBase(CGF, OrigVD->getType(), 1001 SharedAddresses[N].first.getType(), 1002 OriginalBaseLValue.getAddress(CGF), Ptr); 1003 } 1004 BaseDecls.emplace_back( 1005 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1006 return PrivateAddr; 1007 } 1008 1009 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1010 const OMPDeclareReductionDecl *DRD = 1011 getReductionInit(ClausesData[N].ReductionOp); 1012 return DRD && DRD->getInitializer(); 1013 } 1014 1015 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1016 return CGF.EmitLoadOfPointerLValue( 1017 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1018 getThreadIDVariable()->getType()->castAs<PointerType>()); 1019 } 1020 1021 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 1022 if (!CGF.HaveInsertPoint()) 1023 return; 1024 // 1.2.2 OpenMP Language Terminology 1025 // Structured block - An executable statement with a single entry at the 1026 // top and a single exit at the bottom. 1027 // The point of exit cannot be a branch out of the structured block. 1028 // longjmp() and throw() must not violate the entry/exit criteria. 1029 CGF.EHStack.pushTerminate(); 1030 if (S) 1031 CGF.incrementProfileCounter(S); 1032 CodeGen(CGF); 1033 CGF.EHStack.popTerminate(); 1034 } 1035 1036 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1037 CodeGenFunction &CGF) { 1038 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1039 getThreadIDVariable()->getType(), 1040 AlignmentSource::Decl); 1041 } 1042 1043 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1044 QualType FieldTy) { 1045 auto *Field = FieldDecl::Create( 1046 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1047 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1048 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1049 Field->setAccess(AS_public); 1050 DC->addDecl(Field); 1051 return Field; 1052 } 1053 1054 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) 1055 : CGM(CGM), OMPBuilder(CGM.getModule()) { 1056 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1057 llvm::OpenMPIRBuilderConfig Config(CGM.getLangOpts().OpenMPIsTargetDevice, 1058 isGPU(), hasRequiresUnifiedSharedMemory(), 1059 CGM.getLangOpts().OpenMPOffloadMandatory); 1060 OMPBuilder.initialize(CGM.getLangOpts().OpenMPIsTargetDevice 1061 ? CGM.getLangOpts().OMPHostIRFile 1062 : StringRef{}); 1063 OMPBuilder.setConfig(Config); 1064 } 1065 1066 void CGOpenMPRuntime::clear() { 1067 InternalVars.clear(); 1068 // Clean non-target variable declarations possibly used only in debug info. 1069 for (const auto &Data : EmittedNonTargetVariables) { 1070 if (!Data.getValue().pointsToAliveValue()) 1071 continue; 1072 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1073 if (!GV) 1074 continue; 1075 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1076 continue; 1077 GV->eraseFromParent(); 1078 } 1079 } 1080 1081 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1082 return OMPBuilder.createPlatformSpecificName(Parts); 1083 } 1084 1085 static llvm::Function * 1086 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1087 const Expr *CombinerInitializer, const VarDecl *In, 1088 const VarDecl *Out, bool IsCombiner) { 1089 // void .omp_combiner.(Ty *in, Ty *out); 1090 ASTContext &C = CGM.getContext(); 1091 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1092 FunctionArgList Args; 1093 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1094 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1095 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1096 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1097 Args.push_back(&OmpOutParm); 1098 Args.push_back(&OmpInParm); 1099 const CGFunctionInfo &FnInfo = 1100 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1101 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1102 std::string Name = CGM.getOpenMPRuntime().getName( 1103 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1104 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1105 Name, &CGM.getModule()); 1106 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1107 if (CGM.getLangOpts().Optimize) { 1108 Fn->removeFnAttr(llvm::Attribute::NoInline); 1109 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1110 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1111 } 1112 CodeGenFunction CGF(CGM); 1113 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1114 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1115 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1116 Out->getLocation()); 1117 CodeGenFunction::OMPPrivateScope Scope(CGF); 1118 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1119 Scope.addPrivate( 1120 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1121 .getAddress(CGF)); 1122 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1123 Scope.addPrivate( 1124 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1125 .getAddress(CGF)); 1126 (void)Scope.Privatize(); 1127 if (!IsCombiner && Out->hasInit() && 1128 !CGF.isTrivialInitializer(Out->getInit())) { 1129 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1130 Out->getType().getQualifiers(), 1131 /*IsInitializer=*/true); 1132 } 1133 if (CombinerInitializer) 1134 CGF.EmitIgnoredExpr(CombinerInitializer); 1135 Scope.ForceCleanup(); 1136 CGF.FinishFunction(); 1137 return Fn; 1138 } 1139 1140 void CGOpenMPRuntime::emitUserDefinedReduction( 1141 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1142 if (UDRMap.count(D) > 0) 1143 return; 1144 llvm::Function *Combiner = emitCombinerOrInitializer( 1145 CGM, D->getType(), D->getCombiner(), 1146 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1147 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1148 /*IsCombiner=*/true); 1149 llvm::Function *Initializer = nullptr; 1150 if (const Expr *Init = D->getInitializer()) { 1151 Initializer = emitCombinerOrInitializer( 1152 CGM, D->getType(), 1153 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1154 : nullptr, 1155 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1156 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1157 /*IsCombiner=*/false); 1158 } 1159 UDRMap.try_emplace(D, Combiner, Initializer); 1160 if (CGF) { 1161 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1162 Decls.second.push_back(D); 1163 } 1164 } 1165 1166 std::pair<llvm::Function *, llvm::Function *> 1167 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1168 auto I = UDRMap.find(D); 1169 if (I != UDRMap.end()) 1170 return I->second; 1171 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1172 return UDRMap.lookup(D); 1173 } 1174 1175 namespace { 1176 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1177 // Builder if one is present. 1178 struct PushAndPopStackRAII { 1179 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1180 bool HasCancel, llvm::omp::Directive Kind) 1181 : OMPBuilder(OMPBuilder) { 1182 if (!OMPBuilder) 1183 return; 1184 1185 // The following callback is the crucial part of clangs cleanup process. 1186 // 1187 // NOTE: 1188 // Once the OpenMPIRBuilder is used to create parallel regions (and 1189 // similar), the cancellation destination (Dest below) is determined via 1190 // IP. That means if we have variables to finalize we split the block at IP, 1191 // use the new block (=BB) as destination to build a JumpDest (via 1192 // getJumpDestInCurrentScope(BB)) which then is fed to 1193 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1194 // to push & pop an FinalizationInfo object. 1195 // The FiniCB will still be needed but at the point where the 1196 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1197 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1198 assert(IP.getBlock()->end() == IP.getPoint() && 1199 "Clang CG should cause non-terminated block!"); 1200 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1201 CGF.Builder.restoreIP(IP); 1202 CodeGenFunction::JumpDest Dest = 1203 CGF.getOMPCancelDestination(OMPD_parallel); 1204 CGF.EmitBranchThroughCleanup(Dest); 1205 }; 1206 1207 // TODO: Remove this once we emit parallel regions through the 1208 // OpenMPIRBuilder as it can do this setup internally. 1209 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); 1210 OMPBuilder->pushFinalizationCB(std::move(FI)); 1211 } 1212 ~PushAndPopStackRAII() { 1213 if (OMPBuilder) 1214 OMPBuilder->popFinalizationCB(); 1215 } 1216 llvm::OpenMPIRBuilder *OMPBuilder; 1217 }; 1218 } // namespace 1219 1220 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1221 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1222 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1223 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1224 assert(ThreadIDVar->getType()->isPointerType() && 1225 "thread id variable must be of type kmp_int32 *"); 1226 CodeGenFunction CGF(CGM, true); 1227 bool HasCancel = false; 1228 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1229 HasCancel = OPD->hasCancel(); 1230 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1231 HasCancel = OPD->hasCancel(); 1232 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1233 HasCancel = OPSD->hasCancel(); 1234 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1235 HasCancel = OPFD->hasCancel(); 1236 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1237 HasCancel = OPFD->hasCancel(); 1238 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1239 HasCancel = OPFD->hasCancel(); 1240 else if (const auto *OPFD = 1241 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1242 HasCancel = OPFD->hasCancel(); 1243 else if (const auto *OPFD = 1244 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1245 HasCancel = OPFD->hasCancel(); 1246 1247 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1248 // parallel region to make cancellation barriers work properly. 1249 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1250 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); 1251 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1252 HasCancel, OutlinedHelperName); 1253 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1254 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1255 } 1256 1257 std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const { 1258 std::string Suffix = getName({"omp_outlined"}); 1259 return (Name + Suffix).str(); 1260 } 1261 1262 std::string CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction &CGF) const { 1263 return getOutlinedHelperName(CGF.CurFn->getName()); 1264 } 1265 1266 std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const { 1267 std::string Suffix = getName({"omp", "reduction", "reduction_func"}); 1268 return (Name + Suffix).str(); 1269 } 1270 1271 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1272 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1273 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1274 const RegionCodeGenTy &CodeGen) { 1275 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1276 return emitParallelOrTeamsOutlinedFunction( 1277 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF), 1278 CodeGen); 1279 } 1280 1281 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1282 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1283 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1284 const RegionCodeGenTy &CodeGen) { 1285 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1286 return emitParallelOrTeamsOutlinedFunction( 1287 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF), 1288 CodeGen); 1289 } 1290 1291 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1292 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1293 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1294 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1295 bool Tied, unsigned &NumberOfParts) { 1296 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1297 PrePostActionTy &) { 1298 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1299 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1300 llvm::Value *TaskArgs[] = { 1301 UpLoc, ThreadID, 1302 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1303 TaskTVar->getType()->castAs<PointerType>()) 1304 .getPointer(CGF)}; 1305 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1306 CGM.getModule(), OMPRTL___kmpc_omp_task), 1307 TaskArgs); 1308 }; 1309 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1310 UntiedCodeGen); 1311 CodeGen.setAction(Action); 1312 assert(!ThreadIDVar->getType()->isPointerType() && 1313 "thread id variable must be of type kmp_int32 for tasks"); 1314 const OpenMPDirectiveKind Region = 1315 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1316 : OMPD_task; 1317 const CapturedStmt *CS = D.getCapturedStmt(Region); 1318 bool HasCancel = false; 1319 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1320 HasCancel = TD->hasCancel(); 1321 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1322 HasCancel = TD->hasCancel(); 1323 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1324 HasCancel = TD->hasCancel(); 1325 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1326 HasCancel = TD->hasCancel(); 1327 1328 CodeGenFunction CGF(CGM, true); 1329 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1330 InnermostKind, HasCancel, Action); 1331 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1332 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1333 if (!Tied) 1334 NumberOfParts = Action.getNumberOfParts(); 1335 return Res; 1336 } 1337 1338 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1339 bool AtCurrentPoint) { 1340 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1341 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1342 1343 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1344 if (AtCurrentPoint) { 1345 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1346 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1347 } else { 1348 Elem.second.ServiceInsertPt = 1349 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1350 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1351 } 1352 } 1353 1354 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1355 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1356 if (Elem.second.ServiceInsertPt) { 1357 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1358 Elem.second.ServiceInsertPt = nullptr; 1359 Ptr->eraseFromParent(); 1360 } 1361 } 1362 1363 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1364 SourceLocation Loc, 1365 SmallString<128> &Buffer) { 1366 llvm::raw_svector_ostream OS(Buffer); 1367 // Build debug location 1368 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1369 OS << ";" << PLoc.getFilename() << ";"; 1370 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1371 OS << FD->getQualifiedNameAsString(); 1372 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1373 return OS.str(); 1374 } 1375 1376 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1377 SourceLocation Loc, 1378 unsigned Flags, bool EmitLoc) { 1379 uint32_t SrcLocStrSize; 1380 llvm::Constant *SrcLocStr; 1381 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() == 1382 llvm::codegenoptions::NoDebugInfo) || 1383 Loc.isInvalid()) { 1384 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 1385 } else { 1386 std::string FunctionName; 1387 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1388 FunctionName = FD->getQualifiedNameAsString(); 1389 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1390 const char *FileName = PLoc.getFilename(); 1391 unsigned Line = PLoc.getLine(); 1392 unsigned Column = PLoc.getColumn(); 1393 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, 1394 Column, SrcLocStrSize); 1395 } 1396 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1397 return OMPBuilder.getOrCreateIdent( 1398 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags); 1399 } 1400 1401 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1402 SourceLocation Loc) { 1403 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1404 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1405 // the clang invariants used below might be broken. 1406 if (CGM.getLangOpts().OpenMPIRBuilder) { 1407 SmallString<128> Buffer; 1408 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1409 uint32_t SrcLocStrSize; 1410 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1411 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize); 1412 return OMPBuilder.getOrCreateThreadID( 1413 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize)); 1414 } 1415 1416 llvm::Value *ThreadID = nullptr; 1417 // Check whether we've already cached a load of the thread id in this 1418 // function. 1419 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1420 if (I != OpenMPLocThreadIDMap.end()) { 1421 ThreadID = I->second.ThreadID; 1422 if (ThreadID != nullptr) 1423 return ThreadID; 1424 } 1425 // If exceptions are enabled, do not use parameter to avoid possible crash. 1426 if (auto *OMPRegionInfo = 1427 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1428 if (OMPRegionInfo->getThreadIDVariable()) { 1429 // Check if this an outlined function with thread id passed as argument. 1430 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1431 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1432 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1433 !CGF.getLangOpts().CXXExceptions || 1434 CGF.Builder.GetInsertBlock() == TopBlock || 1435 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1436 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1437 TopBlock || 1438 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1439 CGF.Builder.GetInsertBlock()) { 1440 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1441 // If value loaded in entry block, cache it and use it everywhere in 1442 // function. 1443 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1444 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1445 Elem.second.ThreadID = ThreadID; 1446 } 1447 return ThreadID; 1448 } 1449 } 1450 } 1451 1452 // This is not an outlined function region - need to call __kmpc_int32 1453 // kmpc_global_thread_num(ident_t *loc). 1454 // Generate thread id value and cache this value for use across the 1455 // function. 1456 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1457 if (!Elem.second.ServiceInsertPt) 1458 setLocThreadIdInsertPt(CGF); 1459 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1460 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1461 llvm::CallInst *Call = CGF.Builder.CreateCall( 1462 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1463 OMPRTL___kmpc_global_thread_num), 1464 emitUpdateLocation(CGF, Loc)); 1465 Call->setCallingConv(CGF.getRuntimeCC()); 1466 Elem.second.ThreadID = Call; 1467 return Call; 1468 } 1469 1470 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1471 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1472 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1473 clearLocThreadIdInsertPt(CGF); 1474 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1475 } 1476 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1477 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1478 UDRMap.erase(D); 1479 FunctionUDRMap.erase(CGF.CurFn); 1480 } 1481 auto I = FunctionUDMMap.find(CGF.CurFn); 1482 if (I != FunctionUDMMap.end()) { 1483 for(const auto *D : I->second) 1484 UDMMap.erase(D); 1485 FunctionUDMMap.erase(I); 1486 } 1487 LastprivateConditionalToTypes.erase(CGF.CurFn); 1488 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1489 } 1490 1491 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1492 return OMPBuilder.IdentPtr; 1493 } 1494 1495 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1496 if (!Kmpc_MicroTy) { 1497 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1498 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1499 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1500 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1501 } 1502 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1503 } 1504 1505 llvm::FunctionCallee 1506 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned, 1507 bool IsGPUDistribute) { 1508 assert((IVSize == 32 || IVSize == 64) && 1509 "IV size is not compatible with the omp runtime"); 1510 StringRef Name; 1511 if (IsGPUDistribute) 1512 Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4" 1513 : "__kmpc_distribute_static_init_4u") 1514 : (IVSigned ? "__kmpc_distribute_static_init_8" 1515 : "__kmpc_distribute_static_init_8u"); 1516 else 1517 Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1518 : "__kmpc_for_static_init_4u") 1519 : (IVSigned ? "__kmpc_for_static_init_8" 1520 : "__kmpc_for_static_init_8u"); 1521 1522 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1523 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1524 llvm::Type *TypeParams[] = { 1525 getIdentTyPointerTy(), // loc 1526 CGM.Int32Ty, // tid 1527 CGM.Int32Ty, // schedtype 1528 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1529 PtrTy, // p_lower 1530 PtrTy, // p_upper 1531 PtrTy, // p_stride 1532 ITy, // incr 1533 ITy // chunk 1534 }; 1535 auto *FnTy = 1536 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1537 return CGM.CreateRuntimeFunction(FnTy, Name); 1538 } 1539 1540 llvm::FunctionCallee 1541 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1542 assert((IVSize == 32 || IVSize == 64) && 1543 "IV size is not compatible with the omp runtime"); 1544 StringRef Name = 1545 IVSize == 32 1546 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1547 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1548 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1549 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1550 CGM.Int32Ty, // tid 1551 CGM.Int32Ty, // schedtype 1552 ITy, // lower 1553 ITy, // upper 1554 ITy, // stride 1555 ITy // chunk 1556 }; 1557 auto *FnTy = 1558 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1559 return CGM.CreateRuntimeFunction(FnTy, Name); 1560 } 1561 1562 llvm::FunctionCallee 1563 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1564 assert((IVSize == 32 || IVSize == 64) && 1565 "IV size is not compatible with the omp runtime"); 1566 StringRef Name = 1567 IVSize == 32 1568 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1569 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1570 llvm::Type *TypeParams[] = { 1571 getIdentTyPointerTy(), // loc 1572 CGM.Int32Ty, // tid 1573 }; 1574 auto *FnTy = 1575 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1576 return CGM.CreateRuntimeFunction(FnTy, Name); 1577 } 1578 1579 llvm::FunctionCallee 1580 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1581 assert((IVSize == 32 || IVSize == 64) && 1582 "IV size is not compatible with the omp runtime"); 1583 StringRef Name = 1584 IVSize == 32 1585 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1586 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1587 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1588 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1589 llvm::Type *TypeParams[] = { 1590 getIdentTyPointerTy(), // loc 1591 CGM.Int32Ty, // tid 1592 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1593 PtrTy, // p_lower 1594 PtrTy, // p_upper 1595 PtrTy // p_stride 1596 }; 1597 auto *FnTy = 1598 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1599 return CGM.CreateRuntimeFunction(FnTy, Name); 1600 } 1601 1602 llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind 1603 convertDeviceClause(const VarDecl *VD) { 1604 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 1605 OMPDeclareTargetDeclAttr::getDeviceType(VD); 1606 if (!DevTy) 1607 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone; 1608 1609 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default 1610 case OMPDeclareTargetDeclAttr::DT_Host: 1611 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost; 1612 break; 1613 case OMPDeclareTargetDeclAttr::DT_NoHost: 1614 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost; 1615 break; 1616 case OMPDeclareTargetDeclAttr::DT_Any: 1617 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny; 1618 break; 1619 default: 1620 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone; 1621 break; 1622 } 1623 } 1624 1625 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind 1626 convertCaptureClause(const VarDecl *VD) { 1627 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType = 1628 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1629 if (!MapType) 1630 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone; 1631 switch ((int)*MapType) { // Avoid -Wcovered-switch-default 1632 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To: 1633 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo; 1634 break; 1635 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter: 1636 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter; 1637 break; 1638 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link: 1639 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink; 1640 break; 1641 default: 1642 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone; 1643 break; 1644 } 1645 } 1646 1647 static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc( 1648 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder, 1649 SourceLocation BeginLoc, llvm::StringRef ParentName = "") { 1650 1651 auto FileInfoCallBack = [&]() { 1652 SourceManager &SM = CGM.getContext().getSourceManager(); 1653 PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc); 1654 1655 llvm::sys::fs::UniqueID ID; 1656 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1657 PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false); 1658 } 1659 1660 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine()); 1661 }; 1662 1663 return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName); 1664 } 1665 1666 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1667 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); }; 1668 1669 auto LinkageForVariable = [&VD, this]() { 1670 return CGM.getLLVMLinkageVarDefinition(VD); 1671 }; 1672 1673 std::vector<llvm::GlobalVariable *> GeneratedRefs; 1674 1675 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem( 1676 CGM.getContext().getPointerType(VD->getType())); 1677 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar( 1678 convertCaptureClause(VD), convertDeviceClause(VD), 1679 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly, 1680 VD->isExternallyVisible(), 1681 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, 1682 VD->getCanonicalDecl()->getBeginLoc()), 1683 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd, 1684 CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal, 1685 LinkageForVariable); 1686 1687 if (!addr) 1688 return Address::invalid(); 1689 return Address(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD)); 1690 } 1691 1692 llvm::Constant * 1693 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1694 assert(!CGM.getLangOpts().OpenMPUseTLS || 1695 !CGM.getContext().getTargetInfo().isTLSSupported()); 1696 // Lookup the entry, lazily creating it if necessary. 1697 std::string Suffix = getName({"cache", ""}); 1698 return OMPBuilder.getOrCreateInternalVariable( 1699 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str()); 1700 } 1701 1702 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1703 const VarDecl *VD, 1704 Address VDAddr, 1705 SourceLocation Loc) { 1706 if (CGM.getLangOpts().OpenMPUseTLS && 1707 CGM.getContext().getTargetInfo().isTLSSupported()) 1708 return VDAddr; 1709 1710 llvm::Type *VarTy = VDAddr.getElementType(); 1711 llvm::Value *Args[] = { 1712 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1713 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy), 1714 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1715 getOrCreateThreadPrivateCache(VD)}; 1716 return Address( 1717 CGF.EmitRuntimeCall( 1718 OMPBuilder.getOrCreateRuntimeFunction( 1719 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1720 Args), 1721 CGF.Int8Ty, VDAddr.getAlignment()); 1722 } 1723 1724 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1725 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1726 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1727 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1728 // library. 1729 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1730 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1731 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1732 OMPLoc); 1733 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1734 // to register constructor/destructor for variable. 1735 llvm::Value *Args[] = { 1736 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1737 Ctor, CopyCtor, Dtor}; 1738 CGF.EmitRuntimeCall( 1739 OMPBuilder.getOrCreateRuntimeFunction( 1740 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1741 Args); 1742 } 1743 1744 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1745 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1746 bool PerformInit, CodeGenFunction *CGF) { 1747 if (CGM.getLangOpts().OpenMPUseTLS && 1748 CGM.getContext().getTargetInfo().isTLSSupported()) 1749 return nullptr; 1750 1751 VD = VD->getDefinition(CGM.getContext()); 1752 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1753 QualType ASTTy = VD->getType(); 1754 1755 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1756 const Expr *Init = VD->getAnyInitializer(); 1757 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1758 // Generate function that re-emits the declaration's initializer into the 1759 // threadprivate copy of the variable VD 1760 CodeGenFunction CtorCGF(CGM); 1761 FunctionArgList Args; 1762 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1763 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1764 ImplicitParamDecl::Other); 1765 Args.push_back(&Dst); 1766 1767 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1768 CGM.getContext().VoidPtrTy, Args); 1769 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1770 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1771 llvm::Function *Fn = 1772 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1773 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1774 Args, Loc, Loc); 1775 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1776 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1777 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1778 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy), 1779 VDAddr.getAlignment()); 1780 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1781 /*IsInitializer=*/true); 1782 ArgVal = CtorCGF.EmitLoadOfScalar( 1783 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1784 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1785 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1786 CtorCGF.FinishFunction(); 1787 Ctor = Fn; 1788 } 1789 if (VD->getType().isDestructedType() != QualType::DK_none) { 1790 // Generate function that emits destructor call for the threadprivate copy 1791 // of the variable VD 1792 CodeGenFunction DtorCGF(CGM); 1793 FunctionArgList Args; 1794 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1795 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1796 ImplicitParamDecl::Other); 1797 Args.push_back(&Dst); 1798 1799 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1800 CGM.getContext().VoidTy, Args); 1801 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1802 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1803 llvm::Function *Fn = 1804 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1805 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1806 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1807 Loc, Loc); 1808 // Create a scope with an artificial location for the body of this function. 1809 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1810 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1811 DtorCGF.GetAddrOfLocalVar(&Dst), 1812 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1813 DtorCGF.emitDestroy( 1814 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy, 1815 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1816 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1817 DtorCGF.FinishFunction(); 1818 Dtor = Fn; 1819 } 1820 // Do not emit init function if it is not required. 1821 if (!Ctor && !Dtor) 1822 return nullptr; 1823 1824 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1825 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1826 /*isVarArg=*/false) 1827 ->getPointerTo(); 1828 // Copying constructor for the threadprivate variable. 1829 // Must be NULL - reserved by runtime, but currently it requires that this 1830 // parameter is always NULL. Otherwise it fires assertion. 1831 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1832 if (Ctor == nullptr) { 1833 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1834 /*isVarArg=*/false) 1835 ->getPointerTo(); 1836 Ctor = llvm::Constant::getNullValue(CtorTy); 1837 } 1838 if (Dtor == nullptr) { 1839 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1840 /*isVarArg=*/false) 1841 ->getPointerTo(); 1842 Dtor = llvm::Constant::getNullValue(DtorTy); 1843 } 1844 if (!CGF) { 1845 auto *InitFunctionTy = 1846 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1847 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1848 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1849 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1850 CodeGenFunction InitCGF(CGM); 1851 FunctionArgList ArgList; 1852 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1853 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1854 Loc, Loc); 1855 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1856 InitCGF.FinishFunction(); 1857 return InitFunction; 1858 } 1859 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1860 } 1861 return nullptr; 1862 } 1863 1864 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1865 llvm::GlobalVariable *Addr, 1866 bool PerformInit) { 1867 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1868 !CGM.getLangOpts().OpenMPIsTargetDevice) 1869 return false; 1870 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1871 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1872 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1873 ((*Res == OMPDeclareTargetDeclAttr::MT_To || 1874 *Res == OMPDeclareTargetDeclAttr::MT_Enter) && 1875 HasRequiresUnifiedSharedMemory)) 1876 return CGM.getLangOpts().OpenMPIsTargetDevice; 1877 VD = VD->getDefinition(CGM.getContext()); 1878 assert(VD && "Unknown VarDecl"); 1879 1880 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1881 return CGM.getLangOpts().OpenMPIsTargetDevice; 1882 1883 QualType ASTTy = VD->getType(); 1884 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1885 1886 // Produce the unique prefix to identify the new target regions. We use 1887 // the source location of the variable declaration which we know to not 1888 // conflict with any target region. 1889 llvm::TargetRegionEntryInfo EntryInfo = 1890 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, Loc, VD->getName()); 1891 SmallString<128> Buffer, Out; 1892 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Buffer, EntryInfo); 1893 1894 const Expr *Init = VD->getAnyInitializer(); 1895 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1896 llvm::Constant *Ctor; 1897 llvm::Constant *ID; 1898 if (CGM.getLangOpts().OpenMPIsTargetDevice) { 1899 // Generate function that re-emits the declaration's initializer into 1900 // the threadprivate copy of the variable VD 1901 CodeGenFunction CtorCGF(CGM); 1902 1903 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1904 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1905 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1906 FTy, Twine(Buffer, "_ctor"), FI, Loc, false, 1907 llvm::GlobalValue::WeakODRLinkage); 1908 Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility); 1909 if (CGM.getTriple().isAMDGCN()) 1910 Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 1911 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1912 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1913 FunctionArgList(), Loc, Loc); 1914 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1915 llvm::Constant *AddrInAS0 = Addr; 1916 if (Addr->getAddressSpace() != 0) 1917 AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast( 1918 Addr, llvm::PointerType::get(CGM.getLLVMContext(), 0)); 1919 CtorCGF.EmitAnyExprToMem(Init, 1920 Address(AddrInAS0, Addr->getValueType(), 1921 CGM.getContext().getDeclAlign(VD)), 1922 Init->getType().getQualifiers(), 1923 /*IsInitializer=*/true); 1924 CtorCGF.FinishFunction(); 1925 Ctor = Fn; 1926 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1927 } else { 1928 Ctor = new llvm::GlobalVariable( 1929 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1930 llvm::GlobalValue::PrivateLinkage, 1931 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1932 ID = Ctor; 1933 } 1934 1935 // Register the information for the entry associated with the constructor. 1936 Out.clear(); 1937 auto CtorEntryInfo = EntryInfo; 1938 CtorEntryInfo.ParentName = Twine(Buffer, "_ctor").toStringRef(Out); 1939 OMPBuilder.OffloadInfoManager.registerTargetRegionEntryInfo( 1940 CtorEntryInfo, Ctor, ID, 1941 llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryCtor); 1942 } 1943 if (VD->getType().isDestructedType() != QualType::DK_none) { 1944 llvm::Constant *Dtor; 1945 llvm::Constant *ID; 1946 if (CGM.getLangOpts().OpenMPIsTargetDevice) { 1947 // Generate function that emits destructor call for the threadprivate 1948 // copy of the variable VD 1949 CodeGenFunction DtorCGF(CGM); 1950 1951 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1952 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1953 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1954 FTy, Twine(Buffer, "_dtor"), FI, Loc, false, 1955 llvm::GlobalValue::WeakODRLinkage); 1956 Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility); 1957 if (CGM.getTriple().isAMDGCN()) 1958 Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 1959 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1960 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1961 FunctionArgList(), Loc, Loc); 1962 // Create a scope with an artificial location for the body of this 1963 // function. 1964 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1965 llvm::Constant *AddrInAS0 = Addr; 1966 if (Addr->getAddressSpace() != 0) 1967 AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast( 1968 Addr, llvm::PointerType::get(CGM.getLLVMContext(), 0)); 1969 DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(), 1970 CGM.getContext().getDeclAlign(VD)), 1971 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1972 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1973 DtorCGF.FinishFunction(); 1974 Dtor = Fn; 1975 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1976 } else { 1977 Dtor = new llvm::GlobalVariable( 1978 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1979 llvm::GlobalValue::PrivateLinkage, 1980 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 1981 ID = Dtor; 1982 } 1983 // Register the information for the entry associated with the destructor. 1984 Out.clear(); 1985 auto DtorEntryInfo = EntryInfo; 1986 DtorEntryInfo.ParentName = Twine(Buffer, "_dtor").toStringRef(Out); 1987 OMPBuilder.OffloadInfoManager.registerTargetRegionEntryInfo( 1988 DtorEntryInfo, Dtor, ID, 1989 llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryDtor); 1990 } 1991 return CGM.getLangOpts().OpenMPIsTargetDevice; 1992 } 1993 1994 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 1995 QualType VarType, 1996 StringRef Name) { 1997 std::string Suffix = getName({"artificial", ""}); 1998 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 1999 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable( 2000 VarLVType, Twine(Name).concat(Suffix).str()); 2001 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 2002 CGM.getTarget().isTLSSupported()) { 2003 GAddr->setThreadLocal(/*Val=*/true); 2004 return Address(GAddr, GAddr->getValueType(), 2005 CGM.getContext().getTypeAlignInChars(VarType)); 2006 } 2007 std::string CacheSuffix = getName({"cache", ""}); 2008 llvm::Value *Args[] = { 2009 emitUpdateLocation(CGF, SourceLocation()), 2010 getThreadID(CGF, SourceLocation()), 2011 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2012 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2013 /*isSigned=*/false), 2014 OMPBuilder.getOrCreateInternalVariable( 2015 CGM.VoidPtrPtrTy, 2016 Twine(Name).concat(Suffix).concat(CacheSuffix).str())}; 2017 return Address( 2018 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2019 CGF.EmitRuntimeCall( 2020 OMPBuilder.getOrCreateRuntimeFunction( 2021 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2022 Args), 2023 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2024 VarLVType, CGM.getContext().getTypeAlignInChars(VarType)); 2025 } 2026 2027 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2028 const RegionCodeGenTy &ThenGen, 2029 const RegionCodeGenTy &ElseGen) { 2030 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2031 2032 // If the condition constant folds and can be elided, try to avoid emitting 2033 // the condition and the dead arm of the if/else. 2034 bool CondConstant; 2035 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2036 if (CondConstant) 2037 ThenGen(CGF); 2038 else 2039 ElseGen(CGF); 2040 return; 2041 } 2042 2043 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2044 // emit the conditional branch. 2045 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2046 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2047 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2048 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2049 2050 // Emit the 'then' code. 2051 CGF.EmitBlock(ThenBlock); 2052 ThenGen(CGF); 2053 CGF.EmitBranch(ContBlock); 2054 // Emit the 'else' code if present. 2055 // There is no need to emit line number for unconditional branch. 2056 (void)ApplyDebugLocation::CreateEmpty(CGF); 2057 CGF.EmitBlock(ElseBlock); 2058 ElseGen(CGF); 2059 // There is no need to emit line number for unconditional branch. 2060 (void)ApplyDebugLocation::CreateEmpty(CGF); 2061 CGF.EmitBranch(ContBlock); 2062 // Emit the continuation block for code after the if. 2063 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2064 } 2065 2066 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2067 llvm::Function *OutlinedFn, 2068 ArrayRef<llvm::Value *> CapturedVars, 2069 const Expr *IfCond, 2070 llvm::Value *NumThreads) { 2071 if (!CGF.HaveInsertPoint()) 2072 return; 2073 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2074 auto &M = CGM.getModule(); 2075 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2076 this](CodeGenFunction &CGF, PrePostActionTy &) { 2077 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2078 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2079 llvm::Value *Args[] = { 2080 RTLoc, 2081 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2082 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2083 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2084 RealArgs.append(std::begin(Args), std::end(Args)); 2085 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2086 2087 llvm::FunctionCallee RTLFn = 2088 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2089 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2090 }; 2091 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2092 this](CodeGenFunction &CGF, PrePostActionTy &) { 2093 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2094 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2095 // Build calls: 2096 // __kmpc_serialized_parallel(&Loc, GTid); 2097 llvm::Value *Args[] = {RTLoc, ThreadID}; 2098 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2099 M, OMPRTL___kmpc_serialized_parallel), 2100 Args); 2101 2102 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2103 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2104 Address ZeroAddrBound = 2105 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2106 /*Name=*/".bound.zero.addr"); 2107 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound); 2108 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2109 // ThreadId for serialized parallels is 0. 2110 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2111 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2112 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2113 2114 // Ensure we do not inline the function. This is trivially true for the ones 2115 // passed to __kmpc_fork_call but the ones called in serialized regions 2116 // could be inlined. This is not a perfect but it is closer to the invariant 2117 // we want, namely, every data environment starts with a new function. 2118 // TODO: We should pass the if condition to the runtime function and do the 2119 // handling there. Much cleaner code. 2120 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline); 2121 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 2122 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2123 2124 // __kmpc_end_serialized_parallel(&Loc, GTid); 2125 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2126 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2127 M, OMPRTL___kmpc_end_serialized_parallel), 2128 EndArgs); 2129 }; 2130 if (IfCond) { 2131 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2132 } else { 2133 RegionCodeGenTy ThenRCG(ThenGen); 2134 ThenRCG(CGF); 2135 } 2136 } 2137 2138 // If we're inside an (outlined) parallel region, use the region info's 2139 // thread-ID variable (it is passed in a first argument of the outlined function 2140 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2141 // regular serial code region, get thread ID by calling kmp_int32 2142 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2143 // return the address of that temp. 2144 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2145 SourceLocation Loc) { 2146 if (auto *OMPRegionInfo = 2147 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2148 if (OMPRegionInfo->getThreadIDVariable()) 2149 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2150 2151 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2152 QualType Int32Ty = 2153 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2154 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2155 CGF.EmitStoreOfScalar(ThreadID, 2156 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2157 2158 return ThreadIDTemp; 2159 } 2160 2161 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2162 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2163 std::string Name = getName({Prefix, "var"}); 2164 llvm::GlobalVariable *G = OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2165 llvm::Align PtrAlign = OMPBuilder.M.getDataLayout().getPointerABIAlignment(G->getAddressSpace()); 2166 if (PtrAlign > llvm::Align(G->getAlignment())) 2167 G->setAlignment(PtrAlign); 2168 return G; 2169 } 2170 2171 namespace { 2172 /// Common pre(post)-action for different OpenMP constructs. 2173 class CommonActionTy final : public PrePostActionTy { 2174 llvm::FunctionCallee EnterCallee; 2175 ArrayRef<llvm::Value *> EnterArgs; 2176 llvm::FunctionCallee ExitCallee; 2177 ArrayRef<llvm::Value *> ExitArgs; 2178 bool Conditional; 2179 llvm::BasicBlock *ContBlock = nullptr; 2180 2181 public: 2182 CommonActionTy(llvm::FunctionCallee EnterCallee, 2183 ArrayRef<llvm::Value *> EnterArgs, 2184 llvm::FunctionCallee ExitCallee, 2185 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2186 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2187 ExitArgs(ExitArgs), Conditional(Conditional) {} 2188 void Enter(CodeGenFunction &CGF) override { 2189 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2190 if (Conditional) { 2191 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2192 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2193 ContBlock = CGF.createBasicBlock("omp_if.end"); 2194 // Generate the branch (If-stmt) 2195 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2196 CGF.EmitBlock(ThenBlock); 2197 } 2198 } 2199 void Done(CodeGenFunction &CGF) { 2200 // Emit the rest of blocks/branches 2201 CGF.EmitBranch(ContBlock); 2202 CGF.EmitBlock(ContBlock, true); 2203 } 2204 void Exit(CodeGenFunction &CGF) override { 2205 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2206 } 2207 }; 2208 } // anonymous namespace 2209 2210 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2211 StringRef CriticalName, 2212 const RegionCodeGenTy &CriticalOpGen, 2213 SourceLocation Loc, const Expr *Hint) { 2214 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2215 // CriticalOpGen(); 2216 // __kmpc_end_critical(ident_t *, gtid, Lock); 2217 // Prepare arguments and build a call to __kmpc_critical 2218 if (!CGF.HaveInsertPoint()) 2219 return; 2220 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2221 getCriticalRegionLock(CriticalName)}; 2222 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2223 std::end(Args)); 2224 if (Hint) { 2225 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2226 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2227 } 2228 CommonActionTy Action( 2229 OMPBuilder.getOrCreateRuntimeFunction( 2230 CGM.getModule(), 2231 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2232 EnterArgs, 2233 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2234 OMPRTL___kmpc_end_critical), 2235 Args); 2236 CriticalOpGen.setAction(Action); 2237 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2238 } 2239 2240 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2241 const RegionCodeGenTy &MasterOpGen, 2242 SourceLocation Loc) { 2243 if (!CGF.HaveInsertPoint()) 2244 return; 2245 // if(__kmpc_master(ident_t *, gtid)) { 2246 // MasterOpGen(); 2247 // __kmpc_end_master(ident_t *, gtid); 2248 // } 2249 // Prepare arguments and build a call to __kmpc_master 2250 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2251 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2252 CGM.getModule(), OMPRTL___kmpc_master), 2253 Args, 2254 OMPBuilder.getOrCreateRuntimeFunction( 2255 CGM.getModule(), OMPRTL___kmpc_end_master), 2256 Args, 2257 /*Conditional=*/true); 2258 MasterOpGen.setAction(Action); 2259 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2260 Action.Done(CGF); 2261 } 2262 2263 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, 2264 const RegionCodeGenTy &MaskedOpGen, 2265 SourceLocation Loc, const Expr *Filter) { 2266 if (!CGF.HaveInsertPoint()) 2267 return; 2268 // if(__kmpc_masked(ident_t *, gtid, filter)) { 2269 // MaskedOpGen(); 2270 // __kmpc_end_masked(iden_t *, gtid); 2271 // } 2272 // Prepare arguments and build a call to __kmpc_masked 2273 llvm::Value *FilterVal = Filter 2274 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) 2275 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 2276 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2277 FilterVal}; 2278 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), 2279 getThreadID(CGF, Loc)}; 2280 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2281 CGM.getModule(), OMPRTL___kmpc_masked), 2282 Args, 2283 OMPBuilder.getOrCreateRuntimeFunction( 2284 CGM.getModule(), OMPRTL___kmpc_end_masked), 2285 ArgsEnd, 2286 /*Conditional=*/true); 2287 MaskedOpGen.setAction(Action); 2288 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); 2289 Action.Done(CGF); 2290 } 2291 2292 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2293 SourceLocation Loc) { 2294 if (!CGF.HaveInsertPoint()) 2295 return; 2296 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2297 OMPBuilder.createTaskyield(CGF.Builder); 2298 } else { 2299 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2300 llvm::Value *Args[] = { 2301 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2302 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2303 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2304 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2305 Args); 2306 } 2307 2308 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2309 Region->emitUntiedSwitch(CGF); 2310 } 2311 2312 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2313 const RegionCodeGenTy &TaskgroupOpGen, 2314 SourceLocation Loc) { 2315 if (!CGF.HaveInsertPoint()) 2316 return; 2317 // __kmpc_taskgroup(ident_t *, gtid); 2318 // TaskgroupOpGen(); 2319 // __kmpc_end_taskgroup(ident_t *, gtid); 2320 // Prepare arguments and build a call to __kmpc_taskgroup 2321 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2322 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2323 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2324 Args, 2325 OMPBuilder.getOrCreateRuntimeFunction( 2326 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2327 Args); 2328 TaskgroupOpGen.setAction(Action); 2329 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2330 } 2331 2332 /// Given an array of pointers to variables, project the address of a 2333 /// given variable. 2334 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2335 unsigned Index, const VarDecl *Var) { 2336 // Pull out the pointer to the variable. 2337 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2338 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2339 2340 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType()); 2341 return Address( 2342 CGF.Builder.CreateBitCast( 2343 Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())), 2344 ElemTy, CGF.getContext().getDeclAlign(Var)); 2345 } 2346 2347 static llvm::Value *emitCopyprivateCopyFunction( 2348 CodeGenModule &CGM, llvm::Type *ArgsElemType, 2349 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2350 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2351 SourceLocation Loc) { 2352 ASTContext &C = CGM.getContext(); 2353 // void copy_func(void *LHSArg, void *RHSArg); 2354 FunctionArgList Args; 2355 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2356 ImplicitParamDecl::Other); 2357 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2358 ImplicitParamDecl::Other); 2359 Args.push_back(&LHSArg); 2360 Args.push_back(&RHSArg); 2361 const auto &CGFI = 2362 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2363 std::string Name = 2364 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2365 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2366 llvm::GlobalValue::InternalLinkage, Name, 2367 &CGM.getModule()); 2368 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2369 Fn->setDoesNotRecurse(); 2370 CodeGenFunction CGF(CGM); 2371 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2372 // Dest = (void*[n])(LHSArg); 2373 // Src = (void*[n])(RHSArg); 2374 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2375 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2376 ArgsElemType->getPointerTo()), 2377 ArgsElemType, CGF.getPointerAlign()); 2378 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2379 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2380 ArgsElemType->getPointerTo()), 2381 ArgsElemType, CGF.getPointerAlign()); 2382 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2383 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2384 // ... 2385 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2386 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2387 const auto *DestVar = 2388 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2389 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2390 2391 const auto *SrcVar = 2392 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2393 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2394 2395 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2396 QualType Type = VD->getType(); 2397 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2398 } 2399 CGF.FinishFunction(); 2400 return Fn; 2401 } 2402 2403 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2404 const RegionCodeGenTy &SingleOpGen, 2405 SourceLocation Loc, 2406 ArrayRef<const Expr *> CopyprivateVars, 2407 ArrayRef<const Expr *> SrcExprs, 2408 ArrayRef<const Expr *> DstExprs, 2409 ArrayRef<const Expr *> AssignmentOps) { 2410 if (!CGF.HaveInsertPoint()) 2411 return; 2412 assert(CopyprivateVars.size() == SrcExprs.size() && 2413 CopyprivateVars.size() == DstExprs.size() && 2414 CopyprivateVars.size() == AssignmentOps.size()); 2415 ASTContext &C = CGM.getContext(); 2416 // int32 did_it = 0; 2417 // if(__kmpc_single(ident_t *, gtid)) { 2418 // SingleOpGen(); 2419 // __kmpc_end_single(ident_t *, gtid); 2420 // did_it = 1; 2421 // } 2422 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2423 // <copy_func>, did_it); 2424 2425 Address DidIt = Address::invalid(); 2426 if (!CopyprivateVars.empty()) { 2427 // int32 did_it = 0; 2428 QualType KmpInt32Ty = 2429 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2430 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2431 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2432 } 2433 // Prepare arguments and build a call to __kmpc_single 2434 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2435 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2436 CGM.getModule(), OMPRTL___kmpc_single), 2437 Args, 2438 OMPBuilder.getOrCreateRuntimeFunction( 2439 CGM.getModule(), OMPRTL___kmpc_end_single), 2440 Args, 2441 /*Conditional=*/true); 2442 SingleOpGen.setAction(Action); 2443 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2444 if (DidIt.isValid()) { 2445 // did_it = 1; 2446 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2447 } 2448 Action.Done(CGF); 2449 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2450 // <copy_func>, did_it); 2451 if (DidIt.isValid()) { 2452 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2453 QualType CopyprivateArrayTy = C.getConstantArrayType( 2454 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2455 /*IndexTypeQuals=*/0); 2456 // Create a list of all private variables for copyprivate. 2457 Address CopyprivateList = 2458 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2459 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2460 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2461 CGF.Builder.CreateStore( 2462 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2463 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2464 CGF.VoidPtrTy), 2465 Elem); 2466 } 2467 // Build function that copies private values from single region to all other 2468 // threads in the corresponding parallel region. 2469 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2470 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars, 2471 SrcExprs, DstExprs, AssignmentOps, Loc); 2472 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2473 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2474 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty); 2475 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2476 llvm::Value *Args[] = { 2477 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2478 getThreadID(CGF, Loc), // i32 <gtid> 2479 BufSize, // size_t <buf_size> 2480 CL.getPointer(), // void *<copyprivate list> 2481 CpyFn, // void (*) (void *, void *) <copy_func> 2482 DidItVal // i32 did_it 2483 }; 2484 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2485 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2486 Args); 2487 } 2488 } 2489 2490 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2491 const RegionCodeGenTy &OrderedOpGen, 2492 SourceLocation Loc, bool IsThreads) { 2493 if (!CGF.HaveInsertPoint()) 2494 return; 2495 // __kmpc_ordered(ident_t *, gtid); 2496 // OrderedOpGen(); 2497 // __kmpc_end_ordered(ident_t *, gtid); 2498 // Prepare arguments and build a call to __kmpc_ordered 2499 if (IsThreads) { 2500 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2501 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2502 CGM.getModule(), OMPRTL___kmpc_ordered), 2503 Args, 2504 OMPBuilder.getOrCreateRuntimeFunction( 2505 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2506 Args); 2507 OrderedOpGen.setAction(Action); 2508 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2509 return; 2510 } 2511 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2512 } 2513 2514 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2515 unsigned Flags; 2516 if (Kind == OMPD_for) 2517 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2518 else if (Kind == OMPD_sections) 2519 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2520 else if (Kind == OMPD_single) 2521 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2522 else if (Kind == OMPD_barrier) 2523 Flags = OMP_IDENT_BARRIER_EXPL; 2524 else 2525 Flags = OMP_IDENT_BARRIER_IMPL; 2526 return Flags; 2527 } 2528 2529 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2530 CodeGenFunction &CGF, const OMPLoopDirective &S, 2531 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2532 // Check if the loop directive is actually a doacross loop directive. In this 2533 // case choose static, 1 schedule. 2534 if (llvm::any_of( 2535 S.getClausesOfKind<OMPOrderedClause>(), 2536 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2537 ScheduleKind = OMPC_SCHEDULE_static; 2538 // Chunk size is 1 in this case. 2539 llvm::APInt ChunkSize(32, 1); 2540 ChunkExpr = IntegerLiteral::Create( 2541 CGF.getContext(), ChunkSize, 2542 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2543 SourceLocation()); 2544 } 2545 } 2546 2547 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2548 OpenMPDirectiveKind Kind, bool EmitChecks, 2549 bool ForceSimpleCall) { 2550 // Check if we should use the OMPBuilder 2551 auto *OMPRegionInfo = 2552 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2553 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2554 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2555 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2556 return; 2557 } 2558 2559 if (!CGF.HaveInsertPoint()) 2560 return; 2561 // Build call __kmpc_cancel_barrier(loc, thread_id); 2562 // Build call __kmpc_barrier(loc, thread_id); 2563 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2564 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2565 // thread_id); 2566 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2567 getThreadID(CGF, Loc)}; 2568 if (OMPRegionInfo) { 2569 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2570 llvm::Value *Result = CGF.EmitRuntimeCall( 2571 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2572 OMPRTL___kmpc_cancel_barrier), 2573 Args); 2574 if (EmitChecks) { 2575 // if (__kmpc_cancel_barrier()) { 2576 // exit from construct; 2577 // } 2578 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2579 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2580 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2581 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2582 CGF.EmitBlock(ExitBB); 2583 // exit from construct; 2584 CodeGenFunction::JumpDest CancelDestination = 2585 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2586 CGF.EmitBranchThroughCleanup(CancelDestination); 2587 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2588 } 2589 return; 2590 } 2591 } 2592 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2593 CGM.getModule(), OMPRTL___kmpc_barrier), 2594 Args); 2595 } 2596 2597 void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc, 2598 Expr *ME, bool IsFatal) { 2599 llvm::Value *MVL = 2600 ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF) 2601 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 2602 // Build call void __kmpc_error(ident_t *loc, int severity, const char 2603 // *message) 2604 llvm::Value *Args[] = { 2605 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true), 2606 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1), 2607 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)}; 2608 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2609 CGM.getModule(), OMPRTL___kmpc_error), 2610 Args); 2611 } 2612 2613 /// Map the OpenMP loop schedule to the runtime enumeration. 2614 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2615 bool Chunked, bool Ordered) { 2616 switch (ScheduleKind) { 2617 case OMPC_SCHEDULE_static: 2618 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2619 : (Ordered ? OMP_ord_static : OMP_sch_static); 2620 case OMPC_SCHEDULE_dynamic: 2621 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2622 case OMPC_SCHEDULE_guided: 2623 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2624 case OMPC_SCHEDULE_runtime: 2625 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2626 case OMPC_SCHEDULE_auto: 2627 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2628 case OMPC_SCHEDULE_unknown: 2629 assert(!Chunked && "chunk was specified but schedule kind not known"); 2630 return Ordered ? OMP_ord_static : OMP_sch_static; 2631 } 2632 llvm_unreachable("Unexpected runtime schedule"); 2633 } 2634 2635 /// Map the OpenMP distribute schedule to the runtime enumeration. 2636 static OpenMPSchedType 2637 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2638 // only static is allowed for dist_schedule 2639 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2640 } 2641 2642 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2643 bool Chunked) const { 2644 OpenMPSchedType Schedule = 2645 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2646 return Schedule == OMP_sch_static; 2647 } 2648 2649 bool CGOpenMPRuntime::isStaticNonchunked( 2650 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2651 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2652 return Schedule == OMP_dist_sch_static; 2653 } 2654 2655 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2656 bool Chunked) const { 2657 OpenMPSchedType Schedule = 2658 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2659 return Schedule == OMP_sch_static_chunked; 2660 } 2661 2662 bool CGOpenMPRuntime::isStaticChunked( 2663 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2664 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2665 return Schedule == OMP_dist_sch_static_chunked; 2666 } 2667 2668 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2669 OpenMPSchedType Schedule = 2670 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2671 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2672 return Schedule != OMP_sch_static; 2673 } 2674 2675 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2676 OpenMPScheduleClauseModifier M1, 2677 OpenMPScheduleClauseModifier M2) { 2678 int Modifier = 0; 2679 switch (M1) { 2680 case OMPC_SCHEDULE_MODIFIER_monotonic: 2681 Modifier = OMP_sch_modifier_monotonic; 2682 break; 2683 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2684 Modifier = OMP_sch_modifier_nonmonotonic; 2685 break; 2686 case OMPC_SCHEDULE_MODIFIER_simd: 2687 if (Schedule == OMP_sch_static_chunked) 2688 Schedule = OMP_sch_static_balanced_chunked; 2689 break; 2690 case OMPC_SCHEDULE_MODIFIER_last: 2691 case OMPC_SCHEDULE_MODIFIER_unknown: 2692 break; 2693 } 2694 switch (M2) { 2695 case OMPC_SCHEDULE_MODIFIER_monotonic: 2696 Modifier = OMP_sch_modifier_monotonic; 2697 break; 2698 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2699 Modifier = OMP_sch_modifier_nonmonotonic; 2700 break; 2701 case OMPC_SCHEDULE_MODIFIER_simd: 2702 if (Schedule == OMP_sch_static_chunked) 2703 Schedule = OMP_sch_static_balanced_chunked; 2704 break; 2705 case OMPC_SCHEDULE_MODIFIER_last: 2706 case OMPC_SCHEDULE_MODIFIER_unknown: 2707 break; 2708 } 2709 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2710 // If the static schedule kind is specified or if the ordered clause is 2711 // specified, and if the nonmonotonic modifier is not specified, the effect is 2712 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2713 // modifier is specified, the effect is as if the nonmonotonic modifier is 2714 // specified. 2715 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2716 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2717 Schedule == OMP_sch_static_balanced_chunked || 2718 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2719 Schedule == OMP_dist_sch_static_chunked || 2720 Schedule == OMP_dist_sch_static)) 2721 Modifier = OMP_sch_modifier_nonmonotonic; 2722 } 2723 return Schedule | Modifier; 2724 } 2725 2726 void CGOpenMPRuntime::emitForDispatchInit( 2727 CodeGenFunction &CGF, SourceLocation Loc, 2728 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2729 bool Ordered, const DispatchRTInput &DispatchValues) { 2730 if (!CGF.HaveInsertPoint()) 2731 return; 2732 OpenMPSchedType Schedule = getRuntimeSchedule( 2733 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2734 assert(Ordered || 2735 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2736 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2737 Schedule != OMP_sch_static_balanced_chunked)); 2738 // Call __kmpc_dispatch_init( 2739 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2740 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2741 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2742 2743 // If the Chunk was not specified in the clause - use default value 1. 2744 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2745 : CGF.Builder.getIntN(IVSize, 1); 2746 llvm::Value *Args[] = { 2747 emitUpdateLocation(CGF, Loc), 2748 getThreadID(CGF, Loc), 2749 CGF.Builder.getInt32(addMonoNonMonoModifier( 2750 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2751 DispatchValues.LB, // Lower 2752 DispatchValues.UB, // Upper 2753 CGF.Builder.getIntN(IVSize, 1), // Stride 2754 Chunk // Chunk 2755 }; 2756 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2757 } 2758 2759 static void emitForStaticInitCall( 2760 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2761 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2762 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2763 const CGOpenMPRuntime::StaticRTInput &Values) { 2764 if (!CGF.HaveInsertPoint()) 2765 return; 2766 2767 assert(!Values.Ordered); 2768 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2769 Schedule == OMP_sch_static_balanced_chunked || 2770 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2771 Schedule == OMP_dist_sch_static || 2772 Schedule == OMP_dist_sch_static_chunked); 2773 2774 // Call __kmpc_for_static_init( 2775 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2776 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2777 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2778 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2779 llvm::Value *Chunk = Values.Chunk; 2780 if (Chunk == nullptr) { 2781 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2782 Schedule == OMP_dist_sch_static) && 2783 "expected static non-chunked schedule"); 2784 // If the Chunk was not specified in the clause - use default value 1. 2785 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2786 } else { 2787 assert((Schedule == OMP_sch_static_chunked || 2788 Schedule == OMP_sch_static_balanced_chunked || 2789 Schedule == OMP_ord_static_chunked || 2790 Schedule == OMP_dist_sch_static_chunked) && 2791 "expected static chunked schedule"); 2792 } 2793 llvm::Value *Args[] = { 2794 UpdateLocation, 2795 ThreadId, 2796 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2797 M2)), // Schedule type 2798 Values.IL.getPointer(), // &isLastIter 2799 Values.LB.getPointer(), // &LB 2800 Values.UB.getPointer(), // &UB 2801 Values.ST.getPointer(), // &Stride 2802 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2803 Chunk // Chunk 2804 }; 2805 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2806 } 2807 2808 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2809 SourceLocation Loc, 2810 OpenMPDirectiveKind DKind, 2811 const OpenMPScheduleTy &ScheduleKind, 2812 const StaticRTInput &Values) { 2813 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2814 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2815 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) && 2816 "Expected loop-based or sections-based directive."); 2817 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2818 isOpenMPLoopDirective(DKind) 2819 ? OMP_IDENT_WORK_LOOP 2820 : OMP_IDENT_WORK_SECTIONS); 2821 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2822 llvm::FunctionCallee StaticInitFunction = 2823 createForStaticInitFunction(Values.IVSize, Values.IVSigned, false); 2824 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2825 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2826 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2827 } 2828 2829 void CGOpenMPRuntime::emitDistributeStaticInit( 2830 CodeGenFunction &CGF, SourceLocation Loc, 2831 OpenMPDistScheduleClauseKind SchedKind, 2832 const CGOpenMPRuntime::StaticRTInput &Values) { 2833 OpenMPSchedType ScheduleNum = 2834 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2835 llvm::Value *UpdatedLocation = 2836 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2837 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2838 llvm::FunctionCallee StaticInitFunction; 2839 bool isGPUDistribute = 2840 CGM.getLangOpts().OpenMPIsTargetDevice && 2841 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()); 2842 StaticInitFunction = createForStaticInitFunction( 2843 Values.IVSize, Values.IVSigned, isGPUDistribute); 2844 2845 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2846 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2847 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2848 } 2849 2850 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2851 SourceLocation Loc, 2852 OpenMPDirectiveKind DKind) { 2853 if (!CGF.HaveInsertPoint()) 2854 return; 2855 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2856 llvm::Value *Args[] = { 2857 emitUpdateLocation(CGF, Loc, 2858 isOpenMPDistributeDirective(DKind) 2859 ? OMP_IDENT_WORK_DISTRIBUTE 2860 : isOpenMPLoopDirective(DKind) 2861 ? OMP_IDENT_WORK_LOOP 2862 : OMP_IDENT_WORK_SECTIONS), 2863 getThreadID(CGF, Loc)}; 2864 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2865 if (isOpenMPDistributeDirective(DKind) && 2866 CGM.getLangOpts().OpenMPIsTargetDevice && 2867 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX())) 2868 CGF.EmitRuntimeCall( 2869 OMPBuilder.getOrCreateRuntimeFunction( 2870 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini), 2871 Args); 2872 else 2873 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2874 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2875 Args); 2876 } 2877 2878 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2879 SourceLocation Loc, 2880 unsigned IVSize, 2881 bool IVSigned) { 2882 if (!CGF.HaveInsertPoint()) 2883 return; 2884 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2885 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2886 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2887 } 2888 2889 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2890 SourceLocation Loc, unsigned IVSize, 2891 bool IVSigned, Address IL, 2892 Address LB, Address UB, 2893 Address ST) { 2894 // Call __kmpc_dispatch_next( 2895 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2896 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2897 // kmp_int[32|64] *p_stride); 2898 llvm::Value *Args[] = { 2899 emitUpdateLocation(CGF, Loc), 2900 getThreadID(CGF, Loc), 2901 IL.getPointer(), // &isLastIter 2902 LB.getPointer(), // &Lower 2903 UB.getPointer(), // &Upper 2904 ST.getPointer() // &Stride 2905 }; 2906 llvm::Value *Call = 2907 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2908 return CGF.EmitScalarConversion( 2909 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2910 CGF.getContext().BoolTy, Loc); 2911 } 2912 2913 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2914 llvm::Value *NumThreads, 2915 SourceLocation Loc) { 2916 if (!CGF.HaveInsertPoint()) 2917 return; 2918 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2919 llvm::Value *Args[] = { 2920 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2921 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2922 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2923 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2924 Args); 2925 } 2926 2927 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2928 ProcBindKind ProcBind, 2929 SourceLocation Loc) { 2930 if (!CGF.HaveInsertPoint()) 2931 return; 2932 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2933 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2934 llvm::Value *Args[] = { 2935 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2936 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2937 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2938 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2939 Args); 2940 } 2941 2942 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2943 SourceLocation Loc, llvm::AtomicOrdering AO) { 2944 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2945 OMPBuilder.createFlush(CGF.Builder); 2946 } else { 2947 if (!CGF.HaveInsertPoint()) 2948 return; 2949 // Build call void __kmpc_flush(ident_t *loc) 2950 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2951 CGM.getModule(), OMPRTL___kmpc_flush), 2952 emitUpdateLocation(CGF, Loc)); 2953 } 2954 } 2955 2956 namespace { 2957 /// Indexes of fields for type kmp_task_t. 2958 enum KmpTaskTFields { 2959 /// List of shared variables. 2960 KmpTaskTShareds, 2961 /// Task routine. 2962 KmpTaskTRoutine, 2963 /// Partition id for the untied tasks. 2964 KmpTaskTPartId, 2965 /// Function with call of destructors for private variables. 2966 Data1, 2967 /// Task priority. 2968 Data2, 2969 /// (Taskloops only) Lower bound. 2970 KmpTaskTLowerBound, 2971 /// (Taskloops only) Upper bound. 2972 KmpTaskTUpperBound, 2973 /// (Taskloops only) Stride. 2974 KmpTaskTStride, 2975 /// (Taskloops only) Is last iteration flag. 2976 KmpTaskTLastIter, 2977 /// (Taskloops only) Reduction data. 2978 KmpTaskTReductions, 2979 }; 2980 } // anonymous namespace 2981 2982 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 2983 // If we are in simd mode or there are no entries, we don't need to do 2984 // anything. 2985 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty()) 2986 return; 2987 2988 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn = 2989 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind, 2990 const llvm::TargetRegionEntryInfo &EntryInfo) -> void { 2991 SourceLocation Loc; 2992 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) { 2993 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 2994 E = CGM.getContext().getSourceManager().fileinfo_end(); 2995 I != E; ++I) { 2996 if (I->getFirst()->getUniqueID().getDevice() == EntryInfo.DeviceID && 2997 I->getFirst()->getUniqueID().getFile() == EntryInfo.FileID) { 2998 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 2999 I->getFirst(), EntryInfo.Line, 1); 3000 break; 3001 } 3002 } 3003 } 3004 switch (Kind) { 3005 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: { 3006 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3007 DiagnosticsEngine::Error, "Offloading entry for target region in " 3008 "%0 is incorrect: either the " 3009 "address or the ID is invalid."); 3010 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName; 3011 } break; 3012 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: { 3013 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3014 DiagnosticsEngine::Error, "Offloading entry for declare target " 3015 "variable %0 is incorrect: the " 3016 "address is invalid."); 3017 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName; 3018 } break; 3019 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: { 3020 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3021 DiagnosticsEngine::Error, 3022 "Offloading entry for declare target variable is incorrect: the " 3023 "address is invalid."); 3024 CGM.getDiags().Report(DiagID); 3025 } break; 3026 } 3027 }; 3028 3029 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn); 3030 } 3031 3032 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3033 if (!KmpRoutineEntryPtrTy) { 3034 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3035 ASTContext &C = CGM.getContext(); 3036 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3037 FunctionProtoType::ExtProtoInfo EPI; 3038 KmpRoutineEntryPtrQTy = C.getPointerType( 3039 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3040 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3041 } 3042 } 3043 3044 namespace { 3045 struct PrivateHelpersTy { 3046 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3047 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3048 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3049 PrivateElemInit(PrivateElemInit) {} 3050 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3051 const Expr *OriginalRef = nullptr; 3052 const VarDecl *Original = nullptr; 3053 const VarDecl *PrivateCopy = nullptr; 3054 const VarDecl *PrivateElemInit = nullptr; 3055 bool isLocalPrivate() const { 3056 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3057 } 3058 }; 3059 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3060 } // anonymous namespace 3061 3062 static bool isAllocatableDecl(const VarDecl *VD) { 3063 const VarDecl *CVD = VD->getCanonicalDecl(); 3064 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3065 return false; 3066 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3067 // Use the default allocation. 3068 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 3069 !AA->getAllocator()); 3070 } 3071 3072 static RecordDecl * 3073 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3074 if (!Privates.empty()) { 3075 ASTContext &C = CGM.getContext(); 3076 // Build struct .kmp_privates_t. { 3077 // /* private vars */ 3078 // }; 3079 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3080 RD->startDefinition(); 3081 for (const auto &Pair : Privates) { 3082 const VarDecl *VD = Pair.second.Original; 3083 QualType Type = VD->getType().getNonReferenceType(); 3084 // If the private variable is a local variable with lvalue ref type, 3085 // allocate the pointer instead of the pointee type. 3086 if (Pair.second.isLocalPrivate()) { 3087 if (VD->getType()->isLValueReferenceType()) 3088 Type = C.getPointerType(Type); 3089 if (isAllocatableDecl(VD)) 3090 Type = C.getPointerType(Type); 3091 } 3092 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3093 if (VD->hasAttrs()) { 3094 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3095 E(VD->getAttrs().end()); 3096 I != E; ++I) 3097 FD->addAttr(*I); 3098 } 3099 } 3100 RD->completeDefinition(); 3101 return RD; 3102 } 3103 return nullptr; 3104 } 3105 3106 static RecordDecl * 3107 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3108 QualType KmpInt32Ty, 3109 QualType KmpRoutineEntryPointerQTy) { 3110 ASTContext &C = CGM.getContext(); 3111 // Build struct kmp_task_t { 3112 // void * shareds; 3113 // kmp_routine_entry_t routine; 3114 // kmp_int32 part_id; 3115 // kmp_cmplrdata_t data1; 3116 // kmp_cmplrdata_t data2; 3117 // For taskloops additional fields: 3118 // kmp_uint64 lb; 3119 // kmp_uint64 ub; 3120 // kmp_int64 st; 3121 // kmp_int32 liter; 3122 // void * reductions; 3123 // }; 3124 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3125 UD->startDefinition(); 3126 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3127 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3128 UD->completeDefinition(); 3129 QualType KmpCmplrdataTy = C.getRecordType(UD); 3130 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3131 RD->startDefinition(); 3132 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3133 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3134 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3135 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3136 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3137 if (isOpenMPTaskLoopDirective(Kind)) { 3138 QualType KmpUInt64Ty = 3139 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3140 QualType KmpInt64Ty = 3141 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3142 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3143 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3144 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3145 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3146 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3147 } 3148 RD->completeDefinition(); 3149 return RD; 3150 } 3151 3152 static RecordDecl * 3153 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3154 ArrayRef<PrivateDataTy> Privates) { 3155 ASTContext &C = CGM.getContext(); 3156 // Build struct kmp_task_t_with_privates { 3157 // kmp_task_t task_data; 3158 // .kmp_privates_t. privates; 3159 // }; 3160 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3161 RD->startDefinition(); 3162 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3163 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3164 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3165 RD->completeDefinition(); 3166 return RD; 3167 } 3168 3169 /// Emit a proxy function which accepts kmp_task_t as the second 3170 /// argument. 3171 /// \code 3172 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3173 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3174 /// For taskloops: 3175 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3176 /// tt->reductions, tt->shareds); 3177 /// return 0; 3178 /// } 3179 /// \endcode 3180 static llvm::Function * 3181 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3182 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3183 QualType KmpTaskTWithPrivatesPtrQTy, 3184 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3185 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3186 llvm::Value *TaskPrivatesMap) { 3187 ASTContext &C = CGM.getContext(); 3188 FunctionArgList Args; 3189 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3190 ImplicitParamDecl::Other); 3191 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3192 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3193 ImplicitParamDecl::Other); 3194 Args.push_back(&GtidArg); 3195 Args.push_back(&TaskTypeArg); 3196 const auto &TaskEntryFnInfo = 3197 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3198 llvm::FunctionType *TaskEntryTy = 3199 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3200 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3201 auto *TaskEntry = llvm::Function::Create( 3202 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3203 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3204 TaskEntry->setDoesNotRecurse(); 3205 CodeGenFunction CGF(CGM); 3206 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3207 Loc, Loc); 3208 3209 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3210 // tt, 3211 // For taskloops: 3212 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3213 // tt->task_data.shareds); 3214 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3215 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3216 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3217 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3218 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3219 const auto *KmpTaskTWithPrivatesQTyRD = 3220 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3221 LValue Base = 3222 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3223 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3224 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3225 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3226 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3227 3228 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3229 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3230 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3231 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3232 CGF.ConvertTypeForMem(SharedsPtrTy)); 3233 3234 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3235 llvm::Value *PrivatesParam; 3236 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3237 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3238 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3239 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3240 } else { 3241 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3242 } 3243 3244 llvm::Value *CommonArgs[] = { 3245 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap, 3246 CGF.Builder 3247 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF), 3248 CGF.VoidPtrTy, CGF.Int8Ty) 3249 .getPointer()}; 3250 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3251 std::end(CommonArgs)); 3252 if (isOpenMPTaskLoopDirective(Kind)) { 3253 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3254 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3255 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3256 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3257 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3258 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3259 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3260 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3261 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3262 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3263 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3264 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3265 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3266 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3267 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3268 CallArgs.push_back(LBParam); 3269 CallArgs.push_back(UBParam); 3270 CallArgs.push_back(StParam); 3271 CallArgs.push_back(LIParam); 3272 CallArgs.push_back(RParam); 3273 } 3274 CallArgs.push_back(SharedsParam); 3275 3276 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3277 CallArgs); 3278 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3279 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3280 CGF.FinishFunction(); 3281 return TaskEntry; 3282 } 3283 3284 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3285 SourceLocation Loc, 3286 QualType KmpInt32Ty, 3287 QualType KmpTaskTWithPrivatesPtrQTy, 3288 QualType KmpTaskTWithPrivatesQTy) { 3289 ASTContext &C = CGM.getContext(); 3290 FunctionArgList Args; 3291 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3292 ImplicitParamDecl::Other); 3293 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3294 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3295 ImplicitParamDecl::Other); 3296 Args.push_back(&GtidArg); 3297 Args.push_back(&TaskTypeArg); 3298 const auto &DestructorFnInfo = 3299 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3300 llvm::FunctionType *DestructorFnTy = 3301 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3302 std::string Name = 3303 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3304 auto *DestructorFn = 3305 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3306 Name, &CGM.getModule()); 3307 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3308 DestructorFnInfo); 3309 DestructorFn->setDoesNotRecurse(); 3310 CodeGenFunction CGF(CGM); 3311 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3312 Args, Loc, Loc); 3313 3314 LValue Base = CGF.EmitLoadOfPointerLValue( 3315 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3316 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3317 const auto *KmpTaskTWithPrivatesQTyRD = 3318 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3319 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3320 Base = CGF.EmitLValueForField(Base, *FI); 3321 for (const auto *Field : 3322 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3323 if (QualType::DestructionKind DtorKind = 3324 Field->getType().isDestructedType()) { 3325 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3326 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3327 } 3328 } 3329 CGF.FinishFunction(); 3330 return DestructorFn; 3331 } 3332 3333 /// Emit a privates mapping function for correct handling of private and 3334 /// firstprivate variables. 3335 /// \code 3336 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3337 /// **noalias priv1,..., <tyn> **noalias privn) { 3338 /// *priv1 = &.privates.priv1; 3339 /// ...; 3340 /// *privn = &.privates.privn; 3341 /// } 3342 /// \endcode 3343 static llvm::Value * 3344 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3345 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3346 ArrayRef<PrivateDataTy> Privates) { 3347 ASTContext &C = CGM.getContext(); 3348 FunctionArgList Args; 3349 ImplicitParamDecl TaskPrivatesArg( 3350 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3351 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3352 ImplicitParamDecl::Other); 3353 Args.push_back(&TaskPrivatesArg); 3354 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3355 unsigned Counter = 1; 3356 for (const Expr *E : Data.PrivateVars) { 3357 Args.push_back(ImplicitParamDecl::Create( 3358 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3359 C.getPointerType(C.getPointerType(E->getType())) 3360 .withConst() 3361 .withRestrict(), 3362 ImplicitParamDecl::Other)); 3363 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3364 PrivateVarsPos[VD] = Counter; 3365 ++Counter; 3366 } 3367 for (const Expr *E : Data.FirstprivateVars) { 3368 Args.push_back(ImplicitParamDecl::Create( 3369 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3370 C.getPointerType(C.getPointerType(E->getType())) 3371 .withConst() 3372 .withRestrict(), 3373 ImplicitParamDecl::Other)); 3374 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3375 PrivateVarsPos[VD] = Counter; 3376 ++Counter; 3377 } 3378 for (const Expr *E : Data.LastprivateVars) { 3379 Args.push_back(ImplicitParamDecl::Create( 3380 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3381 C.getPointerType(C.getPointerType(E->getType())) 3382 .withConst() 3383 .withRestrict(), 3384 ImplicitParamDecl::Other)); 3385 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3386 PrivateVarsPos[VD] = Counter; 3387 ++Counter; 3388 } 3389 for (const VarDecl *VD : Data.PrivateLocals) { 3390 QualType Ty = VD->getType().getNonReferenceType(); 3391 if (VD->getType()->isLValueReferenceType()) 3392 Ty = C.getPointerType(Ty); 3393 if (isAllocatableDecl(VD)) 3394 Ty = C.getPointerType(Ty); 3395 Args.push_back(ImplicitParamDecl::Create( 3396 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3397 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3398 ImplicitParamDecl::Other)); 3399 PrivateVarsPos[VD] = Counter; 3400 ++Counter; 3401 } 3402 const auto &TaskPrivatesMapFnInfo = 3403 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3404 llvm::FunctionType *TaskPrivatesMapTy = 3405 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3406 std::string Name = 3407 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3408 auto *TaskPrivatesMap = llvm::Function::Create( 3409 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3410 &CGM.getModule()); 3411 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3412 TaskPrivatesMapFnInfo); 3413 if (CGM.getLangOpts().Optimize) { 3414 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3415 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3416 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3417 } 3418 CodeGenFunction CGF(CGM); 3419 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3420 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3421 3422 // *privi = &.privates.privi; 3423 LValue Base = CGF.EmitLoadOfPointerLValue( 3424 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3425 TaskPrivatesArg.getType()->castAs<PointerType>()); 3426 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3427 Counter = 0; 3428 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3429 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3430 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3431 LValue RefLVal = 3432 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3433 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3434 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3435 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3436 ++Counter; 3437 } 3438 CGF.FinishFunction(); 3439 return TaskPrivatesMap; 3440 } 3441 3442 /// Emit initialization for private variables in task-based directives. 3443 static void emitPrivatesInit(CodeGenFunction &CGF, 3444 const OMPExecutableDirective &D, 3445 Address KmpTaskSharedsPtr, LValue TDBase, 3446 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3447 QualType SharedsTy, QualType SharedsPtrTy, 3448 const OMPTaskDataTy &Data, 3449 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3450 ASTContext &C = CGF.getContext(); 3451 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3452 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3453 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3454 ? OMPD_taskloop 3455 : OMPD_task; 3456 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3457 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3458 LValue SrcBase; 3459 bool IsTargetTask = 3460 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3461 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3462 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3463 // PointersArray, SizesArray, and MappersArray. The original variables for 3464 // these arrays are not captured and we get their addresses explicitly. 3465 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3466 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3467 SrcBase = CGF.MakeAddrLValue( 3468 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3469 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy), 3470 CGF.ConvertTypeForMem(SharedsTy)), 3471 SharedsTy); 3472 } 3473 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3474 for (const PrivateDataTy &Pair : Privates) { 3475 // Do not initialize private locals. 3476 if (Pair.second.isLocalPrivate()) { 3477 ++FI; 3478 continue; 3479 } 3480 const VarDecl *VD = Pair.second.PrivateCopy; 3481 const Expr *Init = VD->getAnyInitializer(); 3482 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3483 !CGF.isTrivialInitializer(Init)))) { 3484 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3485 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3486 const VarDecl *OriginalVD = Pair.second.Original; 3487 // Check if the variable is the target-based BasePointersArray, 3488 // PointersArray, SizesArray, or MappersArray. 3489 LValue SharedRefLValue; 3490 QualType Type = PrivateLValue.getType(); 3491 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3492 if (IsTargetTask && !SharedField) { 3493 assert(isa<ImplicitParamDecl>(OriginalVD) && 3494 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3495 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3496 ->getNumParams() == 0 && 3497 isa<TranslationUnitDecl>( 3498 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3499 ->getDeclContext()) && 3500 "Expected artificial target data variable."); 3501 SharedRefLValue = 3502 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3503 } else if (ForDup) { 3504 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3505 SharedRefLValue = CGF.MakeAddrLValue( 3506 SharedRefLValue.getAddress(CGF).withAlignment( 3507 C.getDeclAlign(OriginalVD)), 3508 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3509 SharedRefLValue.getTBAAInfo()); 3510 } else if (CGF.LambdaCaptureFields.count( 3511 Pair.second.Original->getCanonicalDecl()) > 0 || 3512 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) { 3513 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3514 } else { 3515 // Processing for implicitly captured variables. 3516 InlinedOpenMPRegionRAII Region( 3517 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3518 /*HasCancel=*/false, /*NoInheritance=*/true); 3519 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3520 } 3521 if (Type->isArrayType()) { 3522 // Initialize firstprivate array. 3523 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3524 // Perform simple memcpy. 3525 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3526 } else { 3527 // Initialize firstprivate array using element-by-element 3528 // initialization. 3529 CGF.EmitOMPAggregateAssign( 3530 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3531 Type, 3532 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3533 Address SrcElement) { 3534 // Clean up any temporaries needed by the initialization. 3535 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3536 InitScope.addPrivate(Elem, SrcElement); 3537 (void)InitScope.Privatize(); 3538 // Emit initialization for single element. 3539 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3540 CGF, &CapturesInfo); 3541 CGF.EmitAnyExprToMem(Init, DestElement, 3542 Init->getType().getQualifiers(), 3543 /*IsInitializer=*/false); 3544 }); 3545 } 3546 } else { 3547 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3548 InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF)); 3549 (void)InitScope.Privatize(); 3550 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3551 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3552 /*capturedByInit=*/false); 3553 } 3554 } else { 3555 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3556 } 3557 } 3558 ++FI; 3559 } 3560 } 3561 3562 /// Check if duplication function is required for taskloops. 3563 static bool checkInitIsRequired(CodeGenFunction &CGF, 3564 ArrayRef<PrivateDataTy> Privates) { 3565 bool InitRequired = false; 3566 for (const PrivateDataTy &Pair : Privates) { 3567 if (Pair.second.isLocalPrivate()) 3568 continue; 3569 const VarDecl *VD = Pair.second.PrivateCopy; 3570 const Expr *Init = VD->getAnyInitializer(); 3571 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) && 3572 !CGF.isTrivialInitializer(Init)); 3573 if (InitRequired) 3574 break; 3575 } 3576 return InitRequired; 3577 } 3578 3579 3580 /// Emit task_dup function (for initialization of 3581 /// private/firstprivate/lastprivate vars and last_iter flag) 3582 /// \code 3583 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3584 /// lastpriv) { 3585 /// // setup lastprivate flag 3586 /// task_dst->last = lastpriv; 3587 /// // could be constructor calls here... 3588 /// } 3589 /// \endcode 3590 static llvm::Value * 3591 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3592 const OMPExecutableDirective &D, 3593 QualType KmpTaskTWithPrivatesPtrQTy, 3594 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3595 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3596 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3597 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3598 ASTContext &C = CGM.getContext(); 3599 FunctionArgList Args; 3600 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3601 KmpTaskTWithPrivatesPtrQTy, 3602 ImplicitParamDecl::Other); 3603 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3604 KmpTaskTWithPrivatesPtrQTy, 3605 ImplicitParamDecl::Other); 3606 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 3607 ImplicitParamDecl::Other); 3608 Args.push_back(&DstArg); 3609 Args.push_back(&SrcArg); 3610 Args.push_back(&LastprivArg); 3611 const auto &TaskDupFnInfo = 3612 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3613 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3614 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 3615 auto *TaskDup = llvm::Function::Create( 3616 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3617 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 3618 TaskDup->setDoesNotRecurse(); 3619 CodeGenFunction CGF(CGM); 3620 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 3621 Loc); 3622 3623 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3624 CGF.GetAddrOfLocalVar(&DstArg), 3625 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3626 // task_dst->liter = lastpriv; 3627 if (WithLastIter) { 3628 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3629 LValue Base = CGF.EmitLValueForField( 3630 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3631 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3632 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 3633 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 3634 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 3635 } 3636 3637 // Emit initial values for private copies (if any). 3638 assert(!Privates.empty()); 3639 Address KmpTaskSharedsPtr = Address::invalid(); 3640 if (!Data.FirstprivateVars.empty()) { 3641 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3642 CGF.GetAddrOfLocalVar(&SrcArg), 3643 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3644 LValue Base = CGF.EmitLValueForField( 3645 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3646 KmpTaskSharedsPtr = Address( 3647 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 3648 Base, *std::next(KmpTaskTQTyRD->field_begin(), 3649 KmpTaskTShareds)), 3650 Loc), 3651 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy)); 3652 } 3653 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 3654 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 3655 CGF.FinishFunction(); 3656 return TaskDup; 3657 } 3658 3659 /// Checks if destructor function is required to be generated. 3660 /// \return true if cleanups are required, false otherwise. 3661 static bool 3662 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3663 ArrayRef<PrivateDataTy> Privates) { 3664 for (const PrivateDataTy &P : Privates) { 3665 if (P.second.isLocalPrivate()) 3666 continue; 3667 QualType Ty = P.second.Original->getType().getNonReferenceType(); 3668 if (Ty.isDestructedType()) 3669 return true; 3670 } 3671 return false; 3672 } 3673 3674 namespace { 3675 /// Loop generator for OpenMP iterator expression. 3676 class OMPIteratorGeneratorScope final 3677 : public CodeGenFunction::OMPPrivateScope { 3678 CodeGenFunction &CGF; 3679 const OMPIteratorExpr *E = nullptr; 3680 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 3681 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 3682 OMPIteratorGeneratorScope() = delete; 3683 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 3684 3685 public: 3686 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 3687 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 3688 if (!E) 3689 return; 3690 SmallVector<llvm::Value *, 4> Uppers; 3691 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 3692 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 3693 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 3694 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName())); 3695 const OMPIteratorHelperData &HelperData = E->getHelper(I); 3696 addPrivate( 3697 HelperData.CounterVD, 3698 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr")); 3699 } 3700 Privatize(); 3701 3702 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 3703 const OMPIteratorHelperData &HelperData = E->getHelper(I); 3704 LValue CLVal = 3705 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 3706 HelperData.CounterVD->getType()); 3707 // Counter = 0; 3708 CGF.EmitStoreOfScalar( 3709 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 3710 CLVal); 3711 CodeGenFunction::JumpDest &ContDest = 3712 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 3713 CodeGenFunction::JumpDest &ExitDest = 3714 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 3715 // N = <number-of_iterations>; 3716 llvm::Value *N = Uppers[I]; 3717 // cont: 3718 // if (Counter < N) goto body; else goto exit; 3719 CGF.EmitBlock(ContDest.getBlock()); 3720 auto *CVal = 3721 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 3722 llvm::Value *Cmp = 3723 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 3724 ? CGF.Builder.CreateICmpSLT(CVal, N) 3725 : CGF.Builder.CreateICmpULT(CVal, N); 3726 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 3727 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 3728 // body: 3729 CGF.EmitBlock(BodyBB); 3730 // Iteri = Begini + Counter * Stepi; 3731 CGF.EmitIgnoredExpr(HelperData.Update); 3732 } 3733 } 3734 ~OMPIteratorGeneratorScope() { 3735 if (!E) 3736 return; 3737 for (unsigned I = E->numOfIterators(); I > 0; --I) { 3738 // Counter = Counter + 1; 3739 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 3740 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 3741 // goto cont; 3742 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 3743 // exit: 3744 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 3745 } 3746 } 3747 }; 3748 } // namespace 3749 3750 static std::pair<llvm::Value *, llvm::Value *> 3751 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 3752 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 3753 llvm::Value *Addr; 3754 if (OASE) { 3755 const Expr *Base = OASE->getBase(); 3756 Addr = CGF.EmitScalarExpr(Base); 3757 } else { 3758 Addr = CGF.EmitLValue(E).getPointer(CGF); 3759 } 3760 llvm::Value *SizeVal; 3761 QualType Ty = E->getType(); 3762 if (OASE) { 3763 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 3764 for (const Expr *SE : OASE->getDimensions()) { 3765 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 3766 Sz = CGF.EmitScalarConversion( 3767 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 3768 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 3769 } 3770 } else if (const auto *ASE = 3771 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 3772 LValue UpAddrLVal = 3773 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 3774 Address UpAddrAddress = UpAddrLVal.getAddress(CGF); 3775 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 3776 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1); 3777 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 3778 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 3779 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 3780 } else { 3781 SizeVal = CGF.getTypeSize(Ty); 3782 } 3783 return std::make_pair(Addr, SizeVal); 3784 } 3785 3786 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 3787 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 3788 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 3789 if (KmpTaskAffinityInfoTy.isNull()) { 3790 RecordDecl *KmpAffinityInfoRD = 3791 C.buildImplicitRecord("kmp_task_affinity_info_t"); 3792 KmpAffinityInfoRD->startDefinition(); 3793 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 3794 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 3795 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 3796 KmpAffinityInfoRD->completeDefinition(); 3797 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 3798 } 3799 } 3800 3801 CGOpenMPRuntime::TaskResultTy 3802 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 3803 const OMPExecutableDirective &D, 3804 llvm::Function *TaskFunction, QualType SharedsTy, 3805 Address Shareds, const OMPTaskDataTy &Data) { 3806 ASTContext &C = CGM.getContext(); 3807 llvm::SmallVector<PrivateDataTy, 4> Privates; 3808 // Aggregate privates and sort them by the alignment. 3809 const auto *I = Data.PrivateCopies.begin(); 3810 for (const Expr *E : Data.PrivateVars) { 3811 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3812 Privates.emplace_back( 3813 C.getDeclAlign(VD), 3814 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3815 /*PrivateElemInit=*/nullptr)); 3816 ++I; 3817 } 3818 I = Data.FirstprivateCopies.begin(); 3819 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 3820 for (const Expr *E : Data.FirstprivateVars) { 3821 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3822 Privates.emplace_back( 3823 C.getDeclAlign(VD), 3824 PrivateHelpersTy( 3825 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3826 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 3827 ++I; 3828 ++IElemInitRef; 3829 } 3830 I = Data.LastprivateCopies.begin(); 3831 for (const Expr *E : Data.LastprivateVars) { 3832 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3833 Privates.emplace_back( 3834 C.getDeclAlign(VD), 3835 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3836 /*PrivateElemInit=*/nullptr)); 3837 ++I; 3838 } 3839 for (const VarDecl *VD : Data.PrivateLocals) { 3840 if (isAllocatableDecl(VD)) 3841 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 3842 else 3843 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 3844 } 3845 llvm::stable_sort(Privates, 3846 [](const PrivateDataTy &L, const PrivateDataTy &R) { 3847 return L.first > R.first; 3848 }); 3849 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3850 // Build type kmp_routine_entry_t (if not built yet). 3851 emitKmpRoutineEntryT(KmpInt32Ty); 3852 // Build type kmp_task_t (if not built yet). 3853 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 3854 if (SavedKmpTaskloopTQTy.isNull()) { 3855 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 3856 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 3857 } 3858 KmpTaskTQTy = SavedKmpTaskloopTQTy; 3859 } else { 3860 assert((D.getDirectiveKind() == OMPD_task || 3861 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 3862 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 3863 "Expected taskloop, task or target directive"); 3864 if (SavedKmpTaskTQTy.isNull()) { 3865 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 3866 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 3867 } 3868 KmpTaskTQTy = SavedKmpTaskTQTy; 3869 } 3870 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3871 // Build particular struct kmp_task_t for the given task. 3872 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 3873 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 3874 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 3875 QualType KmpTaskTWithPrivatesPtrQTy = 3876 C.getPointerType(KmpTaskTWithPrivatesQTy); 3877 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 3878 llvm::Type *KmpTaskTWithPrivatesPtrTy = 3879 KmpTaskTWithPrivatesTy->getPointerTo(); 3880 llvm::Value *KmpTaskTWithPrivatesTySize = 3881 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 3882 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 3883 3884 // Emit initial values for private copies (if any). 3885 llvm::Value *TaskPrivatesMap = nullptr; 3886 llvm::Type *TaskPrivatesMapTy = 3887 std::next(TaskFunction->arg_begin(), 3)->getType(); 3888 if (!Privates.empty()) { 3889 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3890 TaskPrivatesMap = 3891 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 3892 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3893 TaskPrivatesMap, TaskPrivatesMapTy); 3894 } else { 3895 TaskPrivatesMap = llvm::ConstantPointerNull::get( 3896 cast<llvm::PointerType>(TaskPrivatesMapTy)); 3897 } 3898 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 3899 // kmp_task_t *tt); 3900 llvm::Function *TaskEntry = emitProxyTaskFunction( 3901 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 3902 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 3903 TaskPrivatesMap); 3904 3905 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 3906 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 3907 // kmp_routine_entry_t *task_entry); 3908 // Task flags. Format is taken from 3909 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 3910 // description of kmp_tasking_flags struct. 3911 enum { 3912 TiedFlag = 0x1, 3913 FinalFlag = 0x2, 3914 DestructorsFlag = 0x8, 3915 PriorityFlag = 0x20, 3916 DetachableFlag = 0x40, 3917 }; 3918 unsigned Flags = Data.Tied ? TiedFlag : 0; 3919 bool NeedsCleanup = false; 3920 if (!Privates.empty()) { 3921 NeedsCleanup = 3922 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 3923 if (NeedsCleanup) 3924 Flags = Flags | DestructorsFlag; 3925 } 3926 if (Data.Priority.getInt()) 3927 Flags = Flags | PriorityFlag; 3928 if (D.hasClausesOfKind<OMPDetachClause>()) 3929 Flags = Flags | DetachableFlag; 3930 llvm::Value *TaskFlags = 3931 Data.Final.getPointer() 3932 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 3933 CGF.Builder.getInt32(FinalFlag), 3934 CGF.Builder.getInt32(/*C=*/0)) 3935 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 3936 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 3937 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 3938 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 3939 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 3940 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3941 TaskEntry, KmpRoutineEntryPtrTy)}; 3942 llvm::Value *NewTask; 3943 if (D.hasClausesOfKind<OMPNowaitClause>()) { 3944 // Check if we have any device clause associated with the directive. 3945 const Expr *Device = nullptr; 3946 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 3947 Device = C->getDevice(); 3948 // Emit device ID if any otherwise use default value. 3949 llvm::Value *DeviceID; 3950 if (Device) 3951 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 3952 CGF.Int64Ty, /*isSigned=*/true); 3953 else 3954 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 3955 AllocArgs.push_back(DeviceID); 3956 NewTask = CGF.EmitRuntimeCall( 3957 OMPBuilder.getOrCreateRuntimeFunction( 3958 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 3959 AllocArgs); 3960 } else { 3961 NewTask = 3962 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 3963 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 3964 AllocArgs); 3965 } 3966 // Emit detach clause initialization. 3967 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 3968 // task_descriptor); 3969 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 3970 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 3971 LValue EvtLVal = CGF.EmitLValue(Evt); 3972 3973 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 3974 // int gtid, kmp_task_t *task); 3975 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 3976 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 3977 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 3978 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 3979 OMPBuilder.getOrCreateRuntimeFunction( 3980 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 3981 {Loc, Tid, NewTask}); 3982 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 3983 Evt->getExprLoc()); 3984 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 3985 } 3986 // Process affinity clauses. 3987 if (D.hasClausesOfKind<OMPAffinityClause>()) { 3988 // Process list of affinity data. 3989 ASTContext &C = CGM.getContext(); 3990 Address AffinitiesArray = Address::invalid(); 3991 // Calculate number of elements to form the array of affinity data. 3992 llvm::Value *NumOfElements = nullptr; 3993 unsigned NumAffinities = 0; 3994 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 3995 if (const Expr *Modifier = C->getModifier()) { 3996 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 3997 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 3998 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 3999 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4000 NumOfElements = 4001 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4002 } 4003 } else { 4004 NumAffinities += C->varlist_size(); 4005 } 4006 } 4007 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4008 // Fields ids in kmp_task_affinity_info record. 4009 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4010 4011 QualType KmpTaskAffinityInfoArrayTy; 4012 if (NumOfElements) { 4013 NumOfElements = CGF.Builder.CreateNUWAdd( 4014 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4015 auto *OVE = new (C) OpaqueValueExpr( 4016 Loc, 4017 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4018 VK_PRValue); 4019 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4020 RValue::get(NumOfElements)); 4021 KmpTaskAffinityInfoArrayTy = 4022 C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal, 4023 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4024 // Properly emit variable-sized array. 4025 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4026 ImplicitParamDecl::Other); 4027 CGF.EmitVarDecl(*PD); 4028 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4029 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4030 /*isSigned=*/false); 4031 } else { 4032 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4033 KmpTaskAffinityInfoTy, 4034 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4035 ArrayType::Normal, /*IndexTypeQuals=*/0); 4036 AffinitiesArray = 4037 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4038 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4039 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4040 /*isSigned=*/false); 4041 } 4042 4043 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4044 // Fill array by elements without iterators. 4045 unsigned Pos = 0; 4046 bool HasIterator = false; 4047 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4048 if (C->getModifier()) { 4049 HasIterator = true; 4050 continue; 4051 } 4052 for (const Expr *E : C->varlists()) { 4053 llvm::Value *Addr; 4054 llvm::Value *Size; 4055 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4056 LValue Base = 4057 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4058 KmpTaskAffinityInfoTy); 4059 // affs[i].base_addr = &<Affinities[i].second>; 4060 LValue BaseAddrLVal = CGF.EmitLValueForField( 4061 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4062 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4063 BaseAddrLVal); 4064 // affs[i].len = sizeof(<Affinities[i].second>); 4065 LValue LenLVal = CGF.EmitLValueForField( 4066 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4067 CGF.EmitStoreOfScalar(Size, LenLVal); 4068 ++Pos; 4069 } 4070 } 4071 LValue PosLVal; 4072 if (HasIterator) { 4073 PosLVal = CGF.MakeAddrLValue( 4074 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4075 C.getSizeType()); 4076 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4077 } 4078 // Process elements with iterators. 4079 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4080 const Expr *Modifier = C->getModifier(); 4081 if (!Modifier) 4082 continue; 4083 OMPIteratorGeneratorScope IteratorScope( 4084 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4085 for (const Expr *E : C->varlists()) { 4086 llvm::Value *Addr; 4087 llvm::Value *Size; 4088 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4089 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4090 LValue Base = CGF.MakeAddrLValue( 4091 CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy); 4092 // affs[i].base_addr = &<Affinities[i].second>; 4093 LValue BaseAddrLVal = CGF.EmitLValueForField( 4094 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4095 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4096 BaseAddrLVal); 4097 // affs[i].len = sizeof(<Affinities[i].second>); 4098 LValue LenLVal = CGF.EmitLValueForField( 4099 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4100 CGF.EmitStoreOfScalar(Size, LenLVal); 4101 Idx = CGF.Builder.CreateNUWAdd( 4102 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4103 CGF.EmitStoreOfScalar(Idx, PosLVal); 4104 } 4105 } 4106 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4107 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4108 // naffins, kmp_task_affinity_info_t *affin_list); 4109 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4110 llvm::Value *GTid = getThreadID(CGF, Loc); 4111 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4112 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4113 // FIXME: Emit the function and ignore its result for now unless the 4114 // runtime function is properly implemented. 4115 (void)CGF.EmitRuntimeCall( 4116 OMPBuilder.getOrCreateRuntimeFunction( 4117 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4118 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4119 } 4120 llvm::Value *NewTaskNewTaskTTy = 4121 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4122 NewTask, KmpTaskTWithPrivatesPtrTy); 4123 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4124 KmpTaskTWithPrivatesQTy); 4125 LValue TDBase = 4126 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4127 // Fill the data in the resulting kmp_task_t record. 4128 // Copy shareds if there are any. 4129 Address KmpTaskSharedsPtr = Address::invalid(); 4130 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4131 KmpTaskSharedsPtr = Address( 4132 CGF.EmitLoadOfScalar( 4133 CGF.EmitLValueForField( 4134 TDBase, 4135 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)), 4136 Loc), 4137 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy)); 4138 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4139 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4140 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4141 } 4142 // Emit initial values for private copies (if any). 4143 TaskResultTy Result; 4144 if (!Privates.empty()) { 4145 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4146 SharedsTy, SharedsPtrTy, Data, Privates, 4147 /*ForDup=*/false); 4148 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4149 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4150 Result.TaskDupFn = emitTaskDupFunction( 4151 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4152 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4153 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4154 } 4155 } 4156 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4157 enum { Priority = 0, Destructors = 1 }; 4158 // Provide pointer to function with destructors for privates. 4159 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4160 const RecordDecl *KmpCmplrdataUD = 4161 (*FI)->getType()->getAsUnionType()->getDecl(); 4162 if (NeedsCleanup) { 4163 llvm::Value *DestructorFn = emitDestructorsFunction( 4164 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4165 KmpTaskTWithPrivatesQTy); 4166 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4167 LValue DestructorsLV = CGF.EmitLValueForField( 4168 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4169 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4170 DestructorFn, KmpRoutineEntryPtrTy), 4171 DestructorsLV); 4172 } 4173 // Set priority. 4174 if (Data.Priority.getInt()) { 4175 LValue Data2LV = CGF.EmitLValueForField( 4176 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4177 LValue PriorityLV = CGF.EmitLValueForField( 4178 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4179 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4180 } 4181 Result.NewTask = NewTask; 4182 Result.TaskEntry = TaskEntry; 4183 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4184 Result.TDBase = TDBase; 4185 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4186 return Result; 4187 } 4188 4189 /// Translates internal dependency kind into the runtime kind. 4190 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4191 RTLDependenceKindTy DepKind; 4192 switch (K) { 4193 case OMPC_DEPEND_in: 4194 DepKind = RTLDependenceKindTy::DepIn; 4195 break; 4196 // Out and InOut dependencies must use the same code. 4197 case OMPC_DEPEND_out: 4198 case OMPC_DEPEND_inout: 4199 DepKind = RTLDependenceKindTy::DepInOut; 4200 break; 4201 case OMPC_DEPEND_mutexinoutset: 4202 DepKind = RTLDependenceKindTy::DepMutexInOutSet; 4203 break; 4204 case OMPC_DEPEND_inoutset: 4205 DepKind = RTLDependenceKindTy::DepInOutSet; 4206 break; 4207 case OMPC_DEPEND_outallmemory: 4208 DepKind = RTLDependenceKindTy::DepOmpAllMem; 4209 break; 4210 case OMPC_DEPEND_source: 4211 case OMPC_DEPEND_sink: 4212 case OMPC_DEPEND_depobj: 4213 case OMPC_DEPEND_inoutallmemory: 4214 case OMPC_DEPEND_unknown: 4215 llvm_unreachable("Unknown task dependence type"); 4216 } 4217 return DepKind; 4218 } 4219 4220 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4221 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4222 QualType &FlagsTy) { 4223 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4224 if (KmpDependInfoTy.isNull()) { 4225 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4226 KmpDependInfoRD->startDefinition(); 4227 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4228 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4229 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4230 KmpDependInfoRD->completeDefinition(); 4231 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4232 } 4233 } 4234 4235 std::pair<llvm::Value *, LValue> 4236 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4237 SourceLocation Loc) { 4238 ASTContext &C = CGM.getContext(); 4239 QualType FlagsTy; 4240 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4241 RecordDecl *KmpDependInfoRD = 4242 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4243 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4244 LValue Base = CGF.EmitLoadOfPointerLValue( 4245 DepobjLVal.getAddress(CGF).withElementType( 4246 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)), 4247 KmpDependInfoPtrTy->castAs<PointerType>()); 4248 Address DepObjAddr = CGF.Builder.CreateGEP( 4249 Base.getAddress(CGF), 4250 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4251 LValue NumDepsBase = CGF.MakeAddrLValue( 4252 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4253 // NumDeps = deps[i].base_addr; 4254 LValue BaseAddrLVal = CGF.EmitLValueForField( 4255 NumDepsBase, 4256 *std::next(KmpDependInfoRD->field_begin(), 4257 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr))); 4258 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4259 return std::make_pair(NumDeps, Base); 4260 } 4261 4262 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4263 llvm::PointerUnion<unsigned *, LValue *> Pos, 4264 const OMPTaskDataTy::DependData &Data, 4265 Address DependenciesArray) { 4266 CodeGenModule &CGM = CGF.CGM; 4267 ASTContext &C = CGM.getContext(); 4268 QualType FlagsTy; 4269 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4270 RecordDecl *KmpDependInfoRD = 4271 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4272 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4273 4274 OMPIteratorGeneratorScope IteratorScope( 4275 CGF, cast_or_null<OMPIteratorExpr>( 4276 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4277 : nullptr)); 4278 for (const Expr *E : Data.DepExprs) { 4279 llvm::Value *Addr; 4280 llvm::Value *Size; 4281 4282 // The expression will be a nullptr in the 'omp_all_memory' case. 4283 if (E) { 4284 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4285 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy); 4286 } else { 4287 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4288 Size = llvm::ConstantInt::get(CGF.SizeTy, 0); 4289 } 4290 LValue Base; 4291 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4292 Base = CGF.MakeAddrLValue( 4293 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4294 } else { 4295 assert(E && "Expected a non-null expression"); 4296 LValue &PosLVal = *Pos.get<LValue *>(); 4297 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4298 Base = CGF.MakeAddrLValue( 4299 CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy); 4300 } 4301 // deps[i].base_addr = &<Dependencies[i].second>; 4302 LValue BaseAddrLVal = CGF.EmitLValueForField( 4303 Base, 4304 *std::next(KmpDependInfoRD->field_begin(), 4305 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr))); 4306 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal); 4307 // deps[i].len = sizeof(<Dependencies[i].second>); 4308 LValue LenLVal = CGF.EmitLValueForField( 4309 Base, *std::next(KmpDependInfoRD->field_begin(), 4310 static_cast<unsigned int>(RTLDependInfoFields::Len))); 4311 CGF.EmitStoreOfScalar(Size, LenLVal); 4312 // deps[i].flags = <Dependencies[i].first>; 4313 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4314 LValue FlagsLVal = CGF.EmitLValueForField( 4315 Base, 4316 *std::next(KmpDependInfoRD->field_begin(), 4317 static_cast<unsigned int>(RTLDependInfoFields::Flags))); 4318 CGF.EmitStoreOfScalar( 4319 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)), 4320 FlagsLVal); 4321 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4322 ++(*P); 4323 } else { 4324 LValue &PosLVal = *Pos.get<LValue *>(); 4325 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4326 Idx = CGF.Builder.CreateNUWAdd(Idx, 4327 llvm::ConstantInt::get(Idx->getType(), 1)); 4328 CGF.EmitStoreOfScalar(Idx, PosLVal); 4329 } 4330 } 4331 } 4332 4333 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes( 4334 CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4335 const OMPTaskDataTy::DependData &Data) { 4336 assert(Data.DepKind == OMPC_DEPEND_depobj && 4337 "Expected depobj dependency kind."); 4338 SmallVector<llvm::Value *, 4> Sizes; 4339 SmallVector<LValue, 4> SizeLVals; 4340 ASTContext &C = CGF.getContext(); 4341 { 4342 OMPIteratorGeneratorScope IteratorScope( 4343 CGF, cast_or_null<OMPIteratorExpr>( 4344 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4345 : nullptr)); 4346 for (const Expr *E : Data.DepExprs) { 4347 llvm::Value *NumDeps; 4348 LValue Base; 4349 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4350 std::tie(NumDeps, Base) = 4351 getDepobjElements(CGF, DepobjLVal, E->getExprLoc()); 4352 LValue NumLVal = CGF.MakeAddrLValue( 4353 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4354 C.getUIntPtrType()); 4355 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0), 4356 NumLVal.getAddress(CGF)); 4357 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4358 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4359 CGF.EmitStoreOfScalar(Add, NumLVal); 4360 SizeLVals.push_back(NumLVal); 4361 } 4362 } 4363 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4364 llvm::Value *Size = 4365 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4366 Sizes.push_back(Size); 4367 } 4368 return Sizes; 4369 } 4370 4371 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF, 4372 QualType &KmpDependInfoTy, 4373 LValue PosLVal, 4374 const OMPTaskDataTy::DependData &Data, 4375 Address DependenciesArray) { 4376 assert(Data.DepKind == OMPC_DEPEND_depobj && 4377 "Expected depobj dependency kind."); 4378 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4379 { 4380 OMPIteratorGeneratorScope IteratorScope( 4381 CGF, cast_or_null<OMPIteratorExpr>( 4382 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4383 : nullptr)); 4384 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4385 const Expr *E = Data.DepExprs[I]; 4386 llvm::Value *NumDeps; 4387 LValue Base; 4388 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4389 std::tie(NumDeps, Base) = 4390 getDepobjElements(CGF, DepobjLVal, E->getExprLoc()); 4391 4392 // memcopy dependency data. 4393 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4394 ElSize, 4395 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4396 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4397 Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos); 4398 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4399 4400 // Increase pos. 4401 // pos += size; 4402 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4403 CGF.EmitStoreOfScalar(Add, PosLVal); 4404 } 4405 } 4406 } 4407 4408 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4409 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4410 SourceLocation Loc) { 4411 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4412 return D.DepExprs.empty(); 4413 })) 4414 return std::make_pair(nullptr, Address::invalid()); 4415 // Process list of dependencies. 4416 ASTContext &C = CGM.getContext(); 4417 Address DependenciesArray = Address::invalid(); 4418 llvm::Value *NumOfElements = nullptr; 4419 unsigned NumDependencies = std::accumulate( 4420 Dependencies.begin(), Dependencies.end(), 0, 4421 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4422 return D.DepKind == OMPC_DEPEND_depobj 4423 ? V 4424 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4425 }); 4426 QualType FlagsTy; 4427 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4428 bool HasDepobjDeps = false; 4429 bool HasRegularWithIterators = false; 4430 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4431 llvm::Value *NumOfRegularWithIterators = 4432 llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4433 // Calculate number of depobj dependencies and regular deps with the 4434 // iterators. 4435 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4436 if (D.DepKind == OMPC_DEPEND_depobj) { 4437 SmallVector<llvm::Value *, 4> Sizes = 4438 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4439 for (llvm::Value *Size : Sizes) { 4440 NumOfDepobjElements = 4441 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4442 } 4443 HasDepobjDeps = true; 4444 continue; 4445 } 4446 // Include number of iterations, if any. 4447 4448 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4449 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4450 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4451 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4452 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul( 4453 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size())); 4454 NumOfRegularWithIterators = 4455 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps); 4456 } 4457 HasRegularWithIterators = true; 4458 continue; 4459 } 4460 } 4461 4462 QualType KmpDependInfoArrayTy; 4463 if (HasDepobjDeps || HasRegularWithIterators) { 4464 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4465 /*isSigned=*/false); 4466 if (HasDepobjDeps) { 4467 NumOfElements = 4468 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4469 } 4470 if (HasRegularWithIterators) { 4471 NumOfElements = 4472 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4473 } 4474 auto *OVE = new (C) OpaqueValueExpr( 4475 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4476 VK_PRValue); 4477 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4478 RValue::get(NumOfElements)); 4479 KmpDependInfoArrayTy = 4480 C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal, 4481 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4482 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4483 // Properly emit variable-sized array. 4484 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4485 ImplicitParamDecl::Other); 4486 CGF.EmitVarDecl(*PD); 4487 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4488 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4489 /*isSigned=*/false); 4490 } else { 4491 KmpDependInfoArrayTy = C.getConstantArrayType( 4492 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4493 ArrayType::Normal, /*IndexTypeQuals=*/0); 4494 DependenciesArray = 4495 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4496 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4497 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4498 /*isSigned=*/false); 4499 } 4500 unsigned Pos = 0; 4501 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4502 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4503 Dependencies[I].IteratorExpr) 4504 continue; 4505 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4506 DependenciesArray); 4507 } 4508 // Copy regular dependencies with iterators. 4509 LValue PosLVal = CGF.MakeAddrLValue( 4510 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4511 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4512 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4513 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4514 !Dependencies[I].IteratorExpr) 4515 continue; 4516 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4517 DependenciesArray); 4518 } 4519 // Copy final depobj arrays without iterators. 4520 if (HasDepobjDeps) { 4521 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4522 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4523 continue; 4524 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4525 DependenciesArray); 4526 } 4527 } 4528 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4529 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty); 4530 return std::make_pair(NumOfElements, DependenciesArray); 4531 } 4532 4533 Address CGOpenMPRuntime::emitDepobjDependClause( 4534 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4535 SourceLocation Loc) { 4536 if (Dependencies.DepExprs.empty()) 4537 return Address::invalid(); 4538 // Process list of dependencies. 4539 ASTContext &C = CGM.getContext(); 4540 Address DependenciesArray = Address::invalid(); 4541 unsigned NumDependencies = Dependencies.DepExprs.size(); 4542 QualType FlagsTy; 4543 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4544 RecordDecl *KmpDependInfoRD = 4545 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4546 4547 llvm::Value *Size; 4548 // Define type kmp_depend_info[<Dependencies.size()>]; 4549 // For depobj reserve one extra element to store the number of elements. 4550 // It is required to handle depobj(x) update(in) construct. 4551 // kmp_depend_info[<Dependencies.size()>] deps; 4552 llvm::Value *NumDepsVal; 4553 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4554 if (const auto *IE = 4555 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4556 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4557 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4558 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4559 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4560 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4561 } 4562 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4563 NumDepsVal); 4564 CharUnits SizeInBytes = 4565 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4566 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4567 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4568 NumDepsVal = 4569 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4570 } else { 4571 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4572 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4573 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 4574 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4575 Size = CGM.getSize(Sz.alignTo(Align)); 4576 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 4577 } 4578 // Need to allocate on the dynamic memory. 4579 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4580 // Use default allocator. 4581 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4582 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 4583 4584 llvm::Value *Addr = 4585 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4586 CGM.getModule(), OMPRTL___kmpc_alloc), 4587 Args, ".dep.arr.addr"); 4588 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy); 4589 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4590 Addr, KmpDependInfoLlvmTy->getPointerTo()); 4591 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align); 4592 // Write number of elements in the first element of array for depobj. 4593 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 4594 // deps[i].base_addr = NumDependencies; 4595 LValue BaseAddrLVal = CGF.EmitLValueForField( 4596 Base, 4597 *std::next(KmpDependInfoRD->field_begin(), 4598 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr))); 4599 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 4600 llvm::PointerUnion<unsigned *, LValue *> Pos; 4601 unsigned Idx = 1; 4602 LValue PosLVal; 4603 if (Dependencies.IteratorExpr) { 4604 PosLVal = CGF.MakeAddrLValue( 4605 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 4606 C.getSizeType()); 4607 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 4608 /*IsInit=*/true); 4609 Pos = &PosLVal; 4610 } else { 4611 Pos = &Idx; 4612 } 4613 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 4614 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4615 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy, 4616 CGF.Int8Ty); 4617 return DependenciesArray; 4618 } 4619 4620 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 4621 SourceLocation Loc) { 4622 ASTContext &C = CGM.getContext(); 4623 QualType FlagsTy; 4624 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4625 LValue Base = CGF.EmitLoadOfPointerLValue( 4626 DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>()); 4627 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4628 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4629 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy), 4630 CGF.ConvertTypeForMem(KmpDependInfoTy)); 4631 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4632 Addr.getElementType(), Addr.getPointer(), 4633 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4634 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 4635 CGF.VoidPtrTy); 4636 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4637 // Use default allocator. 4638 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4639 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 4640 4641 // _kmpc_free(gtid, addr, nullptr); 4642 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4643 CGM.getModule(), OMPRTL___kmpc_free), 4644 Args); 4645 } 4646 4647 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 4648 OpenMPDependClauseKind NewDepKind, 4649 SourceLocation Loc) { 4650 ASTContext &C = CGM.getContext(); 4651 QualType FlagsTy; 4652 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4653 RecordDecl *KmpDependInfoRD = 4654 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4655 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4656 llvm::Value *NumDeps; 4657 LValue Base; 4658 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 4659 4660 Address Begin = Base.getAddress(CGF); 4661 // Cast from pointer to array type to pointer to single element. 4662 llvm::Value *End = CGF.Builder.CreateGEP( 4663 Begin.getElementType(), Begin.getPointer(), NumDeps); 4664 // The basic structure here is a while-do loop. 4665 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 4666 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 4667 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 4668 CGF.EmitBlock(BodyBB); 4669 llvm::PHINode *ElementPHI = 4670 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 4671 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 4672 Begin = Begin.withPointer(ElementPHI, KnownNonNull); 4673 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 4674 Base.getTBAAInfo()); 4675 // deps[i].flags = NewDepKind; 4676 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 4677 LValue FlagsLVal = CGF.EmitLValueForField( 4678 Base, *std::next(KmpDependInfoRD->field_begin(), 4679 static_cast<unsigned int>(RTLDependInfoFields::Flags))); 4680 CGF.EmitStoreOfScalar( 4681 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)), 4682 FlagsLVal); 4683 4684 // Shift the address forward by one element. 4685 Address ElementNext = 4686 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 4687 ElementPHI->addIncoming(ElementNext.getPointer(), 4688 CGF.Builder.GetInsertBlock()); 4689 llvm::Value *IsEmpty = 4690 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 4691 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 4692 // Done. 4693 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 4694 } 4695 4696 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 4697 const OMPExecutableDirective &D, 4698 llvm::Function *TaskFunction, 4699 QualType SharedsTy, Address Shareds, 4700 const Expr *IfCond, 4701 const OMPTaskDataTy &Data) { 4702 if (!CGF.HaveInsertPoint()) 4703 return; 4704 4705 TaskResultTy Result = 4706 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 4707 llvm::Value *NewTask = Result.NewTask; 4708 llvm::Function *TaskEntry = Result.TaskEntry; 4709 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 4710 LValue TDBase = Result.TDBase; 4711 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 4712 // Process list of dependences. 4713 Address DependenciesArray = Address::invalid(); 4714 llvm::Value *NumOfElements; 4715 std::tie(NumOfElements, DependenciesArray) = 4716 emitDependClause(CGF, Data.Dependences, Loc); 4717 4718 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 4719 // libcall. 4720 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 4721 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 4722 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 4723 // list is not empty 4724 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4725 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 4726 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 4727 llvm::Value *DepTaskArgs[7]; 4728 if (!Data.Dependences.empty()) { 4729 DepTaskArgs[0] = UpLoc; 4730 DepTaskArgs[1] = ThreadID; 4731 DepTaskArgs[2] = NewTask; 4732 DepTaskArgs[3] = NumOfElements; 4733 DepTaskArgs[4] = DependenciesArray.getPointer(); 4734 DepTaskArgs[5] = CGF.Builder.getInt32(0); 4735 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4736 } 4737 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 4738 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 4739 if (!Data.Tied) { 4740 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4741 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 4742 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 4743 } 4744 if (!Data.Dependences.empty()) { 4745 CGF.EmitRuntimeCall( 4746 OMPBuilder.getOrCreateRuntimeFunction( 4747 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 4748 DepTaskArgs); 4749 } else { 4750 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4751 CGM.getModule(), OMPRTL___kmpc_omp_task), 4752 TaskArgs); 4753 } 4754 // Check if parent region is untied and build return for untied task; 4755 if (auto *Region = 4756 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 4757 Region->emitUntiedSwitch(CGF); 4758 }; 4759 4760 llvm::Value *DepWaitTaskArgs[7]; 4761 if (!Data.Dependences.empty()) { 4762 DepWaitTaskArgs[0] = UpLoc; 4763 DepWaitTaskArgs[1] = ThreadID; 4764 DepWaitTaskArgs[2] = NumOfElements; 4765 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 4766 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 4767 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4768 DepWaitTaskArgs[6] = 4769 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause); 4770 } 4771 auto &M = CGM.getModule(); 4772 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 4773 TaskEntry, &Data, &DepWaitTaskArgs, 4774 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 4775 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 4776 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 4777 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 4778 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 4779 // is specified. 4780 if (!Data.Dependences.empty()) 4781 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4782 M, OMPRTL___kmpc_omp_taskwait_deps_51), 4783 DepWaitTaskArgs); 4784 // Call proxy_task_entry(gtid, new_task); 4785 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 4786 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 4787 Action.Enter(CGF); 4788 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 4789 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 4790 OutlinedFnArgs); 4791 }; 4792 4793 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 4794 // kmp_task_t *new_task); 4795 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 4796 // kmp_task_t *new_task); 4797 RegionCodeGenTy RCG(CodeGen); 4798 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 4799 M, OMPRTL___kmpc_omp_task_begin_if0), 4800 TaskArgs, 4801 OMPBuilder.getOrCreateRuntimeFunction( 4802 M, OMPRTL___kmpc_omp_task_complete_if0), 4803 TaskArgs); 4804 RCG.setAction(Action); 4805 RCG(CGF); 4806 }; 4807 4808 if (IfCond) { 4809 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 4810 } else { 4811 RegionCodeGenTy ThenRCG(ThenCodeGen); 4812 ThenRCG(CGF); 4813 } 4814 } 4815 4816 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 4817 const OMPLoopDirective &D, 4818 llvm::Function *TaskFunction, 4819 QualType SharedsTy, Address Shareds, 4820 const Expr *IfCond, 4821 const OMPTaskDataTy &Data) { 4822 if (!CGF.HaveInsertPoint()) 4823 return; 4824 TaskResultTy Result = 4825 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 4826 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 4827 // libcall. 4828 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 4829 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 4830 // sched, kmp_uint64 grainsize, void *task_dup); 4831 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4832 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 4833 llvm::Value *IfVal; 4834 if (IfCond) { 4835 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 4836 /*isSigned=*/true); 4837 } else { 4838 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 4839 } 4840 4841 LValue LBLVal = CGF.EmitLValueForField( 4842 Result.TDBase, 4843 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 4844 const auto *LBVar = 4845 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 4846 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 4847 LBLVal.getQuals(), 4848 /*IsInitializer=*/true); 4849 LValue UBLVal = CGF.EmitLValueForField( 4850 Result.TDBase, 4851 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 4852 const auto *UBVar = 4853 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 4854 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 4855 UBLVal.getQuals(), 4856 /*IsInitializer=*/true); 4857 LValue StLVal = CGF.EmitLValueForField( 4858 Result.TDBase, 4859 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 4860 const auto *StVar = 4861 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 4862 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 4863 StLVal.getQuals(), 4864 /*IsInitializer=*/true); 4865 // Store reductions address. 4866 LValue RedLVal = CGF.EmitLValueForField( 4867 Result.TDBase, 4868 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 4869 if (Data.Reductions) { 4870 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 4871 } else { 4872 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 4873 CGF.getContext().VoidPtrTy); 4874 } 4875 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 4876 llvm::Value *TaskArgs[] = { 4877 UpLoc, 4878 ThreadID, 4879 Result.NewTask, 4880 IfVal, 4881 LBLVal.getPointer(CGF), 4882 UBLVal.getPointer(CGF), 4883 CGF.EmitLoadOfScalar(StLVal, Loc), 4884 llvm::ConstantInt::getSigned( 4885 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 4886 llvm::ConstantInt::getSigned( 4887 CGF.IntTy, Data.Schedule.getPointer() 4888 ? Data.Schedule.getInt() ? NumTasks : Grainsize 4889 : NoSchedule), 4890 Data.Schedule.getPointer() 4891 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 4892 /*isSigned=*/false) 4893 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 4894 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4895 Result.TaskDupFn, CGF.VoidPtrTy) 4896 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 4897 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4898 CGM.getModule(), OMPRTL___kmpc_taskloop), 4899 TaskArgs); 4900 } 4901 4902 /// Emit reduction operation for each element of array (required for 4903 /// array sections) LHS op = RHS. 4904 /// \param Type Type of array. 4905 /// \param LHSVar Variable on the left side of the reduction operation 4906 /// (references element of array in original variable). 4907 /// \param RHSVar Variable on the right side of the reduction operation 4908 /// (references element of array in original variable). 4909 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 4910 /// RHSVar. 4911 static void EmitOMPAggregateReduction( 4912 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 4913 const VarDecl *RHSVar, 4914 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 4915 const Expr *, const Expr *)> &RedOpGen, 4916 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 4917 const Expr *UpExpr = nullptr) { 4918 // Perform element-by-element initialization. 4919 QualType ElementTy; 4920 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 4921 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 4922 4923 // Drill down to the base element type on both arrays. 4924 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 4925 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 4926 4927 llvm::Value *RHSBegin = RHSAddr.getPointer(); 4928 llvm::Value *LHSBegin = LHSAddr.getPointer(); 4929 // Cast from pointer to array type to pointer to single element. 4930 llvm::Value *LHSEnd = 4931 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements); 4932 // The basic structure here is a while-do loop. 4933 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 4934 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 4935 llvm::Value *IsEmpty = 4936 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 4937 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 4938 4939 // Enter the loop body, making that address the current address. 4940 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 4941 CGF.EmitBlock(BodyBB); 4942 4943 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 4944 4945 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 4946 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 4947 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 4948 Address RHSElementCurrent( 4949 RHSElementPHI, RHSAddr.getElementType(), 4950 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 4951 4952 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 4953 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 4954 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 4955 Address LHSElementCurrent( 4956 LHSElementPHI, LHSAddr.getElementType(), 4957 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 4958 4959 // Emit copy. 4960 CodeGenFunction::OMPPrivateScope Scope(CGF); 4961 Scope.addPrivate(LHSVar, LHSElementCurrent); 4962 Scope.addPrivate(RHSVar, RHSElementCurrent); 4963 Scope.Privatize(); 4964 RedOpGen(CGF, XExpr, EExpr, UpExpr); 4965 Scope.ForceCleanup(); 4966 4967 // Shift the address forward by one element. 4968 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 4969 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1, 4970 "omp.arraycpy.dest.element"); 4971 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 4972 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1, 4973 "omp.arraycpy.src.element"); 4974 // Check whether we've reached the end. 4975 llvm::Value *Done = 4976 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 4977 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 4978 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 4979 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 4980 4981 // Done. 4982 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 4983 } 4984 4985 /// Emit reduction combiner. If the combiner is a simple expression emit it as 4986 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 4987 /// UDR combiner function. 4988 static void emitReductionCombiner(CodeGenFunction &CGF, 4989 const Expr *ReductionOp) { 4990 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 4991 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 4992 if (const auto *DRE = 4993 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 4994 if (const auto *DRD = 4995 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 4996 std::pair<llvm::Function *, llvm::Function *> Reduction = 4997 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 4998 RValue Func = RValue::get(Reduction.first); 4999 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5000 CGF.EmitIgnoredExpr(ReductionOp); 5001 return; 5002 } 5003 CGF.EmitIgnoredExpr(ReductionOp); 5004 } 5005 5006 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5007 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType, 5008 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs, 5009 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) { 5010 ASTContext &C = CGM.getContext(); 5011 5012 // void reduction_func(void *LHSArg, void *RHSArg); 5013 FunctionArgList Args; 5014 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5015 ImplicitParamDecl::Other); 5016 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5017 ImplicitParamDecl::Other); 5018 Args.push_back(&LHSArg); 5019 Args.push_back(&RHSArg); 5020 const auto &CGFI = 5021 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5022 std::string Name = getReductionFuncName(ReducerName); 5023 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5024 llvm::GlobalValue::InternalLinkage, Name, 5025 &CGM.getModule()); 5026 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5027 Fn->setDoesNotRecurse(); 5028 CodeGenFunction CGF(CGM); 5029 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5030 5031 // Dst = (void*[n])(LHSArg); 5032 // Src = (void*[n])(RHSArg); 5033 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5034 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5035 ArgsElemType->getPointerTo()), 5036 ArgsElemType, CGF.getPointerAlign()); 5037 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5038 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5039 ArgsElemType->getPointerTo()), 5040 ArgsElemType, CGF.getPointerAlign()); 5041 5042 // ... 5043 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5044 // ... 5045 CodeGenFunction::OMPPrivateScope Scope(CGF); 5046 const auto *IPriv = Privates.begin(); 5047 unsigned Idx = 0; 5048 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5049 const auto *RHSVar = 5050 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5051 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar)); 5052 const auto *LHSVar = 5053 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5054 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar)); 5055 QualType PrivTy = (*IPriv)->getType(); 5056 if (PrivTy->isVariablyModifiedType()) { 5057 // Get array size and emit VLA type. 5058 ++Idx; 5059 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5060 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5061 const VariableArrayType *VLA = 5062 CGF.getContext().getAsVariableArrayType(PrivTy); 5063 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5064 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5065 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5066 CGF.EmitVariablyModifiedType(PrivTy); 5067 } 5068 } 5069 Scope.Privatize(); 5070 IPriv = Privates.begin(); 5071 const auto *ILHS = LHSExprs.begin(); 5072 const auto *IRHS = RHSExprs.begin(); 5073 for (const Expr *E : ReductionOps) { 5074 if ((*IPriv)->getType()->isArrayType()) { 5075 // Emit reduction for array section. 5076 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5077 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5078 EmitOMPAggregateReduction( 5079 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5080 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5081 emitReductionCombiner(CGF, E); 5082 }); 5083 } else { 5084 // Emit reduction for array subscript or single variable. 5085 emitReductionCombiner(CGF, E); 5086 } 5087 ++IPriv; 5088 ++ILHS; 5089 ++IRHS; 5090 } 5091 Scope.ForceCleanup(); 5092 CGF.FinishFunction(); 5093 return Fn; 5094 } 5095 5096 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5097 const Expr *ReductionOp, 5098 const Expr *PrivateRef, 5099 const DeclRefExpr *LHS, 5100 const DeclRefExpr *RHS) { 5101 if (PrivateRef->getType()->isArrayType()) { 5102 // Emit reduction for array section. 5103 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5104 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5105 EmitOMPAggregateReduction( 5106 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5107 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5108 emitReductionCombiner(CGF, ReductionOp); 5109 }); 5110 } else { 5111 // Emit reduction for array subscript or single variable. 5112 emitReductionCombiner(CGF, ReductionOp); 5113 } 5114 } 5115 5116 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5117 ArrayRef<const Expr *> Privates, 5118 ArrayRef<const Expr *> LHSExprs, 5119 ArrayRef<const Expr *> RHSExprs, 5120 ArrayRef<const Expr *> ReductionOps, 5121 ReductionOptionsTy Options) { 5122 if (!CGF.HaveInsertPoint()) 5123 return; 5124 5125 bool WithNowait = Options.WithNowait; 5126 bool SimpleReduction = Options.SimpleReduction; 5127 5128 // Next code should be emitted for reduction: 5129 // 5130 // static kmp_critical_name lock = { 0 }; 5131 // 5132 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5133 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5134 // ... 5135 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5136 // *(Type<n>-1*)rhs[<n>-1]); 5137 // } 5138 // 5139 // ... 5140 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5141 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5142 // RedList, reduce_func, &<lock>)) { 5143 // case 1: 5144 // ... 5145 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5146 // ... 5147 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5148 // break; 5149 // case 2: 5150 // ... 5151 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5152 // ... 5153 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5154 // break; 5155 // default:; 5156 // } 5157 // 5158 // if SimpleReduction is true, only the next code is generated: 5159 // ... 5160 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5161 // ... 5162 5163 ASTContext &C = CGM.getContext(); 5164 5165 if (SimpleReduction) { 5166 CodeGenFunction::RunCleanupsScope Scope(CGF); 5167 const auto *IPriv = Privates.begin(); 5168 const auto *ILHS = LHSExprs.begin(); 5169 const auto *IRHS = RHSExprs.begin(); 5170 for (const Expr *E : ReductionOps) { 5171 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5172 cast<DeclRefExpr>(*IRHS)); 5173 ++IPriv; 5174 ++ILHS; 5175 ++IRHS; 5176 } 5177 return; 5178 } 5179 5180 // 1. Build a list of reduction variables. 5181 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5182 auto Size = RHSExprs.size(); 5183 for (const Expr *E : Privates) { 5184 if (E->getType()->isVariablyModifiedType()) 5185 // Reserve place for array size. 5186 ++Size; 5187 } 5188 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5189 QualType ReductionArrayTy = 5190 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5191 /*IndexTypeQuals=*/0); 5192 Address ReductionList = 5193 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5194 const auto *IPriv = Privates.begin(); 5195 unsigned Idx = 0; 5196 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5197 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5198 CGF.Builder.CreateStore( 5199 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5200 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5201 Elem); 5202 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5203 // Store array size. 5204 ++Idx; 5205 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5206 llvm::Value *Size = CGF.Builder.CreateIntCast( 5207 CGF.getVLASize( 5208 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5209 .NumElts, 5210 CGF.SizeTy, /*isSigned=*/false); 5211 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5212 Elem); 5213 } 5214 } 5215 5216 // 2. Emit reduce_func(). 5217 llvm::Function *ReductionFn = emitReductionFunction( 5218 CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy), 5219 Privates, LHSExprs, RHSExprs, ReductionOps); 5220 5221 // 3. Create static kmp_critical_name lock = { 0 }; 5222 std::string Name = getName({"reduction"}); 5223 llvm::Value *Lock = getCriticalRegionLock(Name); 5224 5225 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5226 // RedList, reduce_func, &<lock>); 5227 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5228 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5229 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5230 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5231 ReductionList.getPointer(), CGF.VoidPtrTy); 5232 llvm::Value *Args[] = { 5233 IdentTLoc, // ident_t *<loc> 5234 ThreadId, // i32 <gtid> 5235 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5236 ReductionArrayTySize, // size_type sizeof(RedList) 5237 RL, // void *RedList 5238 ReductionFn, // void (*) (void *, void *) <reduce_func> 5239 Lock // kmp_critical_name *&<lock> 5240 }; 5241 llvm::Value *Res = CGF.EmitRuntimeCall( 5242 OMPBuilder.getOrCreateRuntimeFunction( 5243 CGM.getModule(), 5244 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5245 Args); 5246 5247 // 5. Build switch(res) 5248 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5249 llvm::SwitchInst *SwInst = 5250 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5251 5252 // 6. Build case 1: 5253 // ... 5254 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5255 // ... 5256 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5257 // break; 5258 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5259 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5260 CGF.EmitBlock(Case1BB); 5261 5262 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5263 llvm::Value *EndArgs[] = { 5264 IdentTLoc, // ident_t *<loc> 5265 ThreadId, // i32 <gtid> 5266 Lock // kmp_critical_name *&<lock> 5267 }; 5268 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5269 CodeGenFunction &CGF, PrePostActionTy &Action) { 5270 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5271 const auto *IPriv = Privates.begin(); 5272 const auto *ILHS = LHSExprs.begin(); 5273 const auto *IRHS = RHSExprs.begin(); 5274 for (const Expr *E : ReductionOps) { 5275 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5276 cast<DeclRefExpr>(*IRHS)); 5277 ++IPriv; 5278 ++ILHS; 5279 ++IRHS; 5280 } 5281 }; 5282 RegionCodeGenTy RCG(CodeGen); 5283 CommonActionTy Action( 5284 nullptr, std::nullopt, 5285 OMPBuilder.getOrCreateRuntimeFunction( 5286 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5287 : OMPRTL___kmpc_end_reduce), 5288 EndArgs); 5289 RCG.setAction(Action); 5290 RCG(CGF); 5291 5292 CGF.EmitBranch(DefaultBB); 5293 5294 // 7. Build case 2: 5295 // ... 5296 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5297 // ... 5298 // break; 5299 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5300 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5301 CGF.EmitBlock(Case2BB); 5302 5303 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5304 CodeGenFunction &CGF, PrePostActionTy &Action) { 5305 const auto *ILHS = LHSExprs.begin(); 5306 const auto *IRHS = RHSExprs.begin(); 5307 const auto *IPriv = Privates.begin(); 5308 for (const Expr *E : ReductionOps) { 5309 const Expr *XExpr = nullptr; 5310 const Expr *EExpr = nullptr; 5311 const Expr *UpExpr = nullptr; 5312 BinaryOperatorKind BO = BO_Comma; 5313 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5314 if (BO->getOpcode() == BO_Assign) { 5315 XExpr = BO->getLHS(); 5316 UpExpr = BO->getRHS(); 5317 } 5318 } 5319 // Try to emit update expression as a simple atomic. 5320 const Expr *RHSExpr = UpExpr; 5321 if (RHSExpr) { 5322 // Analyze RHS part of the whole expression. 5323 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5324 RHSExpr->IgnoreParenImpCasts())) { 5325 // If this is a conditional operator, analyze its condition for 5326 // min/max reduction operator. 5327 RHSExpr = ACO->getCond(); 5328 } 5329 if (const auto *BORHS = 5330 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5331 EExpr = BORHS->getRHS(); 5332 BO = BORHS->getOpcode(); 5333 } 5334 } 5335 if (XExpr) { 5336 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5337 auto &&AtomicRedGen = [BO, VD, 5338 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5339 const Expr *EExpr, const Expr *UpExpr) { 5340 LValue X = CGF.EmitLValue(XExpr); 5341 RValue E; 5342 if (EExpr) 5343 E = CGF.EmitAnyExpr(EExpr); 5344 CGF.EmitOMPAtomicSimpleUpdateExpr( 5345 X, E, BO, /*IsXLHSInRHSPart=*/true, 5346 llvm::AtomicOrdering::Monotonic, Loc, 5347 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5348 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5349 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5350 CGF.emitOMPSimpleStore( 5351 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5352 VD->getType().getNonReferenceType(), Loc); 5353 PrivateScope.addPrivate(VD, LHSTemp); 5354 (void)PrivateScope.Privatize(); 5355 return CGF.EmitAnyExpr(UpExpr); 5356 }); 5357 }; 5358 if ((*IPriv)->getType()->isArrayType()) { 5359 // Emit atomic reduction for array section. 5360 const auto *RHSVar = 5361 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5362 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5363 AtomicRedGen, XExpr, EExpr, UpExpr); 5364 } else { 5365 // Emit atomic reduction for array subscript or single variable. 5366 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5367 } 5368 } else { 5369 // Emit as a critical region. 5370 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5371 const Expr *, const Expr *) { 5372 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5373 std::string Name = RT.getName({"atomic_reduction"}); 5374 RT.emitCriticalRegion( 5375 CGF, Name, 5376 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5377 Action.Enter(CGF); 5378 emitReductionCombiner(CGF, E); 5379 }, 5380 Loc); 5381 }; 5382 if ((*IPriv)->getType()->isArrayType()) { 5383 const auto *LHSVar = 5384 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5385 const auto *RHSVar = 5386 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5387 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5388 CritRedGen); 5389 } else { 5390 CritRedGen(CGF, nullptr, nullptr, nullptr); 5391 } 5392 } 5393 ++ILHS; 5394 ++IRHS; 5395 ++IPriv; 5396 } 5397 }; 5398 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5399 if (!WithNowait) { 5400 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5401 llvm::Value *EndArgs[] = { 5402 IdentTLoc, // ident_t *<loc> 5403 ThreadId, // i32 <gtid> 5404 Lock // kmp_critical_name *&<lock> 5405 }; 5406 CommonActionTy Action(nullptr, std::nullopt, 5407 OMPBuilder.getOrCreateRuntimeFunction( 5408 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5409 EndArgs); 5410 AtomicRCG.setAction(Action); 5411 AtomicRCG(CGF); 5412 } else { 5413 AtomicRCG(CGF); 5414 } 5415 5416 CGF.EmitBranch(DefaultBB); 5417 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5418 } 5419 5420 /// Generates unique name for artificial threadprivate variables. 5421 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5422 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5423 const Expr *Ref) { 5424 SmallString<256> Buffer; 5425 llvm::raw_svector_ostream Out(Buffer); 5426 const clang::DeclRefExpr *DE; 5427 const VarDecl *D = ::getBaseDecl(Ref, DE); 5428 if (!D) 5429 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5430 D = D->getCanonicalDecl(); 5431 std::string Name = CGM.getOpenMPRuntime().getName( 5432 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5433 Out << Prefix << Name << "_" 5434 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5435 return std::string(Out.str()); 5436 } 5437 5438 /// Emits reduction initializer function: 5439 /// \code 5440 /// void @.red_init(void* %arg, void* %orig) { 5441 /// %0 = bitcast void* %arg to <type>* 5442 /// store <type> <init>, <type>* %0 5443 /// ret void 5444 /// } 5445 /// \endcode 5446 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5447 SourceLocation Loc, 5448 ReductionCodeGen &RCG, unsigned N) { 5449 ASTContext &C = CGM.getContext(); 5450 QualType VoidPtrTy = C.VoidPtrTy; 5451 VoidPtrTy.addRestrict(); 5452 FunctionArgList Args; 5453 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5454 ImplicitParamDecl::Other); 5455 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5456 ImplicitParamDecl::Other); 5457 Args.emplace_back(&Param); 5458 Args.emplace_back(&ParamOrig); 5459 const auto &FnInfo = 5460 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5461 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5462 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5463 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5464 Name, &CGM.getModule()); 5465 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5466 Fn->setDoesNotRecurse(); 5467 CodeGenFunction CGF(CGM); 5468 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5469 QualType PrivateType = RCG.getPrivateType(N); 5470 Address PrivateAddr = CGF.EmitLoadOfPointer( 5471 CGF.GetAddrOfLocalVar(&Param).withElementType( 5472 CGF.ConvertTypeForMem(PrivateType)->getPointerTo()), 5473 C.getPointerType(PrivateType)->castAs<PointerType>()); 5474 llvm::Value *Size = nullptr; 5475 // If the size of the reduction item is non-constant, load it from global 5476 // threadprivate variable. 5477 if (RCG.getSizes(N).second) { 5478 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5479 CGF, CGM.getContext().getSizeType(), 5480 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5481 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5482 CGM.getContext().getSizeType(), Loc); 5483 } 5484 RCG.emitAggregateType(CGF, N, Size); 5485 Address OrigAddr = Address::invalid(); 5486 // If initializer uses initializer from declare reduction construct, emit a 5487 // pointer to the address of the original reduction item (reuired by reduction 5488 // initializer) 5489 if (RCG.usesReductionInitializer(N)) { 5490 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5491 OrigAddr = CGF.EmitLoadOfPointer( 5492 SharedAddr, 5493 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5494 } 5495 // Emit the initializer: 5496 // %0 = bitcast void* %arg to <type>* 5497 // store <type> <init>, <type>* %0 5498 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr, 5499 [](CodeGenFunction &) { return false; }); 5500 CGF.FinishFunction(); 5501 return Fn; 5502 } 5503 5504 /// Emits reduction combiner function: 5505 /// \code 5506 /// void @.red_comb(void* %arg0, void* %arg1) { 5507 /// %lhs = bitcast void* %arg0 to <type>* 5508 /// %rhs = bitcast void* %arg1 to <type>* 5509 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5510 /// store <type> %2, <type>* %lhs 5511 /// ret void 5512 /// } 5513 /// \endcode 5514 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5515 SourceLocation Loc, 5516 ReductionCodeGen &RCG, unsigned N, 5517 const Expr *ReductionOp, 5518 const Expr *LHS, const Expr *RHS, 5519 const Expr *PrivateRef) { 5520 ASTContext &C = CGM.getContext(); 5521 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5522 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5523 FunctionArgList Args; 5524 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5525 C.VoidPtrTy, ImplicitParamDecl::Other); 5526 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5527 ImplicitParamDecl::Other); 5528 Args.emplace_back(&ParamInOut); 5529 Args.emplace_back(&ParamIn); 5530 const auto &FnInfo = 5531 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5532 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5533 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5534 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5535 Name, &CGM.getModule()); 5536 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5537 Fn->setDoesNotRecurse(); 5538 CodeGenFunction CGF(CGM); 5539 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5540 llvm::Value *Size = nullptr; 5541 // If the size of the reduction item is non-constant, load it from global 5542 // threadprivate variable. 5543 if (RCG.getSizes(N).second) { 5544 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5545 CGF, CGM.getContext().getSizeType(), 5546 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5547 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5548 CGM.getContext().getSizeType(), Loc); 5549 } 5550 RCG.emitAggregateType(CGF, N, Size); 5551 // Remap lhs and rhs variables to the addresses of the function arguments. 5552 // %lhs = bitcast void* %arg0 to <type>* 5553 // %rhs = bitcast void* %arg1 to <type>* 5554 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5555 PrivateScope.addPrivate( 5556 LHSVD, 5557 // Pull out the pointer to the variable. 5558 CGF.EmitLoadOfPointer( 5559 CGF.GetAddrOfLocalVar(&ParamInOut) 5560 .withElementType( 5561 CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()), 5562 C.getPointerType(LHSVD->getType())->castAs<PointerType>())); 5563 PrivateScope.addPrivate( 5564 RHSVD, 5565 // Pull out the pointer to the variable. 5566 CGF.EmitLoadOfPointer( 5567 CGF.GetAddrOfLocalVar(&ParamIn).withElementType( 5568 CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()), 5569 C.getPointerType(RHSVD->getType())->castAs<PointerType>())); 5570 PrivateScope.Privatize(); 5571 // Emit the combiner body: 5572 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5573 // store <type> %2, <type>* %lhs 5574 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5575 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5576 cast<DeclRefExpr>(RHS)); 5577 CGF.FinishFunction(); 5578 return Fn; 5579 } 5580 5581 /// Emits reduction finalizer function: 5582 /// \code 5583 /// void @.red_fini(void* %arg) { 5584 /// %0 = bitcast void* %arg to <type>* 5585 /// <destroy>(<type>* %0) 5586 /// ret void 5587 /// } 5588 /// \endcode 5589 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5590 SourceLocation Loc, 5591 ReductionCodeGen &RCG, unsigned N) { 5592 if (!RCG.needCleanups(N)) 5593 return nullptr; 5594 ASTContext &C = CGM.getContext(); 5595 FunctionArgList Args; 5596 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5597 ImplicitParamDecl::Other); 5598 Args.emplace_back(&Param); 5599 const auto &FnInfo = 5600 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5601 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5602 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 5603 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5604 Name, &CGM.getModule()); 5605 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5606 Fn->setDoesNotRecurse(); 5607 CodeGenFunction CGF(CGM); 5608 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5609 Address PrivateAddr = CGF.EmitLoadOfPointer( 5610 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>()); 5611 llvm::Value *Size = nullptr; 5612 // If the size of the reduction item is non-constant, load it from global 5613 // threadprivate variable. 5614 if (RCG.getSizes(N).second) { 5615 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5616 CGF, CGM.getContext().getSizeType(), 5617 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5618 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5619 CGM.getContext().getSizeType(), Loc); 5620 } 5621 RCG.emitAggregateType(CGF, N, Size); 5622 // Emit the finalizer body: 5623 // <destroy>(<type>* %0) 5624 RCG.emitCleanups(CGF, N, PrivateAddr); 5625 CGF.FinishFunction(Loc); 5626 return Fn; 5627 } 5628 5629 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 5630 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 5631 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 5632 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 5633 return nullptr; 5634 5635 // Build typedef struct: 5636 // kmp_taskred_input { 5637 // void *reduce_shar; // shared reduction item 5638 // void *reduce_orig; // original reduction item used for initialization 5639 // size_t reduce_size; // size of data item 5640 // void *reduce_init; // data initialization routine 5641 // void *reduce_fini; // data finalization routine 5642 // void *reduce_comb; // data combiner routine 5643 // kmp_task_red_flags_t flags; // flags for additional info from compiler 5644 // } kmp_taskred_input_t; 5645 ASTContext &C = CGM.getContext(); 5646 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 5647 RD->startDefinition(); 5648 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5649 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5650 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 5651 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5652 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5653 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5654 const FieldDecl *FlagsFD = addFieldToRecordDecl( 5655 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 5656 RD->completeDefinition(); 5657 QualType RDType = C.getRecordType(RD); 5658 unsigned Size = Data.ReductionVars.size(); 5659 llvm::APInt ArraySize(/*numBits=*/64, Size); 5660 QualType ArrayRDType = C.getConstantArrayType( 5661 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5662 // kmp_task_red_input_t .rd_input.[Size]; 5663 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 5664 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 5665 Data.ReductionCopies, Data.ReductionOps); 5666 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 5667 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 5668 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 5669 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 5670 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 5671 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs, 5672 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 5673 ".rd_input.gep."); 5674 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 5675 // ElemLVal.reduce_shar = &Shareds[Cnt]; 5676 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 5677 RCG.emitSharedOrigLValue(CGF, Cnt); 5678 llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF); 5679 CGF.EmitStoreOfScalar(Shared, SharedLVal); 5680 // ElemLVal.reduce_orig = &Origs[Cnt]; 5681 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 5682 llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF); 5683 CGF.EmitStoreOfScalar(Orig, OrigLVal); 5684 RCG.emitAggregateType(CGF, Cnt); 5685 llvm::Value *SizeValInChars; 5686 llvm::Value *SizeVal; 5687 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 5688 // We use delayed creation/initialization for VLAs and array sections. It is 5689 // required because runtime does not provide the way to pass the sizes of 5690 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 5691 // threadprivate global variables are used to store these values and use 5692 // them in the functions. 5693 bool DelayedCreation = !!SizeVal; 5694 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 5695 /*isSigned=*/false); 5696 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 5697 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 5698 // ElemLVal.reduce_init = init; 5699 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 5700 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt); 5701 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 5702 // ElemLVal.reduce_fini = fini; 5703 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 5704 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 5705 llvm::Value *FiniAddr = 5706 Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 5707 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 5708 // ElemLVal.reduce_comb = comb; 5709 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 5710 llvm::Value *CombAddr = emitReduceCombFunction( 5711 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 5712 RHSExprs[Cnt], Data.ReductionCopies[Cnt]); 5713 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 5714 // ElemLVal.flags = 0; 5715 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 5716 if (DelayedCreation) { 5717 CGF.EmitStoreOfScalar( 5718 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 5719 FlagsLVal); 5720 } else 5721 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 5722 FlagsLVal.getType()); 5723 } 5724 if (Data.IsReductionWithTaskMod) { 5725 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 5726 // is_ws, int num, void *data); 5727 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 5728 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 5729 CGM.IntTy, /*isSigned=*/true); 5730 llvm::Value *Args[] = { 5731 IdentTLoc, GTid, 5732 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 5733 /*isSigned=*/true), 5734 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 5735 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5736 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 5737 return CGF.EmitRuntimeCall( 5738 OMPBuilder.getOrCreateRuntimeFunction( 5739 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 5740 Args); 5741 } 5742 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 5743 llvm::Value *Args[] = { 5744 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 5745 /*isSigned=*/true), 5746 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 5747 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 5748 CGM.VoidPtrTy)}; 5749 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5750 CGM.getModule(), OMPRTL___kmpc_taskred_init), 5751 Args); 5752 } 5753 5754 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 5755 SourceLocation Loc, 5756 bool IsWorksharingReduction) { 5757 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 5758 // is_ws, int num, void *data); 5759 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 5760 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 5761 CGM.IntTy, /*isSigned=*/true); 5762 llvm::Value *Args[] = {IdentTLoc, GTid, 5763 llvm::ConstantInt::get(CGM.IntTy, 5764 IsWorksharingReduction ? 1 : 0, 5765 /*isSigned=*/true)}; 5766 (void)CGF.EmitRuntimeCall( 5767 OMPBuilder.getOrCreateRuntimeFunction( 5768 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 5769 Args); 5770 } 5771 5772 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 5773 SourceLocation Loc, 5774 ReductionCodeGen &RCG, 5775 unsigned N) { 5776 auto Sizes = RCG.getSizes(N); 5777 // Emit threadprivate global variable if the type is non-constant 5778 // (Sizes.second = nullptr). 5779 if (Sizes.second) { 5780 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 5781 /*isSigned=*/false); 5782 Address SizeAddr = getAddrOfArtificialThreadPrivate( 5783 CGF, CGM.getContext().getSizeType(), 5784 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5785 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 5786 } 5787 } 5788 5789 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 5790 SourceLocation Loc, 5791 llvm::Value *ReductionsPtr, 5792 LValue SharedLVal) { 5793 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 5794 // *d); 5795 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 5796 CGM.IntTy, 5797 /*isSigned=*/true), 5798 ReductionsPtr, 5799 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5800 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 5801 return Address( 5802 CGF.EmitRuntimeCall( 5803 OMPBuilder.getOrCreateRuntimeFunction( 5804 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 5805 Args), 5806 CGF.Int8Ty, SharedLVal.getAlignment()); 5807 } 5808 5809 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, 5810 const OMPTaskDataTy &Data) { 5811 if (!CGF.HaveInsertPoint()) 5812 return; 5813 5814 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) { 5815 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder. 5816 OMPBuilder.createTaskwait(CGF.Builder); 5817 } else { 5818 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5819 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5820 auto &M = CGM.getModule(); 5821 Address DependenciesArray = Address::invalid(); 5822 llvm::Value *NumOfElements; 5823 std::tie(NumOfElements, DependenciesArray) = 5824 emitDependClause(CGF, Data.Dependences, Loc); 5825 if (!Data.Dependences.empty()) { 5826 llvm::Value *DepWaitTaskArgs[7]; 5827 DepWaitTaskArgs[0] = UpLoc; 5828 DepWaitTaskArgs[1] = ThreadID; 5829 DepWaitTaskArgs[2] = NumOfElements; 5830 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5831 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5832 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5833 DepWaitTaskArgs[6] = 5834 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause); 5835 5836 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5837 5838 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid, 5839 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5840 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list, 5841 // kmp_int32 has_no_wait); if dependence info is specified. 5842 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5843 M, OMPRTL___kmpc_omp_taskwait_deps_51), 5844 DepWaitTaskArgs); 5845 5846 } else { 5847 5848 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 5849 // global_tid); 5850 llvm::Value *Args[] = {UpLoc, ThreadID}; 5851 // Ignore return result until untied tasks are supported. 5852 CGF.EmitRuntimeCall( 5853 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait), 5854 Args); 5855 } 5856 } 5857 5858 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5859 Region->emitUntiedSwitch(CGF); 5860 } 5861 5862 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 5863 OpenMPDirectiveKind InnerKind, 5864 const RegionCodeGenTy &CodeGen, 5865 bool HasCancel) { 5866 if (!CGF.HaveInsertPoint()) 5867 return; 5868 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 5869 InnerKind != OMPD_critical && 5870 InnerKind != OMPD_master && 5871 InnerKind != OMPD_masked); 5872 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 5873 } 5874 5875 namespace { 5876 enum RTCancelKind { 5877 CancelNoreq = 0, 5878 CancelParallel = 1, 5879 CancelLoop = 2, 5880 CancelSections = 3, 5881 CancelTaskgroup = 4 5882 }; 5883 } // anonymous namespace 5884 5885 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 5886 RTCancelKind CancelKind = CancelNoreq; 5887 if (CancelRegion == OMPD_parallel) 5888 CancelKind = CancelParallel; 5889 else if (CancelRegion == OMPD_for) 5890 CancelKind = CancelLoop; 5891 else if (CancelRegion == OMPD_sections) 5892 CancelKind = CancelSections; 5893 else { 5894 assert(CancelRegion == OMPD_taskgroup); 5895 CancelKind = CancelTaskgroup; 5896 } 5897 return CancelKind; 5898 } 5899 5900 void CGOpenMPRuntime::emitCancellationPointCall( 5901 CodeGenFunction &CGF, SourceLocation Loc, 5902 OpenMPDirectiveKind CancelRegion) { 5903 if (!CGF.HaveInsertPoint()) 5904 return; 5905 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 5906 // global_tid, kmp_int32 cncl_kind); 5907 if (auto *OMPRegionInfo = 5908 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 5909 // For 'cancellation point taskgroup', the task region info may not have a 5910 // cancel. This may instead happen in another adjacent task. 5911 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 5912 llvm::Value *Args[] = { 5913 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 5914 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 5915 // Ignore return result until untied tasks are supported. 5916 llvm::Value *Result = CGF.EmitRuntimeCall( 5917 OMPBuilder.getOrCreateRuntimeFunction( 5918 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 5919 Args); 5920 // if (__kmpc_cancellationpoint()) { 5921 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 5922 // exit from construct; 5923 // } 5924 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 5925 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 5926 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 5927 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 5928 CGF.EmitBlock(ExitBB); 5929 if (CancelRegion == OMPD_parallel) 5930 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 5931 // exit from construct; 5932 CodeGenFunction::JumpDest CancelDest = 5933 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 5934 CGF.EmitBranchThroughCleanup(CancelDest); 5935 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 5936 } 5937 } 5938 } 5939 5940 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 5941 const Expr *IfCond, 5942 OpenMPDirectiveKind CancelRegion) { 5943 if (!CGF.HaveInsertPoint()) 5944 return; 5945 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 5946 // kmp_int32 cncl_kind); 5947 auto &M = CGM.getModule(); 5948 if (auto *OMPRegionInfo = 5949 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 5950 auto &&ThenGen = [this, &M, Loc, CancelRegion, 5951 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 5952 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5953 llvm::Value *Args[] = { 5954 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 5955 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 5956 // Ignore return result until untied tasks are supported. 5957 llvm::Value *Result = CGF.EmitRuntimeCall( 5958 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 5959 // if (__kmpc_cancel()) { 5960 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 5961 // exit from construct; 5962 // } 5963 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 5964 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 5965 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 5966 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 5967 CGF.EmitBlock(ExitBB); 5968 if (CancelRegion == OMPD_parallel) 5969 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 5970 // exit from construct; 5971 CodeGenFunction::JumpDest CancelDest = 5972 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 5973 CGF.EmitBranchThroughCleanup(CancelDest); 5974 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 5975 }; 5976 if (IfCond) { 5977 emitIfClause(CGF, IfCond, ThenGen, 5978 [](CodeGenFunction &, PrePostActionTy &) {}); 5979 } else { 5980 RegionCodeGenTy ThenRCG(ThenGen); 5981 ThenRCG(CGF); 5982 } 5983 } 5984 } 5985 5986 namespace { 5987 /// Cleanup action for uses_allocators support. 5988 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 5989 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 5990 5991 public: 5992 OMPUsesAllocatorsActionTy( 5993 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 5994 : Allocators(Allocators) {} 5995 void Enter(CodeGenFunction &CGF) override { 5996 if (!CGF.HaveInsertPoint()) 5997 return; 5998 for (const auto &AllocatorData : Allocators) { 5999 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6000 CGF, AllocatorData.first, AllocatorData.second); 6001 } 6002 } 6003 void Exit(CodeGenFunction &CGF) override { 6004 if (!CGF.HaveInsertPoint()) 6005 return; 6006 for (const auto &AllocatorData : Allocators) { 6007 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6008 AllocatorData.first); 6009 } 6010 } 6011 }; 6012 } // namespace 6013 6014 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6015 const OMPExecutableDirective &D, StringRef ParentName, 6016 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6017 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6018 assert(!ParentName.empty() && "Invalid target entry parent name!"); 6019 HasEmittedTargetRegion = true; 6020 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6021 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6022 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6023 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6024 if (!D.AllocatorTraits) 6025 continue; 6026 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6027 } 6028 } 6029 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6030 CodeGen.setAction(UsesAllocatorAction); 6031 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6032 IsOffloadEntry, CodeGen); 6033 } 6034 6035 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6036 const Expr *Allocator, 6037 const Expr *AllocatorTraits) { 6038 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6039 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6040 // Use default memspace handle. 6041 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6042 llvm::Value *NumTraits = llvm::ConstantInt::get( 6043 CGF.IntTy, cast<ConstantArrayType>( 6044 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6045 ->getSize() 6046 .getLimitedValue()); 6047 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6048 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6049 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy); 6050 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6051 AllocatorTraitsLVal.getBaseInfo(), 6052 AllocatorTraitsLVal.getTBAAInfo()); 6053 llvm::Value *Traits = Addr.getPointer(); 6054 6055 llvm::Value *AllocatorVal = 6056 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6057 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6058 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6059 // Store to allocator. 6060 CGF.EmitAutoVarAlloca(*cast<VarDecl>( 6061 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6062 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6063 AllocatorVal = 6064 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6065 Allocator->getType(), Allocator->getExprLoc()); 6066 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6067 } 6068 6069 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6070 const Expr *Allocator) { 6071 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6072 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6073 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6074 llvm::Value *AllocatorVal = 6075 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6076 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6077 CGF.getContext().VoidPtrTy, 6078 Allocator->getExprLoc()); 6079 (void)CGF.EmitRuntimeCall( 6080 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6081 OMPRTL___kmpc_destroy_allocator), 6082 {ThreadId, AllocatorVal}); 6083 } 6084 6085 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6086 const OMPExecutableDirective &D, StringRef ParentName, 6087 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6088 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6089 6090 llvm::TargetRegionEntryInfo EntryInfo = 6091 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName); 6092 6093 CodeGenFunction CGF(CGM, true); 6094 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction = 6095 [&CGF, &D, &CodeGen](StringRef EntryFnName) { 6096 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6097 6098 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6099 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6100 return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6101 }; 6102 6103 // Get NumTeams and ThreadLimit attributes 6104 int32_t DefaultValTeams = -1; 6105 int32_t DefaultValThreads = -1; 6106 getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams); 6107 getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads); 6108 6109 OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction, 6110 DefaultValTeams, DefaultValThreads, 6111 IsOffloadEntry, OutlinedFn, OutlinedFnID); 6112 6113 if (OutlinedFn != nullptr) 6114 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); 6115 } 6116 6117 /// Checks if the expression is constant or does not have non-trivial function 6118 /// calls. 6119 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6120 // We can skip constant expressions. 6121 // We can skip expressions with trivial calls or simple expressions. 6122 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6123 !E->hasNonTrivialCall(Ctx)) && 6124 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6125 } 6126 6127 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6128 const Stmt *Body) { 6129 const Stmt *Child = Body->IgnoreContainers(); 6130 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6131 Child = nullptr; 6132 for (const Stmt *S : C->body()) { 6133 if (const auto *E = dyn_cast<Expr>(S)) { 6134 if (isTrivial(Ctx, E)) 6135 continue; 6136 } 6137 // Some of the statements can be ignored. 6138 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6139 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6140 continue; 6141 // Analyze declarations. 6142 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6143 if (llvm::all_of(DS->decls(), [](const Decl *D) { 6144 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6145 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6146 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6147 isa<UsingDirectiveDecl>(D) || 6148 isa<OMPDeclareReductionDecl>(D) || 6149 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6150 return true; 6151 const auto *VD = dyn_cast<VarDecl>(D); 6152 if (!VD) 6153 return false; 6154 return VD->hasGlobalStorage() || !VD->isUsed(); 6155 })) 6156 continue; 6157 } 6158 // Found multiple children - cannot get the one child only. 6159 if (Child) 6160 return nullptr; 6161 Child = S; 6162 } 6163 if (Child) 6164 Child = Child->IgnoreContainers(); 6165 } 6166 return Child; 6167 } 6168 6169 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( 6170 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6171 int32_t &DefaultVal) { 6172 6173 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6174 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6175 "Expected target-based executable directive."); 6176 switch (DirectiveKind) { 6177 case OMPD_target: { 6178 const auto *CS = D.getInnermostCapturedStmt(); 6179 const auto *Body = 6180 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6181 const Stmt *ChildStmt = 6182 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6183 if (const auto *NestedDir = 6184 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6185 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6186 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6187 const Expr *NumTeams = 6188 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6189 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6190 if (auto Constant = 6191 NumTeams->getIntegerConstantExpr(CGF.getContext())) 6192 DefaultVal = Constant->getExtValue(); 6193 return NumTeams; 6194 } 6195 DefaultVal = 0; 6196 return nullptr; 6197 } 6198 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6199 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) { 6200 DefaultVal = 1; 6201 return nullptr; 6202 } 6203 DefaultVal = 1; 6204 return nullptr; 6205 } 6206 // A value of -1 is used to check if we need to emit no teams region 6207 DefaultVal = -1; 6208 return nullptr; 6209 } 6210 case OMPD_target_teams_loop: 6211 case OMPD_target_teams: 6212 case OMPD_target_teams_distribute: 6213 case OMPD_target_teams_distribute_simd: 6214 case OMPD_target_teams_distribute_parallel_for: 6215 case OMPD_target_teams_distribute_parallel_for_simd: { 6216 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6217 const Expr *NumTeams = 6218 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6219 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6220 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) 6221 DefaultVal = Constant->getExtValue(); 6222 return NumTeams; 6223 } 6224 DefaultVal = 0; 6225 return nullptr; 6226 } 6227 case OMPD_target_parallel: 6228 case OMPD_target_parallel_for: 6229 case OMPD_target_parallel_for_simd: 6230 case OMPD_target_parallel_loop: 6231 case OMPD_target_simd: 6232 DefaultVal = 1; 6233 return nullptr; 6234 case OMPD_parallel: 6235 case OMPD_for: 6236 case OMPD_parallel_for: 6237 case OMPD_parallel_loop: 6238 case OMPD_parallel_master: 6239 case OMPD_parallel_sections: 6240 case OMPD_for_simd: 6241 case OMPD_parallel_for_simd: 6242 case OMPD_cancel: 6243 case OMPD_cancellation_point: 6244 case OMPD_ordered: 6245 case OMPD_threadprivate: 6246 case OMPD_allocate: 6247 case OMPD_task: 6248 case OMPD_simd: 6249 case OMPD_tile: 6250 case OMPD_unroll: 6251 case OMPD_sections: 6252 case OMPD_section: 6253 case OMPD_single: 6254 case OMPD_master: 6255 case OMPD_critical: 6256 case OMPD_taskyield: 6257 case OMPD_barrier: 6258 case OMPD_taskwait: 6259 case OMPD_taskgroup: 6260 case OMPD_atomic: 6261 case OMPD_flush: 6262 case OMPD_depobj: 6263 case OMPD_scan: 6264 case OMPD_teams: 6265 case OMPD_target_data: 6266 case OMPD_target_exit_data: 6267 case OMPD_target_enter_data: 6268 case OMPD_distribute: 6269 case OMPD_distribute_simd: 6270 case OMPD_distribute_parallel_for: 6271 case OMPD_distribute_parallel_for_simd: 6272 case OMPD_teams_distribute: 6273 case OMPD_teams_distribute_simd: 6274 case OMPD_teams_distribute_parallel_for: 6275 case OMPD_teams_distribute_parallel_for_simd: 6276 case OMPD_target_update: 6277 case OMPD_declare_simd: 6278 case OMPD_declare_variant: 6279 case OMPD_begin_declare_variant: 6280 case OMPD_end_declare_variant: 6281 case OMPD_declare_target: 6282 case OMPD_end_declare_target: 6283 case OMPD_declare_reduction: 6284 case OMPD_declare_mapper: 6285 case OMPD_taskloop: 6286 case OMPD_taskloop_simd: 6287 case OMPD_master_taskloop: 6288 case OMPD_master_taskloop_simd: 6289 case OMPD_parallel_master_taskloop: 6290 case OMPD_parallel_master_taskloop_simd: 6291 case OMPD_requires: 6292 case OMPD_metadirective: 6293 case OMPD_unknown: 6294 break; 6295 default: 6296 break; 6297 } 6298 llvm_unreachable("Unexpected directive kind."); 6299 } 6300 6301 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( 6302 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6303 assert(!CGF.getLangOpts().OpenMPIsTargetDevice && 6304 "Clauses associated with the teams directive expected to be emitted " 6305 "only for the host!"); 6306 CGBuilderTy &Bld = CGF.Builder; 6307 int32_t DefaultNT = -1; 6308 const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT); 6309 if (NumTeams != nullptr) { 6310 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6311 6312 switch (DirectiveKind) { 6313 case OMPD_target: { 6314 const auto *CS = D.getInnermostCapturedStmt(); 6315 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6316 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6317 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6318 /*IgnoreResultAssign*/ true); 6319 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6320 /*isSigned=*/true); 6321 } 6322 case OMPD_target_teams: 6323 case OMPD_target_teams_distribute: 6324 case OMPD_target_teams_distribute_simd: 6325 case OMPD_target_teams_distribute_parallel_for: 6326 case OMPD_target_teams_distribute_parallel_for_simd: { 6327 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6328 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6329 /*IgnoreResultAssign*/ true); 6330 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6331 /*isSigned=*/true); 6332 } 6333 default: 6334 break; 6335 } 6336 } 6337 6338 return llvm::ConstantInt::get(CGF.Int32Ty, DefaultNT); 6339 } 6340 6341 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6342 llvm::Value *DefaultThreadLimitVal) { 6343 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6344 CGF.getContext(), CS->getCapturedStmt()); 6345 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6346 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6347 llvm::Value *NumThreads = nullptr; 6348 llvm::Value *CondVal = nullptr; 6349 // Handle if clause. If if clause present, the number of threads is 6350 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6351 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6352 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6353 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6354 const OMPIfClause *IfClause = nullptr; 6355 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6356 if (C->getNameModifier() == OMPD_unknown || 6357 C->getNameModifier() == OMPD_parallel) { 6358 IfClause = C; 6359 break; 6360 } 6361 } 6362 if (IfClause) { 6363 const Expr *Cond = IfClause->getCondition(); 6364 bool Result; 6365 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6366 if (!Result) 6367 return CGF.Builder.getInt32(1); 6368 } else { 6369 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6370 if (const auto *PreInit = 6371 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6372 for (const auto *I : PreInit->decls()) { 6373 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6374 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6375 } else { 6376 CodeGenFunction::AutoVarEmission Emission = 6377 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6378 CGF.EmitAutoVarCleanups(Emission); 6379 } 6380 } 6381 } 6382 CondVal = CGF.EvaluateExprAsBool(Cond); 6383 } 6384 } 6385 } 6386 // Check the value of num_threads clause iff if clause was not specified 6387 // or is not evaluated to false. 6388 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6389 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6390 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6391 const auto *NumThreadsClause = 6392 Dir->getSingleClause<OMPNumThreadsClause>(); 6393 CodeGenFunction::LexicalScope Scope( 6394 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6395 if (const auto *PreInit = 6396 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6397 for (const auto *I : PreInit->decls()) { 6398 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6399 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6400 } else { 6401 CodeGenFunction::AutoVarEmission Emission = 6402 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6403 CGF.EmitAutoVarCleanups(Emission); 6404 } 6405 } 6406 } 6407 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6408 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6409 /*isSigned=*/false); 6410 if (DefaultThreadLimitVal) 6411 NumThreads = CGF.Builder.CreateSelect( 6412 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6413 DefaultThreadLimitVal, NumThreads); 6414 } else { 6415 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6416 : CGF.Builder.getInt32(0); 6417 } 6418 // Process condition of the if clause. 6419 if (CondVal) { 6420 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6421 CGF.Builder.getInt32(1)); 6422 } 6423 return NumThreads; 6424 } 6425 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6426 return CGF.Builder.getInt32(1); 6427 } 6428 return DefaultThreadLimitVal; 6429 } 6430 6431 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( 6432 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6433 int32_t &DefaultVal) { 6434 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6435 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6436 "Expected target-based executable directive."); 6437 6438 switch (DirectiveKind) { 6439 case OMPD_target: 6440 // Teams have no clause thread_limit 6441 return nullptr; 6442 case OMPD_target_teams: 6443 case OMPD_target_teams_distribute: 6444 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6445 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6446 const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit(); 6447 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6448 if (auto Constant = 6449 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6450 DefaultVal = Constant->getExtValue(); 6451 return ThreadLimit; 6452 } 6453 return nullptr; 6454 case OMPD_target_teams_loop: 6455 case OMPD_target_parallel_loop: 6456 case OMPD_target_parallel: 6457 case OMPD_target_parallel_for: 6458 case OMPD_target_parallel_for_simd: 6459 case OMPD_target_teams_distribute_parallel_for: 6460 case OMPD_target_teams_distribute_parallel_for_simd: { 6461 Expr *ThreadLimit = nullptr; 6462 Expr *NumThreads = nullptr; 6463 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6464 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6465 ThreadLimit = ThreadLimitClause->getThreadLimit(); 6466 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6467 if (auto Constant = 6468 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6469 DefaultVal = Constant->getExtValue(); 6470 } 6471 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6472 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6473 NumThreads = NumThreadsClause->getNumThreads(); 6474 if (NumThreads->isIntegerConstantExpr(CGF.getContext())) { 6475 if (auto Constant = 6476 NumThreads->getIntegerConstantExpr(CGF.getContext())) { 6477 if (Constant->getExtValue() < DefaultVal) { 6478 DefaultVal = Constant->getExtValue(); 6479 ThreadLimit = NumThreads; 6480 } 6481 } 6482 } 6483 } 6484 return ThreadLimit; 6485 } 6486 case OMPD_target_teams_distribute_simd: 6487 case OMPD_target_simd: 6488 DefaultVal = 1; 6489 return nullptr; 6490 case OMPD_parallel: 6491 case OMPD_for: 6492 case OMPD_parallel_for: 6493 case OMPD_parallel_master: 6494 case OMPD_parallel_sections: 6495 case OMPD_for_simd: 6496 case OMPD_parallel_for_simd: 6497 case OMPD_cancel: 6498 case OMPD_cancellation_point: 6499 case OMPD_ordered: 6500 case OMPD_threadprivate: 6501 case OMPD_allocate: 6502 case OMPD_task: 6503 case OMPD_simd: 6504 case OMPD_tile: 6505 case OMPD_unroll: 6506 case OMPD_sections: 6507 case OMPD_section: 6508 case OMPD_single: 6509 case OMPD_master: 6510 case OMPD_critical: 6511 case OMPD_taskyield: 6512 case OMPD_barrier: 6513 case OMPD_taskwait: 6514 case OMPD_taskgroup: 6515 case OMPD_atomic: 6516 case OMPD_flush: 6517 case OMPD_depobj: 6518 case OMPD_scan: 6519 case OMPD_teams: 6520 case OMPD_target_data: 6521 case OMPD_target_exit_data: 6522 case OMPD_target_enter_data: 6523 case OMPD_distribute: 6524 case OMPD_distribute_simd: 6525 case OMPD_distribute_parallel_for: 6526 case OMPD_distribute_parallel_for_simd: 6527 case OMPD_teams_distribute: 6528 case OMPD_teams_distribute_simd: 6529 case OMPD_teams_distribute_parallel_for: 6530 case OMPD_teams_distribute_parallel_for_simd: 6531 case OMPD_target_update: 6532 case OMPD_declare_simd: 6533 case OMPD_declare_variant: 6534 case OMPD_begin_declare_variant: 6535 case OMPD_end_declare_variant: 6536 case OMPD_declare_target: 6537 case OMPD_end_declare_target: 6538 case OMPD_declare_reduction: 6539 case OMPD_declare_mapper: 6540 case OMPD_taskloop: 6541 case OMPD_taskloop_simd: 6542 case OMPD_master_taskloop: 6543 case OMPD_master_taskloop_simd: 6544 case OMPD_parallel_master_taskloop: 6545 case OMPD_parallel_master_taskloop_simd: 6546 case OMPD_requires: 6547 case OMPD_unknown: 6548 break; 6549 default: 6550 break; 6551 } 6552 llvm_unreachable("Unsupported directive kind."); 6553 } 6554 6555 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( 6556 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6557 assert(!CGF.getLangOpts().OpenMPIsTargetDevice && 6558 "Clauses associated with the teams directive expected to be emitted " 6559 "only for the host!"); 6560 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6561 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6562 "Expected target-based executable directive."); 6563 CGBuilderTy &Bld = CGF.Builder; 6564 llvm::Value *ThreadLimitVal = nullptr; 6565 llvm::Value *NumThreadsVal = nullptr; 6566 switch (DirectiveKind) { 6567 case OMPD_target: { 6568 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6569 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6570 return NumThreads; 6571 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6572 CGF.getContext(), CS->getCapturedStmt()); 6573 // TODO: The standard is not clear how to resolve two thread limit clauses, 6574 // let's pick the teams one if it's present, otherwise the target one. 6575 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6576 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6577 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) { 6578 ThreadLimitClause = TLC; 6579 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6580 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6581 CodeGenFunction::LexicalScope Scope( 6582 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6583 if (const auto *PreInit = 6584 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6585 for (const auto *I : PreInit->decls()) { 6586 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6587 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6588 } else { 6589 CodeGenFunction::AutoVarEmission Emission = 6590 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6591 CGF.EmitAutoVarCleanups(Emission); 6592 } 6593 } 6594 } 6595 } 6596 } 6597 if (ThreadLimitClause) { 6598 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6599 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6600 ThreadLimitVal = 6601 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6602 } 6603 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6604 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6605 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6606 CS = Dir->getInnermostCapturedStmt(); 6607 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6608 CGF.getContext(), CS->getCapturedStmt()); 6609 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6610 } 6611 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6612 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6613 CS = Dir->getInnermostCapturedStmt(); 6614 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6615 return NumThreads; 6616 } 6617 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6618 return Bld.getInt32(1); 6619 } 6620 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6621 } 6622 case OMPD_target_teams: { 6623 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6624 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6625 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6626 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6627 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6628 ThreadLimitVal = 6629 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6630 } 6631 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6632 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6633 return NumThreads; 6634 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6635 CGF.getContext(), CS->getCapturedStmt()); 6636 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6637 if (Dir->getDirectiveKind() == OMPD_distribute) { 6638 CS = Dir->getInnermostCapturedStmt(); 6639 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6640 return NumThreads; 6641 } 6642 } 6643 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6644 } 6645 case OMPD_target_teams_distribute: 6646 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6647 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6648 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6649 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6650 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6651 ThreadLimitVal = 6652 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6653 } 6654 if (llvm::Value *NumThreads = 6655 getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal)) 6656 return NumThreads; 6657 return Bld.getInt32(0); 6658 case OMPD_target_teams_loop: 6659 case OMPD_target_parallel_loop: 6660 case OMPD_target_parallel: 6661 case OMPD_target_parallel_for: 6662 case OMPD_target_parallel_for_simd: 6663 case OMPD_target_teams_distribute_parallel_for: 6664 case OMPD_target_teams_distribute_parallel_for_simd: { 6665 llvm::Value *CondVal = nullptr; 6666 // Handle if clause. If if clause present, the number of threads is 6667 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6668 if (D.hasClausesOfKind<OMPIfClause>()) { 6669 const OMPIfClause *IfClause = nullptr; 6670 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6671 if (C->getNameModifier() == OMPD_unknown || 6672 C->getNameModifier() == OMPD_parallel) { 6673 IfClause = C; 6674 break; 6675 } 6676 } 6677 if (IfClause) { 6678 const Expr *Cond = IfClause->getCondition(); 6679 bool Result; 6680 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6681 if (!Result) 6682 return Bld.getInt32(1); 6683 } else { 6684 CodeGenFunction::RunCleanupsScope Scope(CGF); 6685 CondVal = CGF.EvaluateExprAsBool(Cond); 6686 } 6687 } 6688 } 6689 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6690 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6691 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6692 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6693 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6694 ThreadLimitVal = 6695 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6696 } 6697 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6698 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6699 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6700 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6701 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6702 NumThreadsVal = 6703 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 6704 ThreadLimitVal = ThreadLimitVal 6705 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6706 ThreadLimitVal), 6707 NumThreadsVal, ThreadLimitVal) 6708 : NumThreadsVal; 6709 } 6710 if (!ThreadLimitVal) 6711 ThreadLimitVal = Bld.getInt32(0); 6712 if (CondVal) 6713 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6714 return ThreadLimitVal; 6715 } 6716 case OMPD_target_teams_distribute_simd: 6717 case OMPD_target_simd: 6718 return Bld.getInt32(1); 6719 case OMPD_parallel: 6720 case OMPD_for: 6721 case OMPD_parallel_for: 6722 case OMPD_parallel_master: 6723 case OMPD_parallel_sections: 6724 case OMPD_for_simd: 6725 case OMPD_parallel_for_simd: 6726 case OMPD_cancel: 6727 case OMPD_cancellation_point: 6728 case OMPD_ordered: 6729 case OMPD_threadprivate: 6730 case OMPD_allocate: 6731 case OMPD_task: 6732 case OMPD_simd: 6733 case OMPD_tile: 6734 case OMPD_unroll: 6735 case OMPD_sections: 6736 case OMPD_section: 6737 case OMPD_single: 6738 case OMPD_master: 6739 case OMPD_critical: 6740 case OMPD_taskyield: 6741 case OMPD_barrier: 6742 case OMPD_taskwait: 6743 case OMPD_taskgroup: 6744 case OMPD_atomic: 6745 case OMPD_flush: 6746 case OMPD_depobj: 6747 case OMPD_scan: 6748 case OMPD_teams: 6749 case OMPD_target_data: 6750 case OMPD_target_exit_data: 6751 case OMPD_target_enter_data: 6752 case OMPD_distribute: 6753 case OMPD_distribute_simd: 6754 case OMPD_distribute_parallel_for: 6755 case OMPD_distribute_parallel_for_simd: 6756 case OMPD_teams_distribute: 6757 case OMPD_teams_distribute_simd: 6758 case OMPD_teams_distribute_parallel_for: 6759 case OMPD_teams_distribute_parallel_for_simd: 6760 case OMPD_target_update: 6761 case OMPD_declare_simd: 6762 case OMPD_declare_variant: 6763 case OMPD_begin_declare_variant: 6764 case OMPD_end_declare_variant: 6765 case OMPD_declare_target: 6766 case OMPD_end_declare_target: 6767 case OMPD_declare_reduction: 6768 case OMPD_declare_mapper: 6769 case OMPD_taskloop: 6770 case OMPD_taskloop_simd: 6771 case OMPD_master_taskloop: 6772 case OMPD_master_taskloop_simd: 6773 case OMPD_parallel_master_taskloop: 6774 case OMPD_parallel_master_taskloop_simd: 6775 case OMPD_requires: 6776 case OMPD_metadirective: 6777 case OMPD_unknown: 6778 break; 6779 default: 6780 break; 6781 } 6782 llvm_unreachable("Unsupported directive kind."); 6783 } 6784 6785 namespace { 6786 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 6787 6788 // Utility to handle information from clauses associated with a given 6789 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 6790 // It provides a convenient interface to obtain the information and generate 6791 // code for that information. 6792 class MappableExprsHandler { 6793 public: 6794 /// Get the offset of the OMP_MAP_MEMBER_OF field. 6795 static unsigned getFlagMemberOffset() { 6796 unsigned Offset = 0; 6797 for (uint64_t Remain = 6798 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 6799 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF); 6800 !(Remain & 1); Remain = Remain >> 1) 6801 Offset++; 6802 return Offset; 6803 } 6804 6805 /// Class that holds debugging information for a data mapping to be passed to 6806 /// the runtime library. 6807 class MappingExprInfo { 6808 /// The variable declaration used for the data mapping. 6809 const ValueDecl *MapDecl = nullptr; 6810 /// The original expression used in the map clause, or null if there is 6811 /// none. 6812 const Expr *MapExpr = nullptr; 6813 6814 public: 6815 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 6816 : MapDecl(MapDecl), MapExpr(MapExpr) {} 6817 6818 const ValueDecl *getMapDecl() const { return MapDecl; } 6819 const Expr *getMapExpr() const { return MapExpr; } 6820 }; 6821 6822 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy; 6823 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy; 6824 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy; 6825 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy; 6826 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy; 6827 using MapNonContiguousArrayTy = 6828 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy; 6829 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 6830 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>; 6831 6832 /// This structure contains combined information generated for mappable 6833 /// clauses, including base pointers, pointers, sizes, map types, user-defined 6834 /// mappers, and non-contiguous information. 6835 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy { 6836 MapExprsArrayTy Exprs; 6837 MapValueDeclsArrayTy Mappers; 6838 MapValueDeclsArrayTy DevicePtrDecls; 6839 6840 /// Append arrays in \a CurInfo. 6841 void append(MapCombinedInfoTy &CurInfo) { 6842 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 6843 DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(), 6844 CurInfo.DevicePtrDecls.end()); 6845 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 6846 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo); 6847 } 6848 }; 6849 6850 /// Map between a struct and the its lowest & highest elements which have been 6851 /// mapped. 6852 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 6853 /// HE(FieldIndex, Pointer)} 6854 struct StructRangeInfoTy { 6855 MapCombinedInfoTy PreliminaryMapData; 6856 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 6857 0, Address::invalid()}; 6858 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 6859 0, Address::invalid()}; 6860 Address Base = Address::invalid(); 6861 Address LB = Address::invalid(); 6862 bool IsArraySection = false; 6863 bool HasCompleteRecord = false; 6864 }; 6865 6866 private: 6867 /// Kind that defines how a device pointer has to be returned. 6868 struct MapInfo { 6869 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 6870 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 6871 ArrayRef<OpenMPMapModifierKind> MapModifiers; 6872 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 6873 bool ReturnDevicePointer = false; 6874 bool IsImplicit = false; 6875 const ValueDecl *Mapper = nullptr; 6876 const Expr *VarRef = nullptr; 6877 bool ForDeviceAddr = false; 6878 6879 MapInfo() = default; 6880 MapInfo( 6881 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 6882 OpenMPMapClauseKind MapType, 6883 ArrayRef<OpenMPMapModifierKind> MapModifiers, 6884 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 6885 bool ReturnDevicePointer, bool IsImplicit, 6886 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 6887 bool ForDeviceAddr = false) 6888 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 6889 MotionModifiers(MotionModifiers), 6890 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 6891 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 6892 }; 6893 6894 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 6895 /// member and there is no map information about it, then emission of that 6896 /// entry is deferred until the whole struct has been processed. 6897 struct DeferredDevicePtrEntryTy { 6898 const Expr *IE = nullptr; 6899 const ValueDecl *VD = nullptr; 6900 bool ForDeviceAddr = false; 6901 6902 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 6903 bool ForDeviceAddr) 6904 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 6905 }; 6906 6907 /// The target directive from where the mappable clauses were extracted. It 6908 /// is either a executable directive or a user-defined mapper directive. 6909 llvm::PointerUnion<const OMPExecutableDirective *, 6910 const OMPDeclareMapperDecl *> 6911 CurDir; 6912 6913 /// Function the directive is being generated for. 6914 CodeGenFunction &CGF; 6915 6916 /// Set of all first private variables in the current directive. 6917 /// bool data is set to true if the variable is implicitly marked as 6918 /// firstprivate, false otherwise. 6919 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 6920 6921 /// Map between device pointer declarations and their expression components. 6922 /// The key value for declarations in 'this' is null. 6923 llvm::DenseMap< 6924 const ValueDecl *, 6925 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 6926 DevPointersMap; 6927 6928 /// Map between device addr declarations and their expression components. 6929 /// The key value for declarations in 'this' is null. 6930 llvm::DenseMap< 6931 const ValueDecl *, 6932 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 6933 HasDevAddrsMap; 6934 6935 /// Map between lambda declarations and their map type. 6936 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap; 6937 6938 llvm::Value *getExprTypeSize(const Expr *E) const { 6939 QualType ExprTy = E->getType().getCanonicalType(); 6940 6941 // Calculate the size for array shaping expression. 6942 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 6943 llvm::Value *Size = 6944 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 6945 for (const Expr *SE : OAE->getDimensions()) { 6946 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 6947 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 6948 CGF.getContext().getSizeType(), 6949 SE->getExprLoc()); 6950 Size = CGF.Builder.CreateNUWMul(Size, Sz); 6951 } 6952 return Size; 6953 } 6954 6955 // Reference types are ignored for mapping purposes. 6956 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 6957 ExprTy = RefTy->getPointeeType().getCanonicalType(); 6958 6959 // Given that an array section is considered a built-in type, we need to 6960 // do the calculation based on the length of the section instead of relying 6961 // on CGF.getTypeSize(E->getType()). 6962 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 6963 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 6964 OAE->getBase()->IgnoreParenImpCasts()) 6965 .getCanonicalType(); 6966 6967 // If there is no length associated with the expression and lower bound is 6968 // not specified too, that means we are using the whole length of the 6969 // base. 6970 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 6971 !OAE->getLowerBound()) 6972 return CGF.getTypeSize(BaseTy); 6973 6974 llvm::Value *ElemSize; 6975 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 6976 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 6977 } else { 6978 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 6979 assert(ATy && "Expecting array type if not a pointer type."); 6980 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 6981 } 6982 6983 // If we don't have a length at this point, that is because we have an 6984 // array section with a single element. 6985 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 6986 return ElemSize; 6987 6988 if (const Expr *LenExpr = OAE->getLength()) { 6989 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 6990 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 6991 CGF.getContext().getSizeType(), 6992 LenExpr->getExprLoc()); 6993 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 6994 } 6995 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 6996 OAE->getLowerBound() && "expected array_section[lb:]."); 6997 // Size = sizetype - lb * elemtype; 6998 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 6999 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7000 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7001 CGF.getContext().getSizeType(), 7002 OAE->getLowerBound()->getExprLoc()); 7003 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7004 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7005 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7006 LengthVal = CGF.Builder.CreateSelect( 7007 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7008 return LengthVal; 7009 } 7010 return CGF.getTypeSize(ExprTy); 7011 } 7012 7013 /// Return the corresponding bits for a given map clause modifier. Add 7014 /// a flag marking the map as a pointer if requested. Add a flag marking the 7015 /// map as the first one of a series of maps that relate to the same map 7016 /// expression. 7017 OpenMPOffloadMappingFlags getMapTypeBits( 7018 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7019 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7020 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7021 OpenMPOffloadMappingFlags Bits = 7022 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT 7023 : OpenMPOffloadMappingFlags::OMP_MAP_NONE; 7024 switch (MapType) { 7025 case OMPC_MAP_alloc: 7026 case OMPC_MAP_release: 7027 // alloc and release is the default behavior in the runtime library, i.e. 7028 // if we don't pass any bits alloc/release that is what the runtime is 7029 // going to do. Therefore, we don't need to signal anything for these two 7030 // type modifiers. 7031 break; 7032 case OMPC_MAP_to: 7033 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO; 7034 break; 7035 case OMPC_MAP_from: 7036 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM; 7037 break; 7038 case OMPC_MAP_tofrom: 7039 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO | 7040 OpenMPOffloadMappingFlags::OMP_MAP_FROM; 7041 break; 7042 case OMPC_MAP_delete: 7043 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE; 7044 break; 7045 case OMPC_MAP_unknown: 7046 llvm_unreachable("Unexpected map type!"); 7047 } 7048 if (AddPtrFlag) 7049 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; 7050 if (AddIsTargetParamFlag) 7051 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; 7052 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always)) 7053 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS; 7054 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close)) 7055 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE; 7056 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) || 7057 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present)) 7058 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT; 7059 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold)) 7060 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; 7061 if (IsNonContiguous) 7062 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG; 7063 return Bits; 7064 } 7065 7066 /// Return true if the provided expression is a final array section. A 7067 /// final array section, is one whose length can't be proved to be one. 7068 bool isFinalArraySectionExpression(const Expr *E) const { 7069 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7070 7071 // It is not an array section and therefore not a unity-size one. 7072 if (!OASE) 7073 return false; 7074 7075 // An array section with no colon always refer to a single element. 7076 if (OASE->getColonLocFirst().isInvalid()) 7077 return false; 7078 7079 const Expr *Length = OASE->getLength(); 7080 7081 // If we don't have a length we have to check if the array has size 1 7082 // for this dimension. Also, we should always expect a length if the 7083 // base type is pointer. 7084 if (!Length) { 7085 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7086 OASE->getBase()->IgnoreParenImpCasts()) 7087 .getCanonicalType(); 7088 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7089 return ATy->getSize().getSExtValue() != 1; 7090 // If we don't have a constant dimension length, we have to consider 7091 // the current section as having any size, so it is not necessarily 7092 // unitary. If it happen to be unity size, that's user fault. 7093 return true; 7094 } 7095 7096 // Check if the length evaluates to 1. 7097 Expr::EvalResult Result; 7098 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7099 return true; // Can have more that size 1. 7100 7101 llvm::APSInt ConstLength = Result.Val.getInt(); 7102 return ConstLength.getSExtValue() != 1; 7103 } 7104 7105 /// Generate the base pointers, section pointers, sizes, map type bits, and 7106 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7107 /// map type, map or motion modifiers, and expression components. 7108 /// \a IsFirstComponent should be set to true if the provided set of 7109 /// components is the first associated with a capture. 7110 void generateInfoForComponentList( 7111 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7112 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7113 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7114 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7115 bool IsFirstComponentList, bool IsImplicit, 7116 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7117 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7118 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7119 OverlappedElements = std::nullopt) const { 7120 // The following summarizes what has to be generated for each map and the 7121 // types below. The generated information is expressed in this order: 7122 // base pointer, section pointer, size, flags 7123 // (to add to the ones that come from the map type and modifier). 7124 // 7125 // double d; 7126 // int i[100]; 7127 // float *p; 7128 // int **a = &i; 7129 // 7130 // struct S1 { 7131 // int i; 7132 // float f[50]; 7133 // } 7134 // struct S2 { 7135 // int i; 7136 // float f[50]; 7137 // S1 s; 7138 // double *p; 7139 // struct S2 *ps; 7140 // int &ref; 7141 // } 7142 // S2 s; 7143 // S2 *ps; 7144 // 7145 // map(d) 7146 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7147 // 7148 // map(i) 7149 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7150 // 7151 // map(i[1:23]) 7152 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7153 // 7154 // map(p) 7155 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7156 // 7157 // map(p[1:24]) 7158 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7159 // in unified shared memory mode or for local pointers 7160 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7161 // 7162 // map((*a)[0:3]) 7163 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM 7164 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM 7165 // 7166 // map(**a) 7167 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM 7168 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM 7169 // 7170 // map(s) 7171 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7172 // 7173 // map(s.i) 7174 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7175 // 7176 // map(s.s.f) 7177 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7178 // 7179 // map(s.p) 7180 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7181 // 7182 // map(to: s.p[:22]) 7183 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7184 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7185 // &(s.p), &(s.p[0]), 22*sizeof(double), 7186 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7187 // (*) alloc space for struct members, only this is a target parameter 7188 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7189 // optimizes this entry out, same in the examples below) 7190 // (***) map the pointee (map: to) 7191 // 7192 // map(to: s.ref) 7193 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) 7194 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7195 // (*) alloc space for struct members, only this is a target parameter 7196 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7197 // optimizes this entry out, same in the examples below) 7198 // (***) map the pointee (map: to) 7199 // 7200 // map(s.ps) 7201 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7202 // 7203 // map(from: s.ps->s.i) 7204 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7205 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7206 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7207 // 7208 // map(to: s.ps->ps) 7209 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7210 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7211 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7212 // 7213 // map(s.ps->ps->ps) 7214 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7215 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7216 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7217 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7218 // 7219 // map(to: s.ps->ps->s.f[:22]) 7220 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7221 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7222 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7223 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7224 // 7225 // map(ps) 7226 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7227 // 7228 // map(ps->i) 7229 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7230 // 7231 // map(ps->s.f) 7232 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7233 // 7234 // map(from: ps->p) 7235 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7236 // 7237 // map(to: ps->p[:22]) 7238 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7239 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7240 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7241 // 7242 // map(ps->ps) 7243 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7244 // 7245 // map(from: ps->ps->s.i) 7246 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7247 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7248 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7249 // 7250 // map(from: ps->ps->ps) 7251 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7252 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7253 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7254 // 7255 // map(ps->ps->ps->ps) 7256 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7257 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7258 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7259 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7260 // 7261 // map(to: ps->ps->ps->s.f[:22]) 7262 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7263 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7264 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7265 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7266 // 7267 // map(to: s.f[:22]) map(from: s.p[:33]) 7268 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7269 // sizeof(double*) (**), TARGET_PARAM 7270 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7271 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7272 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7273 // (*) allocate contiguous space needed to fit all mapped members even if 7274 // we allocate space for members not mapped (in this example, 7275 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7276 // them as well because they fall between &s.f[0] and &s.p) 7277 // 7278 // map(from: s.f[:22]) map(to: ps->p[:33]) 7279 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7280 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7281 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7282 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7283 // (*) the struct this entry pertains to is the 2nd element in the list of 7284 // arguments, hence MEMBER_OF(2) 7285 // 7286 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7287 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7288 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7289 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7290 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7291 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7292 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7293 // (*) the struct this entry pertains to is the 4th element in the list 7294 // of arguments, hence MEMBER_OF(4) 7295 7296 // Track if the map information being generated is the first for a capture. 7297 bool IsCaptureFirstInfo = IsFirstComponentList; 7298 // When the variable is on a declare target link or in a to clause with 7299 // unified memory, a reference is needed to hold the host/device address 7300 // of the variable. 7301 bool RequiresReference = false; 7302 7303 // Scan the components from the base to the complete expression. 7304 auto CI = Components.rbegin(); 7305 auto CE = Components.rend(); 7306 auto I = CI; 7307 7308 // Track if the map information being generated is the first for a list of 7309 // components. 7310 bool IsExpressionFirstInfo = true; 7311 bool FirstPointerInComplexData = false; 7312 Address BP = Address::invalid(); 7313 const Expr *AssocExpr = I->getAssociatedExpression(); 7314 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7315 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7316 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7317 7318 if (isa<MemberExpr>(AssocExpr)) { 7319 // The base is the 'this' pointer. The content of the pointer is going 7320 // to be the base of the field being mapped. 7321 BP = CGF.LoadCXXThisAddress(); 7322 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7323 (OASE && 7324 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7325 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7326 } else if (OAShE && 7327 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7328 BP = Address( 7329 CGF.EmitScalarExpr(OAShE->getBase()), 7330 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()), 7331 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7332 } else { 7333 // The base is the reference to the variable. 7334 // BP = &Var. 7335 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7336 if (const auto *VD = 7337 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7338 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7339 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7340 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7341 ((*Res == OMPDeclareTargetDeclAttr::MT_To || 7342 *Res == OMPDeclareTargetDeclAttr::MT_Enter) && 7343 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7344 RequiresReference = true; 7345 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7346 } 7347 } 7348 } 7349 7350 // If the variable is a pointer and is being dereferenced (i.e. is not 7351 // the last component), the base has to be the pointer itself, not its 7352 // reference. References are ignored for mapping purposes. 7353 QualType Ty = 7354 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7355 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7356 // No need to generate individual map information for the pointer, it 7357 // can be associated with the combined storage if shared memory mode is 7358 // active or the base declaration is not global variable. 7359 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7360 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7361 !VD || VD->hasLocalStorage()) 7362 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7363 else 7364 FirstPointerInComplexData = true; 7365 ++I; 7366 } 7367 } 7368 7369 // Track whether a component of the list should be marked as MEMBER_OF some 7370 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7371 // in a component list should be marked as MEMBER_OF, all subsequent entries 7372 // do not belong to the base struct. E.g. 7373 // struct S2 s; 7374 // s.ps->ps->ps->f[:] 7375 // (1) (2) (3) (4) 7376 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7377 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7378 // is the pointee of ps(2) which is not member of struct s, so it should not 7379 // be marked as such (it is still PTR_AND_OBJ). 7380 // The variable is initialized to false so that PTR_AND_OBJ entries which 7381 // are not struct members are not considered (e.g. array of pointers to 7382 // data). 7383 bool ShouldBeMemberOf = false; 7384 7385 // Variable keeping track of whether or not we have encountered a component 7386 // in the component list which is a member expression. Useful when we have a 7387 // pointer or a final array section, in which case it is the previous 7388 // component in the list which tells us whether we have a member expression. 7389 // E.g. X.f[:] 7390 // While processing the final array section "[:]" it is "f" which tells us 7391 // whether we are dealing with a member of a declared struct. 7392 const MemberExpr *EncounteredME = nullptr; 7393 7394 // Track for the total number of dimension. Start from one for the dummy 7395 // dimension. 7396 uint64_t DimSize = 1; 7397 7398 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7399 bool IsPrevMemberReference = false; 7400 7401 for (; I != CE; ++I) { 7402 // If the current component is member of a struct (parent struct) mark it. 7403 if (!EncounteredME) { 7404 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7405 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7406 // as MEMBER_OF the parent struct. 7407 if (EncounteredME) { 7408 ShouldBeMemberOf = true; 7409 // Do not emit as complex pointer if this is actually not array-like 7410 // expression. 7411 if (FirstPointerInComplexData) { 7412 QualType Ty = std::prev(I) 7413 ->getAssociatedDeclaration() 7414 ->getType() 7415 .getNonReferenceType(); 7416 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7417 FirstPointerInComplexData = false; 7418 } 7419 } 7420 } 7421 7422 auto Next = std::next(I); 7423 7424 // We need to generate the addresses and sizes if this is the last 7425 // component, if the component is a pointer or if it is an array section 7426 // whose length can't be proved to be one. If this is a pointer, it 7427 // becomes the base address for the following components. 7428 7429 // A final array section, is one whose length can't be proved to be one. 7430 // If the map item is non-contiguous then we don't treat any array section 7431 // as final array section. 7432 bool IsFinalArraySection = 7433 !IsNonContiguous && 7434 isFinalArraySectionExpression(I->getAssociatedExpression()); 7435 7436 // If we have a declaration for the mapping use that, otherwise use 7437 // the base declaration of the map clause. 7438 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 7439 ? I->getAssociatedDeclaration() 7440 : BaseDecl; 7441 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression() 7442 : MapExpr; 7443 7444 // Get information on whether the element is a pointer. Have to do a 7445 // special treatment for array sections given that they are built-in 7446 // types. 7447 const auto *OASE = 7448 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7449 const auto *OAShE = 7450 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7451 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7452 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7453 bool IsPointer = 7454 OAShE || 7455 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7456 .getCanonicalType() 7457 ->isAnyPointerType()) || 7458 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7459 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && 7460 MapDecl && 7461 MapDecl->getType()->isLValueReferenceType(); 7462 bool IsNonDerefPointer = IsPointer && 7463 !(UO && UO->getOpcode() != UO_Deref) && !BO && 7464 !IsNonContiguous; 7465 7466 if (OASE) 7467 ++DimSize; 7468 7469 if (Next == CE || IsMemberReference || IsNonDerefPointer || 7470 IsFinalArraySection) { 7471 // If this is not the last component, we expect the pointer to be 7472 // associated with an array expression or member expression. 7473 assert((Next == CE || 7474 isa<MemberExpr>(Next->getAssociatedExpression()) || 7475 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7476 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7477 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 7478 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7479 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7480 "Unexpected expression"); 7481 7482 Address LB = Address::invalid(); 7483 Address LowestElem = Address::invalid(); 7484 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, 7485 const MemberExpr *E) { 7486 const Expr *BaseExpr = E->getBase(); 7487 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a 7488 // scalar. 7489 LValue BaseLV; 7490 if (E->isArrow()) { 7491 LValueBaseInfo BaseInfo; 7492 TBAAAccessInfo TBAAInfo; 7493 Address Addr = 7494 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); 7495 QualType PtrTy = BaseExpr->getType()->getPointeeType(); 7496 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); 7497 } else { 7498 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); 7499 } 7500 return BaseLV; 7501 }; 7502 if (OAShE) { 7503 LowestElem = LB = 7504 Address(CGF.EmitScalarExpr(OAShE->getBase()), 7505 CGF.ConvertTypeForMem( 7506 OAShE->getBase()->getType()->getPointeeType()), 7507 CGF.getContext().getTypeAlignInChars( 7508 OAShE->getBase()->getType())); 7509 } else if (IsMemberReference) { 7510 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); 7511 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 7512 LowestElem = CGF.EmitLValueForFieldInitialization( 7513 BaseLVal, cast<FieldDecl>(MapDecl)) 7514 .getAddress(CGF); 7515 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) 7516 .getAddress(CGF); 7517 } else { 7518 LowestElem = LB = 7519 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7520 .getAddress(CGF); 7521 } 7522 7523 // If this component is a pointer inside the base struct then we don't 7524 // need to create any entry for it - it will be combined with the object 7525 // it is pointing to into a single PTR_AND_OBJ entry. 7526 bool IsMemberPointerOrAddr = 7527 EncounteredME && 7528 (((IsPointer || ForDeviceAddr) && 7529 I->getAssociatedExpression() == EncounteredME) || 7530 (IsPrevMemberReference && !IsPointer) || 7531 (IsMemberReference && Next != CE && 7532 !Next->getAssociatedExpression()->getType()->isPointerType())); 7533 if (!OverlappedElements.empty() && Next == CE) { 7534 // Handle base element with the info for overlapped elements. 7535 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7536 assert(!IsPointer && 7537 "Unexpected base element with the pointer type."); 7538 // Mark the whole struct as the struct that requires allocation on the 7539 // device. 7540 PartialStruct.LowestElem = {0, LowestElem}; 7541 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7542 I->getAssociatedExpression()->getType()); 7543 Address HB = CGF.Builder.CreateConstGEP( 7544 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 7545 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty), 7546 TypeSize.getQuantity() - 1); 7547 PartialStruct.HighestElem = { 7548 std::numeric_limits<decltype( 7549 PartialStruct.HighestElem.first)>::max(), 7550 HB}; 7551 PartialStruct.Base = BP; 7552 PartialStruct.LB = LB; 7553 assert( 7554 PartialStruct.PreliminaryMapData.BasePointers.empty() && 7555 "Overlapped elements must be used only once for the variable."); 7556 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 7557 // Emit data for non-overlapped data. 7558 OpenMPOffloadMappingFlags Flags = 7559 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | 7560 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 7561 /*AddPtrFlag=*/false, 7562 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 7563 llvm::Value *Size = nullptr; 7564 // Do bitcopy of all non-overlapped structure elements. 7565 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7566 Component : OverlappedElements) { 7567 Address ComponentLB = Address::invalid(); 7568 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7569 Component) { 7570 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { 7571 const auto *FD = dyn_cast<FieldDecl>(VD); 7572 if (FD && FD->getType()->isLValueReferenceType()) { 7573 const auto *ME = 7574 cast<MemberExpr>(MC.getAssociatedExpression()); 7575 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 7576 ComponentLB = 7577 CGF.EmitLValueForFieldInitialization(BaseLVal, FD) 7578 .getAddress(CGF); 7579 } else { 7580 ComponentLB = 7581 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7582 .getAddress(CGF); 7583 } 7584 Size = CGF.Builder.CreatePtrDiff( 7585 CGF.Int8Ty, ComponentLB.getPointer(), LB.getPointer()); 7586 break; 7587 } 7588 } 7589 assert(Size && "Failed to determine structure size"); 7590 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7591 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7592 CombinedInfo.DevicePtrDecls.push_back(nullptr); 7593 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 7594 CombinedInfo.Pointers.push_back(LB.getPointer()); 7595 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7596 Size, CGF.Int64Ty, /*isSigned=*/true)); 7597 CombinedInfo.Types.push_back(Flags); 7598 CombinedInfo.Mappers.push_back(nullptr); 7599 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7600 : 1); 7601 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7602 } 7603 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7604 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7605 CombinedInfo.DevicePtrDecls.push_back(nullptr); 7606 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 7607 CombinedInfo.Pointers.push_back(LB.getPointer()); 7608 Size = CGF.Builder.CreatePtrDiff( 7609 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(), 7610 LB.getPointer()); 7611 CombinedInfo.Sizes.push_back( 7612 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7613 CombinedInfo.Types.push_back(Flags); 7614 CombinedInfo.Mappers.push_back(nullptr); 7615 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7616 : 1); 7617 break; 7618 } 7619 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7620 if (!IsMemberPointerOrAddr || 7621 (Next == CE && MapType != OMPC_MAP_unknown)) { 7622 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7623 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7624 CombinedInfo.DevicePtrDecls.push_back(nullptr); 7625 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 7626 CombinedInfo.Pointers.push_back(LB.getPointer()); 7627 CombinedInfo.Sizes.push_back( 7628 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7629 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7630 : 1); 7631 7632 // If Mapper is valid, the last component inherits the mapper. 7633 bool HasMapper = Mapper && Next == CE; 7634 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 7635 7636 // We need to add a pointer flag for each map that comes from the 7637 // same expression except for the first one. We also need to signal 7638 // this map is the first one that relates with the current capture 7639 // (there is a set of entries for each capture). 7640 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7641 MapType, MapModifiers, MotionModifiers, IsImplicit, 7642 !IsExpressionFirstInfo || RequiresReference || 7643 FirstPointerInComplexData || IsMemberReference, 7644 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 7645 7646 if (!IsExpressionFirstInfo || IsMemberReference) { 7647 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7648 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7649 if (IsPointer || (IsMemberReference && Next != CE)) 7650 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO | 7651 OpenMPOffloadMappingFlags::OMP_MAP_FROM | 7652 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS | 7653 OpenMPOffloadMappingFlags::OMP_MAP_DELETE | 7654 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE); 7655 7656 if (ShouldBeMemberOf) { 7657 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7658 // should be later updated with the correct value of MEMBER_OF. 7659 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF; 7660 // From now on, all subsequent PTR_AND_OBJ entries should not be 7661 // marked as MEMBER_OF. 7662 ShouldBeMemberOf = false; 7663 } 7664 } 7665 7666 CombinedInfo.Types.push_back(Flags); 7667 } 7668 7669 // If we have encountered a member expression so far, keep track of the 7670 // mapped member. If the parent is "*this", then the value declaration 7671 // is nullptr. 7672 if (EncounteredME) { 7673 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 7674 unsigned FieldIndex = FD->getFieldIndex(); 7675 7676 // Update info about the lowest and highest elements for this struct 7677 if (!PartialStruct.Base.isValid()) { 7678 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 7679 if (IsFinalArraySection) { 7680 Address HB = 7681 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 7682 .getAddress(CGF); 7683 PartialStruct.HighestElem = {FieldIndex, HB}; 7684 } else { 7685 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 7686 } 7687 PartialStruct.Base = BP; 7688 PartialStruct.LB = BP; 7689 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7690 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 7691 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7692 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 7693 } 7694 } 7695 7696 // Need to emit combined struct for array sections. 7697 if (IsFinalArraySection || IsNonContiguous) 7698 PartialStruct.IsArraySection = true; 7699 7700 // If we have a final array section, we are done with this expression. 7701 if (IsFinalArraySection) 7702 break; 7703 7704 // The pointer becomes the base for the next element. 7705 if (Next != CE) 7706 BP = IsMemberReference ? LowestElem : LB; 7707 7708 IsExpressionFirstInfo = false; 7709 IsCaptureFirstInfo = false; 7710 FirstPointerInComplexData = false; 7711 IsPrevMemberReference = IsMemberReference; 7712 } else if (FirstPointerInComplexData) { 7713 QualType Ty = Components.rbegin() 7714 ->getAssociatedDeclaration() 7715 ->getType() 7716 .getNonReferenceType(); 7717 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7718 FirstPointerInComplexData = false; 7719 } 7720 } 7721 // If ran into the whole component - allocate the space for the whole 7722 // record. 7723 if (!EncounteredME) 7724 PartialStruct.HasCompleteRecord = true; 7725 7726 if (!IsNonContiguous) 7727 return; 7728 7729 const ASTContext &Context = CGF.getContext(); 7730 7731 // For supporting stride in array section, we need to initialize the first 7732 // dimension size as 1, first offset as 0, and first count as 1 7733 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 7734 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 7735 MapValuesArrayTy CurStrides; 7736 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 7737 uint64_t ElementTypeSize; 7738 7739 // Collect Size information for each dimension and get the element size as 7740 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 7741 // should be [10, 10] and the first stride is 4 btyes. 7742 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 7743 Components) { 7744 const Expr *AssocExpr = Component.getAssociatedExpression(); 7745 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7746 7747 if (!OASE) 7748 continue; 7749 7750 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 7751 auto *CAT = Context.getAsConstantArrayType(Ty); 7752 auto *VAT = Context.getAsVariableArrayType(Ty); 7753 7754 // We need all the dimension size except for the last dimension. 7755 assert((VAT || CAT || &Component == &*Components.begin()) && 7756 "Should be either ConstantArray or VariableArray if not the " 7757 "first Component"); 7758 7759 // Get element size if CurStrides is empty. 7760 if (CurStrides.empty()) { 7761 const Type *ElementType = nullptr; 7762 if (CAT) 7763 ElementType = CAT->getElementType().getTypePtr(); 7764 else if (VAT) 7765 ElementType = VAT->getElementType().getTypePtr(); 7766 else 7767 assert(&Component == &*Components.begin() && 7768 "Only expect pointer (non CAT or VAT) when this is the " 7769 "first Component"); 7770 // If ElementType is null, then it means the base is a pointer 7771 // (neither CAT nor VAT) and we'll attempt to get ElementType again 7772 // for next iteration. 7773 if (ElementType) { 7774 // For the case that having pointer as base, we need to remove one 7775 // level of indirection. 7776 if (&Component != &*Components.begin()) 7777 ElementType = ElementType->getPointeeOrArrayElementType(); 7778 ElementTypeSize = 7779 Context.getTypeSizeInChars(ElementType).getQuantity(); 7780 CurStrides.push_back( 7781 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 7782 } 7783 } 7784 // Get dimension value except for the last dimension since we don't need 7785 // it. 7786 if (DimSizes.size() < Components.size() - 1) { 7787 if (CAT) 7788 DimSizes.push_back(llvm::ConstantInt::get( 7789 CGF.Int64Ty, CAT->getSize().getZExtValue())); 7790 else if (VAT) 7791 DimSizes.push_back(CGF.Builder.CreateIntCast( 7792 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 7793 /*IsSigned=*/false)); 7794 } 7795 } 7796 7797 // Skip the dummy dimension since we have already have its information. 7798 auto *DI = DimSizes.begin() + 1; 7799 // Product of dimension. 7800 llvm::Value *DimProd = 7801 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 7802 7803 // Collect info for non-contiguous. Notice that offset, count, and stride 7804 // are only meaningful for array-section, so we insert a null for anything 7805 // other than array-section. 7806 // Also, the size of offset, count, and stride are not the same as 7807 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 7808 // count, and stride are the same as the number of non-contiguous 7809 // declaration in target update to/from clause. 7810 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 7811 Components) { 7812 const Expr *AssocExpr = Component.getAssociatedExpression(); 7813 7814 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 7815 llvm::Value *Offset = CGF.Builder.CreateIntCast( 7816 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 7817 /*isSigned=*/false); 7818 CurOffsets.push_back(Offset); 7819 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 7820 CurStrides.push_back(CurStrides.back()); 7821 continue; 7822 } 7823 7824 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7825 7826 if (!OASE) 7827 continue; 7828 7829 // Offset 7830 const Expr *OffsetExpr = OASE->getLowerBound(); 7831 llvm::Value *Offset = nullptr; 7832 if (!OffsetExpr) { 7833 // If offset is absent, then we just set it to zero. 7834 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 7835 } else { 7836 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 7837 CGF.Int64Ty, 7838 /*isSigned=*/false); 7839 } 7840 CurOffsets.push_back(Offset); 7841 7842 // Count 7843 const Expr *CountExpr = OASE->getLength(); 7844 llvm::Value *Count = nullptr; 7845 if (!CountExpr) { 7846 // In Clang, once a high dimension is an array section, we construct all 7847 // the lower dimension as array section, however, for case like 7848 // arr[0:2][2], Clang construct the inner dimension as an array section 7849 // but it actually is not in an array section form according to spec. 7850 if (!OASE->getColonLocFirst().isValid() && 7851 !OASE->getColonLocSecond().isValid()) { 7852 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 7853 } else { 7854 // OpenMP 5.0, 2.1.5 Array Sections, Description. 7855 // When the length is absent it defaults to ⌈(size − 7856 // lower-bound)/stride⌉, where size is the size of the array 7857 // dimension. 7858 const Expr *StrideExpr = OASE->getStride(); 7859 llvm::Value *Stride = 7860 StrideExpr 7861 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 7862 CGF.Int64Ty, /*isSigned=*/false) 7863 : nullptr; 7864 if (Stride) 7865 Count = CGF.Builder.CreateUDiv( 7866 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 7867 else 7868 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 7869 } 7870 } else { 7871 Count = CGF.EmitScalarExpr(CountExpr); 7872 } 7873 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 7874 CurCounts.push_back(Count); 7875 7876 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 7877 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 7878 // Offset Count Stride 7879 // D0 0 1 4 (int) <- dummy dimension 7880 // D1 0 2 8 (2 * (1) * 4) 7881 // D2 1 2 20 (1 * (1 * 5) * 4) 7882 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 7883 const Expr *StrideExpr = OASE->getStride(); 7884 llvm::Value *Stride = 7885 StrideExpr 7886 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 7887 CGF.Int64Ty, /*isSigned=*/false) 7888 : nullptr; 7889 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 7890 if (Stride) 7891 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 7892 else 7893 CurStrides.push_back(DimProd); 7894 if (DI != DimSizes.end()) 7895 ++DI; 7896 } 7897 7898 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 7899 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 7900 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 7901 } 7902 7903 /// Return the adjusted map modifiers if the declaration a capture refers to 7904 /// appears in a first-private clause. This is expected to be used only with 7905 /// directives that start with 'target'. 7906 OpenMPOffloadMappingFlags 7907 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7908 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7909 7910 // A first private variable captured by reference will use only the 7911 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7912 // declaration is known as first-private in this handler. 7913 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 7914 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 7915 return OpenMPOffloadMappingFlags::OMP_MAP_TO | 7916 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; 7917 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE | 7918 OpenMPOffloadMappingFlags::OMP_MAP_TO; 7919 } 7920 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl()); 7921 if (I != LambdasMap.end()) 7922 // for map(to: lambda): using user specified map type. 7923 return getMapTypeBits( 7924 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(), 7925 /*MotionModifiers=*/std::nullopt, I->getSecond()->isImplicit(), 7926 /*AddPtrFlag=*/false, 7927 /*AddIsTargetParamFlag=*/false, 7928 /*isNonContiguous=*/false); 7929 return OpenMPOffloadMappingFlags::OMP_MAP_TO | 7930 OpenMPOffloadMappingFlags::OMP_MAP_FROM; 7931 } 7932 7933 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 7934 // Rotate by getFlagMemberOffset() bits. 7935 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 7936 << getFlagMemberOffset()); 7937 } 7938 7939 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 7940 OpenMPOffloadMappingFlags MemberOfFlag) { 7941 // If the entry is PTR_AND_OBJ but has not been marked with the special 7942 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 7943 // marked as MEMBER_OF. 7944 if (static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 7945 Flags & OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ) && 7946 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 7947 (Flags & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) != 7948 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF)) 7949 return; 7950 7951 // Reset the placeholder value to prepare the flag for the assignment of the 7952 // proper MEMBER_OF value. 7953 Flags &= ~OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF; 7954 Flags |= MemberOfFlag; 7955 } 7956 7957 void getPlainLayout(const CXXRecordDecl *RD, 7958 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7959 bool AsBase) const { 7960 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7961 7962 llvm::StructType *St = 7963 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7964 7965 unsigned NumElements = St->getNumElements(); 7966 llvm::SmallVector< 7967 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7968 RecordLayout(NumElements); 7969 7970 // Fill bases. 7971 for (const auto &I : RD->bases()) { 7972 if (I.isVirtual()) 7973 continue; 7974 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7975 // Ignore empty bases. 7976 if (Base->isEmpty() || CGF.getContext() 7977 .getASTRecordLayout(Base) 7978 .getNonVirtualSize() 7979 .isZero()) 7980 continue; 7981 7982 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 7983 RecordLayout[FieldIndex] = Base; 7984 } 7985 // Fill in virtual bases. 7986 for (const auto &I : RD->vbases()) { 7987 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7988 // Ignore empty bases. 7989 if (Base->isEmpty()) 7990 continue; 7991 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 7992 if (RecordLayout[FieldIndex]) 7993 continue; 7994 RecordLayout[FieldIndex] = Base; 7995 } 7996 // Fill in all the fields. 7997 assert(!RD->isUnion() && "Unexpected union."); 7998 for (const auto *Field : RD->fields()) { 7999 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8000 // will fill in later.) 8001 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8002 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8003 RecordLayout[FieldIndex] = Field; 8004 } 8005 } 8006 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8007 &Data : RecordLayout) { 8008 if (Data.isNull()) 8009 continue; 8010 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8011 getPlainLayout(Base, Layout, /*AsBase=*/true); 8012 else 8013 Layout.push_back(Data.get<const FieldDecl *>()); 8014 } 8015 } 8016 8017 /// Generate all the base pointers, section pointers, sizes, map types, and 8018 /// mappers for the extracted mappable expressions (all included in \a 8019 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8020 /// pair of the relevant declaration and index where it occurs is appended to 8021 /// the device pointers info array. 8022 void generateAllInfoForClauses( 8023 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 8024 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8025 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8026 // We have to process the component lists that relate with the same 8027 // declaration in a single chunk so that we can generate the map flags 8028 // correctly. Therefore, we organize all lists in a map. 8029 enum MapKind { Present, Allocs, Other, Total }; 8030 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8031 SmallVector<SmallVector<MapInfo, 8>, 4>> 8032 Info; 8033 8034 // Helper function to fill the information map for the different supported 8035 // clauses. 8036 auto &&InfoGen = 8037 [&Info, &SkipVarSet]( 8038 const ValueDecl *D, MapKind Kind, 8039 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8040 OpenMPMapClauseKind MapType, 8041 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8042 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8043 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8044 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8045 if (SkipVarSet.contains(D)) 8046 return; 8047 auto It = Info.find(D); 8048 if (It == Info.end()) 8049 It = Info 8050 .insert(std::make_pair( 8051 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 8052 .first; 8053 It->second[Kind].emplace_back( 8054 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 8055 IsImplicit, Mapper, VarRef, ForDeviceAddr); 8056 }; 8057 8058 for (const auto *Cl : Clauses) { 8059 const auto *C = dyn_cast<OMPMapClause>(Cl); 8060 if (!C) 8061 continue; 8062 MapKind Kind = Other; 8063 if (llvm::is_contained(C->getMapTypeModifiers(), 8064 OMPC_MAP_MODIFIER_present)) 8065 Kind = Present; 8066 else if (C->getMapType() == OMPC_MAP_alloc) 8067 Kind = Allocs; 8068 const auto *EI = C->getVarRefs().begin(); 8069 for (const auto L : C->component_lists()) { 8070 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8071 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 8072 C->getMapTypeModifiers(), std::nullopt, 8073 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8074 E); 8075 ++EI; 8076 } 8077 } 8078 for (const auto *Cl : Clauses) { 8079 const auto *C = dyn_cast<OMPToClause>(Cl); 8080 if (!C) 8081 continue; 8082 MapKind Kind = Other; 8083 if (llvm::is_contained(C->getMotionModifiers(), 8084 OMPC_MOTION_MODIFIER_present)) 8085 Kind = Present; 8086 const auto *EI = C->getVarRefs().begin(); 8087 for (const auto L : C->component_lists()) { 8088 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, std::nullopt, 8089 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8090 C->isImplicit(), std::get<2>(L), *EI); 8091 ++EI; 8092 } 8093 } 8094 for (const auto *Cl : Clauses) { 8095 const auto *C = dyn_cast<OMPFromClause>(Cl); 8096 if (!C) 8097 continue; 8098 MapKind Kind = Other; 8099 if (llvm::is_contained(C->getMotionModifiers(), 8100 OMPC_MOTION_MODIFIER_present)) 8101 Kind = Present; 8102 const auto *EI = C->getVarRefs().begin(); 8103 for (const auto L : C->component_lists()) { 8104 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, 8105 std::nullopt, C->getMotionModifiers(), 8106 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8107 *EI); 8108 ++EI; 8109 } 8110 } 8111 8112 // Look at the use_device_ptr and use_device_addr clauses information and 8113 // mark the existing map entries as such. If there is no map information for 8114 // an entry in the use_device_ptr and use_device_addr list, we create one 8115 // with map type 'alloc' and zero size section. It is the user fault if that 8116 // was not mapped before. If there is no map information and the pointer is 8117 // a struct member, then we defer the emission of that entry until the whole 8118 // struct has been processed. 8119 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8120 SmallVector<DeferredDevicePtrEntryTy, 4>> 8121 DeferredInfo; 8122 MapCombinedInfoTy UseDeviceDataCombinedInfo; 8123 8124 auto &&UseDeviceDataCombinedInfoGen = 8125 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr, 8126 CodeGenFunction &CGF, bool IsDevAddr) { 8127 UseDeviceDataCombinedInfo.Exprs.push_back(VD); 8128 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr); 8129 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD); 8130 UseDeviceDataCombinedInfo.DevicePointers.emplace_back( 8131 IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer); 8132 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr); 8133 UseDeviceDataCombinedInfo.Sizes.push_back( 8134 llvm::Constant::getNullValue(CGF.Int64Ty)); 8135 UseDeviceDataCombinedInfo.Types.push_back( 8136 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM); 8137 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr); 8138 }; 8139 8140 auto &&MapInfoGen = 8141 [&DeferredInfo, &UseDeviceDataCombinedInfoGen, 8142 &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD, 8143 OMPClauseMappableExprCommon::MappableExprComponentListRef 8144 Components, 8145 bool IsImplicit, bool IsDevAddr) { 8146 // We didn't find any match in our map information - generate a zero 8147 // size array section - if the pointer is a struct member we defer 8148 // this action until the whole struct has been processed. 8149 if (isa<MemberExpr>(IE)) { 8150 // Insert the pointer into Info to be processed by 8151 // generateInfoForComponentList. Because it is a member pointer 8152 // without a pointee, no entry will be generated for it, therefore 8153 // we need to generate one after the whole struct has been 8154 // processed. Nonetheless, generateInfoForComponentList must be 8155 // called to take the pointer into account for the calculation of 8156 // the range of the partial struct. 8157 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, std::nullopt, 8158 std::nullopt, /*ReturnDevicePointer=*/false, IsImplicit, 8159 nullptr, nullptr, IsDevAddr); 8160 DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr); 8161 } else { 8162 llvm::Value *Ptr; 8163 if (IsDevAddr) { 8164 if (IE->isGLValue()) 8165 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8166 else 8167 Ptr = CGF.EmitScalarExpr(IE); 8168 } else { 8169 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8170 } 8171 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr); 8172 } 8173 }; 8174 8175 auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD, 8176 const Expr *IE, bool IsDevAddr) -> bool { 8177 // We potentially have map information for this declaration already. 8178 // Look for the first set of components that refer to it. If found, 8179 // return true. 8180 // If the first component is a member expression, we have to look into 8181 // 'this', which maps to null in the map of map information. Otherwise 8182 // look directly for the information. 8183 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8184 if (It != Info.end()) { 8185 bool Found = false; 8186 for (auto &Data : It->second) { 8187 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8188 return MI.Components.back().getAssociatedDeclaration() == VD; 8189 }); 8190 // If we found a map entry, signal that the pointer has to be 8191 // returned and move on to the next declaration. Exclude cases where 8192 // the base pointer is mapped as array subscript, array section or 8193 // array shaping. The base address is passed as a pointer to base in 8194 // this case and cannot be used as a base for use_device_ptr list 8195 // item. 8196 if (CI != Data.end()) { 8197 if (IsDevAddr) { 8198 CI->ForDeviceAddr = IsDevAddr; 8199 CI->ReturnDevicePointer = true; 8200 Found = true; 8201 break; 8202 } else { 8203 auto PrevCI = std::next(CI->Components.rbegin()); 8204 const auto *VarD = dyn_cast<VarDecl>(VD); 8205 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8206 isa<MemberExpr>(IE) || 8207 !VD->getType().getNonReferenceType()->isPointerType() || 8208 PrevCI == CI->Components.rend() || 8209 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8210 VarD->hasLocalStorage()) { 8211 CI->ForDeviceAddr = IsDevAddr; 8212 CI->ReturnDevicePointer = true; 8213 Found = true; 8214 break; 8215 } 8216 } 8217 } 8218 } 8219 return Found; 8220 } 8221 return false; 8222 }; 8223 8224 // Look at the use_device_ptr clause information and mark the existing map 8225 // entries as such. If there is no map information for an entry in the 8226 // use_device_ptr list, we create one with map type 'alloc' and zero size 8227 // section. It is the user fault if that was not mapped before. If there is 8228 // no map information and the pointer is a struct member, then we defer the 8229 // emission of that entry until the whole struct has been processed. 8230 for (const auto *Cl : Clauses) { 8231 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 8232 if (!C) 8233 continue; 8234 for (const auto L : C->component_lists()) { 8235 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8236 std::get<1>(L); 8237 assert(!Components.empty() && 8238 "Not expecting empty list of components!"); 8239 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8240 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8241 const Expr *IE = Components.back().getAssociatedExpression(); 8242 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false)) 8243 continue; 8244 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(), 8245 /*IsDevAddr=*/false); 8246 } 8247 } 8248 8249 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8250 for (const auto *Cl : Clauses) { 8251 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8252 if (!C) 8253 continue; 8254 for (const auto L : C->component_lists()) { 8255 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8256 std::get<1>(L); 8257 assert(!std::get<1>(L).empty() && 8258 "Not expecting empty list of components!"); 8259 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8260 if (!Processed.insert(VD).second) 8261 continue; 8262 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8263 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8264 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true)) 8265 continue; 8266 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(), 8267 /*IsDevAddr=*/true); 8268 } 8269 } 8270 8271 for (const auto &Data : Info) { 8272 StructRangeInfoTy PartialStruct; 8273 // Temporary generated information. 8274 MapCombinedInfoTy CurInfo; 8275 const Decl *D = Data.first; 8276 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8277 for (const auto &M : Data.second) { 8278 for (const MapInfo &L : M) { 8279 assert(!L.Components.empty() && 8280 "Not expecting declaration with no component lists."); 8281 8282 // Remember the current base pointer index. 8283 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8284 CurInfo.NonContigInfo.IsNonContiguous = 8285 L.Components.back().isNonContiguous(); 8286 generateInfoForComponentList( 8287 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8288 CurInfo, PartialStruct, /*IsFirstComponentList=*/false, 8289 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); 8290 8291 // If this entry relates with a device pointer, set the relevant 8292 // declaration and add the 'return pointer' flag. 8293 if (L.ReturnDevicePointer) { 8294 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8295 "Unexpected number of mapped base pointers."); 8296 8297 const ValueDecl *RelevantVD = 8298 L.Components.back().getAssociatedDeclaration(); 8299 assert(RelevantVD && 8300 "No relevant declaration related with device pointer??"); 8301 8302 CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD; 8303 CurInfo.DevicePointers[CurrentBasePointersIdx] = 8304 L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer; 8305 CurInfo.Types[CurrentBasePointersIdx] |= 8306 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM; 8307 } 8308 } 8309 } 8310 8311 // Append any pending zero-length pointers which are struct members and 8312 // used with use_device_ptr or use_device_addr. 8313 auto CI = DeferredInfo.find(Data.first); 8314 if (CI != DeferredInfo.end()) { 8315 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8316 llvm::Value *BasePtr; 8317 llvm::Value *Ptr; 8318 if (L.ForDeviceAddr) { 8319 if (L.IE->isGLValue()) 8320 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8321 else 8322 Ptr = this->CGF.EmitScalarExpr(L.IE); 8323 BasePtr = Ptr; 8324 // Entry is RETURN_PARAM. Also, set the placeholder value 8325 // MEMBER_OF=FFFF so that the entry is later updated with the 8326 // correct value of MEMBER_OF. 8327 CurInfo.Types.push_back( 8328 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM | 8329 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF); 8330 } else { 8331 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8332 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8333 L.IE->getExprLoc()); 8334 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8335 // placeholder value MEMBER_OF=FFFF so that the entry is later 8336 // updated with the correct value of MEMBER_OF. 8337 CurInfo.Types.push_back( 8338 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | 8339 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM | 8340 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF); 8341 } 8342 CurInfo.Exprs.push_back(L.VD); 8343 CurInfo.BasePointers.emplace_back(BasePtr); 8344 CurInfo.DevicePtrDecls.emplace_back(L.VD); 8345 CurInfo.DevicePointers.emplace_back( 8346 L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer); 8347 CurInfo.Pointers.push_back(Ptr); 8348 CurInfo.Sizes.push_back( 8349 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8350 CurInfo.Mappers.push_back(nullptr); 8351 } 8352 } 8353 // If there is an entry in PartialStruct it means we have a struct with 8354 // individual members mapped. Emit an extra combined entry. 8355 if (PartialStruct.Base.isValid()) { 8356 CurInfo.NonContigInfo.Dims.push_back(0); 8357 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, 8358 /*IsMapThis*/ !VD, VD); 8359 } 8360 8361 // We need to append the results of this capture to what we already 8362 // have. 8363 CombinedInfo.append(CurInfo); 8364 } 8365 // Append data for use_device_ptr clauses. 8366 CombinedInfo.append(UseDeviceDataCombinedInfo); 8367 } 8368 8369 public: 8370 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8371 : CurDir(&Dir), CGF(CGF) { 8372 // Extract firstprivate clause information. 8373 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8374 for (const auto *D : C->varlists()) 8375 FirstPrivateDecls.try_emplace( 8376 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8377 // Extract implicit firstprivates from uses_allocators clauses. 8378 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8379 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8380 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8381 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8382 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8383 /*Implicit=*/true); 8384 else if (const auto *VD = dyn_cast<VarDecl>( 8385 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8386 ->getDecl())) 8387 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8388 } 8389 } 8390 // Extract device pointer clause information. 8391 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8392 for (auto L : C->component_lists()) 8393 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8394 // Extract device addr clause information. 8395 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>()) 8396 for (auto L : C->component_lists()) 8397 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L)); 8398 // Extract map information. 8399 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) { 8400 if (C->getMapType() != OMPC_MAP_to) 8401 continue; 8402 for (auto L : C->component_lists()) { 8403 const ValueDecl *VD = std::get<0>(L); 8404 const auto *RD = VD ? VD->getType() 8405 .getCanonicalType() 8406 .getNonReferenceType() 8407 ->getAsCXXRecordDecl() 8408 : nullptr; 8409 if (RD && RD->isLambda()) 8410 LambdasMap.try_emplace(std::get<0>(L), C); 8411 } 8412 } 8413 } 8414 8415 /// Constructor for the declare mapper directive. 8416 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8417 : CurDir(&Dir), CGF(CGF) {} 8418 8419 /// Generate code for the combined entry if we have a partially mapped struct 8420 /// and take care of the mapping flags of the arguments corresponding to 8421 /// individual struct members. 8422 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8423 MapFlagsArrayTy &CurTypes, 8424 const StructRangeInfoTy &PartialStruct, bool IsMapThis, 8425 const ValueDecl *VD = nullptr, 8426 bool NotTargetParams = true) const { 8427 if (CurTypes.size() == 1 && 8428 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) != 8429 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) && 8430 !PartialStruct.IsArraySection) 8431 return; 8432 Address LBAddr = PartialStruct.LowestElem.second; 8433 Address HBAddr = PartialStruct.HighestElem.second; 8434 if (PartialStruct.HasCompleteRecord) { 8435 LBAddr = PartialStruct.LB; 8436 HBAddr = PartialStruct.LB; 8437 } 8438 CombinedInfo.Exprs.push_back(VD); 8439 // Base is the base of the struct 8440 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8441 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8442 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8443 // Pointer is the address of the lowest element 8444 llvm::Value *LB = LBAddr.getPointer(); 8445 const CXXMethodDecl *MD = 8446 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr; 8447 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr; 8448 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false; 8449 // There should not be a mapper for a combined entry. 8450 if (HasBaseClass) { 8451 // OpenMP 5.2 148:21: 8452 // If the target construct is within a class non-static member function, 8453 // and a variable is an accessible data member of the object for which the 8454 // non-static data member function is invoked, the variable is treated as 8455 // if the this[:1] expression had appeared in a map clause with a map-type 8456 // of tofrom. 8457 // Emit this[:1] 8458 CombinedInfo.Pointers.push_back(PartialStruct.Base.getPointer()); 8459 QualType Ty = MD->getThisType()->getPointeeType(); 8460 llvm::Value *Size = 8461 CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty, 8462 /*isSigned=*/true); 8463 CombinedInfo.Sizes.push_back(Size); 8464 } else { 8465 CombinedInfo.Pointers.push_back(LB); 8466 // Size is (addr of {highest+1} element) - (addr of lowest element) 8467 llvm::Value *HB = HBAddr.getPointer(); 8468 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32( 8469 HBAddr.getElementType(), HB, /*Idx0=*/1); 8470 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8471 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8472 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr); 8473 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8474 /*isSigned=*/false); 8475 CombinedInfo.Sizes.push_back(Size); 8476 } 8477 CombinedInfo.Mappers.push_back(nullptr); 8478 // Map type is always TARGET_PARAM, if generate info for captures. 8479 CombinedInfo.Types.push_back( 8480 NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE 8481 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM); 8482 // If any element has the present modifier, then make sure the runtime 8483 // doesn't attempt to allocate the struct. 8484 if (CurTypes.end() != 8485 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8486 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 8487 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT); 8488 })) 8489 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT; 8490 // Remove TARGET_PARAM flag from the first element 8491 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; 8492 // If any element has the ompx_hold modifier, then make sure the runtime 8493 // uses the hold reference count for the struct as a whole so that it won't 8494 // be unmapped by an extra dynamic reference count decrement. Add it to all 8495 // elements as well so the runtime knows which reference count to check 8496 // when determining whether it's time for device-to-host transfers of 8497 // individual elements. 8498 if (CurTypes.end() != 8499 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8500 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 8501 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD); 8502 })) { 8503 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; 8504 for (auto &M : CurTypes) 8505 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; 8506 } 8507 8508 // All other current entries will be MEMBER_OF the combined entry 8509 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8510 // 0xFFFF in the MEMBER_OF field). 8511 OpenMPOffloadMappingFlags MemberOfFlag = 8512 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 8513 for (auto &M : CurTypes) 8514 setCorrectMemberOfFlag(M, MemberOfFlag); 8515 } 8516 8517 /// Generate all the base pointers, section pointers, sizes, map types, and 8518 /// mappers for the extracted mappable expressions (all included in \a 8519 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8520 /// pair of the relevant declaration and index where it occurs is appended to 8521 /// the device pointers info array. 8522 void generateAllInfo( 8523 MapCombinedInfoTy &CombinedInfo, 8524 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8525 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8526 assert(CurDir.is<const OMPExecutableDirective *>() && 8527 "Expect a executable directive"); 8528 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8529 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); 8530 } 8531 8532 /// Generate all the base pointers, section pointers, sizes, map types, and 8533 /// mappers for the extracted map clauses of user-defined mapper (all included 8534 /// in \a CombinedInfo). 8535 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 8536 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8537 "Expect a declare mapper directive"); 8538 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8539 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); 8540 } 8541 8542 /// Emit capture info for lambdas for variables captured by reference. 8543 void generateInfoForLambdaCaptures( 8544 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8545 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8546 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType(); 8547 const auto *RD = VDType->getAsCXXRecordDecl(); 8548 if (!RD || !RD->isLambda()) 8549 return; 8550 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType), 8551 CGF.getContext().getDeclAlign(VD)); 8552 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType); 8553 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures; 8554 FieldDecl *ThisCapture = nullptr; 8555 RD->getCaptureFields(Captures, ThisCapture); 8556 if (ThisCapture) { 8557 LValue ThisLVal = 8558 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8559 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8560 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8561 VDLVal.getPointer(CGF)); 8562 CombinedInfo.Exprs.push_back(VD); 8563 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 8564 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8565 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8566 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 8567 CombinedInfo.Sizes.push_back( 8568 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8569 CGF.Int64Ty, /*isSigned=*/true)); 8570 CombinedInfo.Types.push_back( 8571 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | 8572 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | 8573 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | 8574 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); 8575 CombinedInfo.Mappers.push_back(nullptr); 8576 } 8577 for (const LambdaCapture &LC : RD->captures()) { 8578 if (!LC.capturesVariable()) 8579 continue; 8580 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar()); 8581 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8582 continue; 8583 auto It = Captures.find(VD); 8584 assert(It != Captures.end() && "Found lambda capture without field."); 8585 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8586 if (LC.getCaptureKind() == LCK_ByRef) { 8587 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8588 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8589 VDLVal.getPointer(CGF)); 8590 CombinedInfo.Exprs.push_back(VD); 8591 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8592 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8593 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8594 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 8595 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8596 CGF.getTypeSize( 8597 VD->getType().getCanonicalType().getNonReferenceType()), 8598 CGF.Int64Ty, /*isSigned=*/true)); 8599 } else { 8600 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8601 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8602 VDLVal.getPointer(CGF)); 8603 CombinedInfo.Exprs.push_back(VD); 8604 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8605 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8606 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8607 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 8608 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8609 } 8610 CombinedInfo.Types.push_back( 8611 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | 8612 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | 8613 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | 8614 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); 8615 CombinedInfo.Mappers.push_back(nullptr); 8616 } 8617 } 8618 8619 /// Set correct indices for lambdas captures. 8620 void adjustMemberOfForLambdaCaptures( 8621 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8622 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8623 MapFlagsArrayTy &Types) const { 8624 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8625 // Set correct member_of idx for all implicit lambda captures. 8626 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | 8627 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | 8628 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | 8629 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)) 8630 continue; 8631 llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]); 8632 assert(BasePtr && "Unable to find base lambda address."); 8633 int TgtIdx = -1; 8634 for (unsigned J = I; J > 0; --J) { 8635 unsigned Idx = J - 1; 8636 if (Pointers[Idx] != BasePtr) 8637 continue; 8638 TgtIdx = Idx; 8639 break; 8640 } 8641 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8642 // All other current entries will be MEMBER_OF the combined entry 8643 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8644 // 0xFFFF in the MEMBER_OF field). 8645 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8646 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8647 } 8648 } 8649 8650 /// Generate the base pointers, section pointers, sizes, map types, and 8651 /// mappers associated to a given capture (all included in \a CombinedInfo). 8652 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8653 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8654 StructRangeInfoTy &PartialStruct) const { 8655 assert(!Cap->capturesVariableArrayType() && 8656 "Not expecting to generate map info for a variable array type!"); 8657 8658 // We need to know when we generating information for the first component 8659 const ValueDecl *VD = Cap->capturesThis() 8660 ? nullptr 8661 : Cap->getCapturedVar()->getCanonicalDecl(); 8662 8663 // for map(to: lambda): skip here, processing it in 8664 // generateDefaultMapInfo 8665 if (LambdasMap.count(VD)) 8666 return; 8667 8668 // If this declaration appears in a is_device_ptr clause we just have to 8669 // pass the pointer by value. If it is a reference to a declaration, we just 8670 // pass its value. 8671 if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) { 8672 CombinedInfo.Exprs.push_back(VD); 8673 CombinedInfo.BasePointers.emplace_back(Arg); 8674 CombinedInfo.DevicePtrDecls.emplace_back(VD); 8675 CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer); 8676 CombinedInfo.Pointers.push_back(Arg); 8677 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8678 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 8679 /*isSigned=*/true)); 8680 CombinedInfo.Types.push_back( 8681 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | 8682 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM); 8683 CombinedInfo.Mappers.push_back(nullptr); 8684 return; 8685 } 8686 8687 using MapData = 8688 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8689 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 8690 const ValueDecl *, const Expr *>; 8691 SmallVector<MapData, 4> DeclComponentLists; 8692 // For member fields list in is_device_ptr, store it in 8693 // DeclComponentLists for generating components info. 8694 static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown; 8695 auto It = DevPointersMap.find(VD); 8696 if (It != DevPointersMap.end()) 8697 for (const auto &MCL : It->second) 8698 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown, 8699 /*IsImpicit = */ true, nullptr, 8700 nullptr); 8701 auto I = HasDevAddrsMap.find(VD); 8702 if (I != HasDevAddrsMap.end()) 8703 for (const auto &MCL : I->second) 8704 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown, 8705 /*IsImpicit = */ true, nullptr, 8706 nullptr); 8707 assert(CurDir.is<const OMPExecutableDirective *>() && 8708 "Expect a executable directive"); 8709 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8710 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8711 const auto *EI = C->getVarRefs().begin(); 8712 for (const auto L : C->decl_component_lists(VD)) { 8713 const ValueDecl *VDecl, *Mapper; 8714 // The Expression is not correct if the mapping is implicit 8715 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8716 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8717 std::tie(VDecl, Components, Mapper) = L; 8718 assert(VDecl == VD && "We got information for the wrong declaration??"); 8719 assert(!Components.empty() && 8720 "Not expecting declaration with no component lists."); 8721 DeclComponentLists.emplace_back(Components, C->getMapType(), 8722 C->getMapTypeModifiers(), 8723 C->isImplicit(), Mapper, E); 8724 ++EI; 8725 } 8726 } 8727 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 8728 const MapData &RHS) { 8729 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 8730 OpenMPMapClauseKind MapType = std::get<1>(RHS); 8731 bool HasPresent = 8732 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 8733 bool HasAllocs = MapType == OMPC_MAP_alloc; 8734 MapModifiers = std::get<2>(RHS); 8735 MapType = std::get<1>(LHS); 8736 bool HasPresentR = 8737 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 8738 bool HasAllocsR = MapType == OMPC_MAP_alloc; 8739 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 8740 }); 8741 8742 // Find overlapping elements (including the offset from the base element). 8743 llvm::SmallDenseMap< 8744 const MapData *, 8745 llvm::SmallVector< 8746 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8747 4> 8748 OverlappedData; 8749 size_t Count = 0; 8750 for (const MapData &L : DeclComponentLists) { 8751 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8752 OpenMPMapClauseKind MapType; 8753 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8754 bool IsImplicit; 8755 const ValueDecl *Mapper; 8756 const Expr *VarRef; 8757 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8758 L; 8759 ++Count; 8760 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) { 8761 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8762 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 8763 VarRef) = L1; 8764 auto CI = Components.rbegin(); 8765 auto CE = Components.rend(); 8766 auto SI = Components1.rbegin(); 8767 auto SE = Components1.rend(); 8768 for (; CI != CE && SI != SE; ++CI, ++SI) { 8769 if (CI->getAssociatedExpression()->getStmtClass() != 8770 SI->getAssociatedExpression()->getStmtClass()) 8771 break; 8772 // Are we dealing with different variables/fields? 8773 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8774 break; 8775 } 8776 // Found overlapping if, at least for one component, reached the head 8777 // of the components list. 8778 if (CI == CE || SI == SE) { 8779 // Ignore it if it is the same component. 8780 if (CI == CE && SI == SE) 8781 continue; 8782 const auto It = (SI == SE) ? CI : SI; 8783 // If one component is a pointer and another one is a kind of 8784 // dereference of this pointer (array subscript, section, dereference, 8785 // etc.), it is not an overlapping. 8786 // Same, if one component is a base and another component is a 8787 // dereferenced pointer memberexpr with the same base. 8788 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 8789 (std::prev(It)->getAssociatedDeclaration() && 8790 std::prev(It) 8791 ->getAssociatedDeclaration() 8792 ->getType() 8793 ->isPointerType()) || 8794 (It->getAssociatedDeclaration() && 8795 It->getAssociatedDeclaration()->getType()->isPointerType() && 8796 std::next(It) != CE && std::next(It) != SE)) 8797 continue; 8798 const MapData &BaseData = CI == CE ? L : L1; 8799 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8800 SI == SE ? Components : Components1; 8801 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8802 OverlappedElements.getSecond().push_back(SubData); 8803 } 8804 } 8805 } 8806 // Sort the overlapped elements for each item. 8807 llvm::SmallVector<const FieldDecl *, 4> Layout; 8808 if (!OverlappedData.empty()) { 8809 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 8810 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 8811 while (BaseType != OrigType) { 8812 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 8813 OrigType = BaseType->getPointeeOrArrayElementType(); 8814 } 8815 8816 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 8817 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8818 else { 8819 const auto *RD = BaseType->getAsRecordDecl(); 8820 Layout.append(RD->field_begin(), RD->field_end()); 8821 } 8822 } 8823 for (auto &Pair : OverlappedData) { 8824 llvm::stable_sort( 8825 Pair.getSecond(), 8826 [&Layout]( 8827 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8828 OMPClauseMappableExprCommon::MappableExprComponentListRef 8829 Second) { 8830 auto CI = First.rbegin(); 8831 auto CE = First.rend(); 8832 auto SI = Second.rbegin(); 8833 auto SE = Second.rend(); 8834 for (; CI != CE && SI != SE; ++CI, ++SI) { 8835 if (CI->getAssociatedExpression()->getStmtClass() != 8836 SI->getAssociatedExpression()->getStmtClass()) 8837 break; 8838 // Are we dealing with different variables/fields? 8839 if (CI->getAssociatedDeclaration() != 8840 SI->getAssociatedDeclaration()) 8841 break; 8842 } 8843 8844 // Lists contain the same elements. 8845 if (CI == CE && SI == SE) 8846 return false; 8847 8848 // List with less elements is less than list with more elements. 8849 if (CI == CE || SI == SE) 8850 return CI == CE; 8851 8852 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8853 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8854 if (FD1->getParent() == FD2->getParent()) 8855 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8856 const auto *It = 8857 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8858 return FD == FD1 || FD == FD2; 8859 }); 8860 return *It == FD1; 8861 }); 8862 } 8863 8864 // Associated with a capture, because the mapping flags depend on it. 8865 // Go through all of the elements with the overlapped elements. 8866 bool IsFirstComponentList = true; 8867 for (const auto &Pair : OverlappedData) { 8868 const MapData &L = *Pair.getFirst(); 8869 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8870 OpenMPMapClauseKind MapType; 8871 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8872 bool IsImplicit; 8873 const ValueDecl *Mapper; 8874 const Expr *VarRef; 8875 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8876 L; 8877 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8878 OverlappedComponents = Pair.getSecond(); 8879 generateInfoForComponentList( 8880 MapType, MapModifiers, std::nullopt, Components, CombinedInfo, 8881 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 8882 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 8883 IsFirstComponentList = false; 8884 } 8885 // Go through other elements without overlapped elements. 8886 for (const MapData &L : DeclComponentLists) { 8887 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8888 OpenMPMapClauseKind MapType; 8889 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8890 bool IsImplicit; 8891 const ValueDecl *Mapper; 8892 const Expr *VarRef; 8893 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8894 L; 8895 auto It = OverlappedData.find(&L); 8896 if (It == OverlappedData.end()) 8897 generateInfoForComponentList(MapType, MapModifiers, std::nullopt, 8898 Components, CombinedInfo, PartialStruct, 8899 IsFirstComponentList, IsImplicit, Mapper, 8900 /*ForDeviceAddr=*/false, VD, VarRef); 8901 IsFirstComponentList = false; 8902 } 8903 } 8904 8905 /// Generate the default map information for a given capture \a CI, 8906 /// record field declaration \a RI and captured value \a CV. 8907 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8908 const FieldDecl &RI, llvm::Value *CV, 8909 MapCombinedInfoTy &CombinedInfo) const { 8910 bool IsImplicit = true; 8911 // Do the default mapping. 8912 if (CI.capturesThis()) { 8913 CombinedInfo.Exprs.push_back(nullptr); 8914 CombinedInfo.BasePointers.push_back(CV); 8915 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8916 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8917 CombinedInfo.Pointers.push_back(CV); 8918 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8919 CombinedInfo.Sizes.push_back( 8920 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8921 CGF.Int64Ty, /*isSigned=*/true)); 8922 // Default map type. 8923 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO | 8924 OpenMPOffloadMappingFlags::OMP_MAP_FROM); 8925 } else if (CI.capturesVariableByCopy()) { 8926 const VarDecl *VD = CI.getCapturedVar(); 8927 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 8928 CombinedInfo.BasePointers.push_back(CV); 8929 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8930 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8931 CombinedInfo.Pointers.push_back(CV); 8932 if (!RI.getType()->isAnyPointerType()) { 8933 // We have to signal to the runtime captures passed by value that are 8934 // not pointers. 8935 CombinedInfo.Types.push_back( 8936 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL); 8937 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8938 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8939 } else { 8940 // Pointers are implicitly mapped with a zero size and no flags 8941 // (other than first map that is added for all implicit maps). 8942 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE); 8943 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8944 } 8945 auto I = FirstPrivateDecls.find(VD); 8946 if (I != FirstPrivateDecls.end()) 8947 IsImplicit = I->getSecond(); 8948 } else { 8949 assert(CI.capturesVariable() && "Expected captured reference."); 8950 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8951 QualType ElementType = PtrTy->getPointeeType(); 8952 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8953 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8954 // The default map type for a scalar/complex type is 'to' because by 8955 // default the value doesn't have to be retrieved. For an aggregate 8956 // type, the default is 'tofrom'. 8957 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 8958 const VarDecl *VD = CI.getCapturedVar(); 8959 auto I = FirstPrivateDecls.find(VD); 8960 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 8961 CombinedInfo.BasePointers.push_back(CV); 8962 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8963 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8964 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 8965 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8966 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8967 AlignmentSource::Decl)); 8968 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 8969 } else { 8970 CombinedInfo.Pointers.push_back(CV); 8971 } 8972 if (I != FirstPrivateDecls.end()) 8973 IsImplicit = I->getSecond(); 8974 } 8975 // Every default map produces a single argument which is a target parameter. 8976 CombinedInfo.Types.back() |= 8977 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; 8978 8979 // Add flag stating this is an implicit map. 8980 if (IsImplicit) 8981 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; 8982 8983 // No user-defined mapper for default mapping. 8984 CombinedInfo.Mappers.push_back(nullptr); 8985 } 8986 }; 8987 } // anonymous namespace 8988 8989 // Try to extract the base declaration from a `this->x` expression if possible. 8990 static ValueDecl *getDeclFromThisExpr(const Expr *E) { 8991 if (!E) 8992 return nullptr; 8993 8994 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts())) 8995 if (const MemberExpr *ME = 8996 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts())) 8997 return ME->getMemberDecl(); 8998 return nullptr; 8999 } 9000 9001 /// Emit a string constant containing the names of the values mapped to the 9002 /// offloading runtime library. 9003 llvm::Constant * 9004 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9005 MappableExprsHandler::MappingExprInfo &MapExprs) { 9006 9007 uint32_t SrcLocStrSize; 9008 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) 9009 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 9010 9011 SourceLocation Loc; 9012 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { 9013 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr())) 9014 Loc = VD->getLocation(); 9015 else 9016 Loc = MapExprs.getMapExpr()->getExprLoc(); 9017 } else { 9018 Loc = MapExprs.getMapDecl()->getLocation(); 9019 } 9020 9021 std::string ExprName; 9022 if (MapExprs.getMapExpr()) { 9023 PrintingPolicy P(CGF.getContext().getLangOpts()); 9024 llvm::raw_string_ostream OS(ExprName); 9025 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9026 OS.flush(); 9027 } else { 9028 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9029 } 9030 9031 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9032 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName, 9033 PLoc.getLine(), PLoc.getColumn(), 9034 SrcLocStrSize); 9035 } 9036 9037 /// Emit the arrays used to pass the captures and map information to the 9038 /// offloading runtime library. If there is no map or capture information, 9039 /// return nullptr by reference. 9040 static void emitOffloadingArrays( 9041 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9042 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9043 bool IsNonContiguous = false) { 9044 CodeGenModule &CGM = CGF.CGM; 9045 9046 // Reset the array information. 9047 Info.clearArrayInfo(); 9048 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9049 9050 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 9051 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(), 9052 CGF.AllocaInsertPt->getIterator()); 9053 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(), 9054 CGF.Builder.GetInsertPoint()); 9055 9056 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9057 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9058 }; 9059 if (CGM.getCodeGenOpts().getDebugInfo() != 9060 llvm::codegenoptions::NoDebugInfo) { 9061 CombinedInfo.Names.resize(CombinedInfo.Exprs.size()); 9062 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(), 9063 FillInfoMap); 9064 } 9065 9066 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) { 9067 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) { 9068 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl); 9069 } 9070 }; 9071 9072 auto CustomMapperCB = [&](unsigned int I) { 9073 llvm::Value *MFunc = nullptr; 9074 if (CombinedInfo.Mappers[I]) { 9075 Info.HasMapper = true; 9076 MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9077 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9078 } 9079 return MFunc; 9080 }; 9081 OMPBuilder.emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info, 9082 /*IsNonContiguous=*/true, DeviceAddrCB, 9083 CustomMapperCB); 9084 } 9085 9086 /// Check for inner distribute directive. 9087 static const OMPExecutableDirective * 9088 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9089 const auto *CS = D.getInnermostCapturedStmt(); 9090 const auto *Body = 9091 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9092 const Stmt *ChildStmt = 9093 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9094 9095 if (const auto *NestedDir = 9096 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9097 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9098 switch (D.getDirectiveKind()) { 9099 case OMPD_target: 9100 // For now, just treat 'target teams loop' as if it's distributed. 9101 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop) 9102 return NestedDir; 9103 if (DKind == OMPD_teams) { 9104 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9105 /*IgnoreCaptured=*/true); 9106 if (!Body) 9107 return nullptr; 9108 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9109 if (const auto *NND = 9110 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9111 DKind = NND->getDirectiveKind(); 9112 if (isOpenMPDistributeDirective(DKind)) 9113 return NND; 9114 } 9115 } 9116 return nullptr; 9117 case OMPD_target_teams: 9118 if (isOpenMPDistributeDirective(DKind)) 9119 return NestedDir; 9120 return nullptr; 9121 case OMPD_target_parallel: 9122 case OMPD_target_simd: 9123 case OMPD_target_parallel_for: 9124 case OMPD_target_parallel_for_simd: 9125 return nullptr; 9126 case OMPD_target_teams_distribute: 9127 case OMPD_target_teams_distribute_simd: 9128 case OMPD_target_teams_distribute_parallel_for: 9129 case OMPD_target_teams_distribute_parallel_for_simd: 9130 case OMPD_parallel: 9131 case OMPD_for: 9132 case OMPD_parallel_for: 9133 case OMPD_parallel_master: 9134 case OMPD_parallel_sections: 9135 case OMPD_for_simd: 9136 case OMPD_parallel_for_simd: 9137 case OMPD_cancel: 9138 case OMPD_cancellation_point: 9139 case OMPD_ordered: 9140 case OMPD_threadprivate: 9141 case OMPD_allocate: 9142 case OMPD_task: 9143 case OMPD_simd: 9144 case OMPD_tile: 9145 case OMPD_unroll: 9146 case OMPD_sections: 9147 case OMPD_section: 9148 case OMPD_single: 9149 case OMPD_master: 9150 case OMPD_critical: 9151 case OMPD_taskyield: 9152 case OMPD_barrier: 9153 case OMPD_taskwait: 9154 case OMPD_taskgroup: 9155 case OMPD_atomic: 9156 case OMPD_flush: 9157 case OMPD_depobj: 9158 case OMPD_scan: 9159 case OMPD_teams: 9160 case OMPD_target_data: 9161 case OMPD_target_exit_data: 9162 case OMPD_target_enter_data: 9163 case OMPD_distribute: 9164 case OMPD_distribute_simd: 9165 case OMPD_distribute_parallel_for: 9166 case OMPD_distribute_parallel_for_simd: 9167 case OMPD_teams_distribute: 9168 case OMPD_teams_distribute_simd: 9169 case OMPD_teams_distribute_parallel_for: 9170 case OMPD_teams_distribute_parallel_for_simd: 9171 case OMPD_target_update: 9172 case OMPD_declare_simd: 9173 case OMPD_declare_variant: 9174 case OMPD_begin_declare_variant: 9175 case OMPD_end_declare_variant: 9176 case OMPD_declare_target: 9177 case OMPD_end_declare_target: 9178 case OMPD_declare_reduction: 9179 case OMPD_declare_mapper: 9180 case OMPD_taskloop: 9181 case OMPD_taskloop_simd: 9182 case OMPD_master_taskloop: 9183 case OMPD_master_taskloop_simd: 9184 case OMPD_parallel_master_taskloop: 9185 case OMPD_parallel_master_taskloop_simd: 9186 case OMPD_requires: 9187 case OMPD_metadirective: 9188 case OMPD_unknown: 9189 default: 9190 llvm_unreachable("Unexpected directive."); 9191 } 9192 } 9193 9194 return nullptr; 9195 } 9196 9197 /// Emit the user-defined mapper function. The code generation follows the 9198 /// pattern in the example below. 9199 /// \code 9200 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9201 /// void *base, void *begin, 9202 /// int64_t size, int64_t type, 9203 /// void *name = nullptr) { 9204 /// // Allocate space for an array section first or add a base/begin for 9205 /// // pointer dereference. 9206 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9207 /// !maptype.IsDelete) 9208 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9209 /// size*sizeof(Ty), clearToFromMember(type)); 9210 /// // Map members. 9211 /// for (unsigned i = 0; i < size; i++) { 9212 /// // For each component specified by this mapper: 9213 /// for (auto c : begin[i]->all_components) { 9214 /// if (c.hasMapper()) 9215 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9216 /// c.arg_type, c.arg_name); 9217 /// else 9218 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9219 /// c.arg_begin, c.arg_size, c.arg_type, 9220 /// c.arg_name); 9221 /// } 9222 /// } 9223 /// // Delete the array section. 9224 /// if (size > 1 && maptype.IsDelete) 9225 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9226 /// size*sizeof(Ty), clearToFromMember(type)); 9227 /// } 9228 /// \endcode 9229 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9230 CodeGenFunction *CGF) { 9231 if (UDMMap.count(D) > 0) 9232 return; 9233 ASTContext &C = CGM.getContext(); 9234 QualType Ty = D->getType(); 9235 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9236 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9237 auto *MapperVarDecl = 9238 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9239 SourceLocation Loc = D->getLocation(); 9240 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9241 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty); 9242 9243 // Prepare mapper function arguments and attributes. 9244 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9245 C.VoidPtrTy, ImplicitParamDecl::Other); 9246 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9247 ImplicitParamDecl::Other); 9248 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9249 C.VoidPtrTy, ImplicitParamDecl::Other); 9250 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9251 ImplicitParamDecl::Other); 9252 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9253 ImplicitParamDecl::Other); 9254 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9255 ImplicitParamDecl::Other); 9256 FunctionArgList Args; 9257 Args.push_back(&HandleArg); 9258 Args.push_back(&BaseArg); 9259 Args.push_back(&BeginArg); 9260 Args.push_back(&SizeArg); 9261 Args.push_back(&TypeArg); 9262 Args.push_back(&NameArg); 9263 const CGFunctionInfo &FnInfo = 9264 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9265 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9266 SmallString<64> TyStr; 9267 llvm::raw_svector_ostream Out(TyStr); 9268 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9269 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9270 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9271 Name, &CGM.getModule()); 9272 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9273 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9274 // Start the mapper function code generation. 9275 CodeGenFunction MapperCGF(CGM); 9276 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9277 // Compute the starting and end addresses of array elements. 9278 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9279 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9280 C.getPointerType(Int64Ty), Loc); 9281 // Prepare common arguments for array initiation and deletion. 9282 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9283 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9284 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9285 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9286 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9287 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9288 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9289 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9290 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9291 // Convert the size in bytes into the number of array elements. 9292 Size = MapperCGF.Builder.CreateExactUDiv( 9293 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9294 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9295 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 9296 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size); 9297 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9298 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9299 C.getPointerType(Int64Ty), Loc); 9300 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar( 9301 MapperCGF.GetAddrOfLocalVar(&NameArg), 9302 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9303 9304 // Emit array initiation if this is an array section and \p MapType indicates 9305 // that memory allocation is required. 9306 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9307 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9308 MapName, ElementSize, HeadBB, /*IsInit=*/true); 9309 9310 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9311 9312 // Emit the loop header block. 9313 MapperCGF.EmitBlock(HeadBB); 9314 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9315 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9316 // Evaluate whether the initial condition is satisfied. 9317 llvm::Value *IsEmpty = 9318 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9319 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9320 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9321 9322 // Emit the loop body block. 9323 MapperCGF.EmitBlock(BodyBB); 9324 llvm::BasicBlock *LastBB = BodyBB; 9325 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9326 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9327 PtrPHI->addIncoming(PtrBegin, EntryBB); 9328 Address PtrCurrent(PtrPHI, ElemTy, 9329 MapperCGF.GetAddrOfLocalVar(&BeginArg) 9330 .getAlignment() 9331 .alignmentOfArrayElement(ElementSize)); 9332 // Privatize the declared variable of mapper to be the current array element. 9333 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9334 Scope.addPrivate(MapperVarDecl, PtrCurrent); 9335 (void)Scope.Privatize(); 9336 9337 // Get map clause information. Fill up the arrays with all mapped variables. 9338 MappableExprsHandler::MapCombinedInfoTy Info; 9339 MappableExprsHandler MEHandler(*D, MapperCGF); 9340 MEHandler.generateAllInfoForMapper(Info); 9341 9342 // Call the runtime API __tgt_mapper_num_components to get the number of 9343 // pre-existing components. 9344 llvm::Value *OffloadingArgs[] = {Handle}; 9345 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9346 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9347 OMPRTL___tgt_mapper_num_components), 9348 OffloadingArgs); 9349 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9350 PreviousSize, 9351 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9352 9353 // Fill up the runtime mapper handle for all components. 9354 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 9355 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9356 Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9357 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9358 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9359 llvm::Value *CurSizeArg = Info.Sizes[I]; 9360 llvm::Value *CurNameArg = 9361 (CGM.getCodeGenOpts().getDebugInfo() == 9362 llvm::codegenoptions::NoDebugInfo) 9363 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 9364 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 9365 9366 // Extract the MEMBER_OF field from the map type. 9367 llvm::Value *OriMapType = MapperCGF.Builder.getInt64( 9368 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9369 Info.Types[I])); 9370 llvm::Value *MemberMapType = 9371 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9372 9373 // Combine the map type inherited from user-defined mapper with that 9374 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9375 // bits of the \a MapType, which is the input argument of the mapper 9376 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9377 // bits of MemberMapType. 9378 // [OpenMP 5.0], 1.2.6. map-type decay. 9379 // | alloc | to | from | tofrom | release | delete 9380 // ---------------------------------------------------------- 9381 // alloc | alloc | alloc | alloc | alloc | release | delete 9382 // to | alloc | to | alloc | to | release | delete 9383 // from | alloc | alloc | from | from | release | delete 9384 // tofrom | alloc | to | from | tofrom | release | delete 9385 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9386 MapType, 9387 MapperCGF.Builder.getInt64( 9388 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9389 OpenMPOffloadMappingFlags::OMP_MAP_TO | 9390 OpenMPOffloadMappingFlags::OMP_MAP_FROM))); 9391 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9392 llvm::BasicBlock *AllocElseBB = 9393 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9394 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9395 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9396 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9397 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9398 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9399 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9400 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9401 MapperCGF.EmitBlock(AllocBB); 9402 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9403 MemberMapType, 9404 MapperCGF.Builder.getInt64( 9405 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9406 OpenMPOffloadMappingFlags::OMP_MAP_TO | 9407 OpenMPOffloadMappingFlags::OMP_MAP_FROM))); 9408 MapperCGF.Builder.CreateBr(EndBB); 9409 MapperCGF.EmitBlock(AllocElseBB); 9410 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9411 LeftToFrom, 9412 MapperCGF.Builder.getInt64( 9413 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9414 OpenMPOffloadMappingFlags::OMP_MAP_TO))); 9415 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9416 // In case of to, clear OMP_MAP_FROM. 9417 MapperCGF.EmitBlock(ToBB); 9418 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9419 MemberMapType, 9420 MapperCGF.Builder.getInt64( 9421 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9422 OpenMPOffloadMappingFlags::OMP_MAP_FROM))); 9423 MapperCGF.Builder.CreateBr(EndBB); 9424 MapperCGF.EmitBlock(ToElseBB); 9425 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9426 LeftToFrom, 9427 MapperCGF.Builder.getInt64( 9428 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9429 OpenMPOffloadMappingFlags::OMP_MAP_FROM))); 9430 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9431 // In case of from, clear OMP_MAP_TO. 9432 MapperCGF.EmitBlock(FromBB); 9433 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9434 MemberMapType, 9435 MapperCGF.Builder.getInt64( 9436 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9437 OpenMPOffloadMappingFlags::OMP_MAP_TO))); 9438 // In case of tofrom, do nothing. 9439 MapperCGF.EmitBlock(EndBB); 9440 LastBB = EndBB; 9441 llvm::PHINode *CurMapType = 9442 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9443 CurMapType->addIncoming(AllocMapType, AllocBB); 9444 CurMapType->addIncoming(ToMapType, ToBB); 9445 CurMapType->addIncoming(FromMapType, FromBB); 9446 CurMapType->addIncoming(MemberMapType, ToElseBB); 9447 9448 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9449 CurSizeArg, CurMapType, CurNameArg}; 9450 if (Info.Mappers[I]) { 9451 // Call the corresponding mapper function. 9452 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 9453 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 9454 assert(MapperFunc && "Expect a valid mapper function is available."); 9455 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 9456 } else { 9457 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9458 // data structure. 9459 MapperCGF.EmitRuntimeCall( 9460 OMPBuilder.getOrCreateRuntimeFunction( 9461 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 9462 OffloadingArgs); 9463 } 9464 } 9465 9466 // Update the pointer to point to the next element that needs to be mapped, 9467 // and check whether we have mapped all elements. 9468 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9469 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9470 PtrPHI->addIncoming(PtrNext, LastBB); 9471 llvm::Value *IsDone = 9472 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9473 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9474 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9475 9476 MapperCGF.EmitBlock(ExitBB); 9477 // Emit array deletion if this is an array section and \p MapType indicates 9478 // that deletion is required. 9479 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9480 MapName, ElementSize, DoneBB, /*IsInit=*/false); 9481 9482 // Emit the function exit block. 9483 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9484 MapperCGF.FinishFunction(); 9485 UDMMap.try_emplace(D, Fn); 9486 if (CGF) { 9487 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9488 Decls.second.push_back(D); 9489 } 9490 } 9491 9492 /// Emit the array initialization or deletion portion for user-defined mapper 9493 /// code generation. First, it evaluates whether an array section is mapped and 9494 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9495 /// true, and \a MapType indicates to not delete this array, array 9496 /// initialization code is generated. If \a IsInit is false, and \a MapType 9497 /// indicates to not this array, array deletion code is generated. 9498 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9499 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9500 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9501 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, 9502 bool IsInit) { 9503 StringRef Prefix = IsInit ? ".init" : ".del"; 9504 9505 // Evaluate if this is an array section. 9506 llvm::BasicBlock *BodyBB = 9507 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9508 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 9509 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9510 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9511 MapType, 9512 MapperCGF.Builder.getInt64( 9513 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9514 OpenMPOffloadMappingFlags::OMP_MAP_DELETE))); 9515 llvm::Value *DeleteCond; 9516 llvm::Value *Cond; 9517 if (IsInit) { 9518 // base != begin? 9519 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin); 9520 // IsPtrAndObj? 9521 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 9522 MapType, 9523 MapperCGF.Builder.getInt64( 9524 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9525 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ))); 9526 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 9527 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 9528 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 9529 DeleteCond = MapperCGF.Builder.CreateIsNull( 9530 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9531 } else { 9532 Cond = IsArray; 9533 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9534 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9535 } 9536 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 9537 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 9538 9539 MapperCGF.EmitBlock(BodyBB); 9540 // Get the array size by multiplying element size and element number (i.e., \p 9541 // Size). 9542 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9543 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9544 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9545 // memory allocation/deletion purpose only. 9546 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9547 MapType, 9548 MapperCGF.Builder.getInt64( 9549 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9550 OpenMPOffloadMappingFlags::OMP_MAP_TO | 9551 OpenMPOffloadMappingFlags::OMP_MAP_FROM))); 9552 MapTypeArg = MapperCGF.Builder.CreateOr( 9553 MapTypeArg, 9554 MapperCGF.Builder.getInt64( 9555 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9556 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))); 9557 9558 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9559 // data structure. 9560 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 9561 ArraySize, MapTypeArg, MapName}; 9562 MapperCGF.EmitRuntimeCall( 9563 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9564 OMPRTL___tgt_push_mapper_component), 9565 OffloadingArgs); 9566 } 9567 9568 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 9569 const OMPDeclareMapperDecl *D) { 9570 auto I = UDMMap.find(D); 9571 if (I != UDMMap.end()) 9572 return I->second; 9573 emitUserDefinedMapper(D); 9574 return UDMMap.lookup(D); 9575 } 9576 9577 llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall( 9578 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9579 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9580 const OMPLoopDirective &D)> 9581 SizeEmitter) { 9582 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9583 const OMPExecutableDirective *TD = &D; 9584 // Get nested teams distribute kind directive, if any. 9585 if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) && 9586 Kind != OMPD_target_teams_loop) 9587 TD = getNestedDistributeDirective(CGM.getContext(), D); 9588 if (!TD) 9589 return llvm::ConstantInt::get(CGF.Int64Ty, 0); 9590 9591 const auto *LD = cast<OMPLoopDirective>(TD); 9592 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) 9593 return NumIterations; 9594 return llvm::ConstantInt::get(CGF.Int64Ty, 0); 9595 } 9596 9597 static void 9598 emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, 9599 const OMPExecutableDirective &D, 9600 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, 9601 bool RequiresOuterTask, const CapturedStmt &CS, 9602 bool OffloadingMandatory, CodeGenFunction &CGF) { 9603 if (OffloadingMandatory) { 9604 CGF.Builder.CreateUnreachable(); 9605 } else { 9606 if (RequiresOuterTask) { 9607 CapturedVars.clear(); 9608 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9609 } 9610 OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, 9611 CapturedVars); 9612 } 9613 } 9614 9615 static llvm::Value *emitDeviceID( 9616 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9617 CodeGenFunction &CGF) { 9618 // Emit device ID if any. 9619 llvm::Value *DeviceID; 9620 if (Device.getPointer()) { 9621 assert((Device.getInt() == OMPC_DEVICE_unknown || 9622 Device.getInt() == OMPC_DEVICE_device_num) && 9623 "Expected device_num modifier."); 9624 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 9625 DeviceID = 9626 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 9627 } else { 9628 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9629 } 9630 return DeviceID; 9631 } 9632 9633 llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D, 9634 CodeGenFunction &CGF) { 9635 llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0); 9636 9637 if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) { 9638 CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF); 9639 llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr( 9640 DynMemClause->getSize(), /*IgnoreResultAssign=*/true); 9641 DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty, 9642 /*isSigned=*/false); 9643 } 9644 return DynCGroupMem; 9645 } 9646 9647 static void emitTargetCallKernelLaunch( 9648 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, 9649 const OMPExecutableDirective &D, 9650 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask, 9651 const CapturedStmt &CS, bool OffloadingMandatory, 9652 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9653 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo, 9654 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray, 9655 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9656 const OMPLoopDirective &D)> 9657 SizeEmitter, 9658 CodeGenFunction &CGF, CodeGenModule &CGM) { 9659 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder(); 9660 9661 // Fill up the arrays with all the captured variables. 9662 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 9663 9664 // Get mappable expression information. 9665 MappableExprsHandler MEHandler(D, CGF); 9666 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 9667 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 9668 9669 auto RI = CS.getCapturedRecordDecl()->field_begin(); 9670 auto *CV = CapturedVars.begin(); 9671 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 9672 CE = CS.capture_end(); 9673 CI != CE; ++CI, ++RI, ++CV) { 9674 MappableExprsHandler::MapCombinedInfoTy CurInfo; 9675 MappableExprsHandler::StructRangeInfoTy PartialStruct; 9676 9677 // VLA sizes are passed to the outlined region by copy and do not have map 9678 // information associated. 9679 if (CI->capturesVariableArrayType()) { 9680 CurInfo.Exprs.push_back(nullptr); 9681 CurInfo.BasePointers.push_back(*CV); 9682 CurInfo.DevicePtrDecls.push_back(nullptr); 9683 CurInfo.DevicePointers.push_back( 9684 MappableExprsHandler::DeviceInfoTy::None); 9685 CurInfo.Pointers.push_back(*CV); 9686 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9687 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 9688 // Copy to the device as an argument. No need to retrieve it. 9689 CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | 9690 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM | 9691 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); 9692 CurInfo.Mappers.push_back(nullptr); 9693 } else { 9694 // If we have any information in the map clause, we use it, otherwise we 9695 // just do a default mapping. 9696 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 9697 if (!CI->capturesThis()) 9698 MappedVarSet.insert(CI->getCapturedVar()); 9699 else 9700 MappedVarSet.insert(nullptr); 9701 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 9702 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 9703 // Generate correct mapping for variables captured by reference in 9704 // lambdas. 9705 if (CI->capturesVariable()) 9706 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 9707 CurInfo, LambdaPointers); 9708 } 9709 // We expect to have at least an element of information for this capture. 9710 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 9711 "Non-existing map pointer for capture!"); 9712 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 9713 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 9714 CurInfo.BasePointers.size() == CurInfo.Types.size() && 9715 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 9716 "Inconsistent map information sizes!"); 9717 9718 // If there is an entry in PartialStruct it means we have a struct with 9719 // individual members mapped. Emit an extra combined entry. 9720 if (PartialStruct.Base.isValid()) { 9721 CombinedInfo.append(PartialStruct.PreliminaryMapData); 9722 MEHandler.emitCombinedEntry( 9723 CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(), 9724 nullptr, !PartialStruct.PreliminaryMapData.BasePointers.empty()); 9725 } 9726 9727 // We need to append the results of this capture to what we already have. 9728 CombinedInfo.append(CurInfo); 9729 } 9730 // Adjust MEMBER_OF flags for the lambdas captures. 9731 MEHandler.adjustMemberOfForLambdaCaptures( 9732 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 9733 CombinedInfo.Types); 9734 // Map any list items in a map clause that were not captures because they 9735 // weren't referenced within the construct. 9736 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 9737 9738 CGOpenMPRuntime::TargetDataInfo Info; 9739 // Fill up the arrays and create the arguments. 9740 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 9741 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() != 9742 llvm::codegenoptions::NoDebugInfo; 9743 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info, 9744 EmitDebug, 9745 /*ForEndCall=*/false); 9746 9747 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9748 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, 9749 CGF.VoidPtrTy, CGM.getPointerAlign()); 9750 InputInfo.PointersArray = 9751 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 9752 InputInfo.SizesArray = 9753 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); 9754 InputInfo.MappersArray = 9755 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 9756 MapTypesArray = Info.RTArgs.MapTypesArray; 9757 MapNamesArray = Info.RTArgs.MapNamesArray; 9758 9759 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars, 9760 RequiresOuterTask, &CS, OffloadingMandatory, Device, 9761 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray, 9762 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9763 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor; 9764 9765 if (IsReverseOffloading) { 9766 // Reverse offloading is not supported, so just execute on the host. 9767 // FIXME: This fallback solution is incorrect since it ignores the 9768 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to 9769 // assert here and ensure SEMA emits an error. 9770 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars, 9771 RequiresOuterTask, CS, OffloadingMandatory, CGF); 9772 return; 9773 } 9774 9775 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>(); 9776 unsigned NumTargetItems = InputInfo.NumberOfTargetItems; 9777 9778 llvm::Value *BasePointersArray = InputInfo.BasePointersArray.getPointer(); 9779 llvm::Value *PointersArray = InputInfo.PointersArray.getPointer(); 9780 llvm::Value *SizesArray = InputInfo.SizesArray.getPointer(); 9781 llvm::Value *MappersArray = InputInfo.MappersArray.getPointer(); 9782 9783 auto &&EmitTargetCallFallbackCB = 9784 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS, 9785 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) 9786 -> llvm::OpenMPIRBuilder::InsertPointTy { 9787 CGF.Builder.restoreIP(IP); 9788 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars, 9789 RequiresOuterTask, CS, OffloadingMandatory, CGF); 9790 return CGF.Builder.saveIP(); 9791 }; 9792 9793 llvm::Value *DeviceID = emitDeviceID(Device, CGF); 9794 llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D); 9795 llvm::Value *NumThreads = 9796 OMPRuntime->emitNumThreadsForTargetDirective(CGF, D); 9797 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc()); 9798 llvm::Value *NumIterations = 9799 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter); 9800 llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF); 9801 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( 9802 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator()); 9803 9804 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs( 9805 BasePointersArray, PointersArray, SizesArray, MapTypesArray, 9806 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray); 9807 9808 llvm::OpenMPIRBuilder::TargetKernelArgs Args( 9809 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads, 9810 DynCGGroupMem, HasNoWait); 9811 9812 CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch( 9813 CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args, 9814 DeviceID, RTLoc, AllocaIP)); 9815 }; 9816 9817 if (RequiresOuterTask) 9818 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9819 else 9820 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9821 } 9822 9823 static void 9824 emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, 9825 const OMPExecutableDirective &D, 9826 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, 9827 bool RequiresOuterTask, const CapturedStmt &CS, 9828 bool OffloadingMandatory, CodeGenFunction &CGF) { 9829 9830 // Notify that the host version must be executed. 9831 auto &&ElseGen = 9832 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS, 9833 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) { 9834 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars, 9835 RequiresOuterTask, CS, OffloadingMandatory, CGF); 9836 }; 9837 9838 if (RequiresOuterTask) { 9839 CodeGenFunction::OMPTargetDataInfo InputInfo; 9840 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 9841 } else { 9842 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 9843 } 9844 } 9845 9846 void CGOpenMPRuntime::emitTargetCall( 9847 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9848 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9849 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9850 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9851 const OMPLoopDirective &D)> 9852 SizeEmitter) { 9853 if (!CGF.HaveInsertPoint()) 9854 return; 9855 9856 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice && 9857 CGM.getLangOpts().OpenMPOffloadMandatory; 9858 9859 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!"); 9860 9861 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 9862 D.hasClausesOfKind<OMPNowaitClause>() || 9863 D.hasClausesOfKind<OMPInReductionClause>(); 9864 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9865 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9866 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9867 PrePostActionTy &) { 9868 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9869 }; 9870 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9871 9872 CodeGenFunction::OMPTargetDataInfo InputInfo; 9873 llvm::Value *MapTypesArray = nullptr; 9874 llvm::Value *MapNamesArray = nullptr; 9875 9876 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars, 9877 RequiresOuterTask, &CS, OffloadingMandatory, Device, 9878 OutlinedFnID, &InputInfo, &MapTypesArray, 9879 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF, 9880 PrePostActionTy &) { 9881 emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars, 9882 RequiresOuterTask, CS, OffloadingMandatory, 9883 Device, OutlinedFnID, InputInfo, MapTypesArray, 9884 MapNamesArray, SizeEmitter, CGF, CGM); 9885 }; 9886 9887 auto &&TargetElseGen = 9888 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS, 9889 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) { 9890 emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask, 9891 CS, OffloadingMandatory, CGF); 9892 }; 9893 9894 // If we have a target function ID it means that we need to support 9895 // offloading, otherwise, just execute on the host. We need to execute on host 9896 // regardless of the conditional in the if clause if, e.g., the user do not 9897 // specify target triples. 9898 if (OutlinedFnID) { 9899 if (IfCond) { 9900 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 9901 } else { 9902 RegionCodeGenTy ThenRCG(TargetThenGen); 9903 ThenRCG(CGF); 9904 } 9905 } else { 9906 RegionCodeGenTy ElseRCG(TargetElseGen); 9907 ElseRCG(CGF); 9908 } 9909 } 9910 9911 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 9912 StringRef ParentName) { 9913 if (!S) 9914 return; 9915 9916 // Codegen OMP target directives that offload compute to the device. 9917 bool RequiresDeviceCodegen = 9918 isa<OMPExecutableDirective>(S) && 9919 isOpenMPTargetExecutionDirective( 9920 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 9921 9922 if (RequiresDeviceCodegen) { 9923 const auto &E = *cast<OMPExecutableDirective>(S); 9924 9925 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc( 9926 CGM, OMPBuilder, E.getBeginLoc(), ParentName); 9927 9928 // Is this a target region that should not be emitted as an entry point? If 9929 // so just signal we are done with this target region. 9930 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo)) 9931 return; 9932 9933 switch (E.getDirectiveKind()) { 9934 case OMPD_target: 9935 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 9936 cast<OMPTargetDirective>(E)); 9937 break; 9938 case OMPD_target_parallel: 9939 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 9940 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 9941 break; 9942 case OMPD_target_teams: 9943 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 9944 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 9945 break; 9946 case OMPD_target_teams_distribute: 9947 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 9948 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 9949 break; 9950 case OMPD_target_teams_distribute_simd: 9951 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 9952 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 9953 break; 9954 case OMPD_target_parallel_for: 9955 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 9956 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 9957 break; 9958 case OMPD_target_parallel_for_simd: 9959 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 9960 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 9961 break; 9962 case OMPD_target_simd: 9963 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 9964 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 9965 break; 9966 case OMPD_target_teams_distribute_parallel_for: 9967 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 9968 CGM, ParentName, 9969 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 9970 break; 9971 case OMPD_target_teams_distribute_parallel_for_simd: 9972 CodeGenFunction:: 9973 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 9974 CGM, ParentName, 9975 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 9976 break; 9977 case OMPD_target_teams_loop: 9978 CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction( 9979 CGM, ParentName, cast<OMPTargetTeamsGenericLoopDirective>(E)); 9980 break; 9981 case OMPD_target_parallel_loop: 9982 CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction( 9983 CGM, ParentName, cast<OMPTargetParallelGenericLoopDirective>(E)); 9984 break; 9985 case OMPD_parallel: 9986 case OMPD_for: 9987 case OMPD_parallel_for: 9988 case OMPD_parallel_master: 9989 case OMPD_parallel_sections: 9990 case OMPD_for_simd: 9991 case OMPD_parallel_for_simd: 9992 case OMPD_cancel: 9993 case OMPD_cancellation_point: 9994 case OMPD_ordered: 9995 case OMPD_threadprivate: 9996 case OMPD_allocate: 9997 case OMPD_task: 9998 case OMPD_simd: 9999 case OMPD_tile: 10000 case OMPD_unroll: 10001 case OMPD_sections: 10002 case OMPD_section: 10003 case OMPD_single: 10004 case OMPD_master: 10005 case OMPD_critical: 10006 case OMPD_taskyield: 10007 case OMPD_barrier: 10008 case OMPD_taskwait: 10009 case OMPD_taskgroup: 10010 case OMPD_atomic: 10011 case OMPD_flush: 10012 case OMPD_depobj: 10013 case OMPD_scan: 10014 case OMPD_teams: 10015 case OMPD_target_data: 10016 case OMPD_target_exit_data: 10017 case OMPD_target_enter_data: 10018 case OMPD_distribute: 10019 case OMPD_distribute_simd: 10020 case OMPD_distribute_parallel_for: 10021 case OMPD_distribute_parallel_for_simd: 10022 case OMPD_teams_distribute: 10023 case OMPD_teams_distribute_simd: 10024 case OMPD_teams_distribute_parallel_for: 10025 case OMPD_teams_distribute_parallel_for_simd: 10026 case OMPD_target_update: 10027 case OMPD_declare_simd: 10028 case OMPD_declare_variant: 10029 case OMPD_begin_declare_variant: 10030 case OMPD_end_declare_variant: 10031 case OMPD_declare_target: 10032 case OMPD_end_declare_target: 10033 case OMPD_declare_reduction: 10034 case OMPD_declare_mapper: 10035 case OMPD_taskloop: 10036 case OMPD_taskloop_simd: 10037 case OMPD_master_taskloop: 10038 case OMPD_master_taskloop_simd: 10039 case OMPD_parallel_master_taskloop: 10040 case OMPD_parallel_master_taskloop_simd: 10041 case OMPD_requires: 10042 case OMPD_metadirective: 10043 case OMPD_unknown: 10044 default: 10045 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10046 } 10047 return; 10048 } 10049 10050 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10051 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10052 return; 10053 10054 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10055 return; 10056 } 10057 10058 // If this is a lambda function, look into its body. 10059 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10060 S = L->getBody(); 10061 10062 // Keep looking for target regions recursively. 10063 for (const Stmt *II : S->children()) 10064 scanForTargetRegionsFunctions(II, ParentName); 10065 } 10066 10067 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { 10068 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10069 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10070 if (!DevTy) 10071 return false; 10072 // Do not emit device_type(nohost) functions for the host. 10073 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10074 return true; 10075 // Do not emit device_type(host) functions for the device. 10076 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10077 return true; 10078 return false; 10079 } 10080 10081 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10082 // If emitting code for the host, we do not process FD here. Instead we do 10083 // the normal code generation. 10084 if (!CGM.getLangOpts().OpenMPIsTargetDevice) { 10085 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) 10086 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10087 CGM.getLangOpts().OpenMPIsTargetDevice)) 10088 return true; 10089 return false; 10090 } 10091 10092 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10093 // Try to detect target regions in the function. 10094 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10095 StringRef Name = CGM.getMangledName(GD); 10096 scanForTargetRegionsFunctions(FD->getBody(), Name); 10097 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10098 CGM.getLangOpts().OpenMPIsTargetDevice)) 10099 return true; 10100 } 10101 10102 // Do not to emit function if it is not marked as declare target. 10103 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10104 AlreadyEmittedTargetDecls.count(VD) == 0; 10105 } 10106 10107 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10108 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()), 10109 CGM.getLangOpts().OpenMPIsTargetDevice)) 10110 return true; 10111 10112 if (!CGM.getLangOpts().OpenMPIsTargetDevice) 10113 return false; 10114 10115 // Check if there are Ctors/Dtors in this declaration and look for target 10116 // regions in it. We use the complete variant to produce the kernel name 10117 // mangling. 10118 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10119 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10120 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10121 StringRef ParentName = 10122 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10123 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10124 } 10125 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10126 StringRef ParentName = 10127 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10128 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10129 } 10130 } 10131 10132 // Do not to emit variable if it is not marked as declare target. 10133 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10134 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10135 cast<VarDecl>(GD.getDecl())); 10136 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10137 ((*Res == OMPDeclareTargetDeclAttr::MT_To || 10138 *Res == OMPDeclareTargetDeclAttr::MT_Enter) && 10139 HasRequiresUnifiedSharedMemory)) { 10140 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10141 return true; 10142 } 10143 return false; 10144 } 10145 10146 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10147 llvm::Constant *Addr) { 10148 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10149 !CGM.getLangOpts().OpenMPIsTargetDevice) 10150 return; 10151 10152 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10153 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10154 10155 // If this is an 'extern' declaration we defer to the canonical definition and 10156 // do not emit an offloading entry. 10157 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link && 10158 VD->hasExternalStorage()) 10159 return; 10160 10161 if (!Res) { 10162 if (CGM.getLangOpts().OpenMPIsTargetDevice) { 10163 // Register non-target variables being emitted in device code (debug info 10164 // may cause this). 10165 StringRef VarName = CGM.getMangledName(VD); 10166 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10167 } 10168 return; 10169 } 10170 10171 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); }; 10172 auto LinkageForVariable = [&VD, this]() { 10173 return CGM.getLLVMLinkageVarDefinition(VD); 10174 }; 10175 10176 std::vector<llvm::GlobalVariable *> GeneratedRefs; 10177 OMPBuilder.registerTargetGlobalVariable( 10178 convertCaptureClause(VD), convertDeviceClause(VD), 10179 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly, 10180 VD->isExternallyVisible(), 10181 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, 10182 VD->getCanonicalDecl()->getBeginLoc()), 10183 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd, 10184 CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable, 10185 CGM.getTypes().ConvertTypeForMem( 10186 CGM.getContext().getPointerType(VD->getType())), 10187 Addr); 10188 10189 for (auto *ref : GeneratedRefs) 10190 CGM.addCompilerUsedGlobal(ref); 10191 10192 return; 10193 } 10194 10195 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10196 if (isa<FunctionDecl>(GD.getDecl()) || 10197 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10198 return emitTargetFunctions(GD); 10199 10200 return emitTargetGlobalVariable(GD); 10201 } 10202 10203 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10204 for (const VarDecl *VD : DeferredGlobalVariables) { 10205 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10206 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10207 if (!Res) 10208 continue; 10209 if ((*Res == OMPDeclareTargetDeclAttr::MT_To || 10210 *Res == OMPDeclareTargetDeclAttr::MT_Enter) && 10211 !HasRequiresUnifiedSharedMemory) { 10212 CGM.EmitGlobal(VD); 10213 } else { 10214 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10215 ((*Res == OMPDeclareTargetDeclAttr::MT_To || 10216 *Res == OMPDeclareTargetDeclAttr::MT_Enter) && 10217 HasRequiresUnifiedSharedMemory)) && 10218 "Expected link clause or to clause with unified memory."); 10219 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10220 } 10221 } 10222 } 10223 10224 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10225 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10226 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10227 " Expected target-based directive."); 10228 } 10229 10230 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10231 for (const OMPClause *Clause : D->clauselists()) { 10232 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10233 HasRequiresUnifiedSharedMemory = true; 10234 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true); 10235 } else if (const auto *AC = 10236 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10237 switch (AC->getAtomicDefaultMemOrderKind()) { 10238 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10239 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10240 break; 10241 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10242 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10243 break; 10244 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10245 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10246 break; 10247 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10248 break; 10249 } 10250 } 10251 } 10252 } 10253 10254 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10255 return RequiresAtomicOrdering; 10256 } 10257 10258 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10259 LangAS &AS) { 10260 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10261 return false; 10262 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10263 switch(A->getAllocatorType()) { 10264 case OMPAllocateDeclAttr::OMPNullMemAlloc: 10265 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10266 // Not supported, fallback to the default mem space. 10267 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10268 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10269 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10270 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10271 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10272 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10273 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10274 AS = LangAS::Default; 10275 return true; 10276 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10277 llvm_unreachable("Expected predefined allocator for the variables with the " 10278 "static storage."); 10279 } 10280 return false; 10281 } 10282 10283 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10284 return HasRequiresUnifiedSharedMemory; 10285 } 10286 10287 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10288 CodeGenModule &CGM) 10289 : CGM(CGM) { 10290 if (CGM.getLangOpts().OpenMPIsTargetDevice) { 10291 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10292 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10293 } 10294 } 10295 10296 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10297 if (CGM.getLangOpts().OpenMPIsTargetDevice) 10298 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10299 } 10300 10301 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10302 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal) 10303 return true; 10304 10305 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10306 // Do not to emit function if it is marked as declare target as it was already 10307 // emitted. 10308 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10309 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10310 if (auto *F = dyn_cast_or_null<llvm::Function>( 10311 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10312 return !F->isDeclaration(); 10313 return false; 10314 } 10315 return true; 10316 } 10317 10318 return !AlreadyEmittedTargetDecls.insert(D).second; 10319 } 10320 10321 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10322 // If we don't have entries or if we are emitting code for the device, we 10323 // don't need to do anything. 10324 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10325 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsTargetDevice || 10326 (OMPBuilder.OffloadInfoManager.empty() && 10327 !HasEmittedDeclareTargetRegion && !HasEmittedTargetRegion)) 10328 return nullptr; 10329 10330 // Create and register the function that handles the requires directives. 10331 ASTContext &C = CGM.getContext(); 10332 10333 llvm::Function *RequiresRegFn; 10334 { 10335 CodeGenFunction CGF(CGM); 10336 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10337 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10338 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10339 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 10340 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10341 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 10342 // TODO: check for other requires clauses. 10343 // The requires directive takes effect only when a target region is 10344 // present in the compilation unit. Otherwise it is ignored and not 10345 // passed to the runtime. This avoids the runtime from throwing an error 10346 // for mismatching requires clauses across compilation units that don't 10347 // contain at least 1 target region. 10348 assert((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion || 10349 !OMPBuilder.OffloadInfoManager.empty()) && 10350 "Target or declare target region expected."); 10351 if (HasRequiresUnifiedSharedMemory) 10352 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 10353 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10354 CGM.getModule(), OMPRTL___tgt_register_requires), 10355 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 10356 CGF.FinishFunction(); 10357 } 10358 return RequiresRegFn; 10359 } 10360 10361 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10362 const OMPExecutableDirective &D, 10363 SourceLocation Loc, 10364 llvm::Function *OutlinedFn, 10365 ArrayRef<llvm::Value *> CapturedVars) { 10366 if (!CGF.HaveInsertPoint()) 10367 return; 10368 10369 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10370 CodeGenFunction::RunCleanupsScope Scope(CGF); 10371 10372 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10373 llvm::Value *Args[] = { 10374 RTLoc, 10375 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10376 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10377 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10378 RealArgs.append(std::begin(Args), std::end(Args)); 10379 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10380 10381 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 10382 CGM.getModule(), OMPRTL___kmpc_fork_teams); 10383 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10384 } 10385 10386 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10387 const Expr *NumTeams, 10388 const Expr *ThreadLimit, 10389 SourceLocation Loc) { 10390 if (!CGF.HaveInsertPoint()) 10391 return; 10392 10393 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10394 10395 llvm::Value *NumTeamsVal = 10396 NumTeams 10397 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10398 CGF.CGM.Int32Ty, /* isSigned = */ true) 10399 : CGF.Builder.getInt32(0); 10400 10401 llvm::Value *ThreadLimitVal = 10402 ThreadLimit 10403 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10404 CGF.CGM.Int32Ty, /* isSigned = */ true) 10405 : CGF.Builder.getInt32(0); 10406 10407 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10408 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10409 ThreadLimitVal}; 10410 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10411 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 10412 PushNumTeamsArgs); 10413 } 10414 10415 void CGOpenMPRuntime::emitTargetDataCalls( 10416 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10417 const Expr *Device, const RegionCodeGenTy &CodeGen, 10418 CGOpenMPRuntime::TargetDataInfo &Info) { 10419 if (!CGF.HaveInsertPoint()) 10420 return; 10421 10422 // Action used to replace the default codegen action and turn privatization 10423 // off. 10424 PrePostActionTy NoPrivAction; 10425 10426 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 10427 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(), 10428 CGF.AllocaInsertPt->getIterator()); 10429 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(), 10430 CGF.Builder.GetInsertPoint()); 10431 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP); 10432 10433 llvm::Value *IfCondVal = nullptr; 10434 if (IfCond) 10435 IfCondVal = CGF.EvaluateExprAsBool(IfCond); 10436 10437 // Emit device ID if any. 10438 llvm::Value *DeviceID = nullptr; 10439 if (Device) { 10440 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10441 CGF.Int64Ty, /*isSigned=*/true); 10442 } else { 10443 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10444 } 10445 10446 // Fill up the arrays with all the mapped variables. 10447 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10448 auto GenMapInfoCB = 10449 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & { 10450 CGF.Builder.restoreIP(CodeGenIP); 10451 // Get map clause information. 10452 MappableExprsHandler MEHandler(D, CGF); 10453 MEHandler.generateAllInfo(CombinedInfo); 10454 10455 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 10456 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 10457 }; 10458 if (CGM.getCodeGenOpts().getDebugInfo() != 10459 llvm::codegenoptions::NoDebugInfo) { 10460 CombinedInfo.Names.resize(CombinedInfo.Exprs.size()); 10461 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(), 10462 FillInfoMap); 10463 } 10464 10465 return CombinedInfo; 10466 }; 10467 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy; 10468 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) { 10469 CGF.Builder.restoreIP(CodeGenIP); 10470 switch (BodyGenType) { 10471 case BodyGenTy::Priv: 10472 if (!Info.CaptureDeviceAddrMap.empty()) 10473 CodeGen(CGF); 10474 break; 10475 case BodyGenTy::DupNoPriv: 10476 if (!Info.CaptureDeviceAddrMap.empty()) { 10477 CodeGen.setAction(NoPrivAction); 10478 CodeGen(CGF); 10479 } 10480 break; 10481 case BodyGenTy::NoPriv: 10482 if (Info.CaptureDeviceAddrMap.empty()) { 10483 CodeGen.setAction(NoPrivAction); 10484 CodeGen(CGF); 10485 } 10486 break; 10487 } 10488 return InsertPointTy(CGF.Builder.GetInsertBlock(), 10489 CGF.Builder.GetInsertPoint()); 10490 }; 10491 10492 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) { 10493 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) { 10494 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl); 10495 } 10496 }; 10497 10498 auto CustomMapperCB = [&](unsigned int I) { 10499 llvm::Value *MFunc = nullptr; 10500 if (CombinedInfo.Mappers[I]) { 10501 Info.HasMapper = true; 10502 MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 10503 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 10504 } 10505 return MFunc; 10506 }; 10507 10508 // Source location for the ident struct 10509 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10510 10511 CGF.Builder.restoreIP(OMPBuilder.createTargetData( 10512 OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB, 10513 /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc)); 10514 } 10515 10516 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10517 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10518 const Expr *Device) { 10519 if (!CGF.HaveInsertPoint()) 10520 return; 10521 10522 assert((isa<OMPTargetEnterDataDirective>(D) || 10523 isa<OMPTargetExitDataDirective>(D) || 10524 isa<OMPTargetUpdateDirective>(D)) && 10525 "Expecting either target enter, exit data, or update directives."); 10526 10527 CodeGenFunction::OMPTargetDataInfo InputInfo; 10528 llvm::Value *MapTypesArray = nullptr; 10529 llvm::Value *MapNamesArray = nullptr; 10530 // Generate the code for the opening of the data environment. 10531 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 10532 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10533 // Emit device ID if any. 10534 llvm::Value *DeviceID = nullptr; 10535 if (Device) { 10536 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10537 CGF.Int64Ty, /*isSigned=*/true); 10538 } else { 10539 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10540 } 10541 10542 // Emit the number of elements in the offloading arrays. 10543 llvm::Constant *PointerNum = 10544 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10545 10546 // Source location for the ident struct 10547 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10548 10549 llvm::Value *OffloadingArgs[] = {RTLoc, 10550 DeviceID, 10551 PointerNum, 10552 InputInfo.BasePointersArray.getPointer(), 10553 InputInfo.PointersArray.getPointer(), 10554 InputInfo.SizesArray.getPointer(), 10555 MapTypesArray, 10556 MapNamesArray, 10557 InputInfo.MappersArray.getPointer()}; 10558 10559 // Select the right runtime function call for each standalone 10560 // directive. 10561 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10562 RuntimeFunction RTLFn; 10563 switch (D.getDirectiveKind()) { 10564 case OMPD_target_enter_data: 10565 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 10566 : OMPRTL___tgt_target_data_begin_mapper; 10567 break; 10568 case OMPD_target_exit_data: 10569 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 10570 : OMPRTL___tgt_target_data_end_mapper; 10571 break; 10572 case OMPD_target_update: 10573 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 10574 : OMPRTL___tgt_target_data_update_mapper; 10575 break; 10576 case OMPD_parallel: 10577 case OMPD_for: 10578 case OMPD_parallel_for: 10579 case OMPD_parallel_master: 10580 case OMPD_parallel_sections: 10581 case OMPD_for_simd: 10582 case OMPD_parallel_for_simd: 10583 case OMPD_cancel: 10584 case OMPD_cancellation_point: 10585 case OMPD_ordered: 10586 case OMPD_threadprivate: 10587 case OMPD_allocate: 10588 case OMPD_task: 10589 case OMPD_simd: 10590 case OMPD_tile: 10591 case OMPD_unroll: 10592 case OMPD_sections: 10593 case OMPD_section: 10594 case OMPD_single: 10595 case OMPD_master: 10596 case OMPD_critical: 10597 case OMPD_taskyield: 10598 case OMPD_barrier: 10599 case OMPD_taskwait: 10600 case OMPD_taskgroup: 10601 case OMPD_atomic: 10602 case OMPD_flush: 10603 case OMPD_depobj: 10604 case OMPD_scan: 10605 case OMPD_teams: 10606 case OMPD_target_data: 10607 case OMPD_distribute: 10608 case OMPD_distribute_simd: 10609 case OMPD_distribute_parallel_for: 10610 case OMPD_distribute_parallel_for_simd: 10611 case OMPD_teams_distribute: 10612 case OMPD_teams_distribute_simd: 10613 case OMPD_teams_distribute_parallel_for: 10614 case OMPD_teams_distribute_parallel_for_simd: 10615 case OMPD_declare_simd: 10616 case OMPD_declare_variant: 10617 case OMPD_begin_declare_variant: 10618 case OMPD_end_declare_variant: 10619 case OMPD_declare_target: 10620 case OMPD_end_declare_target: 10621 case OMPD_declare_reduction: 10622 case OMPD_declare_mapper: 10623 case OMPD_taskloop: 10624 case OMPD_taskloop_simd: 10625 case OMPD_master_taskloop: 10626 case OMPD_master_taskloop_simd: 10627 case OMPD_parallel_master_taskloop: 10628 case OMPD_parallel_master_taskloop_simd: 10629 case OMPD_target: 10630 case OMPD_target_simd: 10631 case OMPD_target_teams_distribute: 10632 case OMPD_target_teams_distribute_simd: 10633 case OMPD_target_teams_distribute_parallel_for: 10634 case OMPD_target_teams_distribute_parallel_for_simd: 10635 case OMPD_target_teams: 10636 case OMPD_target_parallel: 10637 case OMPD_target_parallel_for: 10638 case OMPD_target_parallel_for_simd: 10639 case OMPD_requires: 10640 case OMPD_metadirective: 10641 case OMPD_unknown: 10642 default: 10643 llvm_unreachable("Unexpected standalone target data directive."); 10644 break; 10645 } 10646 CGF.EmitRuntimeCall( 10647 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 10648 OffloadingArgs); 10649 }; 10650 10651 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10652 &MapNamesArray](CodeGenFunction &CGF, 10653 PrePostActionTy &) { 10654 // Fill up the arrays with all the mapped variables. 10655 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10656 10657 // Get map clause information. 10658 MappableExprsHandler MEHandler(D, CGF); 10659 MEHandler.generateAllInfo(CombinedInfo); 10660 10661 CGOpenMPRuntime::TargetDataInfo Info; 10662 // Fill up the arrays and create the arguments. 10663 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 10664 /*IsNonContiguous=*/true); 10665 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10666 D.hasClausesOfKind<OMPNowaitClause>(); 10667 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() != 10668 llvm::codegenoptions::NoDebugInfo; 10669 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info, 10670 EmitDebug, 10671 /*ForEndCall=*/false); 10672 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10673 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, 10674 CGF.VoidPtrTy, CGM.getPointerAlign()); 10675 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, 10676 CGM.getPointerAlign()); 10677 InputInfo.SizesArray = 10678 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); 10679 InputInfo.MappersArray = 10680 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 10681 MapTypesArray = Info.RTArgs.MapTypesArray; 10682 MapNamesArray = Info.RTArgs.MapNamesArray; 10683 if (RequiresOuterTask) 10684 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10685 else 10686 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10687 }; 10688 10689 if (IfCond) { 10690 emitIfClause(CGF, IfCond, TargetThenGen, 10691 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 10692 } else { 10693 RegionCodeGenTy ThenRCG(TargetThenGen); 10694 ThenRCG(CGF); 10695 } 10696 } 10697 10698 namespace { 10699 /// Kind of parameter in a function with 'declare simd' directive. 10700 enum ParamKindTy { 10701 Linear, 10702 LinearRef, 10703 LinearUVal, 10704 LinearVal, 10705 Uniform, 10706 Vector, 10707 }; 10708 /// Attribute set of the parameter. 10709 struct ParamAttrTy { 10710 ParamKindTy Kind = Vector; 10711 llvm::APSInt StrideOrArg; 10712 llvm::APSInt Alignment; 10713 bool HasVarStride = false; 10714 }; 10715 } // namespace 10716 10717 static unsigned evaluateCDTSize(const FunctionDecl *FD, 10718 ArrayRef<ParamAttrTy> ParamAttrs) { 10719 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 10720 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 10721 // of that clause. The VLEN value must be power of 2. 10722 // In other case the notion of the function`s "characteristic data type" (CDT) 10723 // is used to compute the vector length. 10724 // CDT is defined in the following order: 10725 // a) For non-void function, the CDT is the return type. 10726 // b) If the function has any non-uniform, non-linear parameters, then the 10727 // CDT is the type of the first such parameter. 10728 // c) If the CDT determined by a) or b) above is struct, union, or class 10729 // type which is pass-by-value (except for the type that maps to the 10730 // built-in complex data type), the characteristic data type is int. 10731 // d) If none of the above three cases is applicable, the CDT is int. 10732 // The VLEN is then determined based on the CDT and the size of vector 10733 // register of that ISA for which current vector version is generated. The 10734 // VLEN is computed using the formula below: 10735 // VLEN = sizeof(vector_register) / sizeof(CDT), 10736 // where vector register size specified in section 3.2.1 Registers and the 10737 // Stack Frame of original AMD64 ABI document. 10738 QualType RetType = FD->getReturnType(); 10739 if (RetType.isNull()) 10740 return 0; 10741 ASTContext &C = FD->getASTContext(); 10742 QualType CDT; 10743 if (!RetType.isNull() && !RetType->isVoidType()) { 10744 CDT = RetType; 10745 } else { 10746 unsigned Offset = 0; 10747 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 10748 if (ParamAttrs[Offset].Kind == Vector) 10749 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 10750 ++Offset; 10751 } 10752 if (CDT.isNull()) { 10753 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10754 if (ParamAttrs[I + Offset].Kind == Vector) { 10755 CDT = FD->getParamDecl(I)->getType(); 10756 break; 10757 } 10758 } 10759 } 10760 } 10761 if (CDT.isNull()) 10762 CDT = C.IntTy; 10763 CDT = CDT->getCanonicalTypeUnqualified(); 10764 if (CDT->isRecordType() || CDT->isUnionType()) 10765 CDT = C.IntTy; 10766 return C.getTypeSize(CDT); 10767 } 10768 10769 /// Mangle the parameter part of the vector function name according to 10770 /// their OpenMP classification. The mangling function is defined in 10771 /// section 4.5 of the AAVFABI(2021Q1). 10772 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 10773 SmallString<256> Buffer; 10774 llvm::raw_svector_ostream Out(Buffer); 10775 for (const auto &ParamAttr : ParamAttrs) { 10776 switch (ParamAttr.Kind) { 10777 case Linear: 10778 Out << 'l'; 10779 break; 10780 case LinearRef: 10781 Out << 'R'; 10782 break; 10783 case LinearUVal: 10784 Out << 'U'; 10785 break; 10786 case LinearVal: 10787 Out << 'L'; 10788 break; 10789 case Uniform: 10790 Out << 'u'; 10791 break; 10792 case Vector: 10793 Out << 'v'; 10794 break; 10795 } 10796 if (ParamAttr.HasVarStride) 10797 Out << "s" << ParamAttr.StrideOrArg; 10798 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef || 10799 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) { 10800 // Don't print the step value if it is not present or if it is 10801 // equal to 1. 10802 if (ParamAttr.StrideOrArg < 0) 10803 Out << 'n' << -ParamAttr.StrideOrArg; 10804 else if (ParamAttr.StrideOrArg != 1) 10805 Out << ParamAttr.StrideOrArg; 10806 } 10807 10808 if (!!ParamAttr.Alignment) 10809 Out << 'a' << ParamAttr.Alignment; 10810 } 10811 10812 return std::string(Out.str()); 10813 } 10814 10815 static void 10816 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 10817 const llvm::APSInt &VLENVal, 10818 ArrayRef<ParamAttrTy> ParamAttrs, 10819 OMPDeclareSimdDeclAttr::BranchStateTy State) { 10820 struct ISADataTy { 10821 char ISA; 10822 unsigned VecRegSize; 10823 }; 10824 ISADataTy ISAData[] = { 10825 { 10826 'b', 128 10827 }, // SSE 10828 { 10829 'c', 256 10830 }, // AVX 10831 { 10832 'd', 256 10833 }, // AVX2 10834 { 10835 'e', 512 10836 }, // AVX512 10837 }; 10838 llvm::SmallVector<char, 2> Masked; 10839 switch (State) { 10840 case OMPDeclareSimdDeclAttr::BS_Undefined: 10841 Masked.push_back('N'); 10842 Masked.push_back('M'); 10843 break; 10844 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10845 Masked.push_back('N'); 10846 break; 10847 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10848 Masked.push_back('M'); 10849 break; 10850 } 10851 for (char Mask : Masked) { 10852 for (const ISADataTy &Data : ISAData) { 10853 SmallString<256> Buffer; 10854 llvm::raw_svector_ostream Out(Buffer); 10855 Out << "_ZGV" << Data.ISA << Mask; 10856 if (!VLENVal) { 10857 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 10858 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 10859 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 10860 } else { 10861 Out << VLENVal; 10862 } 10863 Out << mangleVectorParameters(ParamAttrs); 10864 Out << '_' << Fn->getName(); 10865 Fn->addFnAttr(Out.str()); 10866 } 10867 } 10868 } 10869 10870 // This are the Functions that are needed to mangle the name of the 10871 // vector functions generated by the compiler, according to the rules 10872 // defined in the "Vector Function ABI specifications for AArch64", 10873 // available at 10874 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 10875 10876 /// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1). 10877 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 10878 QT = QT.getCanonicalType(); 10879 10880 if (QT->isVoidType()) 10881 return false; 10882 10883 if (Kind == ParamKindTy::Uniform) 10884 return false; 10885 10886 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef) 10887 return false; 10888 10889 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) && 10890 !QT->isReferenceType()) 10891 return false; 10892 10893 return true; 10894 } 10895 10896 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 10897 static bool getAArch64PBV(QualType QT, ASTContext &C) { 10898 QT = QT.getCanonicalType(); 10899 unsigned Size = C.getTypeSize(QT); 10900 10901 // Only scalars and complex within 16 bytes wide set PVB to true. 10902 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 10903 return false; 10904 10905 if (QT->isFloatingType()) 10906 return true; 10907 10908 if (QT->isIntegerType()) 10909 return true; 10910 10911 if (QT->isPointerType()) 10912 return true; 10913 10914 // TODO: Add support for complex types (section 3.1.2, item 2). 10915 10916 return false; 10917 } 10918 10919 /// Computes the lane size (LS) of a return type or of an input parameter, 10920 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 10921 /// TODO: Add support for references, section 3.2.1, item 1. 10922 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 10923 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 10924 QualType PTy = QT.getCanonicalType()->getPointeeType(); 10925 if (getAArch64PBV(PTy, C)) 10926 return C.getTypeSize(PTy); 10927 } 10928 if (getAArch64PBV(QT, C)) 10929 return C.getTypeSize(QT); 10930 10931 return C.getTypeSize(C.getUIntPtrType()); 10932 } 10933 10934 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 10935 // signature of the scalar function, as defined in 3.2.2 of the 10936 // AAVFABI. 10937 static std::tuple<unsigned, unsigned, bool> 10938 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 10939 QualType RetType = FD->getReturnType().getCanonicalType(); 10940 10941 ASTContext &C = FD->getASTContext(); 10942 10943 bool OutputBecomesInput = false; 10944 10945 llvm::SmallVector<unsigned, 8> Sizes; 10946 if (!RetType->isVoidType()) { 10947 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 10948 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 10949 OutputBecomesInput = true; 10950 } 10951 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10952 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 10953 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 10954 } 10955 10956 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 10957 // The LS of a function parameter / return value can only be a power 10958 // of 2, starting from 8 bits, up to 128. 10959 assert(llvm::all_of(Sizes, 10960 [](unsigned Size) { 10961 return Size == 8 || Size == 16 || Size == 32 || 10962 Size == 64 || Size == 128; 10963 }) && 10964 "Invalid size"); 10965 10966 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 10967 *std::max_element(std::begin(Sizes), std::end(Sizes)), 10968 OutputBecomesInput); 10969 } 10970 10971 // Function used to add the attribute. The parameter `VLEN` is 10972 // templated to allow the use of "x" when targeting scalable functions 10973 // for SVE. 10974 template <typename T> 10975 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 10976 char ISA, StringRef ParSeq, 10977 StringRef MangledName, bool OutputBecomesInput, 10978 llvm::Function *Fn) { 10979 SmallString<256> Buffer; 10980 llvm::raw_svector_ostream Out(Buffer); 10981 Out << Prefix << ISA << LMask << VLEN; 10982 if (OutputBecomesInput) 10983 Out << "v"; 10984 Out << ParSeq << "_" << MangledName; 10985 Fn->addFnAttr(Out.str()); 10986 } 10987 10988 // Helper function to generate the Advanced SIMD names depending on 10989 // the value of the NDS when simdlen is not present. 10990 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 10991 StringRef Prefix, char ISA, 10992 StringRef ParSeq, StringRef MangledName, 10993 bool OutputBecomesInput, 10994 llvm::Function *Fn) { 10995 switch (NDS) { 10996 case 8: 10997 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10998 OutputBecomesInput, Fn); 10999 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11000 OutputBecomesInput, Fn); 11001 break; 11002 case 16: 11003 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11004 OutputBecomesInput, Fn); 11005 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11006 OutputBecomesInput, Fn); 11007 break; 11008 case 32: 11009 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11010 OutputBecomesInput, Fn); 11011 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11012 OutputBecomesInput, Fn); 11013 break; 11014 case 64: 11015 case 128: 11016 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11017 OutputBecomesInput, Fn); 11018 break; 11019 default: 11020 llvm_unreachable("Scalar type is too wide."); 11021 } 11022 } 11023 11024 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11025 static void emitAArch64DeclareSimdFunction( 11026 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11027 ArrayRef<ParamAttrTy> ParamAttrs, 11028 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11029 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11030 11031 // Get basic data for building the vector signature. 11032 const auto Data = getNDSWDS(FD, ParamAttrs); 11033 const unsigned NDS = std::get<0>(Data); 11034 const unsigned WDS = std::get<1>(Data); 11035 const bool OutputBecomesInput = std::get<2>(Data); 11036 11037 // Check the values provided via `simdlen` by the user. 11038 // 1. A `simdlen(1)` doesn't produce vector signatures, 11039 if (UserVLEN == 1) { 11040 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11041 DiagnosticsEngine::Warning, 11042 "The clause simdlen(1) has no effect when targeting aarch64."); 11043 CGM.getDiags().Report(SLoc, DiagID); 11044 return; 11045 } 11046 11047 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11048 // Advanced SIMD output. 11049 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11050 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11051 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11052 "power of 2 when targeting Advanced SIMD."); 11053 CGM.getDiags().Report(SLoc, DiagID); 11054 return; 11055 } 11056 11057 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11058 // limits. 11059 if (ISA == 's' && UserVLEN != 0) { 11060 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11061 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11062 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11063 "lanes in the architectural constraints " 11064 "for SVE (min is 128-bit, max is " 11065 "2048-bit, by steps of 128-bit)"); 11066 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11067 return; 11068 } 11069 } 11070 11071 // Sort out parameter sequence. 11072 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11073 StringRef Prefix = "_ZGV"; 11074 // Generate simdlen from user input (if any). 11075 if (UserVLEN) { 11076 if (ISA == 's') { 11077 // SVE generates only a masked function. 11078 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11079 OutputBecomesInput, Fn); 11080 } else { 11081 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11082 // Advanced SIMD generates one or two functions, depending on 11083 // the `[not]inbranch` clause. 11084 switch (State) { 11085 case OMPDeclareSimdDeclAttr::BS_Undefined: 11086 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11087 OutputBecomesInput, Fn); 11088 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11089 OutputBecomesInput, Fn); 11090 break; 11091 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11092 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11093 OutputBecomesInput, Fn); 11094 break; 11095 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11096 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11097 OutputBecomesInput, Fn); 11098 break; 11099 } 11100 } 11101 } else { 11102 // If no user simdlen is provided, follow the AAVFABI rules for 11103 // generating the vector length. 11104 if (ISA == 's') { 11105 // SVE, section 3.4.1, item 1. 11106 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11107 OutputBecomesInput, Fn); 11108 } else { 11109 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11110 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11111 // two vector names depending on the use of the clause 11112 // `[not]inbranch`. 11113 switch (State) { 11114 case OMPDeclareSimdDeclAttr::BS_Undefined: 11115 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11116 OutputBecomesInput, Fn); 11117 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11118 OutputBecomesInput, Fn); 11119 break; 11120 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11121 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11122 OutputBecomesInput, Fn); 11123 break; 11124 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11125 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11126 OutputBecomesInput, Fn); 11127 break; 11128 } 11129 } 11130 } 11131 } 11132 11133 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11134 llvm::Function *Fn) { 11135 ASTContext &C = CGM.getContext(); 11136 FD = FD->getMostRecentDecl(); 11137 while (FD) { 11138 // Map params to their positions in function decl. 11139 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11140 if (isa<CXXMethodDecl>(FD)) 11141 ParamPositions.try_emplace(FD, 0); 11142 unsigned ParamPos = ParamPositions.size(); 11143 for (const ParmVarDecl *P : FD->parameters()) { 11144 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11145 ++ParamPos; 11146 } 11147 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11148 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11149 // Mark uniform parameters. 11150 for (const Expr *E : Attr->uniforms()) { 11151 E = E->IgnoreParenImpCasts(); 11152 unsigned Pos; 11153 if (isa<CXXThisExpr>(E)) { 11154 Pos = ParamPositions[FD]; 11155 } else { 11156 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11157 ->getCanonicalDecl(); 11158 auto It = ParamPositions.find(PVD); 11159 assert(It != ParamPositions.end() && "Function parameter not found"); 11160 Pos = It->second; 11161 } 11162 ParamAttrs[Pos].Kind = Uniform; 11163 } 11164 // Get alignment info. 11165 auto *NI = Attr->alignments_begin(); 11166 for (const Expr *E : Attr->aligneds()) { 11167 E = E->IgnoreParenImpCasts(); 11168 unsigned Pos; 11169 QualType ParmTy; 11170 if (isa<CXXThisExpr>(E)) { 11171 Pos = ParamPositions[FD]; 11172 ParmTy = E->getType(); 11173 } else { 11174 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11175 ->getCanonicalDecl(); 11176 auto It = ParamPositions.find(PVD); 11177 assert(It != ParamPositions.end() && "Function parameter not found"); 11178 Pos = It->second; 11179 ParmTy = PVD->getType(); 11180 } 11181 ParamAttrs[Pos].Alignment = 11182 (*NI) 11183 ? (*NI)->EvaluateKnownConstInt(C) 11184 : llvm::APSInt::getUnsigned( 11185 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11186 .getQuantity()); 11187 ++NI; 11188 } 11189 // Mark linear parameters. 11190 auto *SI = Attr->steps_begin(); 11191 auto *MI = Attr->modifiers_begin(); 11192 for (const Expr *E : Attr->linears()) { 11193 E = E->IgnoreParenImpCasts(); 11194 unsigned Pos; 11195 bool IsReferenceType = false; 11196 // Rescaling factor needed to compute the linear parameter 11197 // value in the mangled name. 11198 unsigned PtrRescalingFactor = 1; 11199 if (isa<CXXThisExpr>(E)) { 11200 Pos = ParamPositions[FD]; 11201 auto *P = cast<PointerType>(E->getType()); 11202 PtrRescalingFactor = CGM.getContext() 11203 .getTypeSizeInChars(P->getPointeeType()) 11204 .getQuantity(); 11205 } else { 11206 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11207 ->getCanonicalDecl(); 11208 auto It = ParamPositions.find(PVD); 11209 assert(It != ParamPositions.end() && "Function parameter not found"); 11210 Pos = It->second; 11211 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 11212 PtrRescalingFactor = CGM.getContext() 11213 .getTypeSizeInChars(P->getPointeeType()) 11214 .getQuantity(); 11215 else if (PVD->getType()->isReferenceType()) { 11216 IsReferenceType = true; 11217 PtrRescalingFactor = 11218 CGM.getContext() 11219 .getTypeSizeInChars(PVD->getType().getNonReferenceType()) 11220 .getQuantity(); 11221 } 11222 } 11223 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 11224 if (*MI == OMPC_LINEAR_ref) 11225 ParamAttr.Kind = LinearRef; 11226 else if (*MI == OMPC_LINEAR_uval) 11227 ParamAttr.Kind = LinearUVal; 11228 else if (IsReferenceType) 11229 ParamAttr.Kind = LinearVal; 11230 else 11231 ParamAttr.Kind = Linear; 11232 // Assuming a stride of 1, for `linear` without modifiers. 11233 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 11234 if (*SI) { 11235 Expr::EvalResult Result; 11236 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 11237 if (const auto *DRE = 11238 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 11239 if (const auto *StridePVD = 11240 dyn_cast<ParmVarDecl>(DRE->getDecl())) { 11241 ParamAttr.HasVarStride = true; 11242 auto It = ParamPositions.find(StridePVD->getCanonicalDecl()); 11243 assert(It != ParamPositions.end() && 11244 "Function parameter not found"); 11245 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second); 11246 } 11247 } 11248 } else { 11249 ParamAttr.StrideOrArg = Result.Val.getInt(); 11250 } 11251 } 11252 // If we are using a linear clause on a pointer, we need to 11253 // rescale the value of linear_step with the byte size of the 11254 // pointee type. 11255 if (!ParamAttr.HasVarStride && 11256 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef)) 11257 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 11258 ++SI; 11259 ++MI; 11260 } 11261 llvm::APSInt VLENVal; 11262 SourceLocation ExprLoc; 11263 const Expr *VLENExpr = Attr->getSimdlen(); 11264 if (VLENExpr) { 11265 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11266 ExprLoc = VLENExpr->getExprLoc(); 11267 } 11268 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11269 if (CGM.getTriple().isX86()) { 11270 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11271 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11272 unsigned VLEN = VLENVal.getExtValue(); 11273 StringRef MangledName = Fn->getName(); 11274 if (CGM.getTarget().hasFeature("sve")) 11275 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11276 MangledName, 's', 128, Fn, ExprLoc); 11277 else if (CGM.getTarget().hasFeature("neon")) 11278 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11279 MangledName, 'n', 128, Fn, ExprLoc); 11280 } 11281 } 11282 FD = FD->getPreviousDecl(); 11283 } 11284 } 11285 11286 namespace { 11287 /// Cleanup action for doacross support. 11288 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 11289 public: 11290 static const int DoacrossFinArgs = 2; 11291 11292 private: 11293 llvm::FunctionCallee RTLFn; 11294 llvm::Value *Args[DoacrossFinArgs]; 11295 11296 public: 11297 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 11298 ArrayRef<llvm::Value *> CallArgs) 11299 : RTLFn(RTLFn) { 11300 assert(CallArgs.size() == DoacrossFinArgs); 11301 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11302 } 11303 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11304 if (!CGF.HaveInsertPoint()) 11305 return; 11306 CGF.EmitRuntimeCall(RTLFn, Args); 11307 } 11308 }; 11309 } // namespace 11310 11311 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11312 const OMPLoopDirective &D, 11313 ArrayRef<Expr *> NumIterations) { 11314 if (!CGF.HaveInsertPoint()) 11315 return; 11316 11317 ASTContext &C = CGM.getContext(); 11318 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11319 RecordDecl *RD; 11320 if (KmpDimTy.isNull()) { 11321 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11322 // kmp_int64 lo; // lower 11323 // kmp_int64 up; // upper 11324 // kmp_int64 st; // stride 11325 // }; 11326 RD = C.buildImplicitRecord("kmp_dim"); 11327 RD->startDefinition(); 11328 addFieldToRecordDecl(C, RD, Int64Ty); 11329 addFieldToRecordDecl(C, RD, Int64Ty); 11330 addFieldToRecordDecl(C, RD, Int64Ty); 11331 RD->completeDefinition(); 11332 KmpDimTy = C.getRecordType(RD); 11333 } else { 11334 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11335 } 11336 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11337 QualType ArrayTy = 11338 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 11339 11340 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11341 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11342 enum { LowerFD = 0, UpperFD, StrideFD }; 11343 // Fill dims with data. 11344 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11345 LValue DimsLVal = CGF.MakeAddrLValue( 11346 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11347 // dims.upper = num_iterations; 11348 LValue UpperLVal = CGF.EmitLValueForField( 11349 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11350 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 11351 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 11352 Int64Ty, NumIterations[I]->getExprLoc()); 11353 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11354 // dims.stride = 1; 11355 LValue StrideLVal = CGF.EmitLValueForField( 11356 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11357 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11358 StrideLVal); 11359 } 11360 11361 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11362 // kmp_int32 num_dims, struct kmp_dim * dims); 11363 llvm::Value *Args[] = { 11364 emitUpdateLocation(CGF, D.getBeginLoc()), 11365 getThreadID(CGF, D.getBeginLoc()), 11366 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11367 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11368 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 11369 CGM.VoidPtrTy)}; 11370 11371 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11372 CGM.getModule(), OMPRTL___kmpc_doacross_init); 11373 CGF.EmitRuntimeCall(RTLFn, Args); 11374 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11375 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11376 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11377 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 11378 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11379 llvm::ArrayRef(FiniArgs)); 11380 } 11381 11382 template <typename T> 11383 static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM, 11384 const T *C, llvm::Value *ULoc, 11385 llvm::Value *ThreadID) { 11386 QualType Int64Ty = 11387 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11388 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11389 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11390 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 11391 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11392 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11393 const Expr *CounterVal = C->getLoopData(I); 11394 assert(CounterVal); 11395 llvm::Value *CntVal = CGF.EmitScalarConversion( 11396 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11397 CounterVal->getExprLoc()); 11398 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11399 /*Volatile=*/false, Int64Ty); 11400 } 11401 llvm::Value *Args[] = { 11402 ULoc, ThreadID, CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11403 llvm::FunctionCallee RTLFn; 11404 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 11405 OMPDoacrossKind<T> ODK; 11406 if (ODK.isSource(C)) { 11407 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11408 OMPRTL___kmpc_doacross_post); 11409 } else { 11410 assert(ODK.isSink(C) && "Expect sink modifier."); 11411 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11412 OMPRTL___kmpc_doacross_wait); 11413 } 11414 CGF.EmitRuntimeCall(RTLFn, Args); 11415 } 11416 11417 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11418 const OMPDependClause *C) { 11419 return EmitDoacrossOrdered<OMPDependClause>( 11420 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()), 11421 getThreadID(CGF, C->getBeginLoc())); 11422 } 11423 11424 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11425 const OMPDoacrossClause *C) { 11426 return EmitDoacrossOrdered<OMPDoacrossClause>( 11427 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()), 11428 getThreadID(CGF, C->getBeginLoc())); 11429 } 11430 11431 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11432 llvm::FunctionCallee Callee, 11433 ArrayRef<llvm::Value *> Args) const { 11434 assert(Loc.isValid() && "Outlined function call location must be valid."); 11435 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11436 11437 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11438 if (Fn->doesNotThrow()) { 11439 CGF.EmitNounwindRuntimeCall(Fn, Args); 11440 return; 11441 } 11442 } 11443 CGF.EmitRuntimeCall(Callee, Args); 11444 } 11445 11446 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11447 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11448 ArrayRef<llvm::Value *> Args) const { 11449 emitCall(CGF, Loc, OutlinedFn, Args); 11450 } 11451 11452 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11453 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11454 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11455 HasEmittedDeclareTargetRegion = true; 11456 } 11457 11458 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11459 const VarDecl *NativeParam, 11460 const VarDecl *TargetParam) const { 11461 return CGF.GetAddrOfLocalVar(NativeParam); 11462 } 11463 11464 /// Return allocator value from expression, or return a null allocator (default 11465 /// when no allocator specified). 11466 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF, 11467 const Expr *Allocator) { 11468 llvm::Value *AllocVal; 11469 if (Allocator) { 11470 AllocVal = CGF.EmitScalarExpr(Allocator); 11471 // According to the standard, the original allocator type is a enum 11472 // (integer). Convert to pointer type, if required. 11473 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 11474 CGF.getContext().VoidPtrTy, 11475 Allocator->getExprLoc()); 11476 } else { 11477 // If no allocator specified, it defaults to the null allocator. 11478 AllocVal = llvm::Constant::getNullValue( 11479 CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy)); 11480 } 11481 return AllocVal; 11482 } 11483 11484 /// Return the alignment from an allocate directive if present. 11485 static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) { 11486 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD); 11487 11488 if (!AllocateAlignment) 11489 return nullptr; 11490 11491 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity()); 11492 } 11493 11494 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11495 const VarDecl *VD) { 11496 if (!VD) 11497 return Address::invalid(); 11498 Address UntiedAddr = Address::invalid(); 11499 Address UntiedRealAddr = Address::invalid(); 11500 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11501 if (It != FunctionToUntiedTaskStackMap.end()) { 11502 const UntiedLocalVarsAddressesMap &UntiedData = 11503 UntiedLocalVarsStack[It->second]; 11504 auto I = UntiedData.find(VD); 11505 if (I != UntiedData.end()) { 11506 UntiedAddr = I->second.first; 11507 UntiedRealAddr = I->second.second; 11508 } 11509 } 11510 const VarDecl *CVD = VD->getCanonicalDecl(); 11511 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 11512 // Use the default allocation. 11513 if (!isAllocatableDecl(VD)) 11514 return UntiedAddr; 11515 llvm::Value *Size; 11516 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11517 if (CVD->getType()->isVariablyModifiedType()) { 11518 Size = CGF.getTypeSize(CVD->getType()); 11519 // Align the size: ((size + align - 1) / align) * align 11520 Size = CGF.Builder.CreateNUWAdd( 11521 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11522 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11523 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11524 } else { 11525 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11526 Size = CGM.getSize(Sz.alignTo(Align)); 11527 } 11528 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11529 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11530 const Expr *Allocator = AA->getAllocator(); 11531 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator); 11532 llvm::Value *Alignment = getAlignmentValue(CGM, CVD); 11533 SmallVector<llvm::Value *, 4> Args; 11534 Args.push_back(ThreadID); 11535 if (Alignment) 11536 Args.push_back(Alignment); 11537 Args.push_back(Size); 11538 Args.push_back(AllocVal); 11539 llvm::omp::RuntimeFunction FnID = 11540 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc; 11541 llvm::Value *Addr = CGF.EmitRuntimeCall( 11542 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args, 11543 getName({CVD->getName(), ".void.addr"})); 11544 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11545 CGM.getModule(), OMPRTL___kmpc_free); 11546 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 11547 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11548 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 11549 if (UntiedAddr.isValid()) 11550 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 11551 11552 // Cleanup action for allocate support. 11553 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 11554 llvm::FunctionCallee RTLFn; 11555 SourceLocation::UIntTy LocEncoding; 11556 Address Addr; 11557 const Expr *AllocExpr; 11558 11559 public: 11560 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 11561 SourceLocation::UIntTy LocEncoding, Address Addr, 11562 const Expr *AllocExpr) 11563 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 11564 AllocExpr(AllocExpr) {} 11565 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11566 if (!CGF.HaveInsertPoint()) 11567 return; 11568 llvm::Value *Args[3]; 11569 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 11570 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 11571 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11572 Addr.getPointer(), CGF.VoidPtrTy); 11573 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr); 11574 Args[2] = AllocVal; 11575 CGF.EmitRuntimeCall(RTLFn, Args); 11576 } 11577 }; 11578 Address VDAddr = 11579 UntiedRealAddr.isValid() 11580 ? UntiedRealAddr 11581 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align); 11582 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 11583 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 11584 VDAddr, Allocator); 11585 if (UntiedRealAddr.isValid()) 11586 if (auto *Region = 11587 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 11588 Region->emitUntiedSwitch(CGF); 11589 return VDAddr; 11590 } 11591 return UntiedAddr; 11592 } 11593 11594 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 11595 const VarDecl *VD) const { 11596 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11597 if (It == FunctionToUntiedTaskStackMap.end()) 11598 return false; 11599 return UntiedLocalVarsStack[It->second].count(VD) > 0; 11600 } 11601 11602 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11603 CodeGenModule &CGM, const OMPLoopDirective &S) 11604 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11605 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11606 if (!NeedToPush) 11607 return; 11608 NontemporalDeclsSet &DS = 11609 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11610 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11611 for (const Stmt *Ref : C->private_refs()) { 11612 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11613 const ValueDecl *VD; 11614 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11615 VD = DRE->getDecl(); 11616 } else { 11617 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11618 assert((ME->isImplicitCXXThis() || 11619 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11620 "Expected member of current class."); 11621 VD = ME->getMemberDecl(); 11622 } 11623 DS.insert(VD); 11624 } 11625 } 11626 } 11627 11628 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 11629 if (!NeedToPush) 11630 return; 11631 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 11632 } 11633 11634 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 11635 CodeGenFunction &CGF, 11636 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 11637 std::pair<Address, Address>> &LocalVars) 11638 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 11639 if (!NeedToPush) 11640 return; 11641 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 11642 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 11643 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 11644 } 11645 11646 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 11647 if (!NeedToPush) 11648 return; 11649 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 11650 } 11651 11652 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 11653 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11654 11655 return llvm::any_of( 11656 CGM.getOpenMPRuntime().NontemporalDeclsStack, 11657 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); }); 11658 } 11659 11660 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 11661 const OMPExecutableDirective &S, 11662 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 11663 const { 11664 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 11665 // Vars in target/task regions must be excluded completely. 11666 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 11667 isOpenMPTaskingDirective(S.getDirectiveKind())) { 11668 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11669 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 11670 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 11671 for (const CapturedStmt::Capture &Cap : CS->captures()) { 11672 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 11673 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 11674 } 11675 } 11676 // Exclude vars in private clauses. 11677 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 11678 for (const Expr *Ref : C->varlists()) { 11679 if (!Ref->getType()->isScalarType()) 11680 continue; 11681 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11682 if (!DRE) 11683 continue; 11684 NeedToCheckForLPCs.insert(DRE->getDecl()); 11685 } 11686 } 11687 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 11688 for (const Expr *Ref : C->varlists()) { 11689 if (!Ref->getType()->isScalarType()) 11690 continue; 11691 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11692 if (!DRE) 11693 continue; 11694 NeedToCheckForLPCs.insert(DRE->getDecl()); 11695 } 11696 } 11697 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11698 for (const Expr *Ref : C->varlists()) { 11699 if (!Ref->getType()->isScalarType()) 11700 continue; 11701 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11702 if (!DRE) 11703 continue; 11704 NeedToCheckForLPCs.insert(DRE->getDecl()); 11705 } 11706 } 11707 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 11708 for (const Expr *Ref : C->varlists()) { 11709 if (!Ref->getType()->isScalarType()) 11710 continue; 11711 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11712 if (!DRE) 11713 continue; 11714 NeedToCheckForLPCs.insert(DRE->getDecl()); 11715 } 11716 } 11717 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 11718 for (const Expr *Ref : C->varlists()) { 11719 if (!Ref->getType()->isScalarType()) 11720 continue; 11721 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11722 if (!DRE) 11723 continue; 11724 NeedToCheckForLPCs.insert(DRE->getDecl()); 11725 } 11726 } 11727 for (const Decl *VD : NeedToCheckForLPCs) { 11728 for (const LastprivateConditionalData &Data : 11729 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 11730 if (Data.DeclToUniqueName.count(VD) > 0) { 11731 if (!Data.Disabled) 11732 NeedToAddForLPCsAsDisabled.insert(VD); 11733 break; 11734 } 11735 } 11736 } 11737 } 11738 11739 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11740 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 11741 : CGM(CGF.CGM), 11742 Action((CGM.getLangOpts().OpenMP >= 50 && 11743 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 11744 [](const OMPLastprivateClause *C) { 11745 return C->getKind() == 11746 OMPC_LASTPRIVATE_conditional; 11747 })) 11748 ? ActionToDo::PushAsLastprivateConditional 11749 : ActionToDo::DoNotPush) { 11750 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11751 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 11752 return; 11753 assert(Action == ActionToDo::PushAsLastprivateConditional && 11754 "Expected a push action."); 11755 LastprivateConditionalData &Data = 11756 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11757 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11758 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 11759 continue; 11760 11761 for (const Expr *Ref : C->varlists()) { 11762 Data.DeclToUniqueName.insert(std::make_pair( 11763 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 11764 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 11765 } 11766 } 11767 Data.IVLVal = IVLVal; 11768 Data.Fn = CGF.CurFn; 11769 } 11770 11771 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11772 CodeGenFunction &CGF, const OMPExecutableDirective &S) 11773 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 11774 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11775 if (CGM.getLangOpts().OpenMP < 50) 11776 return; 11777 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 11778 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 11779 if (!NeedToAddForLPCsAsDisabled.empty()) { 11780 Action = ActionToDo::DisableLastprivateConditional; 11781 LastprivateConditionalData &Data = 11782 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11783 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 11784 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 11785 Data.Fn = CGF.CurFn; 11786 Data.Disabled = true; 11787 } 11788 } 11789 11790 CGOpenMPRuntime::LastprivateConditionalRAII 11791 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 11792 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 11793 return LastprivateConditionalRAII(CGF, S); 11794 } 11795 11796 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 11797 if (CGM.getLangOpts().OpenMP < 50) 11798 return; 11799 if (Action == ActionToDo::DisableLastprivateConditional) { 11800 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11801 "Expected list of disabled private vars."); 11802 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11803 } 11804 if (Action == ActionToDo::PushAsLastprivateConditional) { 11805 assert( 11806 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11807 "Expected list of lastprivate conditional vars."); 11808 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11809 } 11810 } 11811 11812 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 11813 const VarDecl *VD) { 11814 ASTContext &C = CGM.getContext(); 11815 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 11816 if (I == LastprivateConditionalToTypes.end()) 11817 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 11818 QualType NewType; 11819 const FieldDecl *VDField; 11820 const FieldDecl *FiredField; 11821 LValue BaseLVal; 11822 auto VI = I->getSecond().find(VD); 11823 if (VI == I->getSecond().end()) { 11824 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 11825 RD->startDefinition(); 11826 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 11827 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 11828 RD->completeDefinition(); 11829 NewType = C.getRecordType(RD); 11830 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 11831 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 11832 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 11833 } else { 11834 NewType = std::get<0>(VI->getSecond()); 11835 VDField = std::get<1>(VI->getSecond()); 11836 FiredField = std::get<2>(VI->getSecond()); 11837 BaseLVal = std::get<3>(VI->getSecond()); 11838 } 11839 LValue FiredLVal = 11840 CGF.EmitLValueForField(BaseLVal, FiredField); 11841 CGF.EmitStoreOfScalar( 11842 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 11843 FiredLVal); 11844 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 11845 } 11846 11847 namespace { 11848 /// Checks if the lastprivate conditional variable is referenced in LHS. 11849 class LastprivateConditionalRefChecker final 11850 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 11851 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 11852 const Expr *FoundE = nullptr; 11853 const Decl *FoundD = nullptr; 11854 StringRef UniqueDeclName; 11855 LValue IVLVal; 11856 llvm::Function *FoundFn = nullptr; 11857 SourceLocation Loc; 11858 11859 public: 11860 bool VisitDeclRefExpr(const DeclRefExpr *E) { 11861 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11862 llvm::reverse(LPM)) { 11863 auto It = D.DeclToUniqueName.find(E->getDecl()); 11864 if (It == D.DeclToUniqueName.end()) 11865 continue; 11866 if (D.Disabled) 11867 return false; 11868 FoundE = E; 11869 FoundD = E->getDecl()->getCanonicalDecl(); 11870 UniqueDeclName = It->second; 11871 IVLVal = D.IVLVal; 11872 FoundFn = D.Fn; 11873 break; 11874 } 11875 return FoundE == E; 11876 } 11877 bool VisitMemberExpr(const MemberExpr *E) { 11878 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 11879 return false; 11880 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11881 llvm::reverse(LPM)) { 11882 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 11883 if (It == D.DeclToUniqueName.end()) 11884 continue; 11885 if (D.Disabled) 11886 return false; 11887 FoundE = E; 11888 FoundD = E->getMemberDecl()->getCanonicalDecl(); 11889 UniqueDeclName = It->second; 11890 IVLVal = D.IVLVal; 11891 FoundFn = D.Fn; 11892 break; 11893 } 11894 return FoundE == E; 11895 } 11896 bool VisitStmt(const Stmt *S) { 11897 for (const Stmt *Child : S->children()) { 11898 if (!Child) 11899 continue; 11900 if (const auto *E = dyn_cast<Expr>(Child)) 11901 if (!E->isGLValue()) 11902 continue; 11903 if (Visit(Child)) 11904 return true; 11905 } 11906 return false; 11907 } 11908 explicit LastprivateConditionalRefChecker( 11909 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 11910 : LPM(LPM) {} 11911 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 11912 getFoundData() const { 11913 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 11914 } 11915 }; 11916 } // namespace 11917 11918 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 11919 LValue IVLVal, 11920 StringRef UniqueDeclName, 11921 LValue LVal, 11922 SourceLocation Loc) { 11923 // Last updated loop counter for the lastprivate conditional var. 11924 // int<xx> last_iv = 0; 11925 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 11926 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable( 11927 LLIVTy, getName({UniqueDeclName, "iv"})); 11928 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 11929 IVLVal.getAlignment().getAsAlign()); 11930 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 11931 11932 // Last value of the lastprivate conditional. 11933 // decltype(priv_a) last_a; 11934 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable( 11935 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 11936 Last->setAlignment(LVal.getAlignment().getAsAlign()); 11937 LValue LastLVal = CGF.MakeAddrLValue( 11938 Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType()); 11939 11940 // Global loop counter. Required to handle inner parallel-for regions. 11941 // iv 11942 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 11943 11944 // #pragma omp critical(a) 11945 // if (last_iv <= iv) { 11946 // last_iv = iv; 11947 // last_a = priv_a; 11948 // } 11949 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 11950 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 11951 Action.Enter(CGF); 11952 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 11953 // (last_iv <= iv) ? Check if the variable is updated and store new 11954 // value in global var. 11955 llvm::Value *CmpRes; 11956 if (IVLVal.getType()->isSignedIntegerType()) { 11957 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 11958 } else { 11959 assert(IVLVal.getType()->isUnsignedIntegerType() && 11960 "Loop iteration variable must be integer."); 11961 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 11962 } 11963 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 11964 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 11965 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 11966 // { 11967 CGF.EmitBlock(ThenBB); 11968 11969 // last_iv = iv; 11970 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 11971 11972 // last_a = priv_a; 11973 switch (CGF.getEvaluationKind(LVal.getType())) { 11974 case TEK_Scalar: { 11975 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 11976 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 11977 break; 11978 } 11979 case TEK_Complex: { 11980 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 11981 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 11982 break; 11983 } 11984 case TEK_Aggregate: 11985 llvm_unreachable( 11986 "Aggregates are not supported in lastprivate conditional."); 11987 } 11988 // } 11989 CGF.EmitBranch(ExitBB); 11990 // There is no need to emit line number for unconditional branch. 11991 (void)ApplyDebugLocation::CreateEmpty(CGF); 11992 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 11993 }; 11994 11995 if (CGM.getLangOpts().OpenMPSimd) { 11996 // Do not emit as a critical region as no parallel region could be emitted. 11997 RegionCodeGenTy ThenRCG(CodeGen); 11998 ThenRCG(CGF); 11999 } else { 12000 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12001 } 12002 } 12003 12004 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12005 const Expr *LHS) { 12006 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12007 return; 12008 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12009 if (!Checker.Visit(LHS)) 12010 return; 12011 const Expr *FoundE; 12012 const Decl *FoundD; 12013 StringRef UniqueDeclName; 12014 LValue IVLVal; 12015 llvm::Function *FoundFn; 12016 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12017 Checker.getFoundData(); 12018 if (FoundFn != CGF.CurFn) { 12019 // Special codegen for inner parallel regions. 12020 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12021 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12022 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12023 "Lastprivate conditional is not found in outer region."); 12024 QualType StructTy = std::get<0>(It->getSecond()); 12025 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12026 LValue PrivLVal = CGF.EmitLValue(FoundE); 12027 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12028 PrivLVal.getAddress(CGF), 12029 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)), 12030 CGF.ConvertTypeForMem(StructTy)); 12031 LValue BaseLVal = 12032 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12033 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12034 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12035 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12036 FiredLVal, llvm::AtomicOrdering::Unordered, 12037 /*IsVolatile=*/true, /*isInit=*/false); 12038 return; 12039 } 12040 12041 // Private address of the lastprivate conditional in the current context. 12042 // priv_a 12043 LValue LVal = CGF.EmitLValue(FoundE); 12044 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12045 FoundE->getExprLoc()); 12046 } 12047 12048 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12049 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12050 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12051 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12052 return; 12053 auto Range = llvm::reverse(LastprivateConditionalStack); 12054 auto It = llvm::find_if( 12055 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12056 if (It == Range.end() || It->Fn != CGF.CurFn) 12057 return; 12058 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12059 assert(LPCI != LastprivateConditionalToTypes.end() && 12060 "Lastprivates must be registered already."); 12061 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12062 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12063 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12064 for (const auto &Pair : It->DeclToUniqueName) { 12065 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12066 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD)) 12067 continue; 12068 auto I = LPCI->getSecond().find(Pair.first); 12069 assert(I != LPCI->getSecond().end() && 12070 "Lastprivate must be rehistered already."); 12071 // bool Cmp = priv_a.Fired != 0; 12072 LValue BaseLVal = std::get<3>(I->getSecond()); 12073 LValue FiredLVal = 12074 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12075 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12076 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12077 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12078 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12079 // if (Cmp) { 12080 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12081 CGF.EmitBlock(ThenBB); 12082 Address Addr = CGF.GetAddrOfLocalVar(VD); 12083 LValue LVal; 12084 if (VD->getType()->isReferenceType()) 12085 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12086 AlignmentSource::Decl); 12087 else 12088 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12089 AlignmentSource::Decl); 12090 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12091 D.getBeginLoc()); 12092 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12093 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12094 // } 12095 } 12096 } 12097 12098 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12099 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12100 SourceLocation Loc) { 12101 if (CGF.getLangOpts().OpenMP < 50) 12102 return; 12103 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12104 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12105 "Unknown lastprivate conditional variable."); 12106 StringRef UniqueName = It->second; 12107 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12108 // The variable was not updated in the region - exit. 12109 if (!GV) 12110 return; 12111 LValue LPLVal = CGF.MakeAddrLValue( 12112 Address(GV, GV->getValueType(), PrivLVal.getAlignment()), 12113 PrivLVal.getType().getNonReferenceType()); 12114 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12115 CGF.EmitStoreOfScalar(Res, PrivLVal); 12116 } 12117 12118 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12119 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12120 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 12121 const RegionCodeGenTy &CodeGen) { 12122 llvm_unreachable("Not supported in SIMD-only mode"); 12123 } 12124 12125 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12126 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12127 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 12128 const RegionCodeGenTy &CodeGen) { 12129 llvm_unreachable("Not supported in SIMD-only mode"); 12130 } 12131 12132 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12133 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12134 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12135 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12136 bool Tied, unsigned &NumberOfParts) { 12137 llvm_unreachable("Not supported in SIMD-only mode"); 12138 } 12139 12140 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12141 SourceLocation Loc, 12142 llvm::Function *OutlinedFn, 12143 ArrayRef<llvm::Value *> CapturedVars, 12144 const Expr *IfCond, 12145 llvm::Value *NumThreads) { 12146 llvm_unreachable("Not supported in SIMD-only mode"); 12147 } 12148 12149 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12150 CodeGenFunction &CGF, StringRef CriticalName, 12151 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12152 const Expr *Hint) { 12153 llvm_unreachable("Not supported in SIMD-only mode"); 12154 } 12155 12156 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12157 const RegionCodeGenTy &MasterOpGen, 12158 SourceLocation Loc) { 12159 llvm_unreachable("Not supported in SIMD-only mode"); 12160 } 12161 12162 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, 12163 const RegionCodeGenTy &MasterOpGen, 12164 SourceLocation Loc, 12165 const Expr *Filter) { 12166 llvm_unreachable("Not supported in SIMD-only mode"); 12167 } 12168 12169 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12170 SourceLocation Loc) { 12171 llvm_unreachable("Not supported in SIMD-only mode"); 12172 } 12173 12174 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12175 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12176 SourceLocation Loc) { 12177 llvm_unreachable("Not supported in SIMD-only mode"); 12178 } 12179 12180 void CGOpenMPSIMDRuntime::emitSingleRegion( 12181 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12182 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12183 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12184 ArrayRef<const Expr *> AssignmentOps) { 12185 llvm_unreachable("Not supported in SIMD-only mode"); 12186 } 12187 12188 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12189 const RegionCodeGenTy &OrderedOpGen, 12190 SourceLocation Loc, 12191 bool IsThreads) { 12192 llvm_unreachable("Not supported in SIMD-only mode"); 12193 } 12194 12195 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12196 SourceLocation Loc, 12197 OpenMPDirectiveKind Kind, 12198 bool EmitChecks, 12199 bool ForceSimpleCall) { 12200 llvm_unreachable("Not supported in SIMD-only mode"); 12201 } 12202 12203 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12204 CodeGenFunction &CGF, SourceLocation Loc, 12205 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12206 bool Ordered, const DispatchRTInput &DispatchValues) { 12207 llvm_unreachable("Not supported in SIMD-only mode"); 12208 } 12209 12210 void CGOpenMPSIMDRuntime::emitForStaticInit( 12211 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12212 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12213 llvm_unreachable("Not supported in SIMD-only mode"); 12214 } 12215 12216 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12217 CodeGenFunction &CGF, SourceLocation Loc, 12218 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12219 llvm_unreachable("Not supported in SIMD-only mode"); 12220 } 12221 12222 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12223 SourceLocation Loc, 12224 unsigned IVSize, 12225 bool IVSigned) { 12226 llvm_unreachable("Not supported in SIMD-only mode"); 12227 } 12228 12229 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12230 SourceLocation Loc, 12231 OpenMPDirectiveKind DKind) { 12232 llvm_unreachable("Not supported in SIMD-only mode"); 12233 } 12234 12235 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12236 SourceLocation Loc, 12237 unsigned IVSize, bool IVSigned, 12238 Address IL, Address LB, 12239 Address UB, Address ST) { 12240 llvm_unreachable("Not supported in SIMD-only mode"); 12241 } 12242 12243 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12244 llvm::Value *NumThreads, 12245 SourceLocation Loc) { 12246 llvm_unreachable("Not supported in SIMD-only mode"); 12247 } 12248 12249 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12250 ProcBindKind ProcBind, 12251 SourceLocation Loc) { 12252 llvm_unreachable("Not supported in SIMD-only mode"); 12253 } 12254 12255 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12256 const VarDecl *VD, 12257 Address VDAddr, 12258 SourceLocation Loc) { 12259 llvm_unreachable("Not supported in SIMD-only mode"); 12260 } 12261 12262 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12263 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12264 CodeGenFunction *CGF) { 12265 llvm_unreachable("Not supported in SIMD-only mode"); 12266 } 12267 12268 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12269 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12270 llvm_unreachable("Not supported in SIMD-only mode"); 12271 } 12272 12273 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 12274 ArrayRef<const Expr *> Vars, 12275 SourceLocation Loc, 12276 llvm::AtomicOrdering AO) { 12277 llvm_unreachable("Not supported in SIMD-only mode"); 12278 } 12279 12280 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 12281 const OMPExecutableDirective &D, 12282 llvm::Function *TaskFunction, 12283 QualType SharedsTy, Address Shareds, 12284 const Expr *IfCond, 12285 const OMPTaskDataTy &Data) { 12286 llvm_unreachable("Not supported in SIMD-only mode"); 12287 } 12288 12289 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 12290 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 12291 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 12292 const Expr *IfCond, const OMPTaskDataTy &Data) { 12293 llvm_unreachable("Not supported in SIMD-only mode"); 12294 } 12295 12296 void CGOpenMPSIMDRuntime::emitReduction( 12297 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 12298 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 12299 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 12300 assert(Options.SimpleReduction && "Only simple reduction is expected."); 12301 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 12302 ReductionOps, Options); 12303 } 12304 12305 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 12306 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 12307 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 12308 llvm_unreachable("Not supported in SIMD-only mode"); 12309 } 12310 12311 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 12312 SourceLocation Loc, 12313 bool IsWorksharingReduction) { 12314 llvm_unreachable("Not supported in SIMD-only mode"); 12315 } 12316 12317 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 12318 SourceLocation Loc, 12319 ReductionCodeGen &RCG, 12320 unsigned N) { 12321 llvm_unreachable("Not supported in SIMD-only mode"); 12322 } 12323 12324 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 12325 SourceLocation Loc, 12326 llvm::Value *ReductionsPtr, 12327 LValue SharedLVal) { 12328 llvm_unreachable("Not supported in SIMD-only mode"); 12329 } 12330 12331 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 12332 SourceLocation Loc, 12333 const OMPTaskDataTy &Data) { 12334 llvm_unreachable("Not supported in SIMD-only mode"); 12335 } 12336 12337 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 12338 CodeGenFunction &CGF, SourceLocation Loc, 12339 OpenMPDirectiveKind CancelRegion) { 12340 llvm_unreachable("Not supported in SIMD-only mode"); 12341 } 12342 12343 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 12344 SourceLocation Loc, const Expr *IfCond, 12345 OpenMPDirectiveKind CancelRegion) { 12346 llvm_unreachable("Not supported in SIMD-only mode"); 12347 } 12348 12349 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 12350 const OMPExecutableDirective &D, StringRef ParentName, 12351 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 12352 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 12353 llvm_unreachable("Not supported in SIMD-only mode"); 12354 } 12355 12356 void CGOpenMPSIMDRuntime::emitTargetCall( 12357 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12358 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 12359 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 12360 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 12361 const OMPLoopDirective &D)> 12362 SizeEmitter) { 12363 llvm_unreachable("Not supported in SIMD-only mode"); 12364 } 12365 12366 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 12367 llvm_unreachable("Not supported in SIMD-only mode"); 12368 } 12369 12370 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 12371 llvm_unreachable("Not supported in SIMD-only mode"); 12372 } 12373 12374 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 12375 return false; 12376 } 12377 12378 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 12379 const OMPExecutableDirective &D, 12380 SourceLocation Loc, 12381 llvm::Function *OutlinedFn, 12382 ArrayRef<llvm::Value *> CapturedVars) { 12383 llvm_unreachable("Not supported in SIMD-only mode"); 12384 } 12385 12386 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 12387 const Expr *NumTeams, 12388 const Expr *ThreadLimit, 12389 SourceLocation Loc) { 12390 llvm_unreachable("Not supported in SIMD-only mode"); 12391 } 12392 12393 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 12394 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12395 const Expr *Device, const RegionCodeGenTy &CodeGen, 12396 CGOpenMPRuntime::TargetDataInfo &Info) { 12397 llvm_unreachable("Not supported in SIMD-only mode"); 12398 } 12399 12400 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 12401 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12402 const Expr *Device) { 12403 llvm_unreachable("Not supported in SIMD-only mode"); 12404 } 12405 12406 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12407 const OMPLoopDirective &D, 12408 ArrayRef<Expr *> NumIterations) { 12409 llvm_unreachable("Not supported in SIMD-only mode"); 12410 } 12411 12412 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12413 const OMPDependClause *C) { 12414 llvm_unreachable("Not supported in SIMD-only mode"); 12415 } 12416 12417 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12418 const OMPDoacrossClause *C) { 12419 llvm_unreachable("Not supported in SIMD-only mode"); 12420 } 12421 12422 const VarDecl * 12423 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12424 const VarDecl *NativeParam) const { 12425 llvm_unreachable("Not supported in SIMD-only mode"); 12426 } 12427 12428 Address 12429 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12430 const VarDecl *NativeParam, 12431 const VarDecl *TargetParam) const { 12432 llvm_unreachable("Not supported in SIMD-only mode"); 12433 } 12434