1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/APValue.h" 20 #include "clang/AST/Attr.h" 21 #include "clang/AST/Decl.h" 22 #include "clang/AST/OpenMPClause.h" 23 #include "clang/AST/StmtOpenMP.h" 24 #include "clang/AST/StmtVisitor.h" 25 #include "clang/Basic/BitmaskEnum.h" 26 #include "clang/Basic/FileManager.h" 27 #include "clang/Basic/OpenMPKinds.h" 28 #include "clang/Basic/SourceManager.h" 29 #include "clang/CodeGen/ConstantInitBuilder.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/SetOperations.h" 32 #include "llvm/ADT/SmallBitVector.h" 33 #include "llvm/ADT/StringExtras.h" 34 #include "llvm/Bitcode/BitcodeReader.h" 35 #include "llvm/IR/Constants.h" 36 #include "llvm/IR/DerivedTypes.h" 37 #include "llvm/IR/GlobalValue.h" 38 #include "llvm/IR/InstrTypes.h" 39 #include "llvm/IR/Value.h" 40 #include "llvm/Support/AtomicOrdering.h" 41 #include "llvm/Support/Format.h" 42 #include "llvm/Support/raw_ostream.h" 43 #include <cassert> 44 #include <numeric> 45 #include <optional> 46 47 using namespace clang; 48 using namespace CodeGen; 49 using namespace llvm::omp; 50 51 namespace { 52 /// Base class for handling code generation inside OpenMP regions. 53 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 54 public: 55 /// Kinds of OpenMP regions used in codegen. 56 enum CGOpenMPRegionKind { 57 /// Region with outlined function for standalone 'parallel' 58 /// directive. 59 ParallelOutlinedRegion, 60 /// Region with outlined function for standalone 'task' directive. 61 TaskOutlinedRegion, 62 /// Region for constructs that do not require function outlining, 63 /// like 'for', 'sections', 'atomic' etc. directives. 64 InlinedRegion, 65 /// Region with outlined function for standalone 'target' directive. 66 TargetRegion, 67 }; 68 69 CGOpenMPRegionInfo(const CapturedStmt &CS, 70 const CGOpenMPRegionKind RegionKind, 71 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 72 bool HasCancel) 73 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 74 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 75 76 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 77 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 78 bool HasCancel) 79 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 80 Kind(Kind), HasCancel(HasCancel) {} 81 82 /// Get a variable or parameter for storing global thread id 83 /// inside OpenMP construct. 84 virtual const VarDecl *getThreadIDVariable() const = 0; 85 86 /// Emit the captured statement body. 87 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 88 89 /// Get an LValue for the current ThreadID variable. 90 /// \return LValue for thread id variable. This LValue always has type int32*. 91 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 92 93 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 94 95 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 96 97 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 98 99 bool hasCancel() const { return HasCancel; } 100 101 static bool classof(const CGCapturedStmtInfo *Info) { 102 return Info->getKind() == CR_OpenMP; 103 } 104 105 ~CGOpenMPRegionInfo() override = default; 106 107 protected: 108 CGOpenMPRegionKind RegionKind; 109 RegionCodeGenTy CodeGen; 110 OpenMPDirectiveKind Kind; 111 bool HasCancel; 112 }; 113 114 /// API for captured statement code generation in OpenMP constructs. 115 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 116 public: 117 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 118 const RegionCodeGenTy &CodeGen, 119 OpenMPDirectiveKind Kind, bool HasCancel, 120 StringRef HelperName) 121 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 122 HasCancel), 123 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 124 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 125 } 126 127 /// Get a variable or parameter for storing global thread id 128 /// inside OpenMP construct. 129 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 130 131 /// Get the name of the capture helper. 132 StringRef getHelperName() const override { return HelperName; } 133 134 static bool classof(const CGCapturedStmtInfo *Info) { 135 return CGOpenMPRegionInfo::classof(Info) && 136 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 137 ParallelOutlinedRegion; 138 } 139 140 private: 141 /// A variable or parameter storing global thread id for OpenMP 142 /// constructs. 143 const VarDecl *ThreadIDVar; 144 StringRef HelperName; 145 }; 146 147 /// API for captured statement code generation in OpenMP constructs. 148 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 149 public: 150 class UntiedTaskActionTy final : public PrePostActionTy { 151 bool Untied; 152 const VarDecl *PartIDVar; 153 const RegionCodeGenTy UntiedCodeGen; 154 llvm::SwitchInst *UntiedSwitch = nullptr; 155 156 public: 157 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 158 const RegionCodeGenTy &UntiedCodeGen) 159 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 160 void Enter(CodeGenFunction &CGF) override { 161 if (Untied) { 162 // Emit task switching point. 163 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 164 CGF.GetAddrOfLocalVar(PartIDVar), 165 PartIDVar->getType()->castAs<PointerType>()); 166 llvm::Value *Res = 167 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 168 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 169 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 170 CGF.EmitBlock(DoneBB); 171 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 172 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 173 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 174 CGF.Builder.GetInsertBlock()); 175 emitUntiedSwitch(CGF); 176 } 177 } 178 void emitUntiedSwitch(CodeGenFunction &CGF) const { 179 if (Untied) { 180 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 181 CGF.GetAddrOfLocalVar(PartIDVar), 182 PartIDVar->getType()->castAs<PointerType>()); 183 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 184 PartIdLVal); 185 UntiedCodeGen(CGF); 186 CodeGenFunction::JumpDest CurPoint = 187 CGF.getJumpDestInCurrentScope(".untied.next."); 188 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 189 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 190 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 191 CGF.Builder.GetInsertBlock()); 192 CGF.EmitBranchThroughCleanup(CurPoint); 193 CGF.EmitBlock(CurPoint.getBlock()); 194 } 195 } 196 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 197 }; 198 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 199 const VarDecl *ThreadIDVar, 200 const RegionCodeGenTy &CodeGen, 201 OpenMPDirectiveKind Kind, bool HasCancel, 202 const UntiedTaskActionTy &Action) 203 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 204 ThreadIDVar(ThreadIDVar), Action(Action) { 205 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 206 } 207 208 /// Get a variable or parameter for storing global thread id 209 /// inside OpenMP construct. 210 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 211 212 /// Get an LValue for the current ThreadID variable. 213 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 214 215 /// Get the name of the capture helper. 216 StringRef getHelperName() const override { return ".omp_outlined."; } 217 218 void emitUntiedSwitch(CodeGenFunction &CGF) override { 219 Action.emitUntiedSwitch(CGF); 220 } 221 222 static bool classof(const CGCapturedStmtInfo *Info) { 223 return CGOpenMPRegionInfo::classof(Info) && 224 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 225 TaskOutlinedRegion; 226 } 227 228 private: 229 /// A variable or parameter storing global thread id for OpenMP 230 /// constructs. 231 const VarDecl *ThreadIDVar; 232 /// Action for emitting code for untied tasks. 233 const UntiedTaskActionTy &Action; 234 }; 235 236 /// API for inlined captured statement code generation in OpenMP 237 /// constructs. 238 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 239 public: 240 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 241 const RegionCodeGenTy &CodeGen, 242 OpenMPDirectiveKind Kind, bool HasCancel) 243 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 244 OldCSI(OldCSI), 245 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 246 247 // Retrieve the value of the context parameter. 248 llvm::Value *getContextValue() const override { 249 if (OuterRegionInfo) 250 return OuterRegionInfo->getContextValue(); 251 llvm_unreachable("No context value for inlined OpenMP region"); 252 } 253 254 void setContextValue(llvm::Value *V) override { 255 if (OuterRegionInfo) { 256 OuterRegionInfo->setContextValue(V); 257 return; 258 } 259 llvm_unreachable("No context value for inlined OpenMP region"); 260 } 261 262 /// Lookup the captured field decl for a variable. 263 const FieldDecl *lookup(const VarDecl *VD) const override { 264 if (OuterRegionInfo) 265 return OuterRegionInfo->lookup(VD); 266 // If there is no outer outlined region,no need to lookup in a list of 267 // captured variables, we can use the original one. 268 return nullptr; 269 } 270 271 FieldDecl *getThisFieldDecl() const override { 272 if (OuterRegionInfo) 273 return OuterRegionInfo->getThisFieldDecl(); 274 return nullptr; 275 } 276 277 /// Get a variable or parameter for storing global thread id 278 /// inside OpenMP construct. 279 const VarDecl *getThreadIDVariable() const override { 280 if (OuterRegionInfo) 281 return OuterRegionInfo->getThreadIDVariable(); 282 return nullptr; 283 } 284 285 /// Get an LValue for the current ThreadID variable. 286 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 287 if (OuterRegionInfo) 288 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 289 llvm_unreachable("No LValue for inlined OpenMP construct"); 290 } 291 292 /// Get the name of the capture helper. 293 StringRef getHelperName() const override { 294 if (auto *OuterRegionInfo = getOldCSI()) 295 return OuterRegionInfo->getHelperName(); 296 llvm_unreachable("No helper name for inlined OpenMP construct"); 297 } 298 299 void emitUntiedSwitch(CodeGenFunction &CGF) override { 300 if (OuterRegionInfo) 301 OuterRegionInfo->emitUntiedSwitch(CGF); 302 } 303 304 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 305 306 static bool classof(const CGCapturedStmtInfo *Info) { 307 return CGOpenMPRegionInfo::classof(Info) && 308 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 309 } 310 311 ~CGOpenMPInlinedRegionInfo() override = default; 312 313 private: 314 /// CodeGen info about outer OpenMP region. 315 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 316 CGOpenMPRegionInfo *OuterRegionInfo; 317 }; 318 319 /// API for captured statement code generation in OpenMP target 320 /// constructs. For this captures, implicit parameters are used instead of the 321 /// captured fields. The name of the target region has to be unique in a given 322 /// application so it is provided by the client, because only the client has 323 /// the information to generate that. 324 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 325 public: 326 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 327 const RegionCodeGenTy &CodeGen, StringRef HelperName) 328 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 329 /*HasCancel=*/false), 330 HelperName(HelperName) {} 331 332 /// This is unused for target regions because each starts executing 333 /// with a single thread. 334 const VarDecl *getThreadIDVariable() const override { return nullptr; } 335 336 /// Get the name of the capture helper. 337 StringRef getHelperName() const override { return HelperName; } 338 339 static bool classof(const CGCapturedStmtInfo *Info) { 340 return CGOpenMPRegionInfo::classof(Info) && 341 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 342 } 343 344 private: 345 StringRef HelperName; 346 }; 347 348 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 349 llvm_unreachable("No codegen for expressions"); 350 } 351 /// API for generation of expressions captured in a innermost OpenMP 352 /// region. 353 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 354 public: 355 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 356 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 357 OMPD_unknown, 358 /*HasCancel=*/false), 359 PrivScope(CGF) { 360 // Make sure the globals captured in the provided statement are local by 361 // using the privatization logic. We assume the same variable is not 362 // captured more than once. 363 for (const auto &C : CS.captures()) { 364 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 365 continue; 366 367 const VarDecl *VD = C.getCapturedVar(); 368 if (VD->isLocalVarDeclOrParm()) 369 continue; 370 371 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 372 /*RefersToEnclosingVariableOrCapture=*/false, 373 VD->getType().getNonReferenceType(), VK_LValue, 374 C.getLocation()); 375 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF)); 376 } 377 (void)PrivScope.Privatize(); 378 } 379 380 /// Lookup the captured field decl for a variable. 381 const FieldDecl *lookup(const VarDecl *VD) const override { 382 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 383 return FD; 384 return nullptr; 385 } 386 387 /// Emit the captured statement body. 388 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 389 llvm_unreachable("No body for expressions"); 390 } 391 392 /// Get a variable or parameter for storing global thread id 393 /// inside OpenMP construct. 394 const VarDecl *getThreadIDVariable() const override { 395 llvm_unreachable("No thread id for expressions"); 396 } 397 398 /// Get the name of the capture helper. 399 StringRef getHelperName() const override { 400 llvm_unreachable("No helper name for expressions"); 401 } 402 403 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 404 405 private: 406 /// Private scope to capture global variables. 407 CodeGenFunction::OMPPrivateScope PrivScope; 408 }; 409 410 /// RAII for emitting code of OpenMP constructs. 411 class InlinedOpenMPRegionRAII { 412 CodeGenFunction &CGF; 413 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields; 414 FieldDecl *LambdaThisCaptureField = nullptr; 415 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 416 bool NoInheritance = false; 417 418 public: 419 /// Constructs region for combined constructs. 420 /// \param CodeGen Code generation sequence for combined directives. Includes 421 /// a list of functions used for code generation of implicitly inlined 422 /// regions. 423 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 424 OpenMPDirectiveKind Kind, bool HasCancel, 425 bool NoInheritance = true) 426 : CGF(CGF), NoInheritance(NoInheritance) { 427 // Start emission for the construct. 428 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 429 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 430 if (NoInheritance) { 431 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 432 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 433 CGF.LambdaThisCaptureField = nullptr; 434 BlockInfo = CGF.BlockInfo; 435 CGF.BlockInfo = nullptr; 436 } 437 } 438 439 ~InlinedOpenMPRegionRAII() { 440 // Restore original CapturedStmtInfo only if we're done with code emission. 441 auto *OldCSI = 442 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 443 delete CGF.CapturedStmtInfo; 444 CGF.CapturedStmtInfo = OldCSI; 445 if (NoInheritance) { 446 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 447 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 448 CGF.BlockInfo = BlockInfo; 449 } 450 } 451 }; 452 453 /// Values for bit flags used in the ident_t to describe the fields. 454 /// All enumeric elements are named and described in accordance with the code 455 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 456 enum OpenMPLocationFlags : unsigned { 457 /// Use trampoline for internal microtask. 458 OMP_IDENT_IMD = 0x01, 459 /// Use c-style ident structure. 460 OMP_IDENT_KMPC = 0x02, 461 /// Atomic reduction option for kmpc_reduce. 462 OMP_ATOMIC_REDUCE = 0x10, 463 /// Explicit 'barrier' directive. 464 OMP_IDENT_BARRIER_EXPL = 0x20, 465 /// Implicit barrier in code. 466 OMP_IDENT_BARRIER_IMPL = 0x40, 467 /// Implicit barrier in 'for' directive. 468 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 469 /// Implicit barrier in 'sections' directive. 470 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 471 /// Implicit barrier in 'single' directive. 472 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 473 /// Call of __kmp_for_static_init for static loop. 474 OMP_IDENT_WORK_LOOP = 0x200, 475 /// Call of __kmp_for_static_init for sections. 476 OMP_IDENT_WORK_SECTIONS = 0x400, 477 /// Call of __kmp_for_static_init for distribute. 478 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 479 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 480 }; 481 482 namespace { 483 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 484 /// Values for bit flags for marking which requires clauses have been used. 485 enum OpenMPOffloadingRequiresDirFlags : int64_t { 486 /// flag undefined. 487 OMP_REQ_UNDEFINED = 0x000, 488 /// no requires clause present. 489 OMP_REQ_NONE = 0x001, 490 /// reverse_offload clause. 491 OMP_REQ_REVERSE_OFFLOAD = 0x002, 492 /// unified_address clause. 493 OMP_REQ_UNIFIED_ADDRESS = 0x004, 494 /// unified_shared_memory clause. 495 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 496 /// dynamic_allocators clause. 497 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 498 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 499 }; 500 501 enum OpenMPOffloadingReservedDeviceIDs { 502 /// Device ID if the device was not defined, runtime should get it 503 /// from environment variables in the spec. 504 OMP_DEVICEID_UNDEF = -1, 505 }; 506 } // anonymous namespace 507 508 /// Describes ident structure that describes a source location. 509 /// All descriptions are taken from 510 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 511 /// Original structure: 512 /// typedef struct ident { 513 /// kmp_int32 reserved_1; /**< might be used in Fortran; 514 /// see above */ 515 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 516 /// KMP_IDENT_KMPC identifies this union 517 /// member */ 518 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 519 /// see above */ 520 ///#if USE_ITT_BUILD 521 /// /* but currently used for storing 522 /// region-specific ITT */ 523 /// /* contextual information. */ 524 ///#endif /* USE_ITT_BUILD */ 525 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 526 /// C++ */ 527 /// char const *psource; /**< String describing the source location. 528 /// The string is composed of semi-colon separated 529 // fields which describe the source file, 530 /// the function and a pair of line numbers that 531 /// delimit the construct. 532 /// */ 533 /// } ident_t; 534 enum IdentFieldIndex { 535 /// might be used in Fortran 536 IdentField_Reserved_1, 537 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 538 IdentField_Flags, 539 /// Not really used in Fortran any more 540 IdentField_Reserved_2, 541 /// Source[4] in Fortran, do not use for C++ 542 IdentField_Reserved_3, 543 /// String describing the source location. The string is composed of 544 /// semi-colon separated fields which describe the source file, the function 545 /// and a pair of line numbers that delimit the construct. 546 IdentField_PSource 547 }; 548 549 /// Schedule types for 'omp for' loops (these enumerators are taken from 550 /// the enum sched_type in kmp.h). 551 enum OpenMPSchedType { 552 /// Lower bound for default (unordered) versions. 553 OMP_sch_lower = 32, 554 OMP_sch_static_chunked = 33, 555 OMP_sch_static = 34, 556 OMP_sch_dynamic_chunked = 35, 557 OMP_sch_guided_chunked = 36, 558 OMP_sch_runtime = 37, 559 OMP_sch_auto = 38, 560 /// static with chunk adjustment (e.g., simd) 561 OMP_sch_static_balanced_chunked = 45, 562 /// Lower bound for 'ordered' versions. 563 OMP_ord_lower = 64, 564 OMP_ord_static_chunked = 65, 565 OMP_ord_static = 66, 566 OMP_ord_dynamic_chunked = 67, 567 OMP_ord_guided_chunked = 68, 568 OMP_ord_runtime = 69, 569 OMP_ord_auto = 70, 570 OMP_sch_default = OMP_sch_static, 571 /// dist_schedule types 572 OMP_dist_sch_static_chunked = 91, 573 OMP_dist_sch_static = 92, 574 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 575 /// Set if the monotonic schedule modifier was present. 576 OMP_sch_modifier_monotonic = (1 << 29), 577 /// Set if the nonmonotonic schedule modifier was present. 578 OMP_sch_modifier_nonmonotonic = (1 << 30), 579 }; 580 581 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 582 /// region. 583 class CleanupTy final : public EHScopeStack::Cleanup { 584 PrePostActionTy *Action; 585 586 public: 587 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 588 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 589 if (!CGF.HaveInsertPoint()) 590 return; 591 Action->Exit(CGF); 592 } 593 }; 594 595 } // anonymous namespace 596 597 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 598 CodeGenFunction::RunCleanupsScope Scope(CGF); 599 if (PrePostAction) { 600 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 601 Callback(CodeGen, CGF, *PrePostAction); 602 } else { 603 PrePostActionTy Action; 604 Callback(CodeGen, CGF, Action); 605 } 606 } 607 608 /// Check if the combiner is a call to UDR combiner and if it is so return the 609 /// UDR decl used for reduction. 610 static const OMPDeclareReductionDecl * 611 getReductionInit(const Expr *ReductionOp) { 612 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 613 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 614 if (const auto *DRE = 615 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 616 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 617 return DRD; 618 return nullptr; 619 } 620 621 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 622 const OMPDeclareReductionDecl *DRD, 623 const Expr *InitOp, 624 Address Private, Address Original, 625 QualType Ty) { 626 if (DRD->getInitializer()) { 627 std::pair<llvm::Function *, llvm::Function *> Reduction = 628 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 629 const auto *CE = cast<CallExpr>(InitOp); 630 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 631 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 632 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 633 const auto *LHSDRE = 634 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 635 const auto *RHSDRE = 636 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 637 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 638 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private); 639 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original); 640 (void)PrivateScope.Privatize(); 641 RValue Func = RValue::get(Reduction.second); 642 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 643 CGF.EmitIgnoredExpr(InitOp); 644 } else { 645 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 646 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 647 auto *GV = new llvm::GlobalVariable( 648 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 649 llvm::GlobalValue::PrivateLinkage, Init, Name); 650 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 651 RValue InitRVal; 652 switch (CGF.getEvaluationKind(Ty)) { 653 case TEK_Scalar: 654 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 655 break; 656 case TEK_Complex: 657 InitRVal = 658 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 659 break; 660 case TEK_Aggregate: { 661 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); 662 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); 663 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 664 /*IsInitializer=*/false); 665 return; 666 } 667 } 668 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue); 669 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 670 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 671 /*IsInitializer=*/false); 672 } 673 } 674 675 /// Emit initialization of arrays of complex types. 676 /// \param DestAddr Address of the array. 677 /// \param Type Type of array. 678 /// \param Init Initial expression of array. 679 /// \param SrcAddr Address of the original array. 680 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 681 QualType Type, bool EmitDeclareReductionInit, 682 const Expr *Init, 683 const OMPDeclareReductionDecl *DRD, 684 Address SrcAddr = Address::invalid()) { 685 // Perform element-by-element initialization. 686 QualType ElementTy; 687 688 // Drill down to the base element type on both arrays. 689 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 690 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 691 if (DRD) 692 SrcAddr = 693 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 694 695 llvm::Value *SrcBegin = nullptr; 696 if (DRD) 697 SrcBegin = SrcAddr.getPointer(); 698 llvm::Value *DestBegin = DestAddr.getPointer(); 699 // Cast from pointer to array type to pointer to single element. 700 llvm::Value *DestEnd = 701 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); 702 // The basic structure here is a while-do loop. 703 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 704 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 705 llvm::Value *IsEmpty = 706 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 707 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 708 709 // Enter the loop body, making that address the current address. 710 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 711 CGF.EmitBlock(BodyBB); 712 713 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 714 715 llvm::PHINode *SrcElementPHI = nullptr; 716 Address SrcElementCurrent = Address::invalid(); 717 if (DRD) { 718 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 719 "omp.arraycpy.srcElementPast"); 720 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 721 SrcElementCurrent = 722 Address(SrcElementPHI, SrcAddr.getElementType(), 723 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 724 } 725 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 726 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 727 DestElementPHI->addIncoming(DestBegin, EntryBB); 728 Address DestElementCurrent = 729 Address(DestElementPHI, DestAddr.getElementType(), 730 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 731 732 // Emit copy. 733 { 734 CodeGenFunction::RunCleanupsScope InitScope(CGF); 735 if (EmitDeclareReductionInit) { 736 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 737 SrcElementCurrent, ElementTy); 738 } else 739 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 740 /*IsInitializer=*/false); 741 } 742 743 if (DRD) { 744 // Shift the address forward by one element. 745 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 746 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, 747 "omp.arraycpy.dest.element"); 748 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 749 } 750 751 // Shift the address forward by one element. 752 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 753 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, 754 "omp.arraycpy.dest.element"); 755 // Check whether we've reached the end. 756 llvm::Value *Done = 757 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 758 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 759 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 760 761 // Done. 762 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 763 } 764 765 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 766 return CGF.EmitOMPSharedLValue(E); 767 } 768 769 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 770 const Expr *E) { 771 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 772 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 773 return LValue(); 774 } 775 776 void ReductionCodeGen::emitAggregateInitialization( 777 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 778 const OMPDeclareReductionDecl *DRD) { 779 // Emit VarDecl with copy init for arrays. 780 // Get the address of the original variable captured in current 781 // captured region. 782 const auto *PrivateVD = 783 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 784 bool EmitDeclareReductionInit = 785 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 786 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 787 EmitDeclareReductionInit, 788 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 789 : PrivateVD->getInit(), 790 DRD, SharedAddr); 791 } 792 793 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 794 ArrayRef<const Expr *> Origs, 795 ArrayRef<const Expr *> Privates, 796 ArrayRef<const Expr *> ReductionOps) { 797 ClausesData.reserve(Shareds.size()); 798 SharedAddresses.reserve(Shareds.size()); 799 Sizes.reserve(Shareds.size()); 800 BaseDecls.reserve(Shareds.size()); 801 const auto *IOrig = Origs.begin(); 802 const auto *IPriv = Privates.begin(); 803 const auto *IRed = ReductionOps.begin(); 804 for (const Expr *Ref : Shareds) { 805 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 806 std::advance(IOrig, 1); 807 std::advance(IPriv, 1); 808 std::advance(IRed, 1); 809 } 810 } 811 812 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 813 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 814 "Number of generated lvalues must be exactly N."); 815 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 816 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 817 SharedAddresses.emplace_back(First, Second); 818 if (ClausesData[N].Shared == ClausesData[N].Ref) { 819 OrigAddresses.emplace_back(First, Second); 820 } else { 821 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 822 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 823 OrigAddresses.emplace_back(First, Second); 824 } 825 } 826 827 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 828 QualType PrivateType = getPrivateType(N); 829 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 830 if (!PrivateType->isVariablyModifiedType()) { 831 Sizes.emplace_back( 832 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 833 nullptr); 834 return; 835 } 836 llvm::Value *Size; 837 llvm::Value *SizeInChars; 838 auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType(); 839 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 840 if (AsArraySection) { 841 Size = CGF.Builder.CreatePtrDiff(ElemType, 842 OrigAddresses[N].second.getPointer(CGF), 843 OrigAddresses[N].first.getPointer(CGF)); 844 Size = CGF.Builder.CreateNUWAdd( 845 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 846 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 847 } else { 848 SizeInChars = 849 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 850 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 851 } 852 Sizes.emplace_back(SizeInChars, Size); 853 CodeGenFunction::OpaqueValueMapping OpaqueMap( 854 CGF, 855 cast<OpaqueValueExpr>( 856 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 857 RValue::get(Size)); 858 CGF.EmitVariablyModifiedType(PrivateType); 859 } 860 861 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 862 llvm::Value *Size) { 863 QualType PrivateType = getPrivateType(N); 864 if (!PrivateType->isVariablyModifiedType()) { 865 assert(!Size && !Sizes[N].second && 866 "Size should be nullptr for non-variably modified reduction " 867 "items."); 868 return; 869 } 870 CodeGenFunction::OpaqueValueMapping OpaqueMap( 871 CGF, 872 cast<OpaqueValueExpr>( 873 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 874 RValue::get(Size)); 875 CGF.EmitVariablyModifiedType(PrivateType); 876 } 877 878 void ReductionCodeGen::emitInitialization( 879 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 880 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 881 assert(SharedAddresses.size() > N && "No variable was generated"); 882 const auto *PrivateVD = 883 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 884 const OMPDeclareReductionDecl *DRD = 885 getReductionInit(ClausesData[N].ReductionOp); 886 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 887 if (DRD && DRD->getInitializer()) 888 (void)DefaultInit(CGF); 889 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD); 890 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 891 (void)DefaultInit(CGF); 892 QualType SharedType = SharedAddresses[N].first.getType(); 893 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 894 PrivateAddr, SharedAddr, SharedType); 895 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 896 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 897 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 898 PrivateVD->getType().getQualifiers(), 899 /*IsInitializer=*/false); 900 } 901 } 902 903 bool ReductionCodeGen::needCleanups(unsigned N) { 904 QualType PrivateType = getPrivateType(N); 905 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 906 return DTorKind != QualType::DK_none; 907 } 908 909 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 910 Address PrivateAddr) { 911 QualType PrivateType = getPrivateType(N); 912 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 913 if (needCleanups(N)) { 914 PrivateAddr = CGF.Builder.CreateElementBitCast( 915 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 916 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 917 } 918 } 919 920 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 921 LValue BaseLV) { 922 BaseTy = BaseTy.getNonReferenceType(); 923 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 924 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 925 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 926 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 927 } else { 928 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 929 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 930 } 931 BaseTy = BaseTy->getPointeeType(); 932 } 933 return CGF.MakeAddrLValue( 934 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 935 CGF.ConvertTypeForMem(ElTy)), 936 BaseLV.getType(), BaseLV.getBaseInfo(), 937 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 938 } 939 940 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 941 Address OriginalBaseAddress, llvm::Value *Addr) { 942 Address Tmp = Address::invalid(); 943 Address TopTmp = Address::invalid(); 944 Address MostTopTmp = Address::invalid(); 945 BaseTy = BaseTy.getNonReferenceType(); 946 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 947 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 948 Tmp = CGF.CreateMemTemp(BaseTy); 949 if (TopTmp.isValid()) 950 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 951 else 952 MostTopTmp = Tmp; 953 TopTmp = Tmp; 954 BaseTy = BaseTy->getPointeeType(); 955 } 956 957 if (Tmp.isValid()) { 958 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 959 Addr, Tmp.getElementType()); 960 CGF.Builder.CreateStore(Addr, Tmp); 961 return MostTopTmp; 962 } 963 964 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 965 Addr, OriginalBaseAddress.getType()); 966 return OriginalBaseAddress.withPointer(Addr); 967 } 968 969 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 970 const VarDecl *OrigVD = nullptr; 971 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 972 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 973 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 974 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 975 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 976 Base = TempASE->getBase()->IgnoreParenImpCasts(); 977 DE = cast<DeclRefExpr>(Base); 978 OrigVD = cast<VarDecl>(DE->getDecl()); 979 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 980 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 981 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 982 Base = TempASE->getBase()->IgnoreParenImpCasts(); 983 DE = cast<DeclRefExpr>(Base); 984 OrigVD = cast<VarDecl>(DE->getDecl()); 985 } 986 return OrigVD; 987 } 988 989 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 990 Address PrivateAddr) { 991 const DeclRefExpr *DE; 992 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 993 BaseDecls.emplace_back(OrigVD); 994 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 995 LValue BaseLValue = 996 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 997 OriginalBaseLValue); 998 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF); 999 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1000 SharedAddr.getElementType(), BaseLValue.getPointer(CGF), 1001 SharedAddr.getPointer()); 1002 llvm::Value *PrivatePointer = 1003 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1004 PrivateAddr.getPointer(), SharedAddr.getType()); 1005 llvm::Value *Ptr = CGF.Builder.CreateGEP( 1006 SharedAddr.getElementType(), PrivatePointer, Adjustment); 1007 return castToBase(CGF, OrigVD->getType(), 1008 SharedAddresses[N].first.getType(), 1009 OriginalBaseLValue.getAddress(CGF), Ptr); 1010 } 1011 BaseDecls.emplace_back( 1012 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1013 return PrivateAddr; 1014 } 1015 1016 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1017 const OMPDeclareReductionDecl *DRD = 1018 getReductionInit(ClausesData[N].ReductionOp); 1019 return DRD && DRD->getInitializer(); 1020 } 1021 1022 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1023 return CGF.EmitLoadOfPointerLValue( 1024 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1025 getThreadIDVariable()->getType()->castAs<PointerType>()); 1026 } 1027 1028 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 1029 if (!CGF.HaveInsertPoint()) 1030 return; 1031 // 1.2.2 OpenMP Language Terminology 1032 // Structured block - An executable statement with a single entry at the 1033 // top and a single exit at the bottom. 1034 // The point of exit cannot be a branch out of the structured block. 1035 // longjmp() and throw() must not violate the entry/exit criteria. 1036 CGF.EHStack.pushTerminate(); 1037 if (S) 1038 CGF.incrementProfileCounter(S); 1039 CodeGen(CGF); 1040 CGF.EHStack.popTerminate(); 1041 } 1042 1043 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1044 CodeGenFunction &CGF) { 1045 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1046 getThreadIDVariable()->getType(), 1047 AlignmentSource::Decl); 1048 } 1049 1050 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1051 QualType FieldTy) { 1052 auto *Field = FieldDecl::Create( 1053 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1054 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1055 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1056 Field->setAccess(AS_public); 1057 DC->addDecl(Field); 1058 return Field; 1059 } 1060 1061 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) 1062 : CGM(CGM), OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager() { 1063 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1064 llvm::OpenMPIRBuilderConfig Config(CGM.getLangOpts().OpenMPIsDevice, false, 1065 hasRequiresUnifiedSharedMemory(), 1066 CGM.getLangOpts().OpenMPOffloadMandatory); 1067 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1068 OMPBuilder.initialize(); 1069 OMPBuilder.setConfig(Config); 1070 OffloadEntriesInfoManager.setConfig(Config); 1071 loadOffloadInfoMetadata(); 1072 } 1073 1074 void CGOpenMPRuntime::clear() { 1075 InternalVars.clear(); 1076 // Clean non-target variable declarations possibly used only in debug info. 1077 for (const auto &Data : EmittedNonTargetVariables) { 1078 if (!Data.getValue().pointsToAliveValue()) 1079 continue; 1080 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1081 if (!GV) 1082 continue; 1083 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1084 continue; 1085 GV->eraseFromParent(); 1086 } 1087 } 1088 1089 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1090 return OMPBuilder.createPlatformSpecificName(Parts); 1091 } 1092 1093 static llvm::Function * 1094 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1095 const Expr *CombinerInitializer, const VarDecl *In, 1096 const VarDecl *Out, bool IsCombiner) { 1097 // void .omp_combiner.(Ty *in, Ty *out); 1098 ASTContext &C = CGM.getContext(); 1099 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1100 FunctionArgList Args; 1101 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1102 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1103 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1104 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1105 Args.push_back(&OmpOutParm); 1106 Args.push_back(&OmpInParm); 1107 const CGFunctionInfo &FnInfo = 1108 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1109 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1110 std::string Name = CGM.getOpenMPRuntime().getName( 1111 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1112 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1113 Name, &CGM.getModule()); 1114 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1115 if (CGM.getLangOpts().Optimize) { 1116 Fn->removeFnAttr(llvm::Attribute::NoInline); 1117 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1118 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1119 } 1120 CodeGenFunction CGF(CGM); 1121 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1122 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1123 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1124 Out->getLocation()); 1125 CodeGenFunction::OMPPrivateScope Scope(CGF); 1126 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1127 Scope.addPrivate( 1128 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1129 .getAddress(CGF)); 1130 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1131 Scope.addPrivate( 1132 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1133 .getAddress(CGF)); 1134 (void)Scope.Privatize(); 1135 if (!IsCombiner && Out->hasInit() && 1136 !CGF.isTrivialInitializer(Out->getInit())) { 1137 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1138 Out->getType().getQualifiers(), 1139 /*IsInitializer=*/true); 1140 } 1141 if (CombinerInitializer) 1142 CGF.EmitIgnoredExpr(CombinerInitializer); 1143 Scope.ForceCleanup(); 1144 CGF.FinishFunction(); 1145 return Fn; 1146 } 1147 1148 void CGOpenMPRuntime::emitUserDefinedReduction( 1149 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1150 if (UDRMap.count(D) > 0) 1151 return; 1152 llvm::Function *Combiner = emitCombinerOrInitializer( 1153 CGM, D->getType(), D->getCombiner(), 1154 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1155 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1156 /*IsCombiner=*/true); 1157 llvm::Function *Initializer = nullptr; 1158 if (const Expr *Init = D->getInitializer()) { 1159 Initializer = emitCombinerOrInitializer( 1160 CGM, D->getType(), 1161 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1162 : nullptr, 1163 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1164 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1165 /*IsCombiner=*/false); 1166 } 1167 UDRMap.try_emplace(D, Combiner, Initializer); 1168 if (CGF) { 1169 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1170 Decls.second.push_back(D); 1171 } 1172 } 1173 1174 std::pair<llvm::Function *, llvm::Function *> 1175 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1176 auto I = UDRMap.find(D); 1177 if (I != UDRMap.end()) 1178 return I->second; 1179 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1180 return UDRMap.lookup(D); 1181 } 1182 1183 namespace { 1184 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1185 // Builder if one is present. 1186 struct PushAndPopStackRAII { 1187 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1188 bool HasCancel, llvm::omp::Directive Kind) 1189 : OMPBuilder(OMPBuilder) { 1190 if (!OMPBuilder) 1191 return; 1192 1193 // The following callback is the crucial part of clangs cleanup process. 1194 // 1195 // NOTE: 1196 // Once the OpenMPIRBuilder is used to create parallel regions (and 1197 // similar), the cancellation destination (Dest below) is determined via 1198 // IP. That means if we have variables to finalize we split the block at IP, 1199 // use the new block (=BB) as destination to build a JumpDest (via 1200 // getJumpDestInCurrentScope(BB)) which then is fed to 1201 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1202 // to push & pop an FinalizationInfo object. 1203 // The FiniCB will still be needed but at the point where the 1204 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1205 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1206 assert(IP.getBlock()->end() == IP.getPoint() && 1207 "Clang CG should cause non-terminated block!"); 1208 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1209 CGF.Builder.restoreIP(IP); 1210 CodeGenFunction::JumpDest Dest = 1211 CGF.getOMPCancelDestination(OMPD_parallel); 1212 CGF.EmitBranchThroughCleanup(Dest); 1213 }; 1214 1215 // TODO: Remove this once we emit parallel regions through the 1216 // OpenMPIRBuilder as it can do this setup internally. 1217 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); 1218 OMPBuilder->pushFinalizationCB(std::move(FI)); 1219 } 1220 ~PushAndPopStackRAII() { 1221 if (OMPBuilder) 1222 OMPBuilder->popFinalizationCB(); 1223 } 1224 llvm::OpenMPIRBuilder *OMPBuilder; 1225 }; 1226 } // namespace 1227 1228 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1229 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1230 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1231 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1232 assert(ThreadIDVar->getType()->isPointerType() && 1233 "thread id variable must be of type kmp_int32 *"); 1234 CodeGenFunction CGF(CGM, true); 1235 bool HasCancel = false; 1236 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1237 HasCancel = OPD->hasCancel(); 1238 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1239 HasCancel = OPD->hasCancel(); 1240 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1241 HasCancel = OPSD->hasCancel(); 1242 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1243 HasCancel = OPFD->hasCancel(); 1244 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1245 HasCancel = OPFD->hasCancel(); 1246 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1247 HasCancel = OPFD->hasCancel(); 1248 else if (const auto *OPFD = 1249 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1250 HasCancel = OPFD->hasCancel(); 1251 else if (const auto *OPFD = 1252 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1253 HasCancel = OPFD->hasCancel(); 1254 1255 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1256 // parallel region to make cancellation barriers work properly. 1257 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1258 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); 1259 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1260 HasCancel, OutlinedHelperName); 1261 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1262 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1263 } 1264 1265 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1266 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1267 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1268 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1269 return emitParallelOrTeamsOutlinedFunction( 1270 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1271 } 1272 1273 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1274 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1275 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1276 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1277 return emitParallelOrTeamsOutlinedFunction( 1278 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1279 } 1280 1281 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1282 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1283 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1284 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1285 bool Tied, unsigned &NumberOfParts) { 1286 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1287 PrePostActionTy &) { 1288 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1289 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1290 llvm::Value *TaskArgs[] = { 1291 UpLoc, ThreadID, 1292 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1293 TaskTVar->getType()->castAs<PointerType>()) 1294 .getPointer(CGF)}; 1295 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1296 CGM.getModule(), OMPRTL___kmpc_omp_task), 1297 TaskArgs); 1298 }; 1299 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1300 UntiedCodeGen); 1301 CodeGen.setAction(Action); 1302 assert(!ThreadIDVar->getType()->isPointerType() && 1303 "thread id variable must be of type kmp_int32 for tasks"); 1304 const OpenMPDirectiveKind Region = 1305 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1306 : OMPD_task; 1307 const CapturedStmt *CS = D.getCapturedStmt(Region); 1308 bool HasCancel = false; 1309 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1310 HasCancel = TD->hasCancel(); 1311 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1312 HasCancel = TD->hasCancel(); 1313 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1314 HasCancel = TD->hasCancel(); 1315 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1316 HasCancel = TD->hasCancel(); 1317 1318 CodeGenFunction CGF(CGM, true); 1319 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1320 InnermostKind, HasCancel, Action); 1321 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1322 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1323 if (!Tied) 1324 NumberOfParts = Action.getNumberOfParts(); 1325 return Res; 1326 } 1327 1328 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1329 bool AtCurrentPoint) { 1330 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1331 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1332 1333 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1334 if (AtCurrentPoint) { 1335 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1336 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1337 } else { 1338 Elem.second.ServiceInsertPt = 1339 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1340 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1341 } 1342 } 1343 1344 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1345 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1346 if (Elem.second.ServiceInsertPt) { 1347 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1348 Elem.second.ServiceInsertPt = nullptr; 1349 Ptr->eraseFromParent(); 1350 } 1351 } 1352 1353 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1354 SourceLocation Loc, 1355 SmallString<128> &Buffer) { 1356 llvm::raw_svector_ostream OS(Buffer); 1357 // Build debug location 1358 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1359 OS << ";" << PLoc.getFilename() << ";"; 1360 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1361 OS << FD->getQualifiedNameAsString(); 1362 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1363 return OS.str(); 1364 } 1365 1366 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1367 SourceLocation Loc, 1368 unsigned Flags, bool EmitLoc) { 1369 uint32_t SrcLocStrSize; 1370 llvm::Constant *SrcLocStr; 1371 if ((!EmitLoc && 1372 CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) || 1373 Loc.isInvalid()) { 1374 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 1375 } else { 1376 std::string FunctionName; 1377 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1378 FunctionName = FD->getQualifiedNameAsString(); 1379 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1380 const char *FileName = PLoc.getFilename(); 1381 unsigned Line = PLoc.getLine(); 1382 unsigned Column = PLoc.getColumn(); 1383 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, 1384 Column, SrcLocStrSize); 1385 } 1386 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1387 return OMPBuilder.getOrCreateIdent( 1388 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags); 1389 } 1390 1391 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1392 SourceLocation Loc) { 1393 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1394 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1395 // the clang invariants used below might be broken. 1396 if (CGM.getLangOpts().OpenMPIRBuilder) { 1397 SmallString<128> Buffer; 1398 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1399 uint32_t SrcLocStrSize; 1400 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1401 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize); 1402 return OMPBuilder.getOrCreateThreadID( 1403 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize)); 1404 } 1405 1406 llvm::Value *ThreadID = nullptr; 1407 // Check whether we've already cached a load of the thread id in this 1408 // function. 1409 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1410 if (I != OpenMPLocThreadIDMap.end()) { 1411 ThreadID = I->second.ThreadID; 1412 if (ThreadID != nullptr) 1413 return ThreadID; 1414 } 1415 // If exceptions are enabled, do not use parameter to avoid possible crash. 1416 if (auto *OMPRegionInfo = 1417 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1418 if (OMPRegionInfo->getThreadIDVariable()) { 1419 // Check if this an outlined function with thread id passed as argument. 1420 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1421 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1422 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1423 !CGF.getLangOpts().CXXExceptions || 1424 CGF.Builder.GetInsertBlock() == TopBlock || 1425 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1426 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1427 TopBlock || 1428 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1429 CGF.Builder.GetInsertBlock()) { 1430 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1431 // If value loaded in entry block, cache it and use it everywhere in 1432 // function. 1433 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1434 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1435 Elem.second.ThreadID = ThreadID; 1436 } 1437 return ThreadID; 1438 } 1439 } 1440 } 1441 1442 // This is not an outlined function region - need to call __kmpc_int32 1443 // kmpc_global_thread_num(ident_t *loc). 1444 // Generate thread id value and cache this value for use across the 1445 // function. 1446 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1447 if (!Elem.second.ServiceInsertPt) 1448 setLocThreadIdInsertPt(CGF); 1449 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1450 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1451 llvm::CallInst *Call = CGF.Builder.CreateCall( 1452 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1453 OMPRTL___kmpc_global_thread_num), 1454 emitUpdateLocation(CGF, Loc)); 1455 Call->setCallingConv(CGF.getRuntimeCC()); 1456 Elem.second.ThreadID = Call; 1457 return Call; 1458 } 1459 1460 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1461 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1462 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1463 clearLocThreadIdInsertPt(CGF); 1464 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1465 } 1466 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1467 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1468 UDRMap.erase(D); 1469 FunctionUDRMap.erase(CGF.CurFn); 1470 } 1471 auto I = FunctionUDMMap.find(CGF.CurFn); 1472 if (I != FunctionUDMMap.end()) { 1473 for(const auto *D : I->second) 1474 UDMMap.erase(D); 1475 FunctionUDMMap.erase(I); 1476 } 1477 LastprivateConditionalToTypes.erase(CGF.CurFn); 1478 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1479 } 1480 1481 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1482 return OMPBuilder.IdentPtr; 1483 } 1484 1485 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1486 if (!Kmpc_MicroTy) { 1487 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1488 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1489 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1490 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1491 } 1492 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1493 } 1494 1495 llvm::FunctionCallee 1496 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned, 1497 bool IsGPUDistribute) { 1498 assert((IVSize == 32 || IVSize == 64) && 1499 "IV size is not compatible with the omp runtime"); 1500 StringRef Name; 1501 if (IsGPUDistribute) 1502 Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4" 1503 : "__kmpc_distribute_static_init_4u") 1504 : (IVSigned ? "__kmpc_distribute_static_init_8" 1505 : "__kmpc_distribute_static_init_8u"); 1506 else 1507 Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1508 : "__kmpc_for_static_init_4u") 1509 : (IVSigned ? "__kmpc_for_static_init_8" 1510 : "__kmpc_for_static_init_8u"); 1511 1512 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1513 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1514 llvm::Type *TypeParams[] = { 1515 getIdentTyPointerTy(), // loc 1516 CGM.Int32Ty, // tid 1517 CGM.Int32Ty, // schedtype 1518 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1519 PtrTy, // p_lower 1520 PtrTy, // p_upper 1521 PtrTy, // p_stride 1522 ITy, // incr 1523 ITy // chunk 1524 }; 1525 auto *FnTy = 1526 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1527 return CGM.CreateRuntimeFunction(FnTy, Name); 1528 } 1529 1530 llvm::FunctionCallee 1531 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1532 assert((IVSize == 32 || IVSize == 64) && 1533 "IV size is not compatible with the omp runtime"); 1534 StringRef Name = 1535 IVSize == 32 1536 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1537 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1538 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1539 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1540 CGM.Int32Ty, // tid 1541 CGM.Int32Ty, // schedtype 1542 ITy, // lower 1543 ITy, // upper 1544 ITy, // stride 1545 ITy // chunk 1546 }; 1547 auto *FnTy = 1548 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1549 return CGM.CreateRuntimeFunction(FnTy, Name); 1550 } 1551 1552 llvm::FunctionCallee 1553 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1554 assert((IVSize == 32 || IVSize == 64) && 1555 "IV size is not compatible with the omp runtime"); 1556 StringRef Name = 1557 IVSize == 32 1558 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1559 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1560 llvm::Type *TypeParams[] = { 1561 getIdentTyPointerTy(), // loc 1562 CGM.Int32Ty, // tid 1563 }; 1564 auto *FnTy = 1565 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1566 return CGM.CreateRuntimeFunction(FnTy, Name); 1567 } 1568 1569 llvm::FunctionCallee 1570 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1571 assert((IVSize == 32 || IVSize == 64) && 1572 "IV size is not compatible with the omp runtime"); 1573 StringRef Name = 1574 IVSize == 32 1575 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1576 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1577 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1578 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1579 llvm::Type *TypeParams[] = { 1580 getIdentTyPointerTy(), // loc 1581 CGM.Int32Ty, // tid 1582 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1583 PtrTy, // p_lower 1584 PtrTy, // p_upper 1585 PtrTy // p_stride 1586 }; 1587 auto *FnTy = 1588 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1589 return CGM.CreateRuntimeFunction(FnTy, Name); 1590 } 1591 1592 /// Obtain information that uniquely identifies a target entry. This 1593 /// consists of the file and device IDs as well as line number associated with 1594 /// the relevant entry source location. 1595 static llvm::TargetRegionEntryInfo 1596 getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1597 StringRef ParentName = "") { 1598 SourceManager &SM = C.getSourceManager(); 1599 1600 // The loc should be always valid and have a file ID (the user cannot use 1601 // #pragma directives in macros) 1602 1603 assert(Loc.isValid() && "Source location is expected to be always valid."); 1604 1605 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1606 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1607 1608 llvm::sys::fs::UniqueID ID; 1609 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1610 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false); 1611 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1612 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1613 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1614 << PLoc.getFilename() << EC.message(); 1615 } 1616 1617 return llvm::TargetRegionEntryInfo(ParentName, ID.getDevice(), ID.getFile(), 1618 PLoc.getLine()); 1619 } 1620 1621 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1622 if (CGM.getLangOpts().OpenMPSimd) 1623 return Address::invalid(); 1624 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1625 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1626 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1627 ((*Res == OMPDeclareTargetDeclAttr::MT_To || 1628 *Res == OMPDeclareTargetDeclAttr::MT_Enter) && 1629 HasRequiresUnifiedSharedMemory))) { 1630 SmallString<64> PtrName; 1631 { 1632 llvm::raw_svector_ostream OS(PtrName); 1633 OS << CGM.getMangledName(GlobalDecl(VD)); 1634 if (!VD->isExternallyVisible()) { 1635 auto EntryInfo = getTargetEntryUniqueInfo( 1636 CGM.getContext(), VD->getCanonicalDecl()->getBeginLoc()); 1637 OS << llvm::format("_%x", EntryInfo.FileID); 1638 } 1639 OS << "_decl_tgt_ref_ptr"; 1640 } 1641 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1642 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1643 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(PtrTy); 1644 if (!Ptr) { 1645 Ptr = OMPBuilder.getOrCreateInternalVariable(LlvmPtrTy, PtrName); 1646 1647 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1648 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1649 1650 if (!CGM.getLangOpts().OpenMPIsDevice) 1651 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1652 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1653 } 1654 return Address(Ptr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD)); 1655 } 1656 return Address::invalid(); 1657 } 1658 1659 llvm::Constant * 1660 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1661 assert(!CGM.getLangOpts().OpenMPUseTLS || 1662 !CGM.getContext().getTargetInfo().isTLSSupported()); 1663 // Lookup the entry, lazily creating it if necessary. 1664 std::string Suffix = getName({"cache", ""}); 1665 return OMPBuilder.getOrCreateInternalVariable( 1666 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str()); 1667 } 1668 1669 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1670 const VarDecl *VD, 1671 Address VDAddr, 1672 SourceLocation Loc) { 1673 if (CGM.getLangOpts().OpenMPUseTLS && 1674 CGM.getContext().getTargetInfo().isTLSSupported()) 1675 return VDAddr; 1676 1677 llvm::Type *VarTy = VDAddr.getElementType(); 1678 llvm::Value *Args[] = { 1679 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1680 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy), 1681 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1682 getOrCreateThreadPrivateCache(VD)}; 1683 return Address( 1684 CGF.EmitRuntimeCall( 1685 OMPBuilder.getOrCreateRuntimeFunction( 1686 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1687 Args), 1688 CGF.Int8Ty, VDAddr.getAlignment()); 1689 } 1690 1691 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1692 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1693 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1694 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1695 // library. 1696 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1697 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1698 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1699 OMPLoc); 1700 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1701 // to register constructor/destructor for variable. 1702 llvm::Value *Args[] = { 1703 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1704 Ctor, CopyCtor, Dtor}; 1705 CGF.EmitRuntimeCall( 1706 OMPBuilder.getOrCreateRuntimeFunction( 1707 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1708 Args); 1709 } 1710 1711 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1712 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1713 bool PerformInit, CodeGenFunction *CGF) { 1714 if (CGM.getLangOpts().OpenMPUseTLS && 1715 CGM.getContext().getTargetInfo().isTLSSupported()) 1716 return nullptr; 1717 1718 VD = VD->getDefinition(CGM.getContext()); 1719 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1720 QualType ASTTy = VD->getType(); 1721 1722 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1723 const Expr *Init = VD->getAnyInitializer(); 1724 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1725 // Generate function that re-emits the declaration's initializer into the 1726 // threadprivate copy of the variable VD 1727 CodeGenFunction CtorCGF(CGM); 1728 FunctionArgList Args; 1729 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1730 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1731 ImplicitParamDecl::Other); 1732 Args.push_back(&Dst); 1733 1734 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1735 CGM.getContext().VoidPtrTy, Args); 1736 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1737 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1738 llvm::Function *Fn = 1739 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1740 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1741 Args, Loc, Loc); 1742 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1743 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1744 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1745 Address Arg(ArgVal, CtorCGF.Int8Ty, VDAddr.getAlignment()); 1746 Arg = CtorCGF.Builder.CreateElementBitCast( 1747 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1748 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1749 /*IsInitializer=*/true); 1750 ArgVal = CtorCGF.EmitLoadOfScalar( 1751 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1752 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1753 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1754 CtorCGF.FinishFunction(); 1755 Ctor = Fn; 1756 } 1757 if (VD->getType().isDestructedType() != QualType::DK_none) { 1758 // Generate function that emits destructor call for the threadprivate copy 1759 // of the variable VD 1760 CodeGenFunction DtorCGF(CGM); 1761 FunctionArgList Args; 1762 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1763 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1764 ImplicitParamDecl::Other); 1765 Args.push_back(&Dst); 1766 1767 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1768 CGM.getContext().VoidTy, Args); 1769 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1770 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1771 llvm::Function *Fn = 1772 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1773 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1774 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1775 Loc, Loc); 1776 // Create a scope with an artificial location for the body of this function. 1777 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1778 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1779 DtorCGF.GetAddrOfLocalVar(&Dst), 1780 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1781 DtorCGF.emitDestroy( 1782 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy, 1783 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1784 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1785 DtorCGF.FinishFunction(); 1786 Dtor = Fn; 1787 } 1788 // Do not emit init function if it is not required. 1789 if (!Ctor && !Dtor) 1790 return nullptr; 1791 1792 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1793 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1794 /*isVarArg=*/false) 1795 ->getPointerTo(); 1796 // Copying constructor for the threadprivate variable. 1797 // Must be NULL - reserved by runtime, but currently it requires that this 1798 // parameter is always NULL. Otherwise it fires assertion. 1799 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1800 if (Ctor == nullptr) { 1801 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1802 /*isVarArg=*/false) 1803 ->getPointerTo(); 1804 Ctor = llvm::Constant::getNullValue(CtorTy); 1805 } 1806 if (Dtor == nullptr) { 1807 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1808 /*isVarArg=*/false) 1809 ->getPointerTo(); 1810 Dtor = llvm::Constant::getNullValue(DtorTy); 1811 } 1812 if (!CGF) { 1813 auto *InitFunctionTy = 1814 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1815 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1816 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1817 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1818 CodeGenFunction InitCGF(CGM); 1819 FunctionArgList ArgList; 1820 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1821 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1822 Loc, Loc); 1823 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1824 InitCGF.FinishFunction(); 1825 return InitFunction; 1826 } 1827 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1828 } 1829 return nullptr; 1830 } 1831 1832 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1833 llvm::GlobalVariable *Addr, 1834 bool PerformInit) { 1835 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1836 !CGM.getLangOpts().OpenMPIsDevice) 1837 return false; 1838 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1839 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1840 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1841 ((*Res == OMPDeclareTargetDeclAttr::MT_To || 1842 *Res == OMPDeclareTargetDeclAttr::MT_Enter) && 1843 HasRequiresUnifiedSharedMemory)) 1844 return CGM.getLangOpts().OpenMPIsDevice; 1845 VD = VD->getDefinition(CGM.getContext()); 1846 assert(VD && "Unknown VarDecl"); 1847 1848 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1849 return CGM.getLangOpts().OpenMPIsDevice; 1850 1851 QualType ASTTy = VD->getType(); 1852 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1853 1854 // Produce the unique prefix to identify the new target regions. We use 1855 // the source location of the variable declaration which we know to not 1856 // conflict with any target region. 1857 auto EntryInfo = 1858 getTargetEntryUniqueInfo(CGM.getContext(), Loc, VD->getName()); 1859 SmallString<128> Buffer, Out; 1860 OffloadEntriesInfoManager.getTargetRegionEntryFnName(Buffer, EntryInfo); 1861 1862 const Expr *Init = VD->getAnyInitializer(); 1863 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1864 llvm::Constant *Ctor; 1865 llvm::Constant *ID; 1866 if (CGM.getLangOpts().OpenMPIsDevice) { 1867 // Generate function that re-emits the declaration's initializer into 1868 // the threadprivate copy of the variable VD 1869 CodeGenFunction CtorCGF(CGM); 1870 1871 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1872 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1873 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1874 FTy, Twine(Buffer, "_ctor"), FI, Loc, false, 1875 llvm::GlobalValue::WeakODRLinkage); 1876 Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility); 1877 if (CGM.getTriple().isAMDGCN()) 1878 Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 1879 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1880 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1881 FunctionArgList(), Loc, Loc); 1882 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1883 llvm::Constant *AddrInAS0 = Addr; 1884 if (Addr->getAddressSpace() != 0) 1885 AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast( 1886 Addr, llvm::PointerType::getWithSamePointeeType( 1887 cast<llvm::PointerType>(Addr->getType()), 0)); 1888 CtorCGF.EmitAnyExprToMem(Init, 1889 Address(AddrInAS0, Addr->getValueType(), 1890 CGM.getContext().getDeclAlign(VD)), 1891 Init->getType().getQualifiers(), 1892 /*IsInitializer=*/true); 1893 CtorCGF.FinishFunction(); 1894 Ctor = Fn; 1895 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1896 } else { 1897 Ctor = new llvm::GlobalVariable( 1898 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1899 llvm::GlobalValue::PrivateLinkage, 1900 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1901 ID = Ctor; 1902 } 1903 1904 // Register the information for the entry associated with the constructor. 1905 Out.clear(); 1906 auto CtorEntryInfo = EntryInfo; 1907 CtorEntryInfo.ParentName = Twine(Buffer, "_ctor").toStringRef(Out); 1908 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1909 CtorEntryInfo, Ctor, ID, 1910 llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryCtor); 1911 } 1912 if (VD->getType().isDestructedType() != QualType::DK_none) { 1913 llvm::Constant *Dtor; 1914 llvm::Constant *ID; 1915 if (CGM.getLangOpts().OpenMPIsDevice) { 1916 // Generate function that emits destructor call for the threadprivate 1917 // copy of the variable VD 1918 CodeGenFunction DtorCGF(CGM); 1919 1920 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1921 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1922 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1923 FTy, Twine(Buffer, "_dtor"), FI, Loc, false, 1924 llvm::GlobalValue::WeakODRLinkage); 1925 Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility); 1926 if (CGM.getTriple().isAMDGCN()) 1927 Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 1928 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1929 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1930 FunctionArgList(), Loc, Loc); 1931 // Create a scope with an artificial location for the body of this 1932 // function. 1933 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1934 llvm::Constant *AddrInAS0 = Addr; 1935 if (Addr->getAddressSpace() != 0) 1936 AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast( 1937 Addr, llvm::PointerType::getWithSamePointeeType( 1938 cast<llvm::PointerType>(Addr->getType()), 0)); 1939 DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(), 1940 CGM.getContext().getDeclAlign(VD)), 1941 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1942 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1943 DtorCGF.FinishFunction(); 1944 Dtor = Fn; 1945 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1946 } else { 1947 Dtor = new llvm::GlobalVariable( 1948 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1949 llvm::GlobalValue::PrivateLinkage, 1950 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 1951 ID = Dtor; 1952 } 1953 // Register the information for the entry associated with the destructor. 1954 Out.clear(); 1955 auto DtorEntryInfo = EntryInfo; 1956 DtorEntryInfo.ParentName = Twine(Buffer, "_dtor").toStringRef(Out); 1957 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1958 DtorEntryInfo, Dtor, ID, 1959 llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryDtor); 1960 } 1961 return CGM.getLangOpts().OpenMPIsDevice; 1962 } 1963 1964 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 1965 QualType VarType, 1966 StringRef Name) { 1967 std::string Suffix = getName({"artificial", ""}); 1968 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 1969 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable( 1970 VarLVType, Twine(Name).concat(Suffix).str()); 1971 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 1972 CGM.getTarget().isTLSSupported()) { 1973 GAddr->setThreadLocal(/*Val=*/true); 1974 return Address(GAddr, GAddr->getValueType(), 1975 CGM.getContext().getTypeAlignInChars(VarType)); 1976 } 1977 std::string CacheSuffix = getName({"cache", ""}); 1978 llvm::Value *Args[] = { 1979 emitUpdateLocation(CGF, SourceLocation()), 1980 getThreadID(CGF, SourceLocation()), 1981 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 1982 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 1983 /*isSigned=*/false), 1984 OMPBuilder.getOrCreateInternalVariable( 1985 CGM.VoidPtrPtrTy, 1986 Twine(Name).concat(Suffix).concat(CacheSuffix).str())}; 1987 return Address( 1988 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1989 CGF.EmitRuntimeCall( 1990 OMPBuilder.getOrCreateRuntimeFunction( 1991 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1992 Args), 1993 VarLVType->getPointerTo(/*AddrSpace=*/0)), 1994 VarLVType, CGM.getContext().getTypeAlignInChars(VarType)); 1995 } 1996 1997 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 1998 const RegionCodeGenTy &ThenGen, 1999 const RegionCodeGenTy &ElseGen) { 2000 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2001 2002 // If the condition constant folds and can be elided, try to avoid emitting 2003 // the condition and the dead arm of the if/else. 2004 bool CondConstant; 2005 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2006 if (CondConstant) 2007 ThenGen(CGF); 2008 else 2009 ElseGen(CGF); 2010 return; 2011 } 2012 2013 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2014 // emit the conditional branch. 2015 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2016 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2017 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2018 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2019 2020 // Emit the 'then' code. 2021 CGF.EmitBlock(ThenBlock); 2022 ThenGen(CGF); 2023 CGF.EmitBranch(ContBlock); 2024 // Emit the 'else' code if present. 2025 // There is no need to emit line number for unconditional branch. 2026 (void)ApplyDebugLocation::CreateEmpty(CGF); 2027 CGF.EmitBlock(ElseBlock); 2028 ElseGen(CGF); 2029 // There is no need to emit line number for unconditional branch. 2030 (void)ApplyDebugLocation::CreateEmpty(CGF); 2031 CGF.EmitBranch(ContBlock); 2032 // Emit the continuation block for code after the if. 2033 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2034 } 2035 2036 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2037 llvm::Function *OutlinedFn, 2038 ArrayRef<llvm::Value *> CapturedVars, 2039 const Expr *IfCond, 2040 llvm::Value *NumThreads) { 2041 if (!CGF.HaveInsertPoint()) 2042 return; 2043 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2044 auto &M = CGM.getModule(); 2045 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2046 this](CodeGenFunction &CGF, PrePostActionTy &) { 2047 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2048 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2049 llvm::Value *Args[] = { 2050 RTLoc, 2051 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2052 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2053 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2054 RealArgs.append(std::begin(Args), std::end(Args)); 2055 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2056 2057 llvm::FunctionCallee RTLFn = 2058 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2059 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2060 }; 2061 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2062 this](CodeGenFunction &CGF, PrePostActionTy &) { 2063 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2064 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2065 // Build calls: 2066 // __kmpc_serialized_parallel(&Loc, GTid); 2067 llvm::Value *Args[] = {RTLoc, ThreadID}; 2068 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2069 M, OMPRTL___kmpc_serialized_parallel), 2070 Args); 2071 2072 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2073 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2074 Address ZeroAddrBound = 2075 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2076 /*Name=*/".bound.zero.addr"); 2077 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound); 2078 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2079 // ThreadId for serialized parallels is 0. 2080 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2081 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2082 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2083 2084 // Ensure we do not inline the function. This is trivially true for the ones 2085 // passed to __kmpc_fork_call but the ones called in serialized regions 2086 // could be inlined. This is not a perfect but it is closer to the invariant 2087 // we want, namely, every data environment starts with a new function. 2088 // TODO: We should pass the if condition to the runtime function and do the 2089 // handling there. Much cleaner code. 2090 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline); 2091 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 2092 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2093 2094 // __kmpc_end_serialized_parallel(&Loc, GTid); 2095 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2096 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2097 M, OMPRTL___kmpc_end_serialized_parallel), 2098 EndArgs); 2099 }; 2100 if (IfCond) { 2101 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2102 } else { 2103 RegionCodeGenTy ThenRCG(ThenGen); 2104 ThenRCG(CGF); 2105 } 2106 } 2107 2108 // If we're inside an (outlined) parallel region, use the region info's 2109 // thread-ID variable (it is passed in a first argument of the outlined function 2110 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2111 // regular serial code region, get thread ID by calling kmp_int32 2112 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2113 // return the address of that temp. 2114 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2115 SourceLocation Loc) { 2116 if (auto *OMPRegionInfo = 2117 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2118 if (OMPRegionInfo->getThreadIDVariable()) 2119 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2120 2121 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2122 QualType Int32Ty = 2123 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2124 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2125 CGF.EmitStoreOfScalar(ThreadID, 2126 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2127 2128 return ThreadIDTemp; 2129 } 2130 2131 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2132 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2133 std::string Name = getName({Prefix, "var"}); 2134 return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2135 } 2136 2137 namespace { 2138 /// Common pre(post)-action for different OpenMP constructs. 2139 class CommonActionTy final : public PrePostActionTy { 2140 llvm::FunctionCallee EnterCallee; 2141 ArrayRef<llvm::Value *> EnterArgs; 2142 llvm::FunctionCallee ExitCallee; 2143 ArrayRef<llvm::Value *> ExitArgs; 2144 bool Conditional; 2145 llvm::BasicBlock *ContBlock = nullptr; 2146 2147 public: 2148 CommonActionTy(llvm::FunctionCallee EnterCallee, 2149 ArrayRef<llvm::Value *> EnterArgs, 2150 llvm::FunctionCallee ExitCallee, 2151 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2152 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2153 ExitArgs(ExitArgs), Conditional(Conditional) {} 2154 void Enter(CodeGenFunction &CGF) override { 2155 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2156 if (Conditional) { 2157 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2158 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2159 ContBlock = CGF.createBasicBlock("omp_if.end"); 2160 // Generate the branch (If-stmt) 2161 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2162 CGF.EmitBlock(ThenBlock); 2163 } 2164 } 2165 void Done(CodeGenFunction &CGF) { 2166 // Emit the rest of blocks/branches 2167 CGF.EmitBranch(ContBlock); 2168 CGF.EmitBlock(ContBlock, true); 2169 } 2170 void Exit(CodeGenFunction &CGF) override { 2171 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2172 } 2173 }; 2174 } // anonymous namespace 2175 2176 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2177 StringRef CriticalName, 2178 const RegionCodeGenTy &CriticalOpGen, 2179 SourceLocation Loc, const Expr *Hint) { 2180 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2181 // CriticalOpGen(); 2182 // __kmpc_end_critical(ident_t *, gtid, Lock); 2183 // Prepare arguments and build a call to __kmpc_critical 2184 if (!CGF.HaveInsertPoint()) 2185 return; 2186 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2187 getCriticalRegionLock(CriticalName)}; 2188 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2189 std::end(Args)); 2190 if (Hint) { 2191 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2192 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2193 } 2194 CommonActionTy Action( 2195 OMPBuilder.getOrCreateRuntimeFunction( 2196 CGM.getModule(), 2197 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2198 EnterArgs, 2199 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2200 OMPRTL___kmpc_end_critical), 2201 Args); 2202 CriticalOpGen.setAction(Action); 2203 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2204 } 2205 2206 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2207 const RegionCodeGenTy &MasterOpGen, 2208 SourceLocation Loc) { 2209 if (!CGF.HaveInsertPoint()) 2210 return; 2211 // if(__kmpc_master(ident_t *, gtid)) { 2212 // MasterOpGen(); 2213 // __kmpc_end_master(ident_t *, gtid); 2214 // } 2215 // Prepare arguments and build a call to __kmpc_master 2216 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2217 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2218 CGM.getModule(), OMPRTL___kmpc_master), 2219 Args, 2220 OMPBuilder.getOrCreateRuntimeFunction( 2221 CGM.getModule(), OMPRTL___kmpc_end_master), 2222 Args, 2223 /*Conditional=*/true); 2224 MasterOpGen.setAction(Action); 2225 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2226 Action.Done(CGF); 2227 } 2228 2229 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, 2230 const RegionCodeGenTy &MaskedOpGen, 2231 SourceLocation Loc, const Expr *Filter) { 2232 if (!CGF.HaveInsertPoint()) 2233 return; 2234 // if(__kmpc_masked(ident_t *, gtid, filter)) { 2235 // MaskedOpGen(); 2236 // __kmpc_end_masked(iden_t *, gtid); 2237 // } 2238 // Prepare arguments and build a call to __kmpc_masked 2239 llvm::Value *FilterVal = Filter 2240 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) 2241 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 2242 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2243 FilterVal}; 2244 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), 2245 getThreadID(CGF, Loc)}; 2246 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2247 CGM.getModule(), OMPRTL___kmpc_masked), 2248 Args, 2249 OMPBuilder.getOrCreateRuntimeFunction( 2250 CGM.getModule(), OMPRTL___kmpc_end_masked), 2251 ArgsEnd, 2252 /*Conditional=*/true); 2253 MaskedOpGen.setAction(Action); 2254 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); 2255 Action.Done(CGF); 2256 } 2257 2258 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2259 SourceLocation Loc) { 2260 if (!CGF.HaveInsertPoint()) 2261 return; 2262 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2263 OMPBuilder.createTaskyield(CGF.Builder); 2264 } else { 2265 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2266 llvm::Value *Args[] = { 2267 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2268 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2269 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2270 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2271 Args); 2272 } 2273 2274 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2275 Region->emitUntiedSwitch(CGF); 2276 } 2277 2278 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2279 const RegionCodeGenTy &TaskgroupOpGen, 2280 SourceLocation Loc) { 2281 if (!CGF.HaveInsertPoint()) 2282 return; 2283 // __kmpc_taskgroup(ident_t *, gtid); 2284 // TaskgroupOpGen(); 2285 // __kmpc_end_taskgroup(ident_t *, gtid); 2286 // Prepare arguments and build a call to __kmpc_taskgroup 2287 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2288 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2289 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2290 Args, 2291 OMPBuilder.getOrCreateRuntimeFunction( 2292 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2293 Args); 2294 TaskgroupOpGen.setAction(Action); 2295 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2296 } 2297 2298 /// Given an array of pointers to variables, project the address of a 2299 /// given variable. 2300 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2301 unsigned Index, const VarDecl *Var) { 2302 // Pull out the pointer to the variable. 2303 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2304 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2305 2306 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType()); 2307 return Address( 2308 CGF.Builder.CreateBitCast( 2309 Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())), 2310 ElemTy, CGF.getContext().getDeclAlign(Var)); 2311 } 2312 2313 static llvm::Value *emitCopyprivateCopyFunction( 2314 CodeGenModule &CGM, llvm::Type *ArgsElemType, 2315 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2316 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2317 SourceLocation Loc) { 2318 ASTContext &C = CGM.getContext(); 2319 // void copy_func(void *LHSArg, void *RHSArg); 2320 FunctionArgList Args; 2321 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2322 ImplicitParamDecl::Other); 2323 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2324 ImplicitParamDecl::Other); 2325 Args.push_back(&LHSArg); 2326 Args.push_back(&RHSArg); 2327 const auto &CGFI = 2328 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2329 std::string Name = 2330 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2331 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2332 llvm::GlobalValue::InternalLinkage, Name, 2333 &CGM.getModule()); 2334 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2335 Fn->setDoesNotRecurse(); 2336 CodeGenFunction CGF(CGM); 2337 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2338 // Dest = (void*[n])(LHSArg); 2339 // Src = (void*[n])(RHSArg); 2340 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2341 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2342 ArgsElemType->getPointerTo()), 2343 ArgsElemType, CGF.getPointerAlign()); 2344 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2345 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2346 ArgsElemType->getPointerTo()), 2347 ArgsElemType, CGF.getPointerAlign()); 2348 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2349 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2350 // ... 2351 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2352 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2353 const auto *DestVar = 2354 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2355 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2356 2357 const auto *SrcVar = 2358 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2359 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2360 2361 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2362 QualType Type = VD->getType(); 2363 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2364 } 2365 CGF.FinishFunction(); 2366 return Fn; 2367 } 2368 2369 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2370 const RegionCodeGenTy &SingleOpGen, 2371 SourceLocation Loc, 2372 ArrayRef<const Expr *> CopyprivateVars, 2373 ArrayRef<const Expr *> SrcExprs, 2374 ArrayRef<const Expr *> DstExprs, 2375 ArrayRef<const Expr *> AssignmentOps) { 2376 if (!CGF.HaveInsertPoint()) 2377 return; 2378 assert(CopyprivateVars.size() == SrcExprs.size() && 2379 CopyprivateVars.size() == DstExprs.size() && 2380 CopyprivateVars.size() == AssignmentOps.size()); 2381 ASTContext &C = CGM.getContext(); 2382 // int32 did_it = 0; 2383 // if(__kmpc_single(ident_t *, gtid)) { 2384 // SingleOpGen(); 2385 // __kmpc_end_single(ident_t *, gtid); 2386 // did_it = 1; 2387 // } 2388 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2389 // <copy_func>, did_it); 2390 2391 Address DidIt = Address::invalid(); 2392 if (!CopyprivateVars.empty()) { 2393 // int32 did_it = 0; 2394 QualType KmpInt32Ty = 2395 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2396 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2397 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2398 } 2399 // Prepare arguments and build a call to __kmpc_single 2400 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2401 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2402 CGM.getModule(), OMPRTL___kmpc_single), 2403 Args, 2404 OMPBuilder.getOrCreateRuntimeFunction( 2405 CGM.getModule(), OMPRTL___kmpc_end_single), 2406 Args, 2407 /*Conditional=*/true); 2408 SingleOpGen.setAction(Action); 2409 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2410 if (DidIt.isValid()) { 2411 // did_it = 1; 2412 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2413 } 2414 Action.Done(CGF); 2415 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2416 // <copy_func>, did_it); 2417 if (DidIt.isValid()) { 2418 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2419 QualType CopyprivateArrayTy = C.getConstantArrayType( 2420 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2421 /*IndexTypeQuals=*/0); 2422 // Create a list of all private variables for copyprivate. 2423 Address CopyprivateList = 2424 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2425 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2426 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2427 CGF.Builder.CreateStore( 2428 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2429 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2430 CGF.VoidPtrTy), 2431 Elem); 2432 } 2433 // Build function that copies private values from single region to all other 2434 // threads in the corresponding parallel region. 2435 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2436 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars, 2437 SrcExprs, DstExprs, AssignmentOps, Loc); 2438 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2439 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2440 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty); 2441 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2442 llvm::Value *Args[] = { 2443 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2444 getThreadID(CGF, Loc), // i32 <gtid> 2445 BufSize, // size_t <buf_size> 2446 CL.getPointer(), // void *<copyprivate list> 2447 CpyFn, // void (*) (void *, void *) <copy_func> 2448 DidItVal // i32 did_it 2449 }; 2450 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2451 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2452 Args); 2453 } 2454 } 2455 2456 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2457 const RegionCodeGenTy &OrderedOpGen, 2458 SourceLocation Loc, bool IsThreads) { 2459 if (!CGF.HaveInsertPoint()) 2460 return; 2461 // __kmpc_ordered(ident_t *, gtid); 2462 // OrderedOpGen(); 2463 // __kmpc_end_ordered(ident_t *, gtid); 2464 // Prepare arguments and build a call to __kmpc_ordered 2465 if (IsThreads) { 2466 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2467 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2468 CGM.getModule(), OMPRTL___kmpc_ordered), 2469 Args, 2470 OMPBuilder.getOrCreateRuntimeFunction( 2471 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2472 Args); 2473 OrderedOpGen.setAction(Action); 2474 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2475 return; 2476 } 2477 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2478 } 2479 2480 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2481 unsigned Flags; 2482 if (Kind == OMPD_for) 2483 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2484 else if (Kind == OMPD_sections) 2485 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2486 else if (Kind == OMPD_single) 2487 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2488 else if (Kind == OMPD_barrier) 2489 Flags = OMP_IDENT_BARRIER_EXPL; 2490 else 2491 Flags = OMP_IDENT_BARRIER_IMPL; 2492 return Flags; 2493 } 2494 2495 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2496 CodeGenFunction &CGF, const OMPLoopDirective &S, 2497 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2498 // Check if the loop directive is actually a doacross loop directive. In this 2499 // case choose static, 1 schedule. 2500 if (llvm::any_of( 2501 S.getClausesOfKind<OMPOrderedClause>(), 2502 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2503 ScheduleKind = OMPC_SCHEDULE_static; 2504 // Chunk size is 1 in this case. 2505 llvm::APInt ChunkSize(32, 1); 2506 ChunkExpr = IntegerLiteral::Create( 2507 CGF.getContext(), ChunkSize, 2508 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2509 SourceLocation()); 2510 } 2511 } 2512 2513 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2514 OpenMPDirectiveKind Kind, bool EmitChecks, 2515 bool ForceSimpleCall) { 2516 // Check if we should use the OMPBuilder 2517 auto *OMPRegionInfo = 2518 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2519 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2520 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2521 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2522 return; 2523 } 2524 2525 if (!CGF.HaveInsertPoint()) 2526 return; 2527 // Build call __kmpc_cancel_barrier(loc, thread_id); 2528 // Build call __kmpc_barrier(loc, thread_id); 2529 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2530 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2531 // thread_id); 2532 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2533 getThreadID(CGF, Loc)}; 2534 if (OMPRegionInfo) { 2535 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2536 llvm::Value *Result = CGF.EmitRuntimeCall( 2537 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2538 OMPRTL___kmpc_cancel_barrier), 2539 Args); 2540 if (EmitChecks) { 2541 // if (__kmpc_cancel_barrier()) { 2542 // exit from construct; 2543 // } 2544 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2545 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2546 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2547 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2548 CGF.EmitBlock(ExitBB); 2549 // exit from construct; 2550 CodeGenFunction::JumpDest CancelDestination = 2551 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2552 CGF.EmitBranchThroughCleanup(CancelDestination); 2553 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2554 } 2555 return; 2556 } 2557 } 2558 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2559 CGM.getModule(), OMPRTL___kmpc_barrier), 2560 Args); 2561 } 2562 2563 void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc, 2564 Expr *ME, bool IsFatal) { 2565 llvm::Value *MVL = 2566 ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF) 2567 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 2568 // Build call void __kmpc_error(ident_t *loc, int severity, const char 2569 // *message) 2570 llvm::Value *Args[] = { 2571 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true), 2572 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1), 2573 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)}; 2574 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2575 CGM.getModule(), OMPRTL___kmpc_error), 2576 Args); 2577 } 2578 2579 /// Map the OpenMP loop schedule to the runtime enumeration. 2580 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2581 bool Chunked, bool Ordered) { 2582 switch (ScheduleKind) { 2583 case OMPC_SCHEDULE_static: 2584 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2585 : (Ordered ? OMP_ord_static : OMP_sch_static); 2586 case OMPC_SCHEDULE_dynamic: 2587 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2588 case OMPC_SCHEDULE_guided: 2589 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2590 case OMPC_SCHEDULE_runtime: 2591 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2592 case OMPC_SCHEDULE_auto: 2593 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2594 case OMPC_SCHEDULE_unknown: 2595 assert(!Chunked && "chunk was specified but schedule kind not known"); 2596 return Ordered ? OMP_ord_static : OMP_sch_static; 2597 } 2598 llvm_unreachable("Unexpected runtime schedule"); 2599 } 2600 2601 /// Map the OpenMP distribute schedule to the runtime enumeration. 2602 static OpenMPSchedType 2603 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2604 // only static is allowed for dist_schedule 2605 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2606 } 2607 2608 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2609 bool Chunked) const { 2610 OpenMPSchedType Schedule = 2611 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2612 return Schedule == OMP_sch_static; 2613 } 2614 2615 bool CGOpenMPRuntime::isStaticNonchunked( 2616 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2617 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2618 return Schedule == OMP_dist_sch_static; 2619 } 2620 2621 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2622 bool Chunked) const { 2623 OpenMPSchedType Schedule = 2624 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2625 return Schedule == OMP_sch_static_chunked; 2626 } 2627 2628 bool CGOpenMPRuntime::isStaticChunked( 2629 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2630 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2631 return Schedule == OMP_dist_sch_static_chunked; 2632 } 2633 2634 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2635 OpenMPSchedType Schedule = 2636 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2637 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2638 return Schedule != OMP_sch_static; 2639 } 2640 2641 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2642 OpenMPScheduleClauseModifier M1, 2643 OpenMPScheduleClauseModifier M2) { 2644 int Modifier = 0; 2645 switch (M1) { 2646 case OMPC_SCHEDULE_MODIFIER_monotonic: 2647 Modifier = OMP_sch_modifier_monotonic; 2648 break; 2649 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2650 Modifier = OMP_sch_modifier_nonmonotonic; 2651 break; 2652 case OMPC_SCHEDULE_MODIFIER_simd: 2653 if (Schedule == OMP_sch_static_chunked) 2654 Schedule = OMP_sch_static_balanced_chunked; 2655 break; 2656 case OMPC_SCHEDULE_MODIFIER_last: 2657 case OMPC_SCHEDULE_MODIFIER_unknown: 2658 break; 2659 } 2660 switch (M2) { 2661 case OMPC_SCHEDULE_MODIFIER_monotonic: 2662 Modifier = OMP_sch_modifier_monotonic; 2663 break; 2664 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2665 Modifier = OMP_sch_modifier_nonmonotonic; 2666 break; 2667 case OMPC_SCHEDULE_MODIFIER_simd: 2668 if (Schedule == OMP_sch_static_chunked) 2669 Schedule = OMP_sch_static_balanced_chunked; 2670 break; 2671 case OMPC_SCHEDULE_MODIFIER_last: 2672 case OMPC_SCHEDULE_MODIFIER_unknown: 2673 break; 2674 } 2675 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2676 // If the static schedule kind is specified or if the ordered clause is 2677 // specified, and if the nonmonotonic modifier is not specified, the effect is 2678 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2679 // modifier is specified, the effect is as if the nonmonotonic modifier is 2680 // specified. 2681 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2682 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2683 Schedule == OMP_sch_static_balanced_chunked || 2684 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2685 Schedule == OMP_dist_sch_static_chunked || 2686 Schedule == OMP_dist_sch_static)) 2687 Modifier = OMP_sch_modifier_nonmonotonic; 2688 } 2689 return Schedule | Modifier; 2690 } 2691 2692 void CGOpenMPRuntime::emitForDispatchInit( 2693 CodeGenFunction &CGF, SourceLocation Loc, 2694 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2695 bool Ordered, const DispatchRTInput &DispatchValues) { 2696 if (!CGF.HaveInsertPoint()) 2697 return; 2698 OpenMPSchedType Schedule = getRuntimeSchedule( 2699 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2700 assert(Ordered || 2701 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2702 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2703 Schedule != OMP_sch_static_balanced_chunked)); 2704 // Call __kmpc_dispatch_init( 2705 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2706 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2707 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2708 2709 // If the Chunk was not specified in the clause - use default value 1. 2710 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2711 : CGF.Builder.getIntN(IVSize, 1); 2712 llvm::Value *Args[] = { 2713 emitUpdateLocation(CGF, Loc), 2714 getThreadID(CGF, Loc), 2715 CGF.Builder.getInt32(addMonoNonMonoModifier( 2716 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2717 DispatchValues.LB, // Lower 2718 DispatchValues.UB, // Upper 2719 CGF.Builder.getIntN(IVSize, 1), // Stride 2720 Chunk // Chunk 2721 }; 2722 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2723 } 2724 2725 static void emitForStaticInitCall( 2726 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2727 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2728 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2729 const CGOpenMPRuntime::StaticRTInput &Values) { 2730 if (!CGF.HaveInsertPoint()) 2731 return; 2732 2733 assert(!Values.Ordered); 2734 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2735 Schedule == OMP_sch_static_balanced_chunked || 2736 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2737 Schedule == OMP_dist_sch_static || 2738 Schedule == OMP_dist_sch_static_chunked); 2739 2740 // Call __kmpc_for_static_init( 2741 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2742 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2743 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2744 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2745 llvm::Value *Chunk = Values.Chunk; 2746 if (Chunk == nullptr) { 2747 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2748 Schedule == OMP_dist_sch_static) && 2749 "expected static non-chunked schedule"); 2750 // If the Chunk was not specified in the clause - use default value 1. 2751 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2752 } else { 2753 assert((Schedule == OMP_sch_static_chunked || 2754 Schedule == OMP_sch_static_balanced_chunked || 2755 Schedule == OMP_ord_static_chunked || 2756 Schedule == OMP_dist_sch_static_chunked) && 2757 "expected static chunked schedule"); 2758 } 2759 llvm::Value *Args[] = { 2760 UpdateLocation, 2761 ThreadId, 2762 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2763 M2)), // Schedule type 2764 Values.IL.getPointer(), // &isLastIter 2765 Values.LB.getPointer(), // &LB 2766 Values.UB.getPointer(), // &UB 2767 Values.ST.getPointer(), // &Stride 2768 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2769 Chunk // Chunk 2770 }; 2771 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2772 } 2773 2774 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2775 SourceLocation Loc, 2776 OpenMPDirectiveKind DKind, 2777 const OpenMPScheduleTy &ScheduleKind, 2778 const StaticRTInput &Values) { 2779 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2780 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2781 assert(isOpenMPWorksharingDirective(DKind) && 2782 "Expected loop-based or sections-based directive."); 2783 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2784 isOpenMPLoopDirective(DKind) 2785 ? OMP_IDENT_WORK_LOOP 2786 : OMP_IDENT_WORK_SECTIONS); 2787 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2788 llvm::FunctionCallee StaticInitFunction = 2789 createForStaticInitFunction(Values.IVSize, Values.IVSigned, false); 2790 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2791 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2792 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2793 } 2794 2795 void CGOpenMPRuntime::emitDistributeStaticInit( 2796 CodeGenFunction &CGF, SourceLocation Loc, 2797 OpenMPDistScheduleClauseKind SchedKind, 2798 const CGOpenMPRuntime::StaticRTInput &Values) { 2799 OpenMPSchedType ScheduleNum = 2800 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2801 llvm::Value *UpdatedLocation = 2802 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2803 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2804 llvm::FunctionCallee StaticInitFunction; 2805 bool isGPUDistribute = 2806 CGM.getLangOpts().OpenMPIsDevice && 2807 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()); 2808 StaticInitFunction = createForStaticInitFunction( 2809 Values.IVSize, Values.IVSigned, isGPUDistribute); 2810 2811 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2812 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2813 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2814 } 2815 2816 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2817 SourceLocation Loc, 2818 OpenMPDirectiveKind DKind) { 2819 if (!CGF.HaveInsertPoint()) 2820 return; 2821 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2822 llvm::Value *Args[] = { 2823 emitUpdateLocation(CGF, Loc, 2824 isOpenMPDistributeDirective(DKind) 2825 ? OMP_IDENT_WORK_DISTRIBUTE 2826 : isOpenMPLoopDirective(DKind) 2827 ? OMP_IDENT_WORK_LOOP 2828 : OMP_IDENT_WORK_SECTIONS), 2829 getThreadID(CGF, Loc)}; 2830 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2831 if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice && 2832 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX())) 2833 CGF.EmitRuntimeCall( 2834 OMPBuilder.getOrCreateRuntimeFunction( 2835 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini), 2836 Args); 2837 else 2838 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2839 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2840 Args); 2841 } 2842 2843 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2844 SourceLocation Loc, 2845 unsigned IVSize, 2846 bool IVSigned) { 2847 if (!CGF.HaveInsertPoint()) 2848 return; 2849 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2850 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2851 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2852 } 2853 2854 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2855 SourceLocation Loc, unsigned IVSize, 2856 bool IVSigned, Address IL, 2857 Address LB, Address UB, 2858 Address ST) { 2859 // Call __kmpc_dispatch_next( 2860 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2861 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2862 // kmp_int[32|64] *p_stride); 2863 llvm::Value *Args[] = { 2864 emitUpdateLocation(CGF, Loc), 2865 getThreadID(CGF, Loc), 2866 IL.getPointer(), // &isLastIter 2867 LB.getPointer(), // &Lower 2868 UB.getPointer(), // &Upper 2869 ST.getPointer() // &Stride 2870 }; 2871 llvm::Value *Call = 2872 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2873 return CGF.EmitScalarConversion( 2874 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2875 CGF.getContext().BoolTy, Loc); 2876 } 2877 2878 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2879 llvm::Value *NumThreads, 2880 SourceLocation Loc) { 2881 if (!CGF.HaveInsertPoint()) 2882 return; 2883 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2884 llvm::Value *Args[] = { 2885 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2886 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2887 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2888 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2889 Args); 2890 } 2891 2892 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2893 ProcBindKind ProcBind, 2894 SourceLocation Loc) { 2895 if (!CGF.HaveInsertPoint()) 2896 return; 2897 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2898 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2899 llvm::Value *Args[] = { 2900 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2901 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2902 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2903 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2904 Args); 2905 } 2906 2907 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2908 SourceLocation Loc, llvm::AtomicOrdering AO) { 2909 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2910 OMPBuilder.createFlush(CGF.Builder); 2911 } else { 2912 if (!CGF.HaveInsertPoint()) 2913 return; 2914 // Build call void __kmpc_flush(ident_t *loc) 2915 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2916 CGM.getModule(), OMPRTL___kmpc_flush), 2917 emitUpdateLocation(CGF, Loc)); 2918 } 2919 } 2920 2921 namespace { 2922 /// Indexes of fields for type kmp_task_t. 2923 enum KmpTaskTFields { 2924 /// List of shared variables. 2925 KmpTaskTShareds, 2926 /// Task routine. 2927 KmpTaskTRoutine, 2928 /// Partition id for the untied tasks. 2929 KmpTaskTPartId, 2930 /// Function with call of destructors for private variables. 2931 Data1, 2932 /// Task priority. 2933 Data2, 2934 /// (Taskloops only) Lower bound. 2935 KmpTaskTLowerBound, 2936 /// (Taskloops only) Upper bound. 2937 KmpTaskTUpperBound, 2938 /// (Taskloops only) Stride. 2939 KmpTaskTStride, 2940 /// (Taskloops only) Is last iteration flag. 2941 KmpTaskTLastIter, 2942 /// (Taskloops only) Reduction data. 2943 KmpTaskTReductions, 2944 }; 2945 } // anonymous namespace 2946 2947 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 2948 // If we are in simd mode or there are no entries, we don't need to do 2949 // anything. 2950 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 2951 return; 2952 2953 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn = 2954 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind, 2955 const llvm::TargetRegionEntryInfo &EntryInfo) -> void { 2956 SourceLocation Loc; 2957 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) { 2958 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 2959 E = CGM.getContext().getSourceManager().fileinfo_end(); 2960 I != E; ++I) { 2961 if (I->getFirst()->getUniqueID().getDevice() == EntryInfo.DeviceID && 2962 I->getFirst()->getUniqueID().getFile() == EntryInfo.FileID) { 2963 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 2964 I->getFirst(), EntryInfo.Line, 1); 2965 break; 2966 } 2967 } 2968 } 2969 switch (Kind) { 2970 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: { 2971 unsigned DiagID = CGM.getDiags().getCustomDiagID( 2972 DiagnosticsEngine::Error, "Offloading entry for target region in " 2973 "%0 is incorrect: either the " 2974 "address or the ID is invalid."); 2975 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName; 2976 } break; 2977 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: { 2978 unsigned DiagID = CGM.getDiags().getCustomDiagID( 2979 DiagnosticsEngine::Error, "Offloading entry for declare target " 2980 "variable %0 is incorrect: the " 2981 "address is invalid."); 2982 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName; 2983 } break; 2984 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: { 2985 unsigned DiagID = CGM.getDiags().getCustomDiagID( 2986 DiagnosticsEngine::Error, 2987 "Offloading entry for declare target variable is incorrect: the " 2988 "address is invalid."); 2989 CGM.getDiags().Report(DiagID); 2990 } break; 2991 } 2992 }; 2993 2994 OMPBuilder.createOffloadEntriesAndInfoMetadata(OffloadEntriesInfoManager, 2995 ErrorReportFn); 2996 } 2997 2998 /// Loads all the offload entries information from the host IR 2999 /// metadata. 3000 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3001 // If we are in target mode, load the metadata from the host IR. This code has 3002 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3003 3004 if (!CGM.getLangOpts().OpenMPIsDevice) 3005 return; 3006 3007 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3008 return; 3009 3010 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3011 if (auto EC = Buf.getError()) { 3012 CGM.getDiags().Report(diag::err_cannot_open_file) 3013 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3014 return; 3015 } 3016 3017 llvm::LLVMContext C; 3018 auto ME = expectedToErrorOrAndEmitErrors( 3019 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3020 3021 if (auto EC = ME.getError()) { 3022 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3023 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3024 CGM.getDiags().Report(DiagID) 3025 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3026 return; 3027 } 3028 3029 OMPBuilder.loadOffloadInfoMetadata(*ME.get(), OffloadEntriesInfoManager); 3030 } 3031 3032 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3033 if (!KmpRoutineEntryPtrTy) { 3034 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3035 ASTContext &C = CGM.getContext(); 3036 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3037 FunctionProtoType::ExtProtoInfo EPI; 3038 KmpRoutineEntryPtrQTy = C.getPointerType( 3039 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3040 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3041 } 3042 } 3043 3044 namespace { 3045 struct PrivateHelpersTy { 3046 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3047 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3048 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3049 PrivateElemInit(PrivateElemInit) {} 3050 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3051 const Expr *OriginalRef = nullptr; 3052 const VarDecl *Original = nullptr; 3053 const VarDecl *PrivateCopy = nullptr; 3054 const VarDecl *PrivateElemInit = nullptr; 3055 bool isLocalPrivate() const { 3056 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3057 } 3058 }; 3059 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3060 } // anonymous namespace 3061 3062 static bool isAllocatableDecl(const VarDecl *VD) { 3063 const VarDecl *CVD = VD->getCanonicalDecl(); 3064 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3065 return false; 3066 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3067 // Use the default allocation. 3068 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 3069 !AA->getAllocator()); 3070 } 3071 3072 static RecordDecl * 3073 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3074 if (!Privates.empty()) { 3075 ASTContext &C = CGM.getContext(); 3076 // Build struct .kmp_privates_t. { 3077 // /* private vars */ 3078 // }; 3079 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3080 RD->startDefinition(); 3081 for (const auto &Pair : Privates) { 3082 const VarDecl *VD = Pair.second.Original; 3083 QualType Type = VD->getType().getNonReferenceType(); 3084 // If the private variable is a local variable with lvalue ref type, 3085 // allocate the pointer instead of the pointee type. 3086 if (Pair.second.isLocalPrivate()) { 3087 if (VD->getType()->isLValueReferenceType()) 3088 Type = C.getPointerType(Type); 3089 if (isAllocatableDecl(VD)) 3090 Type = C.getPointerType(Type); 3091 } 3092 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3093 if (VD->hasAttrs()) { 3094 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3095 E(VD->getAttrs().end()); 3096 I != E; ++I) 3097 FD->addAttr(*I); 3098 } 3099 } 3100 RD->completeDefinition(); 3101 return RD; 3102 } 3103 return nullptr; 3104 } 3105 3106 static RecordDecl * 3107 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3108 QualType KmpInt32Ty, 3109 QualType KmpRoutineEntryPointerQTy) { 3110 ASTContext &C = CGM.getContext(); 3111 // Build struct kmp_task_t { 3112 // void * shareds; 3113 // kmp_routine_entry_t routine; 3114 // kmp_int32 part_id; 3115 // kmp_cmplrdata_t data1; 3116 // kmp_cmplrdata_t data2; 3117 // For taskloops additional fields: 3118 // kmp_uint64 lb; 3119 // kmp_uint64 ub; 3120 // kmp_int64 st; 3121 // kmp_int32 liter; 3122 // void * reductions; 3123 // }; 3124 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3125 UD->startDefinition(); 3126 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3127 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3128 UD->completeDefinition(); 3129 QualType KmpCmplrdataTy = C.getRecordType(UD); 3130 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3131 RD->startDefinition(); 3132 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3133 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3134 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3135 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3136 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3137 if (isOpenMPTaskLoopDirective(Kind)) { 3138 QualType KmpUInt64Ty = 3139 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3140 QualType KmpInt64Ty = 3141 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3142 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3143 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3144 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3145 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3146 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3147 } 3148 RD->completeDefinition(); 3149 return RD; 3150 } 3151 3152 static RecordDecl * 3153 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3154 ArrayRef<PrivateDataTy> Privates) { 3155 ASTContext &C = CGM.getContext(); 3156 // Build struct kmp_task_t_with_privates { 3157 // kmp_task_t task_data; 3158 // .kmp_privates_t. privates; 3159 // }; 3160 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3161 RD->startDefinition(); 3162 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3163 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3164 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3165 RD->completeDefinition(); 3166 return RD; 3167 } 3168 3169 /// Emit a proxy function which accepts kmp_task_t as the second 3170 /// argument. 3171 /// \code 3172 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3173 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3174 /// For taskloops: 3175 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3176 /// tt->reductions, tt->shareds); 3177 /// return 0; 3178 /// } 3179 /// \endcode 3180 static llvm::Function * 3181 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3182 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3183 QualType KmpTaskTWithPrivatesPtrQTy, 3184 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3185 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3186 llvm::Value *TaskPrivatesMap) { 3187 ASTContext &C = CGM.getContext(); 3188 FunctionArgList Args; 3189 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3190 ImplicitParamDecl::Other); 3191 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3192 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3193 ImplicitParamDecl::Other); 3194 Args.push_back(&GtidArg); 3195 Args.push_back(&TaskTypeArg); 3196 const auto &TaskEntryFnInfo = 3197 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3198 llvm::FunctionType *TaskEntryTy = 3199 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3200 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3201 auto *TaskEntry = llvm::Function::Create( 3202 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3203 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3204 TaskEntry->setDoesNotRecurse(); 3205 CodeGenFunction CGF(CGM); 3206 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3207 Loc, Loc); 3208 3209 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3210 // tt, 3211 // For taskloops: 3212 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3213 // tt->task_data.shareds); 3214 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3215 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3216 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3217 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3218 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3219 const auto *KmpTaskTWithPrivatesQTyRD = 3220 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3221 LValue Base = 3222 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3223 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3224 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3225 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3226 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3227 3228 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3229 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3230 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3231 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3232 CGF.ConvertTypeForMem(SharedsPtrTy)); 3233 3234 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3235 llvm::Value *PrivatesParam; 3236 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3237 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3238 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3239 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3240 } else { 3241 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3242 } 3243 3244 llvm::Value *CommonArgs[] = { 3245 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap, 3246 CGF.Builder 3247 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF), 3248 CGF.VoidPtrTy, CGF.Int8Ty) 3249 .getPointer()}; 3250 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3251 std::end(CommonArgs)); 3252 if (isOpenMPTaskLoopDirective(Kind)) { 3253 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3254 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3255 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3256 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3257 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3258 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3259 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3260 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3261 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3262 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3263 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3264 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3265 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3266 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3267 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3268 CallArgs.push_back(LBParam); 3269 CallArgs.push_back(UBParam); 3270 CallArgs.push_back(StParam); 3271 CallArgs.push_back(LIParam); 3272 CallArgs.push_back(RParam); 3273 } 3274 CallArgs.push_back(SharedsParam); 3275 3276 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3277 CallArgs); 3278 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3279 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3280 CGF.FinishFunction(); 3281 return TaskEntry; 3282 } 3283 3284 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3285 SourceLocation Loc, 3286 QualType KmpInt32Ty, 3287 QualType KmpTaskTWithPrivatesPtrQTy, 3288 QualType KmpTaskTWithPrivatesQTy) { 3289 ASTContext &C = CGM.getContext(); 3290 FunctionArgList Args; 3291 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3292 ImplicitParamDecl::Other); 3293 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3294 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3295 ImplicitParamDecl::Other); 3296 Args.push_back(&GtidArg); 3297 Args.push_back(&TaskTypeArg); 3298 const auto &DestructorFnInfo = 3299 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3300 llvm::FunctionType *DestructorFnTy = 3301 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3302 std::string Name = 3303 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3304 auto *DestructorFn = 3305 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3306 Name, &CGM.getModule()); 3307 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3308 DestructorFnInfo); 3309 DestructorFn->setDoesNotRecurse(); 3310 CodeGenFunction CGF(CGM); 3311 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3312 Args, Loc, Loc); 3313 3314 LValue Base = CGF.EmitLoadOfPointerLValue( 3315 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3316 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3317 const auto *KmpTaskTWithPrivatesQTyRD = 3318 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3319 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3320 Base = CGF.EmitLValueForField(Base, *FI); 3321 for (const auto *Field : 3322 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3323 if (QualType::DestructionKind DtorKind = 3324 Field->getType().isDestructedType()) { 3325 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3326 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3327 } 3328 } 3329 CGF.FinishFunction(); 3330 return DestructorFn; 3331 } 3332 3333 /// Emit a privates mapping function for correct handling of private and 3334 /// firstprivate variables. 3335 /// \code 3336 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3337 /// **noalias priv1,..., <tyn> **noalias privn) { 3338 /// *priv1 = &.privates.priv1; 3339 /// ...; 3340 /// *privn = &.privates.privn; 3341 /// } 3342 /// \endcode 3343 static llvm::Value * 3344 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3345 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3346 ArrayRef<PrivateDataTy> Privates) { 3347 ASTContext &C = CGM.getContext(); 3348 FunctionArgList Args; 3349 ImplicitParamDecl TaskPrivatesArg( 3350 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3351 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3352 ImplicitParamDecl::Other); 3353 Args.push_back(&TaskPrivatesArg); 3354 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3355 unsigned Counter = 1; 3356 for (const Expr *E : Data.PrivateVars) { 3357 Args.push_back(ImplicitParamDecl::Create( 3358 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3359 C.getPointerType(C.getPointerType(E->getType())) 3360 .withConst() 3361 .withRestrict(), 3362 ImplicitParamDecl::Other)); 3363 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3364 PrivateVarsPos[VD] = Counter; 3365 ++Counter; 3366 } 3367 for (const Expr *E : Data.FirstprivateVars) { 3368 Args.push_back(ImplicitParamDecl::Create( 3369 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3370 C.getPointerType(C.getPointerType(E->getType())) 3371 .withConst() 3372 .withRestrict(), 3373 ImplicitParamDecl::Other)); 3374 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3375 PrivateVarsPos[VD] = Counter; 3376 ++Counter; 3377 } 3378 for (const Expr *E : Data.LastprivateVars) { 3379 Args.push_back(ImplicitParamDecl::Create( 3380 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3381 C.getPointerType(C.getPointerType(E->getType())) 3382 .withConst() 3383 .withRestrict(), 3384 ImplicitParamDecl::Other)); 3385 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3386 PrivateVarsPos[VD] = Counter; 3387 ++Counter; 3388 } 3389 for (const VarDecl *VD : Data.PrivateLocals) { 3390 QualType Ty = VD->getType().getNonReferenceType(); 3391 if (VD->getType()->isLValueReferenceType()) 3392 Ty = C.getPointerType(Ty); 3393 if (isAllocatableDecl(VD)) 3394 Ty = C.getPointerType(Ty); 3395 Args.push_back(ImplicitParamDecl::Create( 3396 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3397 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3398 ImplicitParamDecl::Other)); 3399 PrivateVarsPos[VD] = Counter; 3400 ++Counter; 3401 } 3402 const auto &TaskPrivatesMapFnInfo = 3403 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3404 llvm::FunctionType *TaskPrivatesMapTy = 3405 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3406 std::string Name = 3407 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3408 auto *TaskPrivatesMap = llvm::Function::Create( 3409 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3410 &CGM.getModule()); 3411 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3412 TaskPrivatesMapFnInfo); 3413 if (CGM.getLangOpts().Optimize) { 3414 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3415 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3416 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3417 } 3418 CodeGenFunction CGF(CGM); 3419 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3420 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3421 3422 // *privi = &.privates.privi; 3423 LValue Base = CGF.EmitLoadOfPointerLValue( 3424 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3425 TaskPrivatesArg.getType()->castAs<PointerType>()); 3426 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3427 Counter = 0; 3428 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3429 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3430 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3431 LValue RefLVal = 3432 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3433 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3434 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3435 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3436 ++Counter; 3437 } 3438 CGF.FinishFunction(); 3439 return TaskPrivatesMap; 3440 } 3441 3442 /// Emit initialization for private variables in task-based directives. 3443 static void emitPrivatesInit(CodeGenFunction &CGF, 3444 const OMPExecutableDirective &D, 3445 Address KmpTaskSharedsPtr, LValue TDBase, 3446 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3447 QualType SharedsTy, QualType SharedsPtrTy, 3448 const OMPTaskDataTy &Data, 3449 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3450 ASTContext &C = CGF.getContext(); 3451 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3452 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3453 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3454 ? OMPD_taskloop 3455 : OMPD_task; 3456 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3457 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3458 LValue SrcBase; 3459 bool IsTargetTask = 3460 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3461 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3462 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3463 // PointersArray, SizesArray, and MappersArray. The original variables for 3464 // these arrays are not captured and we get their addresses explicitly. 3465 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3466 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3467 SrcBase = CGF.MakeAddrLValue( 3468 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3469 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy), 3470 CGF.ConvertTypeForMem(SharedsTy)), 3471 SharedsTy); 3472 } 3473 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3474 for (const PrivateDataTy &Pair : Privates) { 3475 // Do not initialize private locals. 3476 if (Pair.second.isLocalPrivate()) { 3477 ++FI; 3478 continue; 3479 } 3480 const VarDecl *VD = Pair.second.PrivateCopy; 3481 const Expr *Init = VD->getAnyInitializer(); 3482 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3483 !CGF.isTrivialInitializer(Init)))) { 3484 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3485 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3486 const VarDecl *OriginalVD = Pair.second.Original; 3487 // Check if the variable is the target-based BasePointersArray, 3488 // PointersArray, SizesArray, or MappersArray. 3489 LValue SharedRefLValue; 3490 QualType Type = PrivateLValue.getType(); 3491 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3492 if (IsTargetTask && !SharedField) { 3493 assert(isa<ImplicitParamDecl>(OriginalVD) && 3494 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3495 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3496 ->getNumParams() == 0 && 3497 isa<TranslationUnitDecl>( 3498 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3499 ->getDeclContext()) && 3500 "Expected artificial target data variable."); 3501 SharedRefLValue = 3502 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3503 } else if (ForDup) { 3504 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3505 SharedRefLValue = CGF.MakeAddrLValue( 3506 SharedRefLValue.getAddress(CGF).withAlignment( 3507 C.getDeclAlign(OriginalVD)), 3508 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3509 SharedRefLValue.getTBAAInfo()); 3510 } else if (CGF.LambdaCaptureFields.count( 3511 Pair.second.Original->getCanonicalDecl()) > 0 || 3512 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) { 3513 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3514 } else { 3515 // Processing for implicitly captured variables. 3516 InlinedOpenMPRegionRAII Region( 3517 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3518 /*HasCancel=*/false, /*NoInheritance=*/true); 3519 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3520 } 3521 if (Type->isArrayType()) { 3522 // Initialize firstprivate array. 3523 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3524 // Perform simple memcpy. 3525 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3526 } else { 3527 // Initialize firstprivate array using element-by-element 3528 // initialization. 3529 CGF.EmitOMPAggregateAssign( 3530 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3531 Type, 3532 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3533 Address SrcElement) { 3534 // Clean up any temporaries needed by the initialization. 3535 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3536 InitScope.addPrivate(Elem, SrcElement); 3537 (void)InitScope.Privatize(); 3538 // Emit initialization for single element. 3539 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3540 CGF, &CapturesInfo); 3541 CGF.EmitAnyExprToMem(Init, DestElement, 3542 Init->getType().getQualifiers(), 3543 /*IsInitializer=*/false); 3544 }); 3545 } 3546 } else { 3547 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3548 InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF)); 3549 (void)InitScope.Privatize(); 3550 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3551 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3552 /*capturedByInit=*/false); 3553 } 3554 } else { 3555 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3556 } 3557 } 3558 ++FI; 3559 } 3560 } 3561 3562 /// Check if duplication function is required for taskloops. 3563 static bool checkInitIsRequired(CodeGenFunction &CGF, 3564 ArrayRef<PrivateDataTy> Privates) { 3565 bool InitRequired = false; 3566 for (const PrivateDataTy &Pair : Privates) { 3567 if (Pair.second.isLocalPrivate()) 3568 continue; 3569 const VarDecl *VD = Pair.second.PrivateCopy; 3570 const Expr *Init = VD->getAnyInitializer(); 3571 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) && 3572 !CGF.isTrivialInitializer(Init)); 3573 if (InitRequired) 3574 break; 3575 } 3576 return InitRequired; 3577 } 3578 3579 3580 /// Emit task_dup function (for initialization of 3581 /// private/firstprivate/lastprivate vars and last_iter flag) 3582 /// \code 3583 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3584 /// lastpriv) { 3585 /// // setup lastprivate flag 3586 /// task_dst->last = lastpriv; 3587 /// // could be constructor calls here... 3588 /// } 3589 /// \endcode 3590 static llvm::Value * 3591 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3592 const OMPExecutableDirective &D, 3593 QualType KmpTaskTWithPrivatesPtrQTy, 3594 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3595 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3596 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3597 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3598 ASTContext &C = CGM.getContext(); 3599 FunctionArgList Args; 3600 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3601 KmpTaskTWithPrivatesPtrQTy, 3602 ImplicitParamDecl::Other); 3603 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3604 KmpTaskTWithPrivatesPtrQTy, 3605 ImplicitParamDecl::Other); 3606 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 3607 ImplicitParamDecl::Other); 3608 Args.push_back(&DstArg); 3609 Args.push_back(&SrcArg); 3610 Args.push_back(&LastprivArg); 3611 const auto &TaskDupFnInfo = 3612 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3613 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3614 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 3615 auto *TaskDup = llvm::Function::Create( 3616 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3617 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 3618 TaskDup->setDoesNotRecurse(); 3619 CodeGenFunction CGF(CGM); 3620 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 3621 Loc); 3622 3623 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3624 CGF.GetAddrOfLocalVar(&DstArg), 3625 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3626 // task_dst->liter = lastpriv; 3627 if (WithLastIter) { 3628 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3629 LValue Base = CGF.EmitLValueForField( 3630 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3631 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3632 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 3633 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 3634 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 3635 } 3636 3637 // Emit initial values for private copies (if any). 3638 assert(!Privates.empty()); 3639 Address KmpTaskSharedsPtr = Address::invalid(); 3640 if (!Data.FirstprivateVars.empty()) { 3641 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3642 CGF.GetAddrOfLocalVar(&SrcArg), 3643 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3644 LValue Base = CGF.EmitLValueForField( 3645 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3646 KmpTaskSharedsPtr = Address( 3647 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 3648 Base, *std::next(KmpTaskTQTyRD->field_begin(), 3649 KmpTaskTShareds)), 3650 Loc), 3651 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy)); 3652 } 3653 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 3654 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 3655 CGF.FinishFunction(); 3656 return TaskDup; 3657 } 3658 3659 /// Checks if destructor function is required to be generated. 3660 /// \return true if cleanups are required, false otherwise. 3661 static bool 3662 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3663 ArrayRef<PrivateDataTy> Privates) { 3664 for (const PrivateDataTy &P : Privates) { 3665 if (P.second.isLocalPrivate()) 3666 continue; 3667 QualType Ty = P.second.Original->getType().getNonReferenceType(); 3668 if (Ty.isDestructedType()) 3669 return true; 3670 } 3671 return false; 3672 } 3673 3674 namespace { 3675 /// Loop generator for OpenMP iterator expression. 3676 class OMPIteratorGeneratorScope final 3677 : public CodeGenFunction::OMPPrivateScope { 3678 CodeGenFunction &CGF; 3679 const OMPIteratorExpr *E = nullptr; 3680 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 3681 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 3682 OMPIteratorGeneratorScope() = delete; 3683 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 3684 3685 public: 3686 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 3687 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 3688 if (!E) 3689 return; 3690 SmallVector<llvm::Value *, 4> Uppers; 3691 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 3692 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 3693 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 3694 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName())); 3695 const OMPIteratorHelperData &HelperData = E->getHelper(I); 3696 addPrivate( 3697 HelperData.CounterVD, 3698 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr")); 3699 } 3700 Privatize(); 3701 3702 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 3703 const OMPIteratorHelperData &HelperData = E->getHelper(I); 3704 LValue CLVal = 3705 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 3706 HelperData.CounterVD->getType()); 3707 // Counter = 0; 3708 CGF.EmitStoreOfScalar( 3709 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 3710 CLVal); 3711 CodeGenFunction::JumpDest &ContDest = 3712 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 3713 CodeGenFunction::JumpDest &ExitDest = 3714 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 3715 // N = <number-of_iterations>; 3716 llvm::Value *N = Uppers[I]; 3717 // cont: 3718 // if (Counter < N) goto body; else goto exit; 3719 CGF.EmitBlock(ContDest.getBlock()); 3720 auto *CVal = 3721 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 3722 llvm::Value *Cmp = 3723 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 3724 ? CGF.Builder.CreateICmpSLT(CVal, N) 3725 : CGF.Builder.CreateICmpULT(CVal, N); 3726 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 3727 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 3728 // body: 3729 CGF.EmitBlock(BodyBB); 3730 // Iteri = Begini + Counter * Stepi; 3731 CGF.EmitIgnoredExpr(HelperData.Update); 3732 } 3733 } 3734 ~OMPIteratorGeneratorScope() { 3735 if (!E) 3736 return; 3737 for (unsigned I = E->numOfIterators(); I > 0; --I) { 3738 // Counter = Counter + 1; 3739 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 3740 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 3741 // goto cont; 3742 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 3743 // exit: 3744 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 3745 } 3746 } 3747 }; 3748 } // namespace 3749 3750 static std::pair<llvm::Value *, llvm::Value *> 3751 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 3752 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 3753 llvm::Value *Addr; 3754 if (OASE) { 3755 const Expr *Base = OASE->getBase(); 3756 Addr = CGF.EmitScalarExpr(Base); 3757 } else { 3758 Addr = CGF.EmitLValue(E).getPointer(CGF); 3759 } 3760 llvm::Value *SizeVal; 3761 QualType Ty = E->getType(); 3762 if (OASE) { 3763 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 3764 for (const Expr *SE : OASE->getDimensions()) { 3765 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 3766 Sz = CGF.EmitScalarConversion( 3767 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 3768 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 3769 } 3770 } else if (const auto *ASE = 3771 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 3772 LValue UpAddrLVal = 3773 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 3774 Address UpAddrAddress = UpAddrLVal.getAddress(CGF); 3775 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 3776 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1); 3777 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 3778 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 3779 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 3780 } else { 3781 SizeVal = CGF.getTypeSize(Ty); 3782 } 3783 return std::make_pair(Addr, SizeVal); 3784 } 3785 3786 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 3787 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 3788 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 3789 if (KmpTaskAffinityInfoTy.isNull()) { 3790 RecordDecl *KmpAffinityInfoRD = 3791 C.buildImplicitRecord("kmp_task_affinity_info_t"); 3792 KmpAffinityInfoRD->startDefinition(); 3793 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 3794 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 3795 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 3796 KmpAffinityInfoRD->completeDefinition(); 3797 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 3798 } 3799 } 3800 3801 CGOpenMPRuntime::TaskResultTy 3802 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 3803 const OMPExecutableDirective &D, 3804 llvm::Function *TaskFunction, QualType SharedsTy, 3805 Address Shareds, const OMPTaskDataTy &Data) { 3806 ASTContext &C = CGM.getContext(); 3807 llvm::SmallVector<PrivateDataTy, 4> Privates; 3808 // Aggregate privates and sort them by the alignment. 3809 const auto *I = Data.PrivateCopies.begin(); 3810 for (const Expr *E : Data.PrivateVars) { 3811 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3812 Privates.emplace_back( 3813 C.getDeclAlign(VD), 3814 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3815 /*PrivateElemInit=*/nullptr)); 3816 ++I; 3817 } 3818 I = Data.FirstprivateCopies.begin(); 3819 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 3820 for (const Expr *E : Data.FirstprivateVars) { 3821 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3822 Privates.emplace_back( 3823 C.getDeclAlign(VD), 3824 PrivateHelpersTy( 3825 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3826 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 3827 ++I; 3828 ++IElemInitRef; 3829 } 3830 I = Data.LastprivateCopies.begin(); 3831 for (const Expr *E : Data.LastprivateVars) { 3832 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3833 Privates.emplace_back( 3834 C.getDeclAlign(VD), 3835 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3836 /*PrivateElemInit=*/nullptr)); 3837 ++I; 3838 } 3839 for (const VarDecl *VD : Data.PrivateLocals) { 3840 if (isAllocatableDecl(VD)) 3841 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 3842 else 3843 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 3844 } 3845 llvm::stable_sort(Privates, 3846 [](const PrivateDataTy &L, const PrivateDataTy &R) { 3847 return L.first > R.first; 3848 }); 3849 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3850 // Build type kmp_routine_entry_t (if not built yet). 3851 emitKmpRoutineEntryT(KmpInt32Ty); 3852 // Build type kmp_task_t (if not built yet). 3853 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 3854 if (SavedKmpTaskloopTQTy.isNull()) { 3855 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 3856 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 3857 } 3858 KmpTaskTQTy = SavedKmpTaskloopTQTy; 3859 } else { 3860 assert((D.getDirectiveKind() == OMPD_task || 3861 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 3862 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 3863 "Expected taskloop, task or target directive"); 3864 if (SavedKmpTaskTQTy.isNull()) { 3865 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 3866 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 3867 } 3868 KmpTaskTQTy = SavedKmpTaskTQTy; 3869 } 3870 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3871 // Build particular struct kmp_task_t for the given task. 3872 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 3873 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 3874 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 3875 QualType KmpTaskTWithPrivatesPtrQTy = 3876 C.getPointerType(KmpTaskTWithPrivatesQTy); 3877 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 3878 llvm::Type *KmpTaskTWithPrivatesPtrTy = 3879 KmpTaskTWithPrivatesTy->getPointerTo(); 3880 llvm::Value *KmpTaskTWithPrivatesTySize = 3881 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 3882 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 3883 3884 // Emit initial values for private copies (if any). 3885 llvm::Value *TaskPrivatesMap = nullptr; 3886 llvm::Type *TaskPrivatesMapTy = 3887 std::next(TaskFunction->arg_begin(), 3)->getType(); 3888 if (!Privates.empty()) { 3889 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3890 TaskPrivatesMap = 3891 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 3892 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3893 TaskPrivatesMap, TaskPrivatesMapTy); 3894 } else { 3895 TaskPrivatesMap = llvm::ConstantPointerNull::get( 3896 cast<llvm::PointerType>(TaskPrivatesMapTy)); 3897 } 3898 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 3899 // kmp_task_t *tt); 3900 llvm::Function *TaskEntry = emitProxyTaskFunction( 3901 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 3902 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 3903 TaskPrivatesMap); 3904 3905 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 3906 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 3907 // kmp_routine_entry_t *task_entry); 3908 // Task flags. Format is taken from 3909 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 3910 // description of kmp_tasking_flags struct. 3911 enum { 3912 TiedFlag = 0x1, 3913 FinalFlag = 0x2, 3914 DestructorsFlag = 0x8, 3915 PriorityFlag = 0x20, 3916 DetachableFlag = 0x40, 3917 }; 3918 unsigned Flags = Data.Tied ? TiedFlag : 0; 3919 bool NeedsCleanup = false; 3920 if (!Privates.empty()) { 3921 NeedsCleanup = 3922 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 3923 if (NeedsCleanup) 3924 Flags = Flags | DestructorsFlag; 3925 } 3926 if (Data.Priority.getInt()) 3927 Flags = Flags | PriorityFlag; 3928 if (D.hasClausesOfKind<OMPDetachClause>()) 3929 Flags = Flags | DetachableFlag; 3930 llvm::Value *TaskFlags = 3931 Data.Final.getPointer() 3932 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 3933 CGF.Builder.getInt32(FinalFlag), 3934 CGF.Builder.getInt32(/*C=*/0)) 3935 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 3936 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 3937 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 3938 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 3939 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 3940 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3941 TaskEntry, KmpRoutineEntryPtrTy)}; 3942 llvm::Value *NewTask; 3943 if (D.hasClausesOfKind<OMPNowaitClause>()) { 3944 // Check if we have any device clause associated with the directive. 3945 const Expr *Device = nullptr; 3946 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 3947 Device = C->getDevice(); 3948 // Emit device ID if any otherwise use default value. 3949 llvm::Value *DeviceID; 3950 if (Device) 3951 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 3952 CGF.Int64Ty, /*isSigned=*/true); 3953 else 3954 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 3955 AllocArgs.push_back(DeviceID); 3956 NewTask = CGF.EmitRuntimeCall( 3957 OMPBuilder.getOrCreateRuntimeFunction( 3958 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 3959 AllocArgs); 3960 } else { 3961 NewTask = 3962 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 3963 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 3964 AllocArgs); 3965 } 3966 // Emit detach clause initialization. 3967 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 3968 // task_descriptor); 3969 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 3970 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 3971 LValue EvtLVal = CGF.EmitLValue(Evt); 3972 3973 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 3974 // int gtid, kmp_task_t *task); 3975 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 3976 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 3977 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 3978 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 3979 OMPBuilder.getOrCreateRuntimeFunction( 3980 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 3981 {Loc, Tid, NewTask}); 3982 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 3983 Evt->getExprLoc()); 3984 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 3985 } 3986 // Process affinity clauses. 3987 if (D.hasClausesOfKind<OMPAffinityClause>()) { 3988 // Process list of affinity data. 3989 ASTContext &C = CGM.getContext(); 3990 Address AffinitiesArray = Address::invalid(); 3991 // Calculate number of elements to form the array of affinity data. 3992 llvm::Value *NumOfElements = nullptr; 3993 unsigned NumAffinities = 0; 3994 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 3995 if (const Expr *Modifier = C->getModifier()) { 3996 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 3997 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 3998 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 3999 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4000 NumOfElements = 4001 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4002 } 4003 } else { 4004 NumAffinities += C->varlist_size(); 4005 } 4006 } 4007 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4008 // Fields ids in kmp_task_affinity_info record. 4009 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4010 4011 QualType KmpTaskAffinityInfoArrayTy; 4012 if (NumOfElements) { 4013 NumOfElements = CGF.Builder.CreateNUWAdd( 4014 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4015 auto *OVE = new (C) OpaqueValueExpr( 4016 Loc, 4017 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4018 VK_PRValue); 4019 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4020 RValue::get(NumOfElements)); 4021 KmpTaskAffinityInfoArrayTy = 4022 C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal, 4023 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4024 // Properly emit variable-sized array. 4025 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4026 ImplicitParamDecl::Other); 4027 CGF.EmitVarDecl(*PD); 4028 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4029 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4030 /*isSigned=*/false); 4031 } else { 4032 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4033 KmpTaskAffinityInfoTy, 4034 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4035 ArrayType::Normal, /*IndexTypeQuals=*/0); 4036 AffinitiesArray = 4037 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4038 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4039 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4040 /*isSigned=*/false); 4041 } 4042 4043 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4044 // Fill array by elements without iterators. 4045 unsigned Pos = 0; 4046 bool HasIterator = false; 4047 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4048 if (C->getModifier()) { 4049 HasIterator = true; 4050 continue; 4051 } 4052 for (const Expr *E : C->varlists()) { 4053 llvm::Value *Addr; 4054 llvm::Value *Size; 4055 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4056 LValue Base = 4057 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4058 KmpTaskAffinityInfoTy); 4059 // affs[i].base_addr = &<Affinities[i].second>; 4060 LValue BaseAddrLVal = CGF.EmitLValueForField( 4061 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4062 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4063 BaseAddrLVal); 4064 // affs[i].len = sizeof(<Affinities[i].second>); 4065 LValue LenLVal = CGF.EmitLValueForField( 4066 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4067 CGF.EmitStoreOfScalar(Size, LenLVal); 4068 ++Pos; 4069 } 4070 } 4071 LValue PosLVal; 4072 if (HasIterator) { 4073 PosLVal = CGF.MakeAddrLValue( 4074 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4075 C.getSizeType()); 4076 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4077 } 4078 // Process elements with iterators. 4079 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4080 const Expr *Modifier = C->getModifier(); 4081 if (!Modifier) 4082 continue; 4083 OMPIteratorGeneratorScope IteratorScope( 4084 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4085 for (const Expr *E : C->varlists()) { 4086 llvm::Value *Addr; 4087 llvm::Value *Size; 4088 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4089 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4090 LValue Base = CGF.MakeAddrLValue( 4091 CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy); 4092 // affs[i].base_addr = &<Affinities[i].second>; 4093 LValue BaseAddrLVal = CGF.EmitLValueForField( 4094 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4095 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4096 BaseAddrLVal); 4097 // affs[i].len = sizeof(<Affinities[i].second>); 4098 LValue LenLVal = CGF.EmitLValueForField( 4099 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4100 CGF.EmitStoreOfScalar(Size, LenLVal); 4101 Idx = CGF.Builder.CreateNUWAdd( 4102 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4103 CGF.EmitStoreOfScalar(Idx, PosLVal); 4104 } 4105 } 4106 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4107 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4108 // naffins, kmp_task_affinity_info_t *affin_list); 4109 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4110 llvm::Value *GTid = getThreadID(CGF, Loc); 4111 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4112 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4113 // FIXME: Emit the function and ignore its result for now unless the 4114 // runtime function is properly implemented. 4115 (void)CGF.EmitRuntimeCall( 4116 OMPBuilder.getOrCreateRuntimeFunction( 4117 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4118 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4119 } 4120 llvm::Value *NewTaskNewTaskTTy = 4121 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4122 NewTask, KmpTaskTWithPrivatesPtrTy); 4123 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4124 KmpTaskTWithPrivatesQTy); 4125 LValue TDBase = 4126 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4127 // Fill the data in the resulting kmp_task_t record. 4128 // Copy shareds if there are any. 4129 Address KmpTaskSharedsPtr = Address::invalid(); 4130 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4131 KmpTaskSharedsPtr = Address( 4132 CGF.EmitLoadOfScalar( 4133 CGF.EmitLValueForField( 4134 TDBase, 4135 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)), 4136 Loc), 4137 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy)); 4138 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4139 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4140 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4141 } 4142 // Emit initial values for private copies (if any). 4143 TaskResultTy Result; 4144 if (!Privates.empty()) { 4145 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4146 SharedsTy, SharedsPtrTy, Data, Privates, 4147 /*ForDup=*/false); 4148 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4149 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4150 Result.TaskDupFn = emitTaskDupFunction( 4151 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4152 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4153 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4154 } 4155 } 4156 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4157 enum { Priority = 0, Destructors = 1 }; 4158 // Provide pointer to function with destructors for privates. 4159 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4160 const RecordDecl *KmpCmplrdataUD = 4161 (*FI)->getType()->getAsUnionType()->getDecl(); 4162 if (NeedsCleanup) { 4163 llvm::Value *DestructorFn = emitDestructorsFunction( 4164 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4165 KmpTaskTWithPrivatesQTy); 4166 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4167 LValue DestructorsLV = CGF.EmitLValueForField( 4168 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4169 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4170 DestructorFn, KmpRoutineEntryPtrTy), 4171 DestructorsLV); 4172 } 4173 // Set priority. 4174 if (Data.Priority.getInt()) { 4175 LValue Data2LV = CGF.EmitLValueForField( 4176 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4177 LValue PriorityLV = CGF.EmitLValueForField( 4178 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4179 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4180 } 4181 Result.NewTask = NewTask; 4182 Result.TaskEntry = TaskEntry; 4183 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4184 Result.TDBase = TDBase; 4185 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4186 return Result; 4187 } 4188 4189 /// Translates internal dependency kind into the runtime kind. 4190 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4191 RTLDependenceKindTy DepKind; 4192 switch (K) { 4193 case OMPC_DEPEND_in: 4194 DepKind = RTLDependenceKindTy::DepIn; 4195 break; 4196 // Out and InOut dependencies must use the same code. 4197 case OMPC_DEPEND_out: 4198 case OMPC_DEPEND_inout: 4199 DepKind = RTLDependenceKindTy::DepInOut; 4200 break; 4201 case OMPC_DEPEND_mutexinoutset: 4202 DepKind = RTLDependenceKindTy::DepMutexInOutSet; 4203 break; 4204 case OMPC_DEPEND_inoutset: 4205 DepKind = RTLDependenceKindTy::DepInOutSet; 4206 break; 4207 case OMPC_DEPEND_outallmemory: 4208 DepKind = RTLDependenceKindTy::DepOmpAllMem; 4209 break; 4210 case OMPC_DEPEND_source: 4211 case OMPC_DEPEND_sink: 4212 case OMPC_DEPEND_depobj: 4213 case OMPC_DEPEND_inoutallmemory: 4214 case OMPC_DEPEND_unknown: 4215 llvm_unreachable("Unknown task dependence type"); 4216 } 4217 return DepKind; 4218 } 4219 4220 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4221 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4222 QualType &FlagsTy) { 4223 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4224 if (KmpDependInfoTy.isNull()) { 4225 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4226 KmpDependInfoRD->startDefinition(); 4227 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4228 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4229 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4230 KmpDependInfoRD->completeDefinition(); 4231 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4232 } 4233 } 4234 4235 std::pair<llvm::Value *, LValue> 4236 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4237 SourceLocation Loc) { 4238 ASTContext &C = CGM.getContext(); 4239 QualType FlagsTy; 4240 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4241 RecordDecl *KmpDependInfoRD = 4242 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4243 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4244 LValue Base = CGF.EmitLoadOfPointerLValue( 4245 CGF.Builder.CreateElementBitCast( 4246 DepobjLVal.getAddress(CGF), 4247 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)), 4248 KmpDependInfoPtrTy->castAs<PointerType>()); 4249 Address DepObjAddr = CGF.Builder.CreateGEP( 4250 Base.getAddress(CGF), 4251 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4252 LValue NumDepsBase = CGF.MakeAddrLValue( 4253 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4254 // NumDeps = deps[i].base_addr; 4255 LValue BaseAddrLVal = CGF.EmitLValueForField( 4256 NumDepsBase, 4257 *std::next(KmpDependInfoRD->field_begin(), 4258 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr))); 4259 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4260 return std::make_pair(NumDeps, Base); 4261 } 4262 4263 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4264 llvm::PointerUnion<unsigned *, LValue *> Pos, 4265 const OMPTaskDataTy::DependData &Data, 4266 Address DependenciesArray) { 4267 CodeGenModule &CGM = CGF.CGM; 4268 ASTContext &C = CGM.getContext(); 4269 QualType FlagsTy; 4270 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4271 RecordDecl *KmpDependInfoRD = 4272 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4273 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4274 4275 OMPIteratorGeneratorScope IteratorScope( 4276 CGF, cast_or_null<OMPIteratorExpr>( 4277 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4278 : nullptr)); 4279 for (const Expr *E : Data.DepExprs) { 4280 llvm::Value *Addr; 4281 llvm::Value *Size; 4282 4283 // The expression will be a nullptr in the 'omp_all_memory' case. 4284 if (E) { 4285 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4286 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy); 4287 } else { 4288 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4289 Size = llvm::ConstantInt::get(CGF.SizeTy, 0); 4290 } 4291 LValue Base; 4292 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4293 Base = CGF.MakeAddrLValue( 4294 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4295 } else { 4296 assert(E && "Expected a non-null expression"); 4297 LValue &PosLVal = *Pos.get<LValue *>(); 4298 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4299 Base = CGF.MakeAddrLValue( 4300 CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy); 4301 } 4302 // deps[i].base_addr = &<Dependencies[i].second>; 4303 LValue BaseAddrLVal = CGF.EmitLValueForField( 4304 Base, 4305 *std::next(KmpDependInfoRD->field_begin(), 4306 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr))); 4307 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal); 4308 // deps[i].len = sizeof(<Dependencies[i].second>); 4309 LValue LenLVal = CGF.EmitLValueForField( 4310 Base, *std::next(KmpDependInfoRD->field_begin(), 4311 static_cast<unsigned int>(RTLDependInfoFields::Len))); 4312 CGF.EmitStoreOfScalar(Size, LenLVal); 4313 // deps[i].flags = <Dependencies[i].first>; 4314 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4315 LValue FlagsLVal = CGF.EmitLValueForField( 4316 Base, 4317 *std::next(KmpDependInfoRD->field_begin(), 4318 static_cast<unsigned int>(RTLDependInfoFields::Flags))); 4319 CGF.EmitStoreOfScalar( 4320 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)), 4321 FlagsLVal); 4322 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4323 ++(*P); 4324 } else { 4325 LValue &PosLVal = *Pos.get<LValue *>(); 4326 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4327 Idx = CGF.Builder.CreateNUWAdd(Idx, 4328 llvm::ConstantInt::get(Idx->getType(), 1)); 4329 CGF.EmitStoreOfScalar(Idx, PosLVal); 4330 } 4331 } 4332 } 4333 4334 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes( 4335 CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4336 const OMPTaskDataTy::DependData &Data) { 4337 assert(Data.DepKind == OMPC_DEPEND_depobj && 4338 "Expected depobj dependency kind."); 4339 SmallVector<llvm::Value *, 4> Sizes; 4340 SmallVector<LValue, 4> SizeLVals; 4341 ASTContext &C = CGF.getContext(); 4342 { 4343 OMPIteratorGeneratorScope IteratorScope( 4344 CGF, cast_or_null<OMPIteratorExpr>( 4345 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4346 : nullptr)); 4347 for (const Expr *E : Data.DepExprs) { 4348 llvm::Value *NumDeps; 4349 LValue Base; 4350 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4351 std::tie(NumDeps, Base) = 4352 getDepobjElements(CGF, DepobjLVal, E->getExprLoc()); 4353 LValue NumLVal = CGF.MakeAddrLValue( 4354 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4355 C.getUIntPtrType()); 4356 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0), 4357 NumLVal.getAddress(CGF)); 4358 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4359 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4360 CGF.EmitStoreOfScalar(Add, NumLVal); 4361 SizeLVals.push_back(NumLVal); 4362 } 4363 } 4364 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4365 llvm::Value *Size = 4366 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4367 Sizes.push_back(Size); 4368 } 4369 return Sizes; 4370 } 4371 4372 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF, 4373 QualType &KmpDependInfoTy, 4374 LValue PosLVal, 4375 const OMPTaskDataTy::DependData &Data, 4376 Address DependenciesArray) { 4377 assert(Data.DepKind == OMPC_DEPEND_depobj && 4378 "Expected depobj dependency kind."); 4379 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4380 { 4381 OMPIteratorGeneratorScope IteratorScope( 4382 CGF, cast_or_null<OMPIteratorExpr>( 4383 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4384 : nullptr)); 4385 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4386 const Expr *E = Data.DepExprs[I]; 4387 llvm::Value *NumDeps; 4388 LValue Base; 4389 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4390 std::tie(NumDeps, Base) = 4391 getDepobjElements(CGF, DepobjLVal, E->getExprLoc()); 4392 4393 // memcopy dependency data. 4394 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4395 ElSize, 4396 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4397 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4398 Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos); 4399 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4400 4401 // Increase pos. 4402 // pos += size; 4403 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4404 CGF.EmitStoreOfScalar(Add, PosLVal); 4405 } 4406 } 4407 } 4408 4409 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4410 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4411 SourceLocation Loc) { 4412 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4413 return D.DepExprs.empty(); 4414 })) 4415 return std::make_pair(nullptr, Address::invalid()); 4416 // Process list of dependencies. 4417 ASTContext &C = CGM.getContext(); 4418 Address DependenciesArray = Address::invalid(); 4419 llvm::Value *NumOfElements = nullptr; 4420 unsigned NumDependencies = std::accumulate( 4421 Dependencies.begin(), Dependencies.end(), 0, 4422 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4423 return D.DepKind == OMPC_DEPEND_depobj 4424 ? V 4425 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4426 }); 4427 QualType FlagsTy; 4428 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4429 bool HasDepobjDeps = false; 4430 bool HasRegularWithIterators = false; 4431 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4432 llvm::Value *NumOfRegularWithIterators = 4433 llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4434 // Calculate number of depobj dependencies and regular deps with the 4435 // iterators. 4436 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4437 if (D.DepKind == OMPC_DEPEND_depobj) { 4438 SmallVector<llvm::Value *, 4> Sizes = 4439 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4440 for (llvm::Value *Size : Sizes) { 4441 NumOfDepobjElements = 4442 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4443 } 4444 HasDepobjDeps = true; 4445 continue; 4446 } 4447 // Include number of iterations, if any. 4448 4449 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4450 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4451 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4452 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4453 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul( 4454 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size())); 4455 NumOfRegularWithIterators = 4456 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps); 4457 } 4458 HasRegularWithIterators = true; 4459 continue; 4460 } 4461 } 4462 4463 QualType KmpDependInfoArrayTy; 4464 if (HasDepobjDeps || HasRegularWithIterators) { 4465 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4466 /*isSigned=*/false); 4467 if (HasDepobjDeps) { 4468 NumOfElements = 4469 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4470 } 4471 if (HasRegularWithIterators) { 4472 NumOfElements = 4473 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4474 } 4475 auto *OVE = new (C) OpaqueValueExpr( 4476 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4477 VK_PRValue); 4478 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4479 RValue::get(NumOfElements)); 4480 KmpDependInfoArrayTy = 4481 C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal, 4482 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4483 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4484 // Properly emit variable-sized array. 4485 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4486 ImplicitParamDecl::Other); 4487 CGF.EmitVarDecl(*PD); 4488 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4489 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4490 /*isSigned=*/false); 4491 } else { 4492 KmpDependInfoArrayTy = C.getConstantArrayType( 4493 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4494 ArrayType::Normal, /*IndexTypeQuals=*/0); 4495 DependenciesArray = 4496 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4497 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4498 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4499 /*isSigned=*/false); 4500 } 4501 unsigned Pos = 0; 4502 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4503 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4504 Dependencies[I].IteratorExpr) 4505 continue; 4506 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4507 DependenciesArray); 4508 } 4509 // Copy regular dependencies with iterators. 4510 LValue PosLVal = CGF.MakeAddrLValue( 4511 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4512 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4513 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4514 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4515 !Dependencies[I].IteratorExpr) 4516 continue; 4517 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4518 DependenciesArray); 4519 } 4520 // Copy final depobj arrays without iterators. 4521 if (HasDepobjDeps) { 4522 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4523 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4524 continue; 4525 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4526 DependenciesArray); 4527 } 4528 } 4529 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4530 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty); 4531 return std::make_pair(NumOfElements, DependenciesArray); 4532 } 4533 4534 Address CGOpenMPRuntime::emitDepobjDependClause( 4535 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4536 SourceLocation Loc) { 4537 if (Dependencies.DepExprs.empty()) 4538 return Address::invalid(); 4539 // Process list of dependencies. 4540 ASTContext &C = CGM.getContext(); 4541 Address DependenciesArray = Address::invalid(); 4542 unsigned NumDependencies = Dependencies.DepExprs.size(); 4543 QualType FlagsTy; 4544 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4545 RecordDecl *KmpDependInfoRD = 4546 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4547 4548 llvm::Value *Size; 4549 // Define type kmp_depend_info[<Dependencies.size()>]; 4550 // For depobj reserve one extra element to store the number of elements. 4551 // It is required to handle depobj(x) update(in) construct. 4552 // kmp_depend_info[<Dependencies.size()>] deps; 4553 llvm::Value *NumDepsVal; 4554 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4555 if (const auto *IE = 4556 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4557 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4558 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4559 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4560 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4561 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4562 } 4563 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4564 NumDepsVal); 4565 CharUnits SizeInBytes = 4566 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4567 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4568 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4569 NumDepsVal = 4570 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4571 } else { 4572 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4573 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4574 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 4575 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4576 Size = CGM.getSize(Sz.alignTo(Align)); 4577 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 4578 } 4579 // Need to allocate on the dynamic memory. 4580 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4581 // Use default allocator. 4582 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4583 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 4584 4585 llvm::Value *Addr = 4586 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4587 CGM.getModule(), OMPRTL___kmpc_alloc), 4588 Args, ".dep.arr.addr"); 4589 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy); 4590 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4591 Addr, KmpDependInfoLlvmTy->getPointerTo()); 4592 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align); 4593 // Write number of elements in the first element of array for depobj. 4594 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 4595 // deps[i].base_addr = NumDependencies; 4596 LValue BaseAddrLVal = CGF.EmitLValueForField( 4597 Base, 4598 *std::next(KmpDependInfoRD->field_begin(), 4599 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr))); 4600 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 4601 llvm::PointerUnion<unsigned *, LValue *> Pos; 4602 unsigned Idx = 1; 4603 LValue PosLVal; 4604 if (Dependencies.IteratorExpr) { 4605 PosLVal = CGF.MakeAddrLValue( 4606 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 4607 C.getSizeType()); 4608 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 4609 /*IsInit=*/true); 4610 Pos = &PosLVal; 4611 } else { 4612 Pos = &Idx; 4613 } 4614 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 4615 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4616 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy, 4617 CGF.Int8Ty); 4618 return DependenciesArray; 4619 } 4620 4621 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 4622 SourceLocation Loc) { 4623 ASTContext &C = CGM.getContext(); 4624 QualType FlagsTy; 4625 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4626 LValue Base = CGF.EmitLoadOfPointerLValue( 4627 DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>()); 4628 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4629 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4630 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy), 4631 CGF.ConvertTypeForMem(KmpDependInfoTy)); 4632 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4633 Addr.getElementType(), Addr.getPointer(), 4634 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4635 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 4636 CGF.VoidPtrTy); 4637 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4638 // Use default allocator. 4639 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4640 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 4641 4642 // _kmpc_free(gtid, addr, nullptr); 4643 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4644 CGM.getModule(), OMPRTL___kmpc_free), 4645 Args); 4646 } 4647 4648 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 4649 OpenMPDependClauseKind NewDepKind, 4650 SourceLocation Loc) { 4651 ASTContext &C = CGM.getContext(); 4652 QualType FlagsTy; 4653 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4654 RecordDecl *KmpDependInfoRD = 4655 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4656 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4657 llvm::Value *NumDeps; 4658 LValue Base; 4659 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 4660 4661 Address Begin = Base.getAddress(CGF); 4662 // Cast from pointer to array type to pointer to single element. 4663 llvm::Value *End = CGF.Builder.CreateGEP( 4664 Begin.getElementType(), Begin.getPointer(), NumDeps); 4665 // The basic structure here is a while-do loop. 4666 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 4667 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 4668 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 4669 CGF.EmitBlock(BodyBB); 4670 llvm::PHINode *ElementPHI = 4671 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 4672 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 4673 Begin = Begin.withPointer(ElementPHI); 4674 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 4675 Base.getTBAAInfo()); 4676 // deps[i].flags = NewDepKind; 4677 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 4678 LValue FlagsLVal = CGF.EmitLValueForField( 4679 Base, *std::next(KmpDependInfoRD->field_begin(), 4680 static_cast<unsigned int>(RTLDependInfoFields::Flags))); 4681 CGF.EmitStoreOfScalar( 4682 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)), 4683 FlagsLVal); 4684 4685 // Shift the address forward by one element. 4686 Address ElementNext = 4687 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 4688 ElementPHI->addIncoming(ElementNext.getPointer(), 4689 CGF.Builder.GetInsertBlock()); 4690 llvm::Value *IsEmpty = 4691 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 4692 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 4693 // Done. 4694 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 4695 } 4696 4697 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 4698 const OMPExecutableDirective &D, 4699 llvm::Function *TaskFunction, 4700 QualType SharedsTy, Address Shareds, 4701 const Expr *IfCond, 4702 const OMPTaskDataTy &Data) { 4703 if (!CGF.HaveInsertPoint()) 4704 return; 4705 4706 TaskResultTy Result = 4707 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 4708 llvm::Value *NewTask = Result.NewTask; 4709 llvm::Function *TaskEntry = Result.TaskEntry; 4710 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 4711 LValue TDBase = Result.TDBase; 4712 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 4713 // Process list of dependences. 4714 Address DependenciesArray = Address::invalid(); 4715 llvm::Value *NumOfElements; 4716 std::tie(NumOfElements, DependenciesArray) = 4717 emitDependClause(CGF, Data.Dependences, Loc); 4718 4719 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 4720 // libcall. 4721 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 4722 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 4723 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 4724 // list is not empty 4725 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4726 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 4727 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 4728 llvm::Value *DepTaskArgs[7]; 4729 if (!Data.Dependences.empty()) { 4730 DepTaskArgs[0] = UpLoc; 4731 DepTaskArgs[1] = ThreadID; 4732 DepTaskArgs[2] = NewTask; 4733 DepTaskArgs[3] = NumOfElements; 4734 DepTaskArgs[4] = DependenciesArray.getPointer(); 4735 DepTaskArgs[5] = CGF.Builder.getInt32(0); 4736 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4737 } 4738 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 4739 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 4740 if (!Data.Tied) { 4741 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4742 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 4743 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 4744 } 4745 if (!Data.Dependences.empty()) { 4746 CGF.EmitRuntimeCall( 4747 OMPBuilder.getOrCreateRuntimeFunction( 4748 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 4749 DepTaskArgs); 4750 } else { 4751 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4752 CGM.getModule(), OMPRTL___kmpc_omp_task), 4753 TaskArgs); 4754 } 4755 // Check if parent region is untied and build return for untied task; 4756 if (auto *Region = 4757 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 4758 Region->emitUntiedSwitch(CGF); 4759 }; 4760 4761 llvm::Value *DepWaitTaskArgs[7]; 4762 if (!Data.Dependences.empty()) { 4763 DepWaitTaskArgs[0] = UpLoc; 4764 DepWaitTaskArgs[1] = ThreadID; 4765 DepWaitTaskArgs[2] = NumOfElements; 4766 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 4767 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 4768 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4769 DepWaitTaskArgs[6] = 4770 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause); 4771 } 4772 auto &M = CGM.getModule(); 4773 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 4774 TaskEntry, &Data, &DepWaitTaskArgs, 4775 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 4776 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 4777 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 4778 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 4779 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 4780 // is specified. 4781 if (!Data.Dependences.empty()) 4782 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4783 M, OMPRTL___kmpc_omp_taskwait_deps_51), 4784 DepWaitTaskArgs); 4785 // Call proxy_task_entry(gtid, new_task); 4786 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 4787 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 4788 Action.Enter(CGF); 4789 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 4790 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 4791 OutlinedFnArgs); 4792 }; 4793 4794 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 4795 // kmp_task_t *new_task); 4796 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 4797 // kmp_task_t *new_task); 4798 RegionCodeGenTy RCG(CodeGen); 4799 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 4800 M, OMPRTL___kmpc_omp_task_begin_if0), 4801 TaskArgs, 4802 OMPBuilder.getOrCreateRuntimeFunction( 4803 M, OMPRTL___kmpc_omp_task_complete_if0), 4804 TaskArgs); 4805 RCG.setAction(Action); 4806 RCG(CGF); 4807 }; 4808 4809 if (IfCond) { 4810 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 4811 } else { 4812 RegionCodeGenTy ThenRCG(ThenCodeGen); 4813 ThenRCG(CGF); 4814 } 4815 } 4816 4817 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 4818 const OMPLoopDirective &D, 4819 llvm::Function *TaskFunction, 4820 QualType SharedsTy, Address Shareds, 4821 const Expr *IfCond, 4822 const OMPTaskDataTy &Data) { 4823 if (!CGF.HaveInsertPoint()) 4824 return; 4825 TaskResultTy Result = 4826 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 4827 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 4828 // libcall. 4829 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 4830 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 4831 // sched, kmp_uint64 grainsize, void *task_dup); 4832 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4833 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 4834 llvm::Value *IfVal; 4835 if (IfCond) { 4836 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 4837 /*isSigned=*/true); 4838 } else { 4839 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 4840 } 4841 4842 LValue LBLVal = CGF.EmitLValueForField( 4843 Result.TDBase, 4844 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 4845 const auto *LBVar = 4846 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 4847 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 4848 LBLVal.getQuals(), 4849 /*IsInitializer=*/true); 4850 LValue UBLVal = CGF.EmitLValueForField( 4851 Result.TDBase, 4852 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 4853 const auto *UBVar = 4854 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 4855 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 4856 UBLVal.getQuals(), 4857 /*IsInitializer=*/true); 4858 LValue StLVal = CGF.EmitLValueForField( 4859 Result.TDBase, 4860 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 4861 const auto *StVar = 4862 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 4863 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 4864 StLVal.getQuals(), 4865 /*IsInitializer=*/true); 4866 // Store reductions address. 4867 LValue RedLVal = CGF.EmitLValueForField( 4868 Result.TDBase, 4869 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 4870 if (Data.Reductions) { 4871 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 4872 } else { 4873 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 4874 CGF.getContext().VoidPtrTy); 4875 } 4876 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 4877 llvm::Value *TaskArgs[] = { 4878 UpLoc, 4879 ThreadID, 4880 Result.NewTask, 4881 IfVal, 4882 LBLVal.getPointer(CGF), 4883 UBLVal.getPointer(CGF), 4884 CGF.EmitLoadOfScalar(StLVal, Loc), 4885 llvm::ConstantInt::getSigned( 4886 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 4887 llvm::ConstantInt::getSigned( 4888 CGF.IntTy, Data.Schedule.getPointer() 4889 ? Data.Schedule.getInt() ? NumTasks : Grainsize 4890 : NoSchedule), 4891 Data.Schedule.getPointer() 4892 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 4893 /*isSigned=*/false) 4894 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 4895 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4896 Result.TaskDupFn, CGF.VoidPtrTy) 4897 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 4898 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4899 CGM.getModule(), OMPRTL___kmpc_taskloop), 4900 TaskArgs); 4901 } 4902 4903 /// Emit reduction operation for each element of array (required for 4904 /// array sections) LHS op = RHS. 4905 /// \param Type Type of array. 4906 /// \param LHSVar Variable on the left side of the reduction operation 4907 /// (references element of array in original variable). 4908 /// \param RHSVar Variable on the right side of the reduction operation 4909 /// (references element of array in original variable). 4910 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 4911 /// RHSVar. 4912 static void EmitOMPAggregateReduction( 4913 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 4914 const VarDecl *RHSVar, 4915 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 4916 const Expr *, const Expr *)> &RedOpGen, 4917 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 4918 const Expr *UpExpr = nullptr) { 4919 // Perform element-by-element initialization. 4920 QualType ElementTy; 4921 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 4922 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 4923 4924 // Drill down to the base element type on both arrays. 4925 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 4926 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 4927 4928 llvm::Value *RHSBegin = RHSAddr.getPointer(); 4929 llvm::Value *LHSBegin = LHSAddr.getPointer(); 4930 // Cast from pointer to array type to pointer to single element. 4931 llvm::Value *LHSEnd = 4932 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements); 4933 // The basic structure here is a while-do loop. 4934 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 4935 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 4936 llvm::Value *IsEmpty = 4937 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 4938 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 4939 4940 // Enter the loop body, making that address the current address. 4941 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 4942 CGF.EmitBlock(BodyBB); 4943 4944 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 4945 4946 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 4947 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 4948 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 4949 Address RHSElementCurrent( 4950 RHSElementPHI, RHSAddr.getElementType(), 4951 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 4952 4953 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 4954 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 4955 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 4956 Address LHSElementCurrent( 4957 LHSElementPHI, LHSAddr.getElementType(), 4958 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 4959 4960 // Emit copy. 4961 CodeGenFunction::OMPPrivateScope Scope(CGF); 4962 Scope.addPrivate(LHSVar, LHSElementCurrent); 4963 Scope.addPrivate(RHSVar, RHSElementCurrent); 4964 Scope.Privatize(); 4965 RedOpGen(CGF, XExpr, EExpr, UpExpr); 4966 Scope.ForceCleanup(); 4967 4968 // Shift the address forward by one element. 4969 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 4970 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1, 4971 "omp.arraycpy.dest.element"); 4972 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 4973 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1, 4974 "omp.arraycpy.src.element"); 4975 // Check whether we've reached the end. 4976 llvm::Value *Done = 4977 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 4978 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 4979 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 4980 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 4981 4982 // Done. 4983 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 4984 } 4985 4986 /// Emit reduction combiner. If the combiner is a simple expression emit it as 4987 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 4988 /// UDR combiner function. 4989 static void emitReductionCombiner(CodeGenFunction &CGF, 4990 const Expr *ReductionOp) { 4991 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 4992 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 4993 if (const auto *DRE = 4994 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 4995 if (const auto *DRD = 4996 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 4997 std::pair<llvm::Function *, llvm::Function *> Reduction = 4998 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 4999 RValue Func = RValue::get(Reduction.first); 5000 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5001 CGF.EmitIgnoredExpr(ReductionOp); 5002 return; 5003 } 5004 CGF.EmitIgnoredExpr(ReductionOp); 5005 } 5006 5007 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5008 SourceLocation Loc, llvm::Type *ArgsElemType, 5009 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs, 5010 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) { 5011 ASTContext &C = CGM.getContext(); 5012 5013 // void reduction_func(void *LHSArg, void *RHSArg); 5014 FunctionArgList Args; 5015 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5016 ImplicitParamDecl::Other); 5017 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5018 ImplicitParamDecl::Other); 5019 Args.push_back(&LHSArg); 5020 Args.push_back(&RHSArg); 5021 const auto &CGFI = 5022 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5023 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5024 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5025 llvm::GlobalValue::InternalLinkage, Name, 5026 &CGM.getModule()); 5027 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5028 Fn->setDoesNotRecurse(); 5029 CodeGenFunction CGF(CGM); 5030 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5031 5032 // Dst = (void*[n])(LHSArg); 5033 // Src = (void*[n])(RHSArg); 5034 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5035 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5036 ArgsElemType->getPointerTo()), 5037 ArgsElemType, CGF.getPointerAlign()); 5038 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5039 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5040 ArgsElemType->getPointerTo()), 5041 ArgsElemType, CGF.getPointerAlign()); 5042 5043 // ... 5044 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5045 // ... 5046 CodeGenFunction::OMPPrivateScope Scope(CGF); 5047 const auto *IPriv = Privates.begin(); 5048 unsigned Idx = 0; 5049 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5050 const auto *RHSVar = 5051 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5052 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar)); 5053 const auto *LHSVar = 5054 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5055 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar)); 5056 QualType PrivTy = (*IPriv)->getType(); 5057 if (PrivTy->isVariablyModifiedType()) { 5058 // Get array size and emit VLA type. 5059 ++Idx; 5060 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5061 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5062 const VariableArrayType *VLA = 5063 CGF.getContext().getAsVariableArrayType(PrivTy); 5064 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5065 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5066 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5067 CGF.EmitVariablyModifiedType(PrivTy); 5068 } 5069 } 5070 Scope.Privatize(); 5071 IPriv = Privates.begin(); 5072 const auto *ILHS = LHSExprs.begin(); 5073 const auto *IRHS = RHSExprs.begin(); 5074 for (const Expr *E : ReductionOps) { 5075 if ((*IPriv)->getType()->isArrayType()) { 5076 // Emit reduction for array section. 5077 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5078 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5079 EmitOMPAggregateReduction( 5080 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5081 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5082 emitReductionCombiner(CGF, E); 5083 }); 5084 } else { 5085 // Emit reduction for array subscript or single variable. 5086 emitReductionCombiner(CGF, E); 5087 } 5088 ++IPriv; 5089 ++ILHS; 5090 ++IRHS; 5091 } 5092 Scope.ForceCleanup(); 5093 CGF.FinishFunction(); 5094 return Fn; 5095 } 5096 5097 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5098 const Expr *ReductionOp, 5099 const Expr *PrivateRef, 5100 const DeclRefExpr *LHS, 5101 const DeclRefExpr *RHS) { 5102 if (PrivateRef->getType()->isArrayType()) { 5103 // Emit reduction for array section. 5104 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5105 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5106 EmitOMPAggregateReduction( 5107 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5108 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5109 emitReductionCombiner(CGF, ReductionOp); 5110 }); 5111 } else { 5112 // Emit reduction for array subscript or single variable. 5113 emitReductionCombiner(CGF, ReductionOp); 5114 } 5115 } 5116 5117 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5118 ArrayRef<const Expr *> Privates, 5119 ArrayRef<const Expr *> LHSExprs, 5120 ArrayRef<const Expr *> RHSExprs, 5121 ArrayRef<const Expr *> ReductionOps, 5122 ReductionOptionsTy Options) { 5123 if (!CGF.HaveInsertPoint()) 5124 return; 5125 5126 bool WithNowait = Options.WithNowait; 5127 bool SimpleReduction = Options.SimpleReduction; 5128 5129 // Next code should be emitted for reduction: 5130 // 5131 // static kmp_critical_name lock = { 0 }; 5132 // 5133 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5134 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5135 // ... 5136 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5137 // *(Type<n>-1*)rhs[<n>-1]); 5138 // } 5139 // 5140 // ... 5141 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5142 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5143 // RedList, reduce_func, &<lock>)) { 5144 // case 1: 5145 // ... 5146 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5147 // ... 5148 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5149 // break; 5150 // case 2: 5151 // ... 5152 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5153 // ... 5154 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5155 // break; 5156 // default:; 5157 // } 5158 // 5159 // if SimpleReduction is true, only the next code is generated: 5160 // ... 5161 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5162 // ... 5163 5164 ASTContext &C = CGM.getContext(); 5165 5166 if (SimpleReduction) { 5167 CodeGenFunction::RunCleanupsScope Scope(CGF); 5168 const auto *IPriv = Privates.begin(); 5169 const auto *ILHS = LHSExprs.begin(); 5170 const auto *IRHS = RHSExprs.begin(); 5171 for (const Expr *E : ReductionOps) { 5172 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5173 cast<DeclRefExpr>(*IRHS)); 5174 ++IPriv; 5175 ++ILHS; 5176 ++IRHS; 5177 } 5178 return; 5179 } 5180 5181 // 1. Build a list of reduction variables. 5182 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5183 auto Size = RHSExprs.size(); 5184 for (const Expr *E : Privates) { 5185 if (E->getType()->isVariablyModifiedType()) 5186 // Reserve place for array size. 5187 ++Size; 5188 } 5189 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5190 QualType ReductionArrayTy = 5191 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5192 /*IndexTypeQuals=*/0); 5193 Address ReductionList = 5194 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5195 const auto *IPriv = Privates.begin(); 5196 unsigned Idx = 0; 5197 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5198 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5199 CGF.Builder.CreateStore( 5200 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5201 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5202 Elem); 5203 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5204 // Store array size. 5205 ++Idx; 5206 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5207 llvm::Value *Size = CGF.Builder.CreateIntCast( 5208 CGF.getVLASize( 5209 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5210 .NumElts, 5211 CGF.SizeTy, /*isSigned=*/false); 5212 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5213 Elem); 5214 } 5215 } 5216 5217 // 2. Emit reduce_func(). 5218 llvm::Function *ReductionFn = 5219 emitReductionFunction(Loc, CGF.ConvertTypeForMem(ReductionArrayTy), 5220 Privates, LHSExprs, RHSExprs, ReductionOps); 5221 5222 // 3. Create static kmp_critical_name lock = { 0 }; 5223 std::string Name = getName({"reduction"}); 5224 llvm::Value *Lock = getCriticalRegionLock(Name); 5225 5226 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5227 // RedList, reduce_func, &<lock>); 5228 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5229 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5230 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5231 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5232 ReductionList.getPointer(), CGF.VoidPtrTy); 5233 llvm::Value *Args[] = { 5234 IdentTLoc, // ident_t *<loc> 5235 ThreadId, // i32 <gtid> 5236 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5237 ReductionArrayTySize, // size_type sizeof(RedList) 5238 RL, // void *RedList 5239 ReductionFn, // void (*) (void *, void *) <reduce_func> 5240 Lock // kmp_critical_name *&<lock> 5241 }; 5242 llvm::Value *Res = CGF.EmitRuntimeCall( 5243 OMPBuilder.getOrCreateRuntimeFunction( 5244 CGM.getModule(), 5245 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5246 Args); 5247 5248 // 5. Build switch(res) 5249 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5250 llvm::SwitchInst *SwInst = 5251 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5252 5253 // 6. Build case 1: 5254 // ... 5255 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5256 // ... 5257 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5258 // break; 5259 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5260 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5261 CGF.EmitBlock(Case1BB); 5262 5263 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5264 llvm::Value *EndArgs[] = { 5265 IdentTLoc, // ident_t *<loc> 5266 ThreadId, // i32 <gtid> 5267 Lock // kmp_critical_name *&<lock> 5268 }; 5269 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5270 CodeGenFunction &CGF, PrePostActionTy &Action) { 5271 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5272 const auto *IPriv = Privates.begin(); 5273 const auto *ILHS = LHSExprs.begin(); 5274 const auto *IRHS = RHSExprs.begin(); 5275 for (const Expr *E : ReductionOps) { 5276 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5277 cast<DeclRefExpr>(*IRHS)); 5278 ++IPriv; 5279 ++ILHS; 5280 ++IRHS; 5281 } 5282 }; 5283 RegionCodeGenTy RCG(CodeGen); 5284 CommonActionTy Action( 5285 nullptr, std::nullopt, 5286 OMPBuilder.getOrCreateRuntimeFunction( 5287 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5288 : OMPRTL___kmpc_end_reduce), 5289 EndArgs); 5290 RCG.setAction(Action); 5291 RCG(CGF); 5292 5293 CGF.EmitBranch(DefaultBB); 5294 5295 // 7. Build case 2: 5296 // ... 5297 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5298 // ... 5299 // break; 5300 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5301 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5302 CGF.EmitBlock(Case2BB); 5303 5304 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5305 CodeGenFunction &CGF, PrePostActionTy &Action) { 5306 const auto *ILHS = LHSExprs.begin(); 5307 const auto *IRHS = RHSExprs.begin(); 5308 const auto *IPriv = Privates.begin(); 5309 for (const Expr *E : ReductionOps) { 5310 const Expr *XExpr = nullptr; 5311 const Expr *EExpr = nullptr; 5312 const Expr *UpExpr = nullptr; 5313 BinaryOperatorKind BO = BO_Comma; 5314 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5315 if (BO->getOpcode() == BO_Assign) { 5316 XExpr = BO->getLHS(); 5317 UpExpr = BO->getRHS(); 5318 } 5319 } 5320 // Try to emit update expression as a simple atomic. 5321 const Expr *RHSExpr = UpExpr; 5322 if (RHSExpr) { 5323 // Analyze RHS part of the whole expression. 5324 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5325 RHSExpr->IgnoreParenImpCasts())) { 5326 // If this is a conditional operator, analyze its condition for 5327 // min/max reduction operator. 5328 RHSExpr = ACO->getCond(); 5329 } 5330 if (const auto *BORHS = 5331 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5332 EExpr = BORHS->getRHS(); 5333 BO = BORHS->getOpcode(); 5334 } 5335 } 5336 if (XExpr) { 5337 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5338 auto &&AtomicRedGen = [BO, VD, 5339 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5340 const Expr *EExpr, const Expr *UpExpr) { 5341 LValue X = CGF.EmitLValue(XExpr); 5342 RValue E; 5343 if (EExpr) 5344 E = CGF.EmitAnyExpr(EExpr); 5345 CGF.EmitOMPAtomicSimpleUpdateExpr( 5346 X, E, BO, /*IsXLHSInRHSPart=*/true, 5347 llvm::AtomicOrdering::Monotonic, Loc, 5348 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5349 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5350 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5351 CGF.emitOMPSimpleStore( 5352 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5353 VD->getType().getNonReferenceType(), Loc); 5354 PrivateScope.addPrivate(VD, LHSTemp); 5355 (void)PrivateScope.Privatize(); 5356 return CGF.EmitAnyExpr(UpExpr); 5357 }); 5358 }; 5359 if ((*IPriv)->getType()->isArrayType()) { 5360 // Emit atomic reduction for array section. 5361 const auto *RHSVar = 5362 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5363 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5364 AtomicRedGen, XExpr, EExpr, UpExpr); 5365 } else { 5366 // Emit atomic reduction for array subscript or single variable. 5367 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5368 } 5369 } else { 5370 // Emit as a critical region. 5371 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5372 const Expr *, const Expr *) { 5373 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5374 std::string Name = RT.getName({"atomic_reduction"}); 5375 RT.emitCriticalRegion( 5376 CGF, Name, 5377 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5378 Action.Enter(CGF); 5379 emitReductionCombiner(CGF, E); 5380 }, 5381 Loc); 5382 }; 5383 if ((*IPriv)->getType()->isArrayType()) { 5384 const auto *LHSVar = 5385 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5386 const auto *RHSVar = 5387 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5388 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5389 CritRedGen); 5390 } else { 5391 CritRedGen(CGF, nullptr, nullptr, nullptr); 5392 } 5393 } 5394 ++ILHS; 5395 ++IRHS; 5396 ++IPriv; 5397 } 5398 }; 5399 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5400 if (!WithNowait) { 5401 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5402 llvm::Value *EndArgs[] = { 5403 IdentTLoc, // ident_t *<loc> 5404 ThreadId, // i32 <gtid> 5405 Lock // kmp_critical_name *&<lock> 5406 }; 5407 CommonActionTy Action(nullptr, std::nullopt, 5408 OMPBuilder.getOrCreateRuntimeFunction( 5409 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5410 EndArgs); 5411 AtomicRCG.setAction(Action); 5412 AtomicRCG(CGF); 5413 } else { 5414 AtomicRCG(CGF); 5415 } 5416 5417 CGF.EmitBranch(DefaultBB); 5418 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5419 } 5420 5421 /// Generates unique name for artificial threadprivate variables. 5422 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5423 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5424 const Expr *Ref) { 5425 SmallString<256> Buffer; 5426 llvm::raw_svector_ostream Out(Buffer); 5427 const clang::DeclRefExpr *DE; 5428 const VarDecl *D = ::getBaseDecl(Ref, DE); 5429 if (!D) 5430 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5431 D = D->getCanonicalDecl(); 5432 std::string Name = CGM.getOpenMPRuntime().getName( 5433 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5434 Out << Prefix << Name << "_" 5435 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5436 return std::string(Out.str()); 5437 } 5438 5439 /// Emits reduction initializer function: 5440 /// \code 5441 /// void @.red_init(void* %arg, void* %orig) { 5442 /// %0 = bitcast void* %arg to <type>* 5443 /// store <type> <init>, <type>* %0 5444 /// ret void 5445 /// } 5446 /// \endcode 5447 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5448 SourceLocation Loc, 5449 ReductionCodeGen &RCG, unsigned N) { 5450 ASTContext &C = CGM.getContext(); 5451 QualType VoidPtrTy = C.VoidPtrTy; 5452 VoidPtrTy.addRestrict(); 5453 FunctionArgList Args; 5454 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5455 ImplicitParamDecl::Other); 5456 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5457 ImplicitParamDecl::Other); 5458 Args.emplace_back(&Param); 5459 Args.emplace_back(&ParamOrig); 5460 const auto &FnInfo = 5461 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5462 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5463 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5464 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5465 Name, &CGM.getModule()); 5466 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5467 Fn->setDoesNotRecurse(); 5468 CodeGenFunction CGF(CGM); 5469 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5470 QualType PrivateType = RCG.getPrivateType(N); 5471 Address PrivateAddr = CGF.EmitLoadOfPointer( 5472 CGF.Builder.CreateElementBitCast( 5473 CGF.GetAddrOfLocalVar(&Param), 5474 CGF.ConvertTypeForMem(PrivateType)->getPointerTo()), 5475 C.getPointerType(PrivateType)->castAs<PointerType>()); 5476 llvm::Value *Size = nullptr; 5477 // If the size of the reduction item is non-constant, load it from global 5478 // threadprivate variable. 5479 if (RCG.getSizes(N).second) { 5480 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5481 CGF, CGM.getContext().getSizeType(), 5482 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5483 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5484 CGM.getContext().getSizeType(), Loc); 5485 } 5486 RCG.emitAggregateType(CGF, N, Size); 5487 Address OrigAddr = Address::invalid(); 5488 // If initializer uses initializer from declare reduction construct, emit a 5489 // pointer to the address of the original reduction item (reuired by reduction 5490 // initializer) 5491 if (RCG.usesReductionInitializer(N)) { 5492 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5493 OrigAddr = CGF.EmitLoadOfPointer( 5494 SharedAddr, 5495 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5496 } 5497 // Emit the initializer: 5498 // %0 = bitcast void* %arg to <type>* 5499 // store <type> <init>, <type>* %0 5500 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr, 5501 [](CodeGenFunction &) { return false; }); 5502 CGF.FinishFunction(); 5503 return Fn; 5504 } 5505 5506 /// Emits reduction combiner function: 5507 /// \code 5508 /// void @.red_comb(void* %arg0, void* %arg1) { 5509 /// %lhs = bitcast void* %arg0 to <type>* 5510 /// %rhs = bitcast void* %arg1 to <type>* 5511 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5512 /// store <type> %2, <type>* %lhs 5513 /// ret void 5514 /// } 5515 /// \endcode 5516 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5517 SourceLocation Loc, 5518 ReductionCodeGen &RCG, unsigned N, 5519 const Expr *ReductionOp, 5520 const Expr *LHS, const Expr *RHS, 5521 const Expr *PrivateRef) { 5522 ASTContext &C = CGM.getContext(); 5523 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5524 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5525 FunctionArgList Args; 5526 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5527 C.VoidPtrTy, ImplicitParamDecl::Other); 5528 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5529 ImplicitParamDecl::Other); 5530 Args.emplace_back(&ParamInOut); 5531 Args.emplace_back(&ParamIn); 5532 const auto &FnInfo = 5533 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5534 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5535 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5536 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5537 Name, &CGM.getModule()); 5538 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5539 Fn->setDoesNotRecurse(); 5540 CodeGenFunction CGF(CGM); 5541 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5542 llvm::Value *Size = nullptr; 5543 // If the size of the reduction item is non-constant, load it from global 5544 // threadprivate variable. 5545 if (RCG.getSizes(N).second) { 5546 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5547 CGF, CGM.getContext().getSizeType(), 5548 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5549 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5550 CGM.getContext().getSizeType(), Loc); 5551 } 5552 RCG.emitAggregateType(CGF, N, Size); 5553 // Remap lhs and rhs variables to the addresses of the function arguments. 5554 // %lhs = bitcast void* %arg0 to <type>* 5555 // %rhs = bitcast void* %arg1 to <type>* 5556 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5557 PrivateScope.addPrivate( 5558 LHSVD, 5559 // Pull out the pointer to the variable. 5560 CGF.EmitLoadOfPointer( 5561 CGF.Builder.CreateElementBitCast( 5562 CGF.GetAddrOfLocalVar(&ParamInOut), 5563 CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()), 5564 C.getPointerType(LHSVD->getType())->castAs<PointerType>())); 5565 PrivateScope.addPrivate( 5566 RHSVD, 5567 // Pull out the pointer to the variable. 5568 CGF.EmitLoadOfPointer( 5569 CGF.Builder.CreateElementBitCast( 5570 CGF.GetAddrOfLocalVar(&ParamIn), 5571 CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()), 5572 C.getPointerType(RHSVD->getType())->castAs<PointerType>())); 5573 PrivateScope.Privatize(); 5574 // Emit the combiner body: 5575 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5576 // store <type> %2, <type>* %lhs 5577 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5578 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5579 cast<DeclRefExpr>(RHS)); 5580 CGF.FinishFunction(); 5581 return Fn; 5582 } 5583 5584 /// Emits reduction finalizer function: 5585 /// \code 5586 /// void @.red_fini(void* %arg) { 5587 /// %0 = bitcast void* %arg to <type>* 5588 /// <destroy>(<type>* %0) 5589 /// ret void 5590 /// } 5591 /// \endcode 5592 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5593 SourceLocation Loc, 5594 ReductionCodeGen &RCG, unsigned N) { 5595 if (!RCG.needCleanups(N)) 5596 return nullptr; 5597 ASTContext &C = CGM.getContext(); 5598 FunctionArgList Args; 5599 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5600 ImplicitParamDecl::Other); 5601 Args.emplace_back(&Param); 5602 const auto &FnInfo = 5603 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5604 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5605 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 5606 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5607 Name, &CGM.getModule()); 5608 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5609 Fn->setDoesNotRecurse(); 5610 CodeGenFunction CGF(CGM); 5611 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5612 Address PrivateAddr = CGF.EmitLoadOfPointer( 5613 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>()); 5614 llvm::Value *Size = nullptr; 5615 // If the size of the reduction item is non-constant, load it from global 5616 // threadprivate variable. 5617 if (RCG.getSizes(N).second) { 5618 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5619 CGF, CGM.getContext().getSizeType(), 5620 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5621 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5622 CGM.getContext().getSizeType(), Loc); 5623 } 5624 RCG.emitAggregateType(CGF, N, Size); 5625 // Emit the finalizer body: 5626 // <destroy>(<type>* %0) 5627 RCG.emitCleanups(CGF, N, PrivateAddr); 5628 CGF.FinishFunction(Loc); 5629 return Fn; 5630 } 5631 5632 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 5633 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 5634 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 5635 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 5636 return nullptr; 5637 5638 // Build typedef struct: 5639 // kmp_taskred_input { 5640 // void *reduce_shar; // shared reduction item 5641 // void *reduce_orig; // original reduction item used for initialization 5642 // size_t reduce_size; // size of data item 5643 // void *reduce_init; // data initialization routine 5644 // void *reduce_fini; // data finalization routine 5645 // void *reduce_comb; // data combiner routine 5646 // kmp_task_red_flags_t flags; // flags for additional info from compiler 5647 // } kmp_taskred_input_t; 5648 ASTContext &C = CGM.getContext(); 5649 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 5650 RD->startDefinition(); 5651 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5652 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5653 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 5654 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5655 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5656 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5657 const FieldDecl *FlagsFD = addFieldToRecordDecl( 5658 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 5659 RD->completeDefinition(); 5660 QualType RDType = C.getRecordType(RD); 5661 unsigned Size = Data.ReductionVars.size(); 5662 llvm::APInt ArraySize(/*numBits=*/64, Size); 5663 QualType ArrayRDType = C.getConstantArrayType( 5664 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5665 // kmp_task_red_input_t .rd_input.[Size]; 5666 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 5667 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 5668 Data.ReductionCopies, Data.ReductionOps); 5669 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 5670 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 5671 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 5672 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 5673 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 5674 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs, 5675 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 5676 ".rd_input.gep."); 5677 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 5678 // ElemLVal.reduce_shar = &Shareds[Cnt]; 5679 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 5680 RCG.emitSharedOrigLValue(CGF, Cnt); 5681 llvm::Value *CastedShared = 5682 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 5683 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 5684 // ElemLVal.reduce_orig = &Origs[Cnt]; 5685 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 5686 llvm::Value *CastedOrig = 5687 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 5688 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 5689 RCG.emitAggregateType(CGF, Cnt); 5690 llvm::Value *SizeValInChars; 5691 llvm::Value *SizeVal; 5692 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 5693 // We use delayed creation/initialization for VLAs and array sections. It is 5694 // required because runtime does not provide the way to pass the sizes of 5695 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 5696 // threadprivate global variables are used to store these values and use 5697 // them in the functions. 5698 bool DelayedCreation = !!SizeVal; 5699 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 5700 /*isSigned=*/false); 5701 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 5702 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 5703 // ElemLVal.reduce_init = init; 5704 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 5705 llvm::Value *InitAddr = 5706 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 5707 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 5708 // ElemLVal.reduce_fini = fini; 5709 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 5710 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 5711 llvm::Value *FiniAddr = Fini 5712 ? CGF.EmitCastToVoidPtr(Fini) 5713 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 5714 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 5715 // ElemLVal.reduce_comb = comb; 5716 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 5717 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 5718 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 5719 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 5720 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 5721 // ElemLVal.flags = 0; 5722 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 5723 if (DelayedCreation) { 5724 CGF.EmitStoreOfScalar( 5725 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 5726 FlagsLVal); 5727 } else 5728 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 5729 FlagsLVal.getType()); 5730 } 5731 if (Data.IsReductionWithTaskMod) { 5732 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 5733 // is_ws, int num, void *data); 5734 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 5735 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 5736 CGM.IntTy, /*isSigned=*/true); 5737 llvm::Value *Args[] = { 5738 IdentTLoc, GTid, 5739 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 5740 /*isSigned=*/true), 5741 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 5742 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5743 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 5744 return CGF.EmitRuntimeCall( 5745 OMPBuilder.getOrCreateRuntimeFunction( 5746 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 5747 Args); 5748 } 5749 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 5750 llvm::Value *Args[] = { 5751 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 5752 /*isSigned=*/true), 5753 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 5754 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 5755 CGM.VoidPtrTy)}; 5756 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5757 CGM.getModule(), OMPRTL___kmpc_taskred_init), 5758 Args); 5759 } 5760 5761 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 5762 SourceLocation Loc, 5763 bool IsWorksharingReduction) { 5764 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 5765 // is_ws, int num, void *data); 5766 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 5767 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 5768 CGM.IntTy, /*isSigned=*/true); 5769 llvm::Value *Args[] = {IdentTLoc, GTid, 5770 llvm::ConstantInt::get(CGM.IntTy, 5771 IsWorksharingReduction ? 1 : 0, 5772 /*isSigned=*/true)}; 5773 (void)CGF.EmitRuntimeCall( 5774 OMPBuilder.getOrCreateRuntimeFunction( 5775 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 5776 Args); 5777 } 5778 5779 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 5780 SourceLocation Loc, 5781 ReductionCodeGen &RCG, 5782 unsigned N) { 5783 auto Sizes = RCG.getSizes(N); 5784 // Emit threadprivate global variable if the type is non-constant 5785 // (Sizes.second = nullptr). 5786 if (Sizes.second) { 5787 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 5788 /*isSigned=*/false); 5789 Address SizeAddr = getAddrOfArtificialThreadPrivate( 5790 CGF, CGM.getContext().getSizeType(), 5791 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5792 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 5793 } 5794 } 5795 5796 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 5797 SourceLocation Loc, 5798 llvm::Value *ReductionsPtr, 5799 LValue SharedLVal) { 5800 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 5801 // *d); 5802 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 5803 CGM.IntTy, 5804 /*isSigned=*/true), 5805 ReductionsPtr, 5806 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5807 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 5808 return Address( 5809 CGF.EmitRuntimeCall( 5810 OMPBuilder.getOrCreateRuntimeFunction( 5811 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 5812 Args), 5813 CGF.Int8Ty, SharedLVal.getAlignment()); 5814 } 5815 5816 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, 5817 const OMPTaskDataTy &Data) { 5818 if (!CGF.HaveInsertPoint()) 5819 return; 5820 5821 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) { 5822 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder. 5823 OMPBuilder.createTaskwait(CGF.Builder); 5824 } else { 5825 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5826 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5827 auto &M = CGM.getModule(); 5828 Address DependenciesArray = Address::invalid(); 5829 llvm::Value *NumOfElements; 5830 std::tie(NumOfElements, DependenciesArray) = 5831 emitDependClause(CGF, Data.Dependences, Loc); 5832 if (!Data.Dependences.empty()) { 5833 llvm::Value *DepWaitTaskArgs[7]; 5834 DepWaitTaskArgs[0] = UpLoc; 5835 DepWaitTaskArgs[1] = ThreadID; 5836 DepWaitTaskArgs[2] = NumOfElements; 5837 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5838 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5839 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5840 DepWaitTaskArgs[6] = 5841 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause); 5842 5843 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5844 5845 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid, 5846 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5847 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list, 5848 // kmp_int32 has_no_wait); if dependence info is specified. 5849 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5850 M, OMPRTL___kmpc_omp_taskwait_deps_51), 5851 DepWaitTaskArgs); 5852 5853 } else { 5854 5855 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 5856 // global_tid); 5857 llvm::Value *Args[] = {UpLoc, ThreadID}; 5858 // Ignore return result until untied tasks are supported. 5859 CGF.EmitRuntimeCall( 5860 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait), 5861 Args); 5862 } 5863 } 5864 5865 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5866 Region->emitUntiedSwitch(CGF); 5867 } 5868 5869 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 5870 OpenMPDirectiveKind InnerKind, 5871 const RegionCodeGenTy &CodeGen, 5872 bool HasCancel) { 5873 if (!CGF.HaveInsertPoint()) 5874 return; 5875 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 5876 InnerKind != OMPD_critical && 5877 InnerKind != OMPD_master && 5878 InnerKind != OMPD_masked); 5879 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 5880 } 5881 5882 namespace { 5883 enum RTCancelKind { 5884 CancelNoreq = 0, 5885 CancelParallel = 1, 5886 CancelLoop = 2, 5887 CancelSections = 3, 5888 CancelTaskgroup = 4 5889 }; 5890 } // anonymous namespace 5891 5892 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 5893 RTCancelKind CancelKind = CancelNoreq; 5894 if (CancelRegion == OMPD_parallel) 5895 CancelKind = CancelParallel; 5896 else if (CancelRegion == OMPD_for) 5897 CancelKind = CancelLoop; 5898 else if (CancelRegion == OMPD_sections) 5899 CancelKind = CancelSections; 5900 else { 5901 assert(CancelRegion == OMPD_taskgroup); 5902 CancelKind = CancelTaskgroup; 5903 } 5904 return CancelKind; 5905 } 5906 5907 void CGOpenMPRuntime::emitCancellationPointCall( 5908 CodeGenFunction &CGF, SourceLocation Loc, 5909 OpenMPDirectiveKind CancelRegion) { 5910 if (!CGF.HaveInsertPoint()) 5911 return; 5912 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 5913 // global_tid, kmp_int32 cncl_kind); 5914 if (auto *OMPRegionInfo = 5915 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 5916 // For 'cancellation point taskgroup', the task region info may not have a 5917 // cancel. This may instead happen in another adjacent task. 5918 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 5919 llvm::Value *Args[] = { 5920 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 5921 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 5922 // Ignore return result until untied tasks are supported. 5923 llvm::Value *Result = CGF.EmitRuntimeCall( 5924 OMPBuilder.getOrCreateRuntimeFunction( 5925 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 5926 Args); 5927 // if (__kmpc_cancellationpoint()) { 5928 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 5929 // exit from construct; 5930 // } 5931 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 5932 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 5933 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 5934 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 5935 CGF.EmitBlock(ExitBB); 5936 if (CancelRegion == OMPD_parallel) 5937 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 5938 // exit from construct; 5939 CodeGenFunction::JumpDest CancelDest = 5940 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 5941 CGF.EmitBranchThroughCleanup(CancelDest); 5942 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 5943 } 5944 } 5945 } 5946 5947 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 5948 const Expr *IfCond, 5949 OpenMPDirectiveKind CancelRegion) { 5950 if (!CGF.HaveInsertPoint()) 5951 return; 5952 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 5953 // kmp_int32 cncl_kind); 5954 auto &M = CGM.getModule(); 5955 if (auto *OMPRegionInfo = 5956 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 5957 auto &&ThenGen = [this, &M, Loc, CancelRegion, 5958 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 5959 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5960 llvm::Value *Args[] = { 5961 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 5962 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 5963 // Ignore return result until untied tasks are supported. 5964 llvm::Value *Result = CGF.EmitRuntimeCall( 5965 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 5966 // if (__kmpc_cancel()) { 5967 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 5968 // exit from construct; 5969 // } 5970 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 5971 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 5972 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 5973 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 5974 CGF.EmitBlock(ExitBB); 5975 if (CancelRegion == OMPD_parallel) 5976 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 5977 // exit from construct; 5978 CodeGenFunction::JumpDest CancelDest = 5979 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 5980 CGF.EmitBranchThroughCleanup(CancelDest); 5981 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 5982 }; 5983 if (IfCond) { 5984 emitIfClause(CGF, IfCond, ThenGen, 5985 [](CodeGenFunction &, PrePostActionTy &) {}); 5986 } else { 5987 RegionCodeGenTy ThenRCG(ThenGen); 5988 ThenRCG(CGF); 5989 } 5990 } 5991 } 5992 5993 namespace { 5994 /// Cleanup action for uses_allocators support. 5995 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 5996 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 5997 5998 public: 5999 OMPUsesAllocatorsActionTy( 6000 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6001 : Allocators(Allocators) {} 6002 void Enter(CodeGenFunction &CGF) override { 6003 if (!CGF.HaveInsertPoint()) 6004 return; 6005 for (const auto &AllocatorData : Allocators) { 6006 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6007 CGF, AllocatorData.first, AllocatorData.second); 6008 } 6009 } 6010 void Exit(CodeGenFunction &CGF) override { 6011 if (!CGF.HaveInsertPoint()) 6012 return; 6013 for (const auto &AllocatorData : Allocators) { 6014 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6015 AllocatorData.first); 6016 } 6017 } 6018 }; 6019 } // namespace 6020 6021 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6022 const OMPExecutableDirective &D, StringRef ParentName, 6023 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6024 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6025 assert(!ParentName.empty() && "Invalid target entry parent name!"); 6026 HasEmittedTargetRegion = true; 6027 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6028 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6029 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6030 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6031 if (!D.AllocatorTraits) 6032 continue; 6033 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6034 } 6035 } 6036 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6037 CodeGen.setAction(UsesAllocatorAction); 6038 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6039 IsOffloadEntry, CodeGen); 6040 } 6041 6042 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6043 const Expr *Allocator, 6044 const Expr *AllocatorTraits) { 6045 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6046 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6047 // Use default memspace handle. 6048 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6049 llvm::Value *NumTraits = llvm::ConstantInt::get( 6050 CGF.IntTy, cast<ConstantArrayType>( 6051 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6052 ->getSize() 6053 .getLimitedValue()); 6054 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6055 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6056 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy); 6057 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6058 AllocatorTraitsLVal.getBaseInfo(), 6059 AllocatorTraitsLVal.getTBAAInfo()); 6060 llvm::Value *Traits = 6061 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6062 6063 llvm::Value *AllocatorVal = 6064 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6065 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6066 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6067 // Store to allocator. 6068 CGF.EmitVarDecl(*cast<VarDecl>( 6069 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6070 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6071 AllocatorVal = 6072 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6073 Allocator->getType(), Allocator->getExprLoc()); 6074 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6075 } 6076 6077 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6078 const Expr *Allocator) { 6079 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6080 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6081 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6082 llvm::Value *AllocatorVal = 6083 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6084 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6085 CGF.getContext().VoidPtrTy, 6086 Allocator->getExprLoc()); 6087 (void)CGF.EmitRuntimeCall( 6088 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6089 OMPRTL___kmpc_destroy_allocator), 6090 {ThreadId, AllocatorVal}); 6091 } 6092 6093 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6094 const OMPExecutableDirective &D, StringRef ParentName, 6095 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6096 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6097 6098 auto EntryInfo = 6099 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), ParentName); 6100 6101 CodeGenFunction CGF(CGM, true); 6102 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction = 6103 [&CGF, &D, &CodeGen](StringRef EntryFnName) { 6104 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6105 6106 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6107 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6108 return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6109 }; 6110 6111 // Get NumTeams and ThreadLimit attributes 6112 int32_t DefaultValTeams = -1; 6113 int32_t DefaultValThreads = -1; 6114 getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams); 6115 getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads); 6116 6117 OMPBuilder.emitTargetRegionFunction(OffloadEntriesInfoManager, EntryInfo, 6118 GenerateOutlinedFunction, DefaultValTeams, 6119 DefaultValThreads, IsOffloadEntry, 6120 OutlinedFn, OutlinedFnID); 6121 6122 if (OutlinedFn != nullptr) 6123 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); 6124 } 6125 6126 /// Checks if the expression is constant or does not have non-trivial function 6127 /// calls. 6128 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6129 // We can skip constant expressions. 6130 // We can skip expressions with trivial calls or simple expressions. 6131 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6132 !E->hasNonTrivialCall(Ctx)) && 6133 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6134 } 6135 6136 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6137 const Stmt *Body) { 6138 const Stmt *Child = Body->IgnoreContainers(); 6139 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6140 Child = nullptr; 6141 for (const Stmt *S : C->body()) { 6142 if (const auto *E = dyn_cast<Expr>(S)) { 6143 if (isTrivial(Ctx, E)) 6144 continue; 6145 } 6146 // Some of the statements can be ignored. 6147 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6148 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6149 continue; 6150 // Analyze declarations. 6151 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6152 if (llvm::all_of(DS->decls(), [](const Decl *D) { 6153 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6154 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6155 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6156 isa<UsingDirectiveDecl>(D) || 6157 isa<OMPDeclareReductionDecl>(D) || 6158 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6159 return true; 6160 const auto *VD = dyn_cast<VarDecl>(D); 6161 if (!VD) 6162 return false; 6163 return VD->hasGlobalStorage() || !VD->isUsed(); 6164 })) 6165 continue; 6166 } 6167 // Found multiple children - cannot get the one child only. 6168 if (Child) 6169 return nullptr; 6170 Child = S; 6171 } 6172 if (Child) 6173 Child = Child->IgnoreContainers(); 6174 } 6175 return Child; 6176 } 6177 6178 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( 6179 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6180 int32_t &DefaultVal) { 6181 6182 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6183 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6184 "Expected target-based executable directive."); 6185 switch (DirectiveKind) { 6186 case OMPD_target: { 6187 const auto *CS = D.getInnermostCapturedStmt(); 6188 const auto *Body = 6189 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6190 const Stmt *ChildStmt = 6191 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6192 if (const auto *NestedDir = 6193 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6194 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6195 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6196 const Expr *NumTeams = 6197 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6198 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6199 if (auto Constant = 6200 NumTeams->getIntegerConstantExpr(CGF.getContext())) 6201 DefaultVal = Constant->getExtValue(); 6202 return NumTeams; 6203 } 6204 DefaultVal = 0; 6205 return nullptr; 6206 } 6207 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6208 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) { 6209 DefaultVal = 1; 6210 return nullptr; 6211 } 6212 DefaultVal = 1; 6213 return nullptr; 6214 } 6215 // A value of -1 is used to check if we need to emit no teams region 6216 DefaultVal = -1; 6217 return nullptr; 6218 } 6219 case OMPD_target_teams: 6220 case OMPD_target_teams_distribute: 6221 case OMPD_target_teams_distribute_simd: 6222 case OMPD_target_teams_distribute_parallel_for: 6223 case OMPD_target_teams_distribute_parallel_for_simd: { 6224 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6225 const Expr *NumTeams = 6226 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6227 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6228 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) 6229 DefaultVal = Constant->getExtValue(); 6230 return NumTeams; 6231 } 6232 DefaultVal = 0; 6233 return nullptr; 6234 } 6235 case OMPD_target_parallel: 6236 case OMPD_target_parallel_for: 6237 case OMPD_target_parallel_for_simd: 6238 case OMPD_target_simd: 6239 DefaultVal = 1; 6240 return nullptr; 6241 case OMPD_parallel: 6242 case OMPD_for: 6243 case OMPD_parallel_for: 6244 case OMPD_parallel_master: 6245 case OMPD_parallel_sections: 6246 case OMPD_for_simd: 6247 case OMPD_parallel_for_simd: 6248 case OMPD_cancel: 6249 case OMPD_cancellation_point: 6250 case OMPD_ordered: 6251 case OMPD_threadprivate: 6252 case OMPD_allocate: 6253 case OMPD_task: 6254 case OMPD_simd: 6255 case OMPD_tile: 6256 case OMPD_unroll: 6257 case OMPD_sections: 6258 case OMPD_section: 6259 case OMPD_single: 6260 case OMPD_master: 6261 case OMPD_critical: 6262 case OMPD_taskyield: 6263 case OMPD_barrier: 6264 case OMPD_taskwait: 6265 case OMPD_taskgroup: 6266 case OMPD_atomic: 6267 case OMPD_flush: 6268 case OMPD_depobj: 6269 case OMPD_scan: 6270 case OMPD_teams: 6271 case OMPD_target_data: 6272 case OMPD_target_exit_data: 6273 case OMPD_target_enter_data: 6274 case OMPD_distribute: 6275 case OMPD_distribute_simd: 6276 case OMPD_distribute_parallel_for: 6277 case OMPD_distribute_parallel_for_simd: 6278 case OMPD_teams_distribute: 6279 case OMPD_teams_distribute_simd: 6280 case OMPD_teams_distribute_parallel_for: 6281 case OMPD_teams_distribute_parallel_for_simd: 6282 case OMPD_target_update: 6283 case OMPD_declare_simd: 6284 case OMPD_declare_variant: 6285 case OMPD_begin_declare_variant: 6286 case OMPD_end_declare_variant: 6287 case OMPD_declare_target: 6288 case OMPD_end_declare_target: 6289 case OMPD_declare_reduction: 6290 case OMPD_declare_mapper: 6291 case OMPD_taskloop: 6292 case OMPD_taskloop_simd: 6293 case OMPD_master_taskloop: 6294 case OMPD_master_taskloop_simd: 6295 case OMPD_parallel_master_taskloop: 6296 case OMPD_parallel_master_taskloop_simd: 6297 case OMPD_requires: 6298 case OMPD_metadirective: 6299 case OMPD_unknown: 6300 break; 6301 default: 6302 break; 6303 } 6304 llvm_unreachable("Unexpected directive kind."); 6305 } 6306 6307 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( 6308 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6309 assert(!CGF.getLangOpts().OpenMPIsDevice && 6310 "Clauses associated with the teams directive expected to be emitted " 6311 "only for the host!"); 6312 CGBuilderTy &Bld = CGF.Builder; 6313 int32_t DefaultNT = -1; 6314 const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT); 6315 if (NumTeams != nullptr) { 6316 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6317 6318 switch (DirectiveKind) { 6319 case OMPD_target: { 6320 const auto *CS = D.getInnermostCapturedStmt(); 6321 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6322 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6323 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6324 /*IgnoreResultAssign*/ true); 6325 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6326 /*isSigned=*/true); 6327 } 6328 case OMPD_target_teams: 6329 case OMPD_target_teams_distribute: 6330 case OMPD_target_teams_distribute_simd: 6331 case OMPD_target_teams_distribute_parallel_for: 6332 case OMPD_target_teams_distribute_parallel_for_simd: { 6333 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6334 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6335 /*IgnoreResultAssign*/ true); 6336 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6337 /*isSigned=*/true); 6338 } 6339 default: 6340 break; 6341 } 6342 } 6343 6344 return llvm::ConstantInt::get(CGF.Int32Ty, DefaultNT); 6345 } 6346 6347 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6348 llvm::Value *DefaultThreadLimitVal) { 6349 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6350 CGF.getContext(), CS->getCapturedStmt()); 6351 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6352 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6353 llvm::Value *NumThreads = nullptr; 6354 llvm::Value *CondVal = nullptr; 6355 // Handle if clause. If if clause present, the number of threads is 6356 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6357 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6358 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6359 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6360 const OMPIfClause *IfClause = nullptr; 6361 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6362 if (C->getNameModifier() == OMPD_unknown || 6363 C->getNameModifier() == OMPD_parallel) { 6364 IfClause = C; 6365 break; 6366 } 6367 } 6368 if (IfClause) { 6369 const Expr *Cond = IfClause->getCondition(); 6370 bool Result; 6371 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6372 if (!Result) 6373 return CGF.Builder.getInt32(1); 6374 } else { 6375 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6376 if (const auto *PreInit = 6377 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6378 for (const auto *I : PreInit->decls()) { 6379 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6380 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6381 } else { 6382 CodeGenFunction::AutoVarEmission Emission = 6383 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6384 CGF.EmitAutoVarCleanups(Emission); 6385 } 6386 } 6387 } 6388 CondVal = CGF.EvaluateExprAsBool(Cond); 6389 } 6390 } 6391 } 6392 // Check the value of num_threads clause iff if clause was not specified 6393 // or is not evaluated to false. 6394 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6395 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6396 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6397 const auto *NumThreadsClause = 6398 Dir->getSingleClause<OMPNumThreadsClause>(); 6399 CodeGenFunction::LexicalScope Scope( 6400 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6401 if (const auto *PreInit = 6402 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6403 for (const auto *I : PreInit->decls()) { 6404 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6405 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6406 } else { 6407 CodeGenFunction::AutoVarEmission Emission = 6408 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6409 CGF.EmitAutoVarCleanups(Emission); 6410 } 6411 } 6412 } 6413 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6414 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6415 /*isSigned=*/false); 6416 if (DefaultThreadLimitVal) 6417 NumThreads = CGF.Builder.CreateSelect( 6418 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6419 DefaultThreadLimitVal, NumThreads); 6420 } else { 6421 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6422 : CGF.Builder.getInt32(0); 6423 } 6424 // Process condition of the if clause. 6425 if (CondVal) { 6426 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6427 CGF.Builder.getInt32(1)); 6428 } 6429 return NumThreads; 6430 } 6431 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6432 return CGF.Builder.getInt32(1); 6433 } 6434 return DefaultThreadLimitVal; 6435 } 6436 6437 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( 6438 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6439 int32_t &DefaultVal) { 6440 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6441 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6442 "Expected target-based executable directive."); 6443 6444 switch (DirectiveKind) { 6445 case OMPD_target: 6446 // Teams have no clause thread_limit 6447 return nullptr; 6448 case OMPD_target_teams: 6449 case OMPD_target_teams_distribute: 6450 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6451 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6452 const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit(); 6453 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6454 if (auto Constant = 6455 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6456 DefaultVal = Constant->getExtValue(); 6457 return ThreadLimit; 6458 } 6459 return nullptr; 6460 case OMPD_target_parallel: 6461 case OMPD_target_parallel_for: 6462 case OMPD_target_parallel_for_simd: 6463 case OMPD_target_teams_distribute_parallel_for: 6464 case OMPD_target_teams_distribute_parallel_for_simd: { 6465 Expr *ThreadLimit = nullptr; 6466 Expr *NumThreads = nullptr; 6467 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6468 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6469 ThreadLimit = ThreadLimitClause->getThreadLimit(); 6470 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6471 if (auto Constant = 6472 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6473 DefaultVal = Constant->getExtValue(); 6474 } 6475 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6476 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6477 NumThreads = NumThreadsClause->getNumThreads(); 6478 if (NumThreads->isIntegerConstantExpr(CGF.getContext())) { 6479 if (auto Constant = 6480 NumThreads->getIntegerConstantExpr(CGF.getContext())) { 6481 if (Constant->getExtValue() < DefaultVal) { 6482 DefaultVal = Constant->getExtValue(); 6483 ThreadLimit = NumThreads; 6484 } 6485 } 6486 } 6487 } 6488 return ThreadLimit; 6489 } 6490 case OMPD_target_teams_distribute_simd: 6491 case OMPD_target_simd: 6492 DefaultVal = 1; 6493 return nullptr; 6494 case OMPD_parallel: 6495 case OMPD_for: 6496 case OMPD_parallel_for: 6497 case OMPD_parallel_master: 6498 case OMPD_parallel_sections: 6499 case OMPD_for_simd: 6500 case OMPD_parallel_for_simd: 6501 case OMPD_cancel: 6502 case OMPD_cancellation_point: 6503 case OMPD_ordered: 6504 case OMPD_threadprivate: 6505 case OMPD_allocate: 6506 case OMPD_task: 6507 case OMPD_simd: 6508 case OMPD_tile: 6509 case OMPD_unroll: 6510 case OMPD_sections: 6511 case OMPD_section: 6512 case OMPD_single: 6513 case OMPD_master: 6514 case OMPD_critical: 6515 case OMPD_taskyield: 6516 case OMPD_barrier: 6517 case OMPD_taskwait: 6518 case OMPD_taskgroup: 6519 case OMPD_atomic: 6520 case OMPD_flush: 6521 case OMPD_depobj: 6522 case OMPD_scan: 6523 case OMPD_teams: 6524 case OMPD_target_data: 6525 case OMPD_target_exit_data: 6526 case OMPD_target_enter_data: 6527 case OMPD_distribute: 6528 case OMPD_distribute_simd: 6529 case OMPD_distribute_parallel_for: 6530 case OMPD_distribute_parallel_for_simd: 6531 case OMPD_teams_distribute: 6532 case OMPD_teams_distribute_simd: 6533 case OMPD_teams_distribute_parallel_for: 6534 case OMPD_teams_distribute_parallel_for_simd: 6535 case OMPD_target_update: 6536 case OMPD_declare_simd: 6537 case OMPD_declare_variant: 6538 case OMPD_begin_declare_variant: 6539 case OMPD_end_declare_variant: 6540 case OMPD_declare_target: 6541 case OMPD_end_declare_target: 6542 case OMPD_declare_reduction: 6543 case OMPD_declare_mapper: 6544 case OMPD_taskloop: 6545 case OMPD_taskloop_simd: 6546 case OMPD_master_taskloop: 6547 case OMPD_master_taskloop_simd: 6548 case OMPD_parallel_master_taskloop: 6549 case OMPD_parallel_master_taskloop_simd: 6550 case OMPD_requires: 6551 case OMPD_unknown: 6552 break; 6553 default: 6554 break; 6555 } 6556 llvm_unreachable("Unsupported directive kind."); 6557 } 6558 6559 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( 6560 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6561 assert(!CGF.getLangOpts().OpenMPIsDevice && 6562 "Clauses associated with the teams directive expected to be emitted " 6563 "only for the host!"); 6564 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6565 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6566 "Expected target-based executable directive."); 6567 CGBuilderTy &Bld = CGF.Builder; 6568 llvm::Value *ThreadLimitVal = nullptr; 6569 llvm::Value *NumThreadsVal = nullptr; 6570 switch (DirectiveKind) { 6571 case OMPD_target: { 6572 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6573 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6574 return NumThreads; 6575 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6576 CGF.getContext(), CS->getCapturedStmt()); 6577 // TODO: The standard is not clear how to resolve two thread limit clauses, 6578 // let's pick the teams one if it's present, otherwise the target one. 6579 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6580 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6581 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) { 6582 ThreadLimitClause = TLC; 6583 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6584 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6585 CodeGenFunction::LexicalScope Scope( 6586 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6587 if (const auto *PreInit = 6588 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6589 for (const auto *I : PreInit->decls()) { 6590 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6591 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6592 } else { 6593 CodeGenFunction::AutoVarEmission Emission = 6594 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6595 CGF.EmitAutoVarCleanups(Emission); 6596 } 6597 } 6598 } 6599 } 6600 } 6601 if (ThreadLimitClause) { 6602 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6603 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6604 ThreadLimitVal = 6605 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6606 } 6607 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6608 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6609 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6610 CS = Dir->getInnermostCapturedStmt(); 6611 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6612 CGF.getContext(), CS->getCapturedStmt()); 6613 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6614 } 6615 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6616 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6617 CS = Dir->getInnermostCapturedStmt(); 6618 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6619 return NumThreads; 6620 } 6621 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6622 return Bld.getInt32(1); 6623 } 6624 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6625 } 6626 case OMPD_target_teams: { 6627 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6628 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6629 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6630 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6631 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6632 ThreadLimitVal = 6633 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6634 } 6635 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6636 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6637 return NumThreads; 6638 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6639 CGF.getContext(), CS->getCapturedStmt()); 6640 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6641 if (Dir->getDirectiveKind() == OMPD_distribute) { 6642 CS = Dir->getInnermostCapturedStmt(); 6643 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6644 return NumThreads; 6645 } 6646 } 6647 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6648 } 6649 case OMPD_target_teams_distribute: 6650 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6651 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6652 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6653 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6654 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6655 ThreadLimitVal = 6656 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6657 } 6658 if (llvm::Value *NumThreads = 6659 getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal)) 6660 return NumThreads; 6661 return Bld.getInt32(0); 6662 case OMPD_target_parallel: 6663 case OMPD_target_parallel_for: 6664 case OMPD_target_parallel_for_simd: 6665 case OMPD_target_teams_distribute_parallel_for: 6666 case OMPD_target_teams_distribute_parallel_for_simd: { 6667 llvm::Value *CondVal = nullptr; 6668 // Handle if clause. If if clause present, the number of threads is 6669 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6670 if (D.hasClausesOfKind<OMPIfClause>()) { 6671 const OMPIfClause *IfClause = nullptr; 6672 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6673 if (C->getNameModifier() == OMPD_unknown || 6674 C->getNameModifier() == OMPD_parallel) { 6675 IfClause = C; 6676 break; 6677 } 6678 } 6679 if (IfClause) { 6680 const Expr *Cond = IfClause->getCondition(); 6681 bool Result; 6682 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6683 if (!Result) 6684 return Bld.getInt32(1); 6685 } else { 6686 CodeGenFunction::RunCleanupsScope Scope(CGF); 6687 CondVal = CGF.EvaluateExprAsBool(Cond); 6688 } 6689 } 6690 } 6691 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6692 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6693 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6694 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6695 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6696 ThreadLimitVal = 6697 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6698 } 6699 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6700 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6701 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6702 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6703 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6704 NumThreadsVal = 6705 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 6706 ThreadLimitVal = ThreadLimitVal 6707 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6708 ThreadLimitVal), 6709 NumThreadsVal, ThreadLimitVal) 6710 : NumThreadsVal; 6711 } 6712 if (!ThreadLimitVal) 6713 ThreadLimitVal = Bld.getInt32(0); 6714 if (CondVal) 6715 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6716 return ThreadLimitVal; 6717 } 6718 case OMPD_target_teams_distribute_simd: 6719 case OMPD_target_simd: 6720 return Bld.getInt32(1); 6721 case OMPD_parallel: 6722 case OMPD_for: 6723 case OMPD_parallel_for: 6724 case OMPD_parallel_master: 6725 case OMPD_parallel_sections: 6726 case OMPD_for_simd: 6727 case OMPD_parallel_for_simd: 6728 case OMPD_cancel: 6729 case OMPD_cancellation_point: 6730 case OMPD_ordered: 6731 case OMPD_threadprivate: 6732 case OMPD_allocate: 6733 case OMPD_task: 6734 case OMPD_simd: 6735 case OMPD_tile: 6736 case OMPD_unroll: 6737 case OMPD_sections: 6738 case OMPD_section: 6739 case OMPD_single: 6740 case OMPD_master: 6741 case OMPD_critical: 6742 case OMPD_taskyield: 6743 case OMPD_barrier: 6744 case OMPD_taskwait: 6745 case OMPD_taskgroup: 6746 case OMPD_atomic: 6747 case OMPD_flush: 6748 case OMPD_depobj: 6749 case OMPD_scan: 6750 case OMPD_teams: 6751 case OMPD_target_data: 6752 case OMPD_target_exit_data: 6753 case OMPD_target_enter_data: 6754 case OMPD_distribute: 6755 case OMPD_distribute_simd: 6756 case OMPD_distribute_parallel_for: 6757 case OMPD_distribute_parallel_for_simd: 6758 case OMPD_teams_distribute: 6759 case OMPD_teams_distribute_simd: 6760 case OMPD_teams_distribute_parallel_for: 6761 case OMPD_teams_distribute_parallel_for_simd: 6762 case OMPD_target_update: 6763 case OMPD_declare_simd: 6764 case OMPD_declare_variant: 6765 case OMPD_begin_declare_variant: 6766 case OMPD_end_declare_variant: 6767 case OMPD_declare_target: 6768 case OMPD_end_declare_target: 6769 case OMPD_declare_reduction: 6770 case OMPD_declare_mapper: 6771 case OMPD_taskloop: 6772 case OMPD_taskloop_simd: 6773 case OMPD_master_taskloop: 6774 case OMPD_master_taskloop_simd: 6775 case OMPD_parallel_master_taskloop: 6776 case OMPD_parallel_master_taskloop_simd: 6777 case OMPD_requires: 6778 case OMPD_metadirective: 6779 case OMPD_unknown: 6780 break; 6781 default: 6782 break; 6783 } 6784 llvm_unreachable("Unsupported directive kind."); 6785 } 6786 6787 namespace { 6788 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 6789 6790 // Utility to handle information from clauses associated with a given 6791 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 6792 // It provides a convenient interface to obtain the information and generate 6793 // code for that information. 6794 class MappableExprsHandler { 6795 public: 6796 /// Get the offset of the OMP_MAP_MEMBER_OF field. 6797 static unsigned getFlagMemberOffset() { 6798 unsigned Offset = 0; 6799 for (uint64_t Remain = 6800 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 6801 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF); 6802 !(Remain & 1); Remain = Remain >> 1) 6803 Offset++; 6804 return Offset; 6805 } 6806 6807 /// Class that holds debugging information for a data mapping to be passed to 6808 /// the runtime library. 6809 class MappingExprInfo { 6810 /// The variable declaration used for the data mapping. 6811 const ValueDecl *MapDecl = nullptr; 6812 /// The original expression used in the map clause, or null if there is 6813 /// none. 6814 const Expr *MapExpr = nullptr; 6815 6816 public: 6817 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 6818 : MapDecl(MapDecl), MapExpr(MapExpr) {} 6819 6820 const ValueDecl *getMapDecl() const { return MapDecl; } 6821 const Expr *getMapExpr() const { return MapExpr; } 6822 }; 6823 6824 /// Class that associates information with a base pointer to be passed to the 6825 /// runtime library. 6826 class BasePointerInfo { 6827 /// The base pointer. 6828 llvm::Value *Ptr = nullptr; 6829 /// The base declaration that refers to this device pointer, or null if 6830 /// there is none. 6831 const ValueDecl *DevPtrDecl = nullptr; 6832 6833 public: 6834 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 6835 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 6836 llvm::Value *operator*() const { return Ptr; } 6837 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 6838 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 6839 }; 6840 6841 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 6842 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 6843 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 6844 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 6845 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 6846 using MapDimArrayTy = SmallVector<uint64_t, 4>; 6847 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 6848 6849 /// This structure contains combined information generated for mappable 6850 /// clauses, including base pointers, pointers, sizes, map types, user-defined 6851 /// mappers, and non-contiguous information. 6852 struct MapCombinedInfoTy { 6853 struct StructNonContiguousInfo { 6854 bool IsNonContiguous = false; 6855 MapDimArrayTy Dims; 6856 MapNonContiguousArrayTy Offsets; 6857 MapNonContiguousArrayTy Counts; 6858 MapNonContiguousArrayTy Strides; 6859 }; 6860 MapExprsArrayTy Exprs; 6861 MapBaseValuesArrayTy BasePointers; 6862 MapValuesArrayTy Pointers; 6863 MapValuesArrayTy Sizes; 6864 MapFlagsArrayTy Types; 6865 MapMappersArrayTy Mappers; 6866 StructNonContiguousInfo NonContigInfo; 6867 6868 /// Append arrays in \a CurInfo. 6869 void append(MapCombinedInfoTy &CurInfo) { 6870 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 6871 BasePointers.append(CurInfo.BasePointers.begin(), 6872 CurInfo.BasePointers.end()); 6873 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 6874 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 6875 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 6876 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 6877 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 6878 CurInfo.NonContigInfo.Dims.end()); 6879 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 6880 CurInfo.NonContigInfo.Offsets.end()); 6881 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 6882 CurInfo.NonContigInfo.Counts.end()); 6883 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 6884 CurInfo.NonContigInfo.Strides.end()); 6885 } 6886 }; 6887 6888 /// Map between a struct and the its lowest & highest elements which have been 6889 /// mapped. 6890 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 6891 /// HE(FieldIndex, Pointer)} 6892 struct StructRangeInfoTy { 6893 MapCombinedInfoTy PreliminaryMapData; 6894 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 6895 0, Address::invalid()}; 6896 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 6897 0, Address::invalid()}; 6898 Address Base = Address::invalid(); 6899 Address LB = Address::invalid(); 6900 bool IsArraySection = false; 6901 bool HasCompleteRecord = false; 6902 }; 6903 6904 private: 6905 /// Kind that defines how a device pointer has to be returned. 6906 struct MapInfo { 6907 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 6908 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 6909 ArrayRef<OpenMPMapModifierKind> MapModifiers; 6910 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 6911 bool ReturnDevicePointer = false; 6912 bool IsImplicit = false; 6913 const ValueDecl *Mapper = nullptr; 6914 const Expr *VarRef = nullptr; 6915 bool ForDeviceAddr = false; 6916 6917 MapInfo() = default; 6918 MapInfo( 6919 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 6920 OpenMPMapClauseKind MapType, 6921 ArrayRef<OpenMPMapModifierKind> MapModifiers, 6922 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 6923 bool ReturnDevicePointer, bool IsImplicit, 6924 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 6925 bool ForDeviceAddr = false) 6926 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 6927 MotionModifiers(MotionModifiers), 6928 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 6929 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 6930 }; 6931 6932 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 6933 /// member and there is no map information about it, then emission of that 6934 /// entry is deferred until the whole struct has been processed. 6935 struct DeferredDevicePtrEntryTy { 6936 const Expr *IE = nullptr; 6937 const ValueDecl *VD = nullptr; 6938 bool ForDeviceAddr = false; 6939 6940 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 6941 bool ForDeviceAddr) 6942 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 6943 }; 6944 6945 /// The target directive from where the mappable clauses were extracted. It 6946 /// is either a executable directive or a user-defined mapper directive. 6947 llvm::PointerUnion<const OMPExecutableDirective *, 6948 const OMPDeclareMapperDecl *> 6949 CurDir; 6950 6951 /// Function the directive is being generated for. 6952 CodeGenFunction &CGF; 6953 6954 /// Set of all first private variables in the current directive. 6955 /// bool data is set to true if the variable is implicitly marked as 6956 /// firstprivate, false otherwise. 6957 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 6958 6959 /// Map between device pointer declarations and their expression components. 6960 /// The key value for declarations in 'this' is null. 6961 llvm::DenseMap< 6962 const ValueDecl *, 6963 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 6964 DevPointersMap; 6965 6966 /// Map between device addr declarations and their expression components. 6967 /// The key value for declarations in 'this' is null. 6968 llvm::DenseMap< 6969 const ValueDecl *, 6970 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 6971 HasDevAddrsMap; 6972 6973 /// Map between lambda declarations and their map type. 6974 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap; 6975 6976 llvm::Value *getExprTypeSize(const Expr *E) const { 6977 QualType ExprTy = E->getType().getCanonicalType(); 6978 6979 // Calculate the size for array shaping expression. 6980 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 6981 llvm::Value *Size = 6982 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 6983 for (const Expr *SE : OAE->getDimensions()) { 6984 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 6985 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 6986 CGF.getContext().getSizeType(), 6987 SE->getExprLoc()); 6988 Size = CGF.Builder.CreateNUWMul(Size, Sz); 6989 } 6990 return Size; 6991 } 6992 6993 // Reference types are ignored for mapping purposes. 6994 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 6995 ExprTy = RefTy->getPointeeType().getCanonicalType(); 6996 6997 // Given that an array section is considered a built-in type, we need to 6998 // do the calculation based on the length of the section instead of relying 6999 // on CGF.getTypeSize(E->getType()). 7000 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7001 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7002 OAE->getBase()->IgnoreParenImpCasts()) 7003 .getCanonicalType(); 7004 7005 // If there is no length associated with the expression and lower bound is 7006 // not specified too, that means we are using the whole length of the 7007 // base. 7008 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7009 !OAE->getLowerBound()) 7010 return CGF.getTypeSize(BaseTy); 7011 7012 llvm::Value *ElemSize; 7013 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7014 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7015 } else { 7016 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7017 assert(ATy && "Expecting array type if not a pointer type."); 7018 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7019 } 7020 7021 // If we don't have a length at this point, that is because we have an 7022 // array section with a single element. 7023 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7024 return ElemSize; 7025 7026 if (const Expr *LenExpr = OAE->getLength()) { 7027 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7028 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7029 CGF.getContext().getSizeType(), 7030 LenExpr->getExprLoc()); 7031 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7032 } 7033 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7034 OAE->getLowerBound() && "expected array_section[lb:]."); 7035 // Size = sizetype - lb * elemtype; 7036 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7037 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7038 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7039 CGF.getContext().getSizeType(), 7040 OAE->getLowerBound()->getExprLoc()); 7041 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7042 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7043 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7044 LengthVal = CGF.Builder.CreateSelect( 7045 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7046 return LengthVal; 7047 } 7048 return CGF.getTypeSize(ExprTy); 7049 } 7050 7051 /// Return the corresponding bits for a given map clause modifier. Add 7052 /// a flag marking the map as a pointer if requested. Add a flag marking the 7053 /// map as the first one of a series of maps that relate to the same map 7054 /// expression. 7055 OpenMPOffloadMappingFlags getMapTypeBits( 7056 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7057 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7058 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7059 OpenMPOffloadMappingFlags Bits = 7060 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT 7061 : OpenMPOffloadMappingFlags::OMP_MAP_NONE; 7062 switch (MapType) { 7063 case OMPC_MAP_alloc: 7064 case OMPC_MAP_release: 7065 // alloc and release is the default behavior in the runtime library, i.e. 7066 // if we don't pass any bits alloc/release that is what the runtime is 7067 // going to do. Therefore, we don't need to signal anything for these two 7068 // type modifiers. 7069 break; 7070 case OMPC_MAP_to: 7071 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO; 7072 break; 7073 case OMPC_MAP_from: 7074 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM; 7075 break; 7076 case OMPC_MAP_tofrom: 7077 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO | 7078 OpenMPOffloadMappingFlags::OMP_MAP_FROM; 7079 break; 7080 case OMPC_MAP_delete: 7081 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE; 7082 break; 7083 case OMPC_MAP_unknown: 7084 llvm_unreachable("Unexpected map type!"); 7085 } 7086 if (AddPtrFlag) 7087 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; 7088 if (AddIsTargetParamFlag) 7089 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; 7090 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always)) 7091 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS; 7092 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close)) 7093 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE; 7094 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) || 7095 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present)) 7096 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT; 7097 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold)) 7098 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; 7099 if (IsNonContiguous) 7100 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG; 7101 return Bits; 7102 } 7103 7104 /// Return true if the provided expression is a final array section. A 7105 /// final array section, is one whose length can't be proved to be one. 7106 bool isFinalArraySectionExpression(const Expr *E) const { 7107 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7108 7109 // It is not an array section and therefore not a unity-size one. 7110 if (!OASE) 7111 return false; 7112 7113 // An array section with no colon always refer to a single element. 7114 if (OASE->getColonLocFirst().isInvalid()) 7115 return false; 7116 7117 const Expr *Length = OASE->getLength(); 7118 7119 // If we don't have a length we have to check if the array has size 1 7120 // for this dimension. Also, we should always expect a length if the 7121 // base type is pointer. 7122 if (!Length) { 7123 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7124 OASE->getBase()->IgnoreParenImpCasts()) 7125 .getCanonicalType(); 7126 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7127 return ATy->getSize().getSExtValue() != 1; 7128 // If we don't have a constant dimension length, we have to consider 7129 // the current section as having any size, so it is not necessarily 7130 // unitary. If it happen to be unity size, that's user fault. 7131 return true; 7132 } 7133 7134 // Check if the length evaluates to 1. 7135 Expr::EvalResult Result; 7136 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7137 return true; // Can have more that size 1. 7138 7139 llvm::APSInt ConstLength = Result.Val.getInt(); 7140 return ConstLength.getSExtValue() != 1; 7141 } 7142 7143 /// Generate the base pointers, section pointers, sizes, map type bits, and 7144 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7145 /// map type, map or motion modifiers, and expression components. 7146 /// \a IsFirstComponent should be set to true if the provided set of 7147 /// components is the first associated with a capture. 7148 void generateInfoForComponentList( 7149 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7150 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7151 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7152 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7153 bool IsFirstComponentList, bool IsImplicit, 7154 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7155 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7156 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7157 OverlappedElements = std::nullopt) const { 7158 // The following summarizes what has to be generated for each map and the 7159 // types below. The generated information is expressed in this order: 7160 // base pointer, section pointer, size, flags 7161 // (to add to the ones that come from the map type and modifier). 7162 // 7163 // double d; 7164 // int i[100]; 7165 // float *p; 7166 // 7167 // struct S1 { 7168 // int i; 7169 // float f[50]; 7170 // } 7171 // struct S2 { 7172 // int i; 7173 // float f[50]; 7174 // S1 s; 7175 // double *p; 7176 // struct S2 *ps; 7177 // int &ref; 7178 // } 7179 // S2 s; 7180 // S2 *ps; 7181 // 7182 // map(d) 7183 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7184 // 7185 // map(i) 7186 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7187 // 7188 // map(i[1:23]) 7189 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7190 // 7191 // map(p) 7192 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7193 // 7194 // map(p[1:24]) 7195 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7196 // in unified shared memory mode or for local pointers 7197 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7198 // 7199 // map(s) 7200 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7201 // 7202 // map(s.i) 7203 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7204 // 7205 // map(s.s.f) 7206 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7207 // 7208 // map(s.p) 7209 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7210 // 7211 // map(to: s.p[:22]) 7212 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7213 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7214 // &(s.p), &(s.p[0]), 22*sizeof(double), 7215 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7216 // (*) alloc space for struct members, only this is a target parameter 7217 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7218 // optimizes this entry out, same in the examples below) 7219 // (***) map the pointee (map: to) 7220 // 7221 // map(to: s.ref) 7222 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) 7223 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7224 // (*) alloc space for struct members, only this is a target parameter 7225 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7226 // optimizes this entry out, same in the examples below) 7227 // (***) map the pointee (map: to) 7228 // 7229 // map(s.ps) 7230 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7231 // 7232 // map(from: s.ps->s.i) 7233 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7234 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7235 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7236 // 7237 // map(to: s.ps->ps) 7238 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7239 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7240 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7241 // 7242 // map(s.ps->ps->ps) 7243 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7244 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7245 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7246 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7247 // 7248 // map(to: s.ps->ps->s.f[:22]) 7249 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7250 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7251 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7252 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7253 // 7254 // map(ps) 7255 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7256 // 7257 // map(ps->i) 7258 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7259 // 7260 // map(ps->s.f) 7261 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7262 // 7263 // map(from: ps->p) 7264 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7265 // 7266 // map(to: ps->p[:22]) 7267 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7268 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7269 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7270 // 7271 // map(ps->ps) 7272 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7273 // 7274 // map(from: ps->ps->s.i) 7275 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7276 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7277 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7278 // 7279 // map(from: ps->ps->ps) 7280 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7281 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7282 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7283 // 7284 // map(ps->ps->ps->ps) 7285 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7286 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7287 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7288 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7289 // 7290 // map(to: ps->ps->ps->s.f[:22]) 7291 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7292 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7293 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7294 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7295 // 7296 // map(to: s.f[:22]) map(from: s.p[:33]) 7297 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7298 // sizeof(double*) (**), TARGET_PARAM 7299 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7300 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7301 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7302 // (*) allocate contiguous space needed to fit all mapped members even if 7303 // we allocate space for members not mapped (in this example, 7304 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7305 // them as well because they fall between &s.f[0] and &s.p) 7306 // 7307 // map(from: s.f[:22]) map(to: ps->p[:33]) 7308 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7309 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7310 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7311 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7312 // (*) the struct this entry pertains to is the 2nd element in the list of 7313 // arguments, hence MEMBER_OF(2) 7314 // 7315 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7316 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7317 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7318 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7319 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7320 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7321 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7322 // (*) the struct this entry pertains to is the 4th element in the list 7323 // of arguments, hence MEMBER_OF(4) 7324 7325 // Track if the map information being generated is the first for a capture. 7326 bool IsCaptureFirstInfo = IsFirstComponentList; 7327 // When the variable is on a declare target link or in a to clause with 7328 // unified memory, a reference is needed to hold the host/device address 7329 // of the variable. 7330 bool RequiresReference = false; 7331 7332 // Scan the components from the base to the complete expression. 7333 auto CI = Components.rbegin(); 7334 auto CE = Components.rend(); 7335 auto I = CI; 7336 7337 // Track if the map information being generated is the first for a list of 7338 // components. 7339 bool IsExpressionFirstInfo = true; 7340 bool FirstPointerInComplexData = false; 7341 Address BP = Address::invalid(); 7342 const Expr *AssocExpr = I->getAssociatedExpression(); 7343 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7344 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7345 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7346 7347 if (isa<MemberExpr>(AssocExpr)) { 7348 // The base is the 'this' pointer. The content of the pointer is going 7349 // to be the base of the field being mapped. 7350 BP = CGF.LoadCXXThisAddress(); 7351 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7352 (OASE && 7353 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7354 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7355 } else if (OAShE && 7356 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7357 BP = Address( 7358 CGF.EmitScalarExpr(OAShE->getBase()), 7359 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()), 7360 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7361 } else { 7362 // The base is the reference to the variable. 7363 // BP = &Var. 7364 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7365 if (const auto *VD = 7366 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7367 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7368 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7369 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7370 ((*Res == OMPDeclareTargetDeclAttr::MT_To || 7371 *Res == OMPDeclareTargetDeclAttr::MT_Enter) && 7372 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7373 RequiresReference = true; 7374 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7375 } 7376 } 7377 } 7378 7379 // If the variable is a pointer and is being dereferenced (i.e. is not 7380 // the last component), the base has to be the pointer itself, not its 7381 // reference. References are ignored for mapping purposes. 7382 QualType Ty = 7383 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7384 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7385 // No need to generate individual map information for the pointer, it 7386 // can be associated with the combined storage if shared memory mode is 7387 // active or the base declaration is not global variable. 7388 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7389 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7390 !VD || VD->hasLocalStorage()) 7391 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7392 else 7393 FirstPointerInComplexData = true; 7394 ++I; 7395 } 7396 } 7397 7398 // Track whether a component of the list should be marked as MEMBER_OF some 7399 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7400 // in a component list should be marked as MEMBER_OF, all subsequent entries 7401 // do not belong to the base struct. E.g. 7402 // struct S2 s; 7403 // s.ps->ps->ps->f[:] 7404 // (1) (2) (3) (4) 7405 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7406 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7407 // is the pointee of ps(2) which is not member of struct s, so it should not 7408 // be marked as such (it is still PTR_AND_OBJ). 7409 // The variable is initialized to false so that PTR_AND_OBJ entries which 7410 // are not struct members are not considered (e.g. array of pointers to 7411 // data). 7412 bool ShouldBeMemberOf = false; 7413 7414 // Variable keeping track of whether or not we have encountered a component 7415 // in the component list which is a member expression. Useful when we have a 7416 // pointer or a final array section, in which case it is the previous 7417 // component in the list which tells us whether we have a member expression. 7418 // E.g. X.f[:] 7419 // While processing the final array section "[:]" it is "f" which tells us 7420 // whether we are dealing with a member of a declared struct. 7421 const MemberExpr *EncounteredME = nullptr; 7422 7423 // Track for the total number of dimension. Start from one for the dummy 7424 // dimension. 7425 uint64_t DimSize = 1; 7426 7427 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7428 bool IsPrevMemberReference = false; 7429 7430 for (; I != CE; ++I) { 7431 // If the current component is member of a struct (parent struct) mark it. 7432 if (!EncounteredME) { 7433 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7434 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7435 // as MEMBER_OF the parent struct. 7436 if (EncounteredME) { 7437 ShouldBeMemberOf = true; 7438 // Do not emit as complex pointer if this is actually not array-like 7439 // expression. 7440 if (FirstPointerInComplexData) { 7441 QualType Ty = std::prev(I) 7442 ->getAssociatedDeclaration() 7443 ->getType() 7444 .getNonReferenceType(); 7445 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7446 FirstPointerInComplexData = false; 7447 } 7448 } 7449 } 7450 7451 auto Next = std::next(I); 7452 7453 // We need to generate the addresses and sizes if this is the last 7454 // component, if the component is a pointer or if it is an array section 7455 // whose length can't be proved to be one. If this is a pointer, it 7456 // becomes the base address for the following components. 7457 7458 // A final array section, is one whose length can't be proved to be one. 7459 // If the map item is non-contiguous then we don't treat any array section 7460 // as final array section. 7461 bool IsFinalArraySection = 7462 !IsNonContiguous && 7463 isFinalArraySectionExpression(I->getAssociatedExpression()); 7464 7465 // If we have a declaration for the mapping use that, otherwise use 7466 // the base declaration of the map clause. 7467 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 7468 ? I->getAssociatedDeclaration() 7469 : BaseDecl; 7470 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression() 7471 : MapExpr; 7472 7473 // Get information on whether the element is a pointer. Have to do a 7474 // special treatment for array sections given that they are built-in 7475 // types. 7476 const auto *OASE = 7477 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7478 const auto *OAShE = 7479 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7480 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7481 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7482 bool IsPointer = 7483 OAShE || 7484 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7485 .getCanonicalType() 7486 ->isAnyPointerType()) || 7487 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7488 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && 7489 MapDecl && 7490 MapDecl->getType()->isLValueReferenceType(); 7491 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 7492 7493 if (OASE) 7494 ++DimSize; 7495 7496 if (Next == CE || IsMemberReference || IsNonDerefPointer || 7497 IsFinalArraySection) { 7498 // If this is not the last component, we expect the pointer to be 7499 // associated with an array expression or member expression. 7500 assert((Next == CE || 7501 isa<MemberExpr>(Next->getAssociatedExpression()) || 7502 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7503 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7504 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 7505 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7506 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7507 "Unexpected expression"); 7508 7509 Address LB = Address::invalid(); 7510 Address LowestElem = Address::invalid(); 7511 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, 7512 const MemberExpr *E) { 7513 const Expr *BaseExpr = E->getBase(); 7514 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a 7515 // scalar. 7516 LValue BaseLV; 7517 if (E->isArrow()) { 7518 LValueBaseInfo BaseInfo; 7519 TBAAAccessInfo TBAAInfo; 7520 Address Addr = 7521 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); 7522 QualType PtrTy = BaseExpr->getType()->getPointeeType(); 7523 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); 7524 } else { 7525 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); 7526 } 7527 return BaseLV; 7528 }; 7529 if (OAShE) { 7530 LowestElem = LB = 7531 Address(CGF.EmitScalarExpr(OAShE->getBase()), 7532 CGF.ConvertTypeForMem( 7533 OAShE->getBase()->getType()->getPointeeType()), 7534 CGF.getContext().getTypeAlignInChars( 7535 OAShE->getBase()->getType())); 7536 } else if (IsMemberReference) { 7537 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); 7538 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 7539 LowestElem = CGF.EmitLValueForFieldInitialization( 7540 BaseLVal, cast<FieldDecl>(MapDecl)) 7541 .getAddress(CGF); 7542 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) 7543 .getAddress(CGF); 7544 } else { 7545 LowestElem = LB = 7546 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7547 .getAddress(CGF); 7548 } 7549 7550 // If this component is a pointer inside the base struct then we don't 7551 // need to create any entry for it - it will be combined with the object 7552 // it is pointing to into a single PTR_AND_OBJ entry. 7553 bool IsMemberPointerOrAddr = 7554 EncounteredME && 7555 (((IsPointer || ForDeviceAddr) && 7556 I->getAssociatedExpression() == EncounteredME) || 7557 (IsPrevMemberReference && !IsPointer) || 7558 (IsMemberReference && Next != CE && 7559 !Next->getAssociatedExpression()->getType()->isPointerType())); 7560 if (!OverlappedElements.empty() && Next == CE) { 7561 // Handle base element with the info for overlapped elements. 7562 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7563 assert(!IsPointer && 7564 "Unexpected base element with the pointer type."); 7565 // Mark the whole struct as the struct that requires allocation on the 7566 // device. 7567 PartialStruct.LowestElem = {0, LowestElem}; 7568 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7569 I->getAssociatedExpression()->getType()); 7570 Address HB = CGF.Builder.CreateConstGEP( 7571 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 7572 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty), 7573 TypeSize.getQuantity() - 1); 7574 PartialStruct.HighestElem = { 7575 std::numeric_limits<decltype( 7576 PartialStruct.HighestElem.first)>::max(), 7577 HB}; 7578 PartialStruct.Base = BP; 7579 PartialStruct.LB = LB; 7580 assert( 7581 PartialStruct.PreliminaryMapData.BasePointers.empty() && 7582 "Overlapped elements must be used only once for the variable."); 7583 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 7584 // Emit data for non-overlapped data. 7585 OpenMPOffloadMappingFlags Flags = 7586 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | 7587 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 7588 /*AddPtrFlag=*/false, 7589 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 7590 llvm::Value *Size = nullptr; 7591 // Do bitcopy of all non-overlapped structure elements. 7592 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7593 Component : OverlappedElements) { 7594 Address ComponentLB = Address::invalid(); 7595 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7596 Component) { 7597 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { 7598 const auto *FD = dyn_cast<FieldDecl>(VD); 7599 if (FD && FD->getType()->isLValueReferenceType()) { 7600 const auto *ME = 7601 cast<MemberExpr>(MC.getAssociatedExpression()); 7602 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 7603 ComponentLB = 7604 CGF.EmitLValueForFieldInitialization(BaseLVal, FD) 7605 .getAddress(CGF); 7606 } else { 7607 ComponentLB = 7608 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7609 .getAddress(CGF); 7610 } 7611 Size = CGF.Builder.CreatePtrDiff( 7612 CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7613 CGF.EmitCastToVoidPtr(LB.getPointer())); 7614 break; 7615 } 7616 } 7617 assert(Size && "Failed to determine structure size"); 7618 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7619 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7620 CombinedInfo.Pointers.push_back(LB.getPointer()); 7621 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7622 Size, CGF.Int64Ty, /*isSigned=*/true)); 7623 CombinedInfo.Types.push_back(Flags); 7624 CombinedInfo.Mappers.push_back(nullptr); 7625 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7626 : 1); 7627 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7628 } 7629 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7630 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7631 CombinedInfo.Pointers.push_back(LB.getPointer()); 7632 Size = CGF.Builder.CreatePtrDiff( 7633 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(), 7634 CGF.EmitCastToVoidPtr(LB.getPointer())); 7635 CombinedInfo.Sizes.push_back( 7636 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7637 CombinedInfo.Types.push_back(Flags); 7638 CombinedInfo.Mappers.push_back(nullptr); 7639 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7640 : 1); 7641 break; 7642 } 7643 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7644 if (!IsMemberPointerOrAddr || 7645 (Next == CE && MapType != OMPC_MAP_unknown)) { 7646 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7647 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7648 CombinedInfo.Pointers.push_back(LB.getPointer()); 7649 CombinedInfo.Sizes.push_back( 7650 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7651 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7652 : 1); 7653 7654 // If Mapper is valid, the last component inherits the mapper. 7655 bool HasMapper = Mapper && Next == CE; 7656 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 7657 7658 // We need to add a pointer flag for each map that comes from the 7659 // same expression except for the first one. We also need to signal 7660 // this map is the first one that relates with the current capture 7661 // (there is a set of entries for each capture). 7662 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7663 MapType, MapModifiers, MotionModifiers, IsImplicit, 7664 !IsExpressionFirstInfo || RequiresReference || 7665 FirstPointerInComplexData || IsMemberReference, 7666 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 7667 7668 if (!IsExpressionFirstInfo || IsMemberReference) { 7669 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7670 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7671 if (IsPointer || (IsMemberReference && Next != CE)) 7672 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO | 7673 OpenMPOffloadMappingFlags::OMP_MAP_FROM | 7674 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS | 7675 OpenMPOffloadMappingFlags::OMP_MAP_DELETE | 7676 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE); 7677 7678 if (ShouldBeMemberOf) { 7679 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7680 // should be later updated with the correct value of MEMBER_OF. 7681 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF; 7682 // From now on, all subsequent PTR_AND_OBJ entries should not be 7683 // marked as MEMBER_OF. 7684 ShouldBeMemberOf = false; 7685 } 7686 } 7687 7688 CombinedInfo.Types.push_back(Flags); 7689 } 7690 7691 // If we have encountered a member expression so far, keep track of the 7692 // mapped member. If the parent is "*this", then the value declaration 7693 // is nullptr. 7694 if (EncounteredME) { 7695 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 7696 unsigned FieldIndex = FD->getFieldIndex(); 7697 7698 // Update info about the lowest and highest elements for this struct 7699 if (!PartialStruct.Base.isValid()) { 7700 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 7701 if (IsFinalArraySection) { 7702 Address HB = 7703 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 7704 .getAddress(CGF); 7705 PartialStruct.HighestElem = {FieldIndex, HB}; 7706 } else { 7707 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 7708 } 7709 PartialStruct.Base = BP; 7710 PartialStruct.LB = BP; 7711 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7712 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 7713 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7714 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 7715 } 7716 } 7717 7718 // Need to emit combined struct for array sections. 7719 if (IsFinalArraySection || IsNonContiguous) 7720 PartialStruct.IsArraySection = true; 7721 7722 // If we have a final array section, we are done with this expression. 7723 if (IsFinalArraySection) 7724 break; 7725 7726 // The pointer becomes the base for the next element. 7727 if (Next != CE) 7728 BP = IsMemberReference ? LowestElem : LB; 7729 7730 IsExpressionFirstInfo = false; 7731 IsCaptureFirstInfo = false; 7732 FirstPointerInComplexData = false; 7733 IsPrevMemberReference = IsMemberReference; 7734 } else if (FirstPointerInComplexData) { 7735 QualType Ty = Components.rbegin() 7736 ->getAssociatedDeclaration() 7737 ->getType() 7738 .getNonReferenceType(); 7739 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7740 FirstPointerInComplexData = false; 7741 } 7742 } 7743 // If ran into the whole component - allocate the space for the whole 7744 // record. 7745 if (!EncounteredME) 7746 PartialStruct.HasCompleteRecord = true; 7747 7748 if (!IsNonContiguous) 7749 return; 7750 7751 const ASTContext &Context = CGF.getContext(); 7752 7753 // For supporting stride in array section, we need to initialize the first 7754 // dimension size as 1, first offset as 0, and first count as 1 7755 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 7756 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 7757 MapValuesArrayTy CurStrides; 7758 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 7759 uint64_t ElementTypeSize; 7760 7761 // Collect Size information for each dimension and get the element size as 7762 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 7763 // should be [10, 10] and the first stride is 4 btyes. 7764 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 7765 Components) { 7766 const Expr *AssocExpr = Component.getAssociatedExpression(); 7767 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7768 7769 if (!OASE) 7770 continue; 7771 7772 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 7773 auto *CAT = Context.getAsConstantArrayType(Ty); 7774 auto *VAT = Context.getAsVariableArrayType(Ty); 7775 7776 // We need all the dimension size except for the last dimension. 7777 assert((VAT || CAT || &Component == &*Components.begin()) && 7778 "Should be either ConstantArray or VariableArray if not the " 7779 "first Component"); 7780 7781 // Get element size if CurStrides is empty. 7782 if (CurStrides.empty()) { 7783 const Type *ElementType = nullptr; 7784 if (CAT) 7785 ElementType = CAT->getElementType().getTypePtr(); 7786 else if (VAT) 7787 ElementType = VAT->getElementType().getTypePtr(); 7788 else 7789 assert(&Component == &*Components.begin() && 7790 "Only expect pointer (non CAT or VAT) when this is the " 7791 "first Component"); 7792 // If ElementType is null, then it means the base is a pointer 7793 // (neither CAT nor VAT) and we'll attempt to get ElementType again 7794 // for next iteration. 7795 if (ElementType) { 7796 // For the case that having pointer as base, we need to remove one 7797 // level of indirection. 7798 if (&Component != &*Components.begin()) 7799 ElementType = ElementType->getPointeeOrArrayElementType(); 7800 ElementTypeSize = 7801 Context.getTypeSizeInChars(ElementType).getQuantity(); 7802 CurStrides.push_back( 7803 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 7804 } 7805 } 7806 // Get dimension value except for the last dimension since we don't need 7807 // it. 7808 if (DimSizes.size() < Components.size() - 1) { 7809 if (CAT) 7810 DimSizes.push_back(llvm::ConstantInt::get( 7811 CGF.Int64Ty, CAT->getSize().getZExtValue())); 7812 else if (VAT) 7813 DimSizes.push_back(CGF.Builder.CreateIntCast( 7814 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 7815 /*IsSigned=*/false)); 7816 } 7817 } 7818 7819 // Skip the dummy dimension since we have already have its information. 7820 auto *DI = DimSizes.begin() + 1; 7821 // Product of dimension. 7822 llvm::Value *DimProd = 7823 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 7824 7825 // Collect info for non-contiguous. Notice that offset, count, and stride 7826 // are only meaningful for array-section, so we insert a null for anything 7827 // other than array-section. 7828 // Also, the size of offset, count, and stride are not the same as 7829 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 7830 // count, and stride are the same as the number of non-contiguous 7831 // declaration in target update to/from clause. 7832 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 7833 Components) { 7834 const Expr *AssocExpr = Component.getAssociatedExpression(); 7835 7836 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 7837 llvm::Value *Offset = CGF.Builder.CreateIntCast( 7838 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 7839 /*isSigned=*/false); 7840 CurOffsets.push_back(Offset); 7841 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 7842 CurStrides.push_back(CurStrides.back()); 7843 continue; 7844 } 7845 7846 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7847 7848 if (!OASE) 7849 continue; 7850 7851 // Offset 7852 const Expr *OffsetExpr = OASE->getLowerBound(); 7853 llvm::Value *Offset = nullptr; 7854 if (!OffsetExpr) { 7855 // If offset is absent, then we just set it to zero. 7856 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 7857 } else { 7858 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 7859 CGF.Int64Ty, 7860 /*isSigned=*/false); 7861 } 7862 CurOffsets.push_back(Offset); 7863 7864 // Count 7865 const Expr *CountExpr = OASE->getLength(); 7866 llvm::Value *Count = nullptr; 7867 if (!CountExpr) { 7868 // In Clang, once a high dimension is an array section, we construct all 7869 // the lower dimension as array section, however, for case like 7870 // arr[0:2][2], Clang construct the inner dimension as an array section 7871 // but it actually is not in an array section form according to spec. 7872 if (!OASE->getColonLocFirst().isValid() && 7873 !OASE->getColonLocSecond().isValid()) { 7874 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 7875 } else { 7876 // OpenMP 5.0, 2.1.5 Array Sections, Description. 7877 // When the length is absent it defaults to ⌈(size − 7878 // lower-bound)/stride⌉, where size is the size of the array 7879 // dimension. 7880 const Expr *StrideExpr = OASE->getStride(); 7881 llvm::Value *Stride = 7882 StrideExpr 7883 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 7884 CGF.Int64Ty, /*isSigned=*/false) 7885 : nullptr; 7886 if (Stride) 7887 Count = CGF.Builder.CreateUDiv( 7888 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 7889 else 7890 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 7891 } 7892 } else { 7893 Count = CGF.EmitScalarExpr(CountExpr); 7894 } 7895 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 7896 CurCounts.push_back(Count); 7897 7898 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 7899 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 7900 // Offset Count Stride 7901 // D0 0 1 4 (int) <- dummy dimension 7902 // D1 0 2 8 (2 * (1) * 4) 7903 // D2 1 2 20 (1 * (1 * 5) * 4) 7904 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 7905 const Expr *StrideExpr = OASE->getStride(); 7906 llvm::Value *Stride = 7907 StrideExpr 7908 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 7909 CGF.Int64Ty, /*isSigned=*/false) 7910 : nullptr; 7911 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 7912 if (Stride) 7913 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 7914 else 7915 CurStrides.push_back(DimProd); 7916 if (DI != DimSizes.end()) 7917 ++DI; 7918 } 7919 7920 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 7921 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 7922 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 7923 } 7924 7925 /// Return the adjusted map modifiers if the declaration a capture refers to 7926 /// appears in a first-private clause. This is expected to be used only with 7927 /// directives that start with 'target'. 7928 OpenMPOffloadMappingFlags 7929 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7930 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7931 7932 // A first private variable captured by reference will use only the 7933 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7934 // declaration is known as first-private in this handler. 7935 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 7936 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 7937 return OpenMPOffloadMappingFlags::OMP_MAP_TO | 7938 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; 7939 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE | 7940 OpenMPOffloadMappingFlags::OMP_MAP_TO; 7941 } 7942 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl()); 7943 if (I != LambdasMap.end()) 7944 // for map(to: lambda): using user specified map type. 7945 return getMapTypeBits( 7946 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(), 7947 /*MotionModifiers=*/std::nullopt, I->getSecond()->isImplicit(), 7948 /*AddPtrFlag=*/false, 7949 /*AddIsTargetParamFlag=*/false, 7950 /*isNonContiguous=*/false); 7951 return OpenMPOffloadMappingFlags::OMP_MAP_TO | 7952 OpenMPOffloadMappingFlags::OMP_MAP_FROM; 7953 } 7954 7955 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 7956 // Rotate by getFlagMemberOffset() bits. 7957 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 7958 << getFlagMemberOffset()); 7959 } 7960 7961 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 7962 OpenMPOffloadMappingFlags MemberOfFlag) { 7963 // If the entry is PTR_AND_OBJ but has not been marked with the special 7964 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 7965 // marked as MEMBER_OF. 7966 if (static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 7967 Flags & OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ) && 7968 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 7969 (Flags & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) != 7970 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF)) 7971 return; 7972 7973 // Reset the placeholder value to prepare the flag for the assignment of the 7974 // proper MEMBER_OF value. 7975 Flags &= ~OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF; 7976 Flags |= MemberOfFlag; 7977 } 7978 7979 void getPlainLayout(const CXXRecordDecl *RD, 7980 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7981 bool AsBase) const { 7982 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7983 7984 llvm::StructType *St = 7985 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7986 7987 unsigned NumElements = St->getNumElements(); 7988 llvm::SmallVector< 7989 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7990 RecordLayout(NumElements); 7991 7992 // Fill bases. 7993 for (const auto &I : RD->bases()) { 7994 if (I.isVirtual()) 7995 continue; 7996 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7997 // Ignore empty bases. 7998 if (Base->isEmpty() || CGF.getContext() 7999 .getASTRecordLayout(Base) 8000 .getNonVirtualSize() 8001 .isZero()) 8002 continue; 8003 8004 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8005 RecordLayout[FieldIndex] = Base; 8006 } 8007 // Fill in virtual bases. 8008 for (const auto &I : RD->vbases()) { 8009 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8010 // Ignore empty bases. 8011 if (Base->isEmpty()) 8012 continue; 8013 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8014 if (RecordLayout[FieldIndex]) 8015 continue; 8016 RecordLayout[FieldIndex] = Base; 8017 } 8018 // Fill in all the fields. 8019 assert(!RD->isUnion() && "Unexpected union."); 8020 for (const auto *Field : RD->fields()) { 8021 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8022 // will fill in later.) 8023 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8024 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8025 RecordLayout[FieldIndex] = Field; 8026 } 8027 } 8028 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8029 &Data : RecordLayout) { 8030 if (Data.isNull()) 8031 continue; 8032 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8033 getPlainLayout(Base, Layout, /*AsBase=*/true); 8034 else 8035 Layout.push_back(Data.get<const FieldDecl *>()); 8036 } 8037 } 8038 8039 /// Generate all the base pointers, section pointers, sizes, map types, and 8040 /// mappers for the extracted mappable expressions (all included in \a 8041 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8042 /// pair of the relevant declaration and index where it occurs is appended to 8043 /// the device pointers info array. 8044 void generateAllInfoForClauses( 8045 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 8046 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8047 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8048 // We have to process the component lists that relate with the same 8049 // declaration in a single chunk so that we can generate the map flags 8050 // correctly. Therefore, we organize all lists in a map. 8051 enum MapKind { Present, Allocs, Other, Total }; 8052 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8053 SmallVector<SmallVector<MapInfo, 8>, 4>> 8054 Info; 8055 8056 // Helper function to fill the information map for the different supported 8057 // clauses. 8058 auto &&InfoGen = 8059 [&Info, &SkipVarSet]( 8060 const ValueDecl *D, MapKind Kind, 8061 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8062 OpenMPMapClauseKind MapType, 8063 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8064 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8065 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8066 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8067 if (SkipVarSet.contains(D)) 8068 return; 8069 auto It = Info.find(D); 8070 if (It == Info.end()) 8071 It = Info 8072 .insert(std::make_pair( 8073 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 8074 .first; 8075 It->second[Kind].emplace_back( 8076 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 8077 IsImplicit, Mapper, VarRef, ForDeviceAddr); 8078 }; 8079 8080 for (const auto *Cl : Clauses) { 8081 const auto *C = dyn_cast<OMPMapClause>(Cl); 8082 if (!C) 8083 continue; 8084 MapKind Kind = Other; 8085 if (llvm::is_contained(C->getMapTypeModifiers(), 8086 OMPC_MAP_MODIFIER_present)) 8087 Kind = Present; 8088 else if (C->getMapType() == OMPC_MAP_alloc) 8089 Kind = Allocs; 8090 const auto *EI = C->getVarRefs().begin(); 8091 for (const auto L : C->component_lists()) { 8092 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8093 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 8094 C->getMapTypeModifiers(), std::nullopt, 8095 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8096 E); 8097 ++EI; 8098 } 8099 } 8100 for (const auto *Cl : Clauses) { 8101 const auto *C = dyn_cast<OMPToClause>(Cl); 8102 if (!C) 8103 continue; 8104 MapKind Kind = Other; 8105 if (llvm::is_contained(C->getMotionModifiers(), 8106 OMPC_MOTION_MODIFIER_present)) 8107 Kind = Present; 8108 const auto *EI = C->getVarRefs().begin(); 8109 for (const auto L : C->component_lists()) { 8110 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, std::nullopt, 8111 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8112 C->isImplicit(), std::get<2>(L), *EI); 8113 ++EI; 8114 } 8115 } 8116 for (const auto *Cl : Clauses) { 8117 const auto *C = dyn_cast<OMPFromClause>(Cl); 8118 if (!C) 8119 continue; 8120 MapKind Kind = Other; 8121 if (llvm::is_contained(C->getMotionModifiers(), 8122 OMPC_MOTION_MODIFIER_present)) 8123 Kind = Present; 8124 const auto *EI = C->getVarRefs().begin(); 8125 for (const auto L : C->component_lists()) { 8126 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, 8127 std::nullopt, C->getMotionModifiers(), 8128 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8129 *EI); 8130 ++EI; 8131 } 8132 } 8133 8134 // Look at the use_device_ptr and use_device_addr clauses information and 8135 // mark the existing map entries as such. If there is no map information for 8136 // an entry in the use_device_ptr and use_device_addr list, we create one 8137 // with map type 'alloc' and zero size section. It is the user fault if that 8138 // was not mapped before. If there is no map information and the pointer is 8139 // a struct member, then we defer the emission of that entry until the whole 8140 // struct has been processed. 8141 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8142 SmallVector<DeferredDevicePtrEntryTy, 4>> 8143 DeferredInfo; 8144 MapCombinedInfoTy UseDeviceDataCombinedInfo; 8145 8146 auto &&UseDeviceDataCombinedInfoGen = 8147 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr, 8148 CodeGenFunction &CGF) { 8149 UseDeviceDataCombinedInfo.Exprs.push_back(VD); 8150 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8151 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr); 8152 UseDeviceDataCombinedInfo.Sizes.push_back( 8153 llvm::Constant::getNullValue(CGF.Int64Ty)); 8154 UseDeviceDataCombinedInfo.Types.push_back( 8155 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM); 8156 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr); 8157 }; 8158 8159 auto &&MapInfoGen = 8160 [&DeferredInfo, &UseDeviceDataCombinedInfoGen, 8161 &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD, 8162 OMPClauseMappableExprCommon::MappableExprComponentListRef 8163 Components, 8164 bool IsImplicit, bool IsDevAddr) { 8165 // We didn't find any match in our map information - generate a zero 8166 // size array section - if the pointer is a struct member we defer 8167 // this action until the whole struct has been processed. 8168 if (isa<MemberExpr>(IE)) { 8169 // Insert the pointer into Info to be processed by 8170 // generateInfoForComponentList. Because it is a member pointer 8171 // without a pointee, no entry will be generated for it, therefore 8172 // we need to generate one after the whole struct has been 8173 // processed. Nonetheless, generateInfoForComponentList must be 8174 // called to take the pointer into account for the calculation of 8175 // the range of the partial struct. 8176 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, std::nullopt, 8177 std::nullopt, /*ReturnDevicePointer=*/false, IsImplicit, 8178 nullptr, nullptr, IsDevAddr); 8179 DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr); 8180 } else { 8181 llvm::Value *Ptr; 8182 if (IsDevAddr) { 8183 if (IE->isGLValue()) 8184 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8185 else 8186 Ptr = CGF.EmitScalarExpr(IE); 8187 } else { 8188 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8189 } 8190 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF); 8191 } 8192 }; 8193 8194 auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD, 8195 const Expr *IE, bool IsDevAddr) -> bool { 8196 // We potentially have map information for this declaration already. 8197 // Look for the first set of components that refer to it. If found, 8198 // return true. 8199 // If the first component is a member expression, we have to look into 8200 // 'this', which maps to null in the map of map information. Otherwise 8201 // look directly for the information. 8202 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8203 if (It != Info.end()) { 8204 bool Found = false; 8205 for (auto &Data : It->second) { 8206 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8207 return MI.Components.back().getAssociatedDeclaration() == VD; 8208 }); 8209 // If we found a map entry, signal that the pointer has to be 8210 // returned and move on to the next declaration. Exclude cases where 8211 // the base pointer is mapped as array subscript, array section or 8212 // array shaping. The base address is passed as a pointer to base in 8213 // this case and cannot be used as a base for use_device_ptr list 8214 // item. 8215 if (CI != Data.end()) { 8216 if (IsDevAddr) { 8217 CI->ReturnDevicePointer = true; 8218 Found = true; 8219 break; 8220 } else { 8221 auto PrevCI = std::next(CI->Components.rbegin()); 8222 const auto *VarD = dyn_cast<VarDecl>(VD); 8223 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8224 isa<MemberExpr>(IE) || 8225 !VD->getType().getNonReferenceType()->isPointerType() || 8226 PrevCI == CI->Components.rend() || 8227 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8228 VarD->hasLocalStorage()) { 8229 CI->ReturnDevicePointer = true; 8230 Found = true; 8231 break; 8232 } 8233 } 8234 } 8235 } 8236 return Found; 8237 } 8238 return false; 8239 }; 8240 8241 // Look at the use_device_ptr clause information and mark the existing map 8242 // entries as such. If there is no map information for an entry in the 8243 // use_device_ptr list, we create one with map type 'alloc' and zero size 8244 // section. It is the user fault if that was not mapped before. If there is 8245 // no map information and the pointer is a struct member, then we defer the 8246 // emission of that entry until the whole struct has been processed. 8247 for (const auto *Cl : Clauses) { 8248 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 8249 if (!C) 8250 continue; 8251 for (const auto L : C->component_lists()) { 8252 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8253 std::get<1>(L); 8254 assert(!Components.empty() && 8255 "Not expecting empty list of components!"); 8256 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8257 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8258 const Expr *IE = Components.back().getAssociatedExpression(); 8259 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false)) 8260 continue; 8261 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(), 8262 /*IsDevAddr=*/false); 8263 } 8264 } 8265 8266 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8267 for (const auto *Cl : Clauses) { 8268 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8269 if (!C) 8270 continue; 8271 for (const auto L : C->component_lists()) { 8272 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8273 std::get<1>(L); 8274 assert(!std::get<1>(L).empty() && 8275 "Not expecting empty list of components!"); 8276 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8277 if (!Processed.insert(VD).second) 8278 continue; 8279 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8280 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8281 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true)) 8282 continue; 8283 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(), 8284 /*IsDevAddr=*/true); 8285 } 8286 } 8287 8288 for (const auto &Data : Info) { 8289 StructRangeInfoTy PartialStruct; 8290 // Temporary generated information. 8291 MapCombinedInfoTy CurInfo; 8292 const Decl *D = Data.first; 8293 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8294 for (const auto &M : Data.second) { 8295 for (const MapInfo &L : M) { 8296 assert(!L.Components.empty() && 8297 "Not expecting declaration with no component lists."); 8298 8299 // Remember the current base pointer index. 8300 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8301 CurInfo.NonContigInfo.IsNonContiguous = 8302 L.Components.back().isNonContiguous(); 8303 generateInfoForComponentList( 8304 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8305 CurInfo, PartialStruct, /*IsFirstComponentList=*/false, 8306 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); 8307 8308 // If this entry relates with a device pointer, set the relevant 8309 // declaration and add the 'return pointer' flag. 8310 if (L.ReturnDevicePointer) { 8311 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8312 "Unexpected number of mapped base pointers."); 8313 8314 const ValueDecl *RelevantVD = 8315 L.Components.back().getAssociatedDeclaration(); 8316 assert(RelevantVD && 8317 "No relevant declaration related with device pointer??"); 8318 8319 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8320 RelevantVD); 8321 CurInfo.Types[CurrentBasePointersIdx] |= 8322 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM; 8323 } 8324 } 8325 } 8326 8327 // Append any pending zero-length pointers which are struct members and 8328 // used with use_device_ptr or use_device_addr. 8329 auto CI = DeferredInfo.find(Data.first); 8330 if (CI != DeferredInfo.end()) { 8331 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8332 llvm::Value *BasePtr; 8333 llvm::Value *Ptr; 8334 if (L.ForDeviceAddr) { 8335 if (L.IE->isGLValue()) 8336 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8337 else 8338 Ptr = this->CGF.EmitScalarExpr(L.IE); 8339 BasePtr = Ptr; 8340 // Entry is RETURN_PARAM. Also, set the placeholder value 8341 // MEMBER_OF=FFFF so that the entry is later updated with the 8342 // correct value of MEMBER_OF. 8343 CurInfo.Types.push_back( 8344 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM | 8345 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF); 8346 } else { 8347 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8348 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8349 L.IE->getExprLoc()); 8350 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8351 // placeholder value MEMBER_OF=FFFF so that the entry is later 8352 // updated with the correct value of MEMBER_OF. 8353 CurInfo.Types.push_back( 8354 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | 8355 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM | 8356 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF); 8357 } 8358 CurInfo.Exprs.push_back(L.VD); 8359 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8360 CurInfo.Pointers.push_back(Ptr); 8361 CurInfo.Sizes.push_back( 8362 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8363 CurInfo.Mappers.push_back(nullptr); 8364 } 8365 } 8366 // If there is an entry in PartialStruct it means we have a struct with 8367 // individual members mapped. Emit an extra combined entry. 8368 if (PartialStruct.Base.isValid()) { 8369 CurInfo.NonContigInfo.Dims.push_back(0); 8370 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8371 } 8372 8373 // We need to append the results of this capture to what we already 8374 // have. 8375 CombinedInfo.append(CurInfo); 8376 } 8377 // Append data for use_device_ptr clauses. 8378 CombinedInfo.append(UseDeviceDataCombinedInfo); 8379 } 8380 8381 public: 8382 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8383 : CurDir(&Dir), CGF(CGF) { 8384 // Extract firstprivate clause information. 8385 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8386 for (const auto *D : C->varlists()) 8387 FirstPrivateDecls.try_emplace( 8388 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8389 // Extract implicit firstprivates from uses_allocators clauses. 8390 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8391 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8392 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8393 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8394 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8395 /*Implicit=*/true); 8396 else if (const auto *VD = dyn_cast<VarDecl>( 8397 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8398 ->getDecl())) 8399 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8400 } 8401 } 8402 // Extract device pointer clause information. 8403 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8404 for (auto L : C->component_lists()) 8405 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8406 // Extract device addr clause information. 8407 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>()) 8408 for (auto L : C->component_lists()) 8409 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L)); 8410 // Extract map information. 8411 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) { 8412 if (C->getMapType() != OMPC_MAP_to) 8413 continue; 8414 for (auto L : C->component_lists()) { 8415 const ValueDecl *VD = std::get<0>(L); 8416 const auto *RD = VD ? VD->getType() 8417 .getCanonicalType() 8418 .getNonReferenceType() 8419 ->getAsCXXRecordDecl() 8420 : nullptr; 8421 if (RD && RD->isLambda()) 8422 LambdasMap.try_emplace(std::get<0>(L), C); 8423 } 8424 } 8425 } 8426 8427 /// Constructor for the declare mapper directive. 8428 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8429 : CurDir(&Dir), CGF(CGF) {} 8430 8431 /// Generate code for the combined entry if we have a partially mapped struct 8432 /// and take care of the mapping flags of the arguments corresponding to 8433 /// individual struct members. 8434 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8435 MapFlagsArrayTy &CurTypes, 8436 const StructRangeInfoTy &PartialStruct, 8437 const ValueDecl *VD = nullptr, 8438 bool NotTargetParams = true) const { 8439 if (CurTypes.size() == 1 && 8440 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) != 8441 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) && 8442 !PartialStruct.IsArraySection) 8443 return; 8444 Address LBAddr = PartialStruct.LowestElem.second; 8445 Address HBAddr = PartialStruct.HighestElem.second; 8446 if (PartialStruct.HasCompleteRecord) { 8447 LBAddr = PartialStruct.LB; 8448 HBAddr = PartialStruct.LB; 8449 } 8450 CombinedInfo.Exprs.push_back(VD); 8451 // Base is the base of the struct 8452 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8453 // Pointer is the address of the lowest element 8454 llvm::Value *LB = LBAddr.getPointer(); 8455 const CXXMethodDecl *MD = 8456 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr; 8457 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr; 8458 bool HasBaseClass = RD ? RD->getNumBases() > 0 : false; 8459 // There should not be a mapper for a combined entry. 8460 if (HasBaseClass) { 8461 // OpenMP 5.2 148:21: 8462 // If the target construct is within a class non-static member function, 8463 // and a variable is an accessible data member of the object for which the 8464 // non-static data member function is invoked, the variable is treated as 8465 // if the this[:1] expression had appeared in a map clause with a map-type 8466 // of tofrom. 8467 // Emit this[:1] 8468 CombinedInfo.Pointers.push_back(PartialStruct.Base.getPointer()); 8469 QualType Ty = MD->getThisType()->getPointeeType(); 8470 llvm::Value *Size = 8471 CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty, 8472 /*isSigned=*/true); 8473 CombinedInfo.Sizes.push_back(Size); 8474 } else { 8475 CombinedInfo.Pointers.push_back(LB); 8476 // Size is (addr of {highest+1} element) - (addr of lowest element) 8477 llvm::Value *HB = HBAddr.getPointer(); 8478 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32( 8479 HBAddr.getElementType(), HB, /*Idx0=*/1); 8480 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8481 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8482 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr); 8483 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8484 /*isSigned=*/false); 8485 CombinedInfo.Sizes.push_back(Size); 8486 } 8487 CombinedInfo.Mappers.push_back(nullptr); 8488 // Map type is always TARGET_PARAM, if generate info for captures. 8489 CombinedInfo.Types.push_back( 8490 NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE 8491 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM); 8492 // If any element has the present modifier, then make sure the runtime 8493 // doesn't attempt to allocate the struct. 8494 if (CurTypes.end() != 8495 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8496 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 8497 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT); 8498 })) 8499 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT; 8500 // Remove TARGET_PARAM flag from the first element 8501 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; 8502 // If any element has the ompx_hold modifier, then make sure the runtime 8503 // uses the hold reference count for the struct as a whole so that it won't 8504 // be unmapped by an extra dynamic reference count decrement. Add it to all 8505 // elements as well so the runtime knows which reference count to check 8506 // when determining whether it's time for device-to-host transfers of 8507 // individual elements. 8508 if (CurTypes.end() != 8509 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8510 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 8511 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD); 8512 })) { 8513 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; 8514 for (auto &M : CurTypes) 8515 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; 8516 } 8517 8518 // All other current entries will be MEMBER_OF the combined entry 8519 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8520 // 0xFFFF in the MEMBER_OF field). 8521 OpenMPOffloadMappingFlags MemberOfFlag = 8522 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 8523 for (auto &M : CurTypes) 8524 setCorrectMemberOfFlag(M, MemberOfFlag); 8525 } 8526 8527 /// Generate all the base pointers, section pointers, sizes, map types, and 8528 /// mappers for the extracted mappable expressions (all included in \a 8529 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8530 /// pair of the relevant declaration and index where it occurs is appended to 8531 /// the device pointers info array. 8532 void generateAllInfo( 8533 MapCombinedInfoTy &CombinedInfo, 8534 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8535 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8536 assert(CurDir.is<const OMPExecutableDirective *>() && 8537 "Expect a executable directive"); 8538 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8539 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); 8540 } 8541 8542 /// Generate all the base pointers, section pointers, sizes, map types, and 8543 /// mappers for the extracted map clauses of user-defined mapper (all included 8544 /// in \a CombinedInfo). 8545 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 8546 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8547 "Expect a declare mapper directive"); 8548 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8549 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); 8550 } 8551 8552 /// Emit capture info for lambdas for variables captured by reference. 8553 void generateInfoForLambdaCaptures( 8554 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8555 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8556 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType(); 8557 const auto *RD = VDType->getAsCXXRecordDecl(); 8558 if (!RD || !RD->isLambda()) 8559 return; 8560 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType), 8561 CGF.getContext().getDeclAlign(VD)); 8562 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType); 8563 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures; 8564 FieldDecl *ThisCapture = nullptr; 8565 RD->getCaptureFields(Captures, ThisCapture); 8566 if (ThisCapture) { 8567 LValue ThisLVal = 8568 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8569 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8570 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8571 VDLVal.getPointer(CGF)); 8572 CombinedInfo.Exprs.push_back(VD); 8573 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 8574 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 8575 CombinedInfo.Sizes.push_back( 8576 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8577 CGF.Int64Ty, /*isSigned=*/true)); 8578 CombinedInfo.Types.push_back( 8579 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | 8580 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | 8581 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | 8582 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); 8583 CombinedInfo.Mappers.push_back(nullptr); 8584 } 8585 for (const LambdaCapture &LC : RD->captures()) { 8586 if (!LC.capturesVariable()) 8587 continue; 8588 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar()); 8589 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8590 continue; 8591 auto It = Captures.find(VD); 8592 assert(It != Captures.end() && "Found lambda capture without field."); 8593 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8594 if (LC.getCaptureKind() == LCK_ByRef) { 8595 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8596 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8597 VDLVal.getPointer(CGF)); 8598 CombinedInfo.Exprs.push_back(VD); 8599 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8600 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 8601 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8602 CGF.getTypeSize( 8603 VD->getType().getCanonicalType().getNonReferenceType()), 8604 CGF.Int64Ty, /*isSigned=*/true)); 8605 } else { 8606 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8607 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8608 VDLVal.getPointer(CGF)); 8609 CombinedInfo.Exprs.push_back(VD); 8610 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8611 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 8612 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8613 } 8614 CombinedInfo.Types.push_back( 8615 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | 8616 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | 8617 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | 8618 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); 8619 CombinedInfo.Mappers.push_back(nullptr); 8620 } 8621 } 8622 8623 /// Set correct indices for lambdas captures. 8624 void adjustMemberOfForLambdaCaptures( 8625 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8626 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8627 MapFlagsArrayTy &Types) const { 8628 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8629 // Set correct member_of idx for all implicit lambda captures. 8630 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | 8631 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | 8632 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | 8633 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)) 8634 continue; 8635 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8636 assert(BasePtr && "Unable to find base lambda address."); 8637 int TgtIdx = -1; 8638 for (unsigned J = I; J > 0; --J) { 8639 unsigned Idx = J - 1; 8640 if (Pointers[Idx] != BasePtr) 8641 continue; 8642 TgtIdx = Idx; 8643 break; 8644 } 8645 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8646 // All other current entries will be MEMBER_OF the combined entry 8647 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8648 // 0xFFFF in the MEMBER_OF field). 8649 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8650 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8651 } 8652 } 8653 8654 /// Generate the base pointers, section pointers, sizes, map types, and 8655 /// mappers associated to a given capture (all included in \a CombinedInfo). 8656 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8657 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8658 StructRangeInfoTy &PartialStruct) const { 8659 assert(!Cap->capturesVariableArrayType() && 8660 "Not expecting to generate map info for a variable array type!"); 8661 8662 // We need to know when we generating information for the first component 8663 const ValueDecl *VD = Cap->capturesThis() 8664 ? nullptr 8665 : Cap->getCapturedVar()->getCanonicalDecl(); 8666 8667 // for map(to: lambda): skip here, processing it in 8668 // generateDefaultMapInfo 8669 if (LambdasMap.count(VD)) 8670 return; 8671 8672 // If this declaration appears in a is_device_ptr clause we just have to 8673 // pass the pointer by value. If it is a reference to a declaration, we just 8674 // pass its value. 8675 if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) { 8676 CombinedInfo.Exprs.push_back(VD); 8677 CombinedInfo.BasePointers.emplace_back(Arg, VD); 8678 CombinedInfo.Pointers.push_back(Arg); 8679 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8680 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 8681 /*isSigned=*/true)); 8682 CombinedInfo.Types.push_back( 8683 (Cap->capturesVariable() 8684 ? OpenMPOffloadMappingFlags::OMP_MAP_TO 8685 : OpenMPOffloadMappingFlags::OMP_MAP_LITERAL) | 8686 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM); 8687 CombinedInfo.Mappers.push_back(nullptr); 8688 return; 8689 } 8690 8691 using MapData = 8692 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8693 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 8694 const ValueDecl *, const Expr *>; 8695 SmallVector<MapData, 4> DeclComponentLists; 8696 // For member fields list in is_device_ptr, store it in 8697 // DeclComponentLists for generating components info. 8698 static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown; 8699 auto It = DevPointersMap.find(VD); 8700 if (It != DevPointersMap.end()) 8701 for (const auto &MCL : It->second) 8702 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown, 8703 /*IsImpicit = */ true, nullptr, 8704 nullptr); 8705 auto I = HasDevAddrsMap.find(VD); 8706 if (I != HasDevAddrsMap.end()) 8707 for (const auto &MCL : I->second) 8708 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown, 8709 /*IsImpicit = */ true, nullptr, 8710 nullptr); 8711 assert(CurDir.is<const OMPExecutableDirective *>() && 8712 "Expect a executable directive"); 8713 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8714 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8715 const auto *EI = C->getVarRefs().begin(); 8716 for (const auto L : C->decl_component_lists(VD)) { 8717 const ValueDecl *VDecl, *Mapper; 8718 // The Expression is not correct if the mapping is implicit 8719 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8720 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8721 std::tie(VDecl, Components, Mapper) = L; 8722 assert(VDecl == VD && "We got information for the wrong declaration??"); 8723 assert(!Components.empty() && 8724 "Not expecting declaration with no component lists."); 8725 DeclComponentLists.emplace_back(Components, C->getMapType(), 8726 C->getMapTypeModifiers(), 8727 C->isImplicit(), Mapper, E); 8728 ++EI; 8729 } 8730 } 8731 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 8732 const MapData &RHS) { 8733 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 8734 OpenMPMapClauseKind MapType = std::get<1>(RHS); 8735 bool HasPresent = 8736 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 8737 bool HasAllocs = MapType == OMPC_MAP_alloc; 8738 MapModifiers = std::get<2>(RHS); 8739 MapType = std::get<1>(LHS); 8740 bool HasPresentR = 8741 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 8742 bool HasAllocsR = MapType == OMPC_MAP_alloc; 8743 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 8744 }); 8745 8746 // Find overlapping elements (including the offset from the base element). 8747 llvm::SmallDenseMap< 8748 const MapData *, 8749 llvm::SmallVector< 8750 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8751 4> 8752 OverlappedData; 8753 size_t Count = 0; 8754 for (const MapData &L : DeclComponentLists) { 8755 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8756 OpenMPMapClauseKind MapType; 8757 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8758 bool IsImplicit; 8759 const ValueDecl *Mapper; 8760 const Expr *VarRef; 8761 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8762 L; 8763 ++Count; 8764 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) { 8765 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8766 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 8767 VarRef) = L1; 8768 auto CI = Components.rbegin(); 8769 auto CE = Components.rend(); 8770 auto SI = Components1.rbegin(); 8771 auto SE = Components1.rend(); 8772 for (; CI != CE && SI != SE; ++CI, ++SI) { 8773 if (CI->getAssociatedExpression()->getStmtClass() != 8774 SI->getAssociatedExpression()->getStmtClass()) 8775 break; 8776 // Are we dealing with different variables/fields? 8777 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8778 break; 8779 } 8780 // Found overlapping if, at least for one component, reached the head 8781 // of the components list. 8782 if (CI == CE || SI == SE) { 8783 // Ignore it if it is the same component. 8784 if (CI == CE && SI == SE) 8785 continue; 8786 const auto It = (SI == SE) ? CI : SI; 8787 // If one component is a pointer and another one is a kind of 8788 // dereference of this pointer (array subscript, section, dereference, 8789 // etc.), it is not an overlapping. 8790 // Same, if one component is a base and another component is a 8791 // dereferenced pointer memberexpr with the same base. 8792 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 8793 (std::prev(It)->getAssociatedDeclaration() && 8794 std::prev(It) 8795 ->getAssociatedDeclaration() 8796 ->getType() 8797 ->isPointerType()) || 8798 (It->getAssociatedDeclaration() && 8799 It->getAssociatedDeclaration()->getType()->isPointerType() && 8800 std::next(It) != CE && std::next(It) != SE)) 8801 continue; 8802 const MapData &BaseData = CI == CE ? L : L1; 8803 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8804 SI == SE ? Components : Components1; 8805 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8806 OverlappedElements.getSecond().push_back(SubData); 8807 } 8808 } 8809 } 8810 // Sort the overlapped elements for each item. 8811 llvm::SmallVector<const FieldDecl *, 4> Layout; 8812 if (!OverlappedData.empty()) { 8813 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 8814 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 8815 while (BaseType != OrigType) { 8816 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 8817 OrigType = BaseType->getPointeeOrArrayElementType(); 8818 } 8819 8820 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 8821 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8822 else { 8823 const auto *RD = BaseType->getAsRecordDecl(); 8824 Layout.append(RD->field_begin(), RD->field_end()); 8825 } 8826 } 8827 for (auto &Pair : OverlappedData) { 8828 llvm::stable_sort( 8829 Pair.getSecond(), 8830 [&Layout]( 8831 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8832 OMPClauseMappableExprCommon::MappableExprComponentListRef 8833 Second) { 8834 auto CI = First.rbegin(); 8835 auto CE = First.rend(); 8836 auto SI = Second.rbegin(); 8837 auto SE = Second.rend(); 8838 for (; CI != CE && SI != SE; ++CI, ++SI) { 8839 if (CI->getAssociatedExpression()->getStmtClass() != 8840 SI->getAssociatedExpression()->getStmtClass()) 8841 break; 8842 // Are we dealing with different variables/fields? 8843 if (CI->getAssociatedDeclaration() != 8844 SI->getAssociatedDeclaration()) 8845 break; 8846 } 8847 8848 // Lists contain the same elements. 8849 if (CI == CE && SI == SE) 8850 return false; 8851 8852 // List with less elements is less than list with more elements. 8853 if (CI == CE || SI == SE) 8854 return CI == CE; 8855 8856 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8857 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8858 if (FD1->getParent() == FD2->getParent()) 8859 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8860 const auto *It = 8861 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8862 return FD == FD1 || FD == FD2; 8863 }); 8864 return *It == FD1; 8865 }); 8866 } 8867 8868 // Associated with a capture, because the mapping flags depend on it. 8869 // Go through all of the elements with the overlapped elements. 8870 bool IsFirstComponentList = true; 8871 for (const auto &Pair : OverlappedData) { 8872 const MapData &L = *Pair.getFirst(); 8873 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8874 OpenMPMapClauseKind MapType; 8875 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8876 bool IsImplicit; 8877 const ValueDecl *Mapper; 8878 const Expr *VarRef; 8879 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8880 L; 8881 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8882 OverlappedComponents = Pair.getSecond(); 8883 generateInfoForComponentList( 8884 MapType, MapModifiers, std::nullopt, Components, CombinedInfo, 8885 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 8886 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 8887 IsFirstComponentList = false; 8888 } 8889 // Go through other elements without overlapped elements. 8890 for (const MapData &L : DeclComponentLists) { 8891 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8892 OpenMPMapClauseKind MapType; 8893 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8894 bool IsImplicit; 8895 const ValueDecl *Mapper; 8896 const Expr *VarRef; 8897 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8898 L; 8899 auto It = OverlappedData.find(&L); 8900 if (It == OverlappedData.end()) 8901 generateInfoForComponentList(MapType, MapModifiers, std::nullopt, 8902 Components, CombinedInfo, PartialStruct, 8903 IsFirstComponentList, IsImplicit, Mapper, 8904 /*ForDeviceAddr=*/false, VD, VarRef); 8905 IsFirstComponentList = false; 8906 } 8907 } 8908 8909 /// Generate the default map information for a given capture \a CI, 8910 /// record field declaration \a RI and captured value \a CV. 8911 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8912 const FieldDecl &RI, llvm::Value *CV, 8913 MapCombinedInfoTy &CombinedInfo) const { 8914 bool IsImplicit = true; 8915 // Do the default mapping. 8916 if (CI.capturesThis()) { 8917 CombinedInfo.Exprs.push_back(nullptr); 8918 CombinedInfo.BasePointers.push_back(CV); 8919 CombinedInfo.Pointers.push_back(CV); 8920 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8921 CombinedInfo.Sizes.push_back( 8922 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8923 CGF.Int64Ty, /*isSigned=*/true)); 8924 // Default map type. 8925 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO | 8926 OpenMPOffloadMappingFlags::OMP_MAP_FROM); 8927 } else if (CI.capturesVariableByCopy()) { 8928 const VarDecl *VD = CI.getCapturedVar(); 8929 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 8930 CombinedInfo.BasePointers.push_back(CV); 8931 CombinedInfo.Pointers.push_back(CV); 8932 if (!RI.getType()->isAnyPointerType()) { 8933 // We have to signal to the runtime captures passed by value that are 8934 // not pointers. 8935 CombinedInfo.Types.push_back( 8936 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL); 8937 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8938 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8939 } else { 8940 // Pointers are implicitly mapped with a zero size and no flags 8941 // (other than first map that is added for all implicit maps). 8942 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE); 8943 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8944 } 8945 auto I = FirstPrivateDecls.find(VD); 8946 if (I != FirstPrivateDecls.end()) 8947 IsImplicit = I->getSecond(); 8948 } else { 8949 assert(CI.capturesVariable() && "Expected captured reference."); 8950 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8951 QualType ElementType = PtrTy->getPointeeType(); 8952 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8953 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8954 // The default map type for a scalar/complex type is 'to' because by 8955 // default the value doesn't have to be retrieved. For an aggregate 8956 // type, the default is 'tofrom'. 8957 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 8958 const VarDecl *VD = CI.getCapturedVar(); 8959 auto I = FirstPrivateDecls.find(VD); 8960 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 8961 CombinedInfo.BasePointers.push_back(CV); 8962 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 8963 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8964 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8965 AlignmentSource::Decl)); 8966 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 8967 } else { 8968 CombinedInfo.Pointers.push_back(CV); 8969 } 8970 if (I != FirstPrivateDecls.end()) 8971 IsImplicit = I->getSecond(); 8972 } 8973 // Every default map produces a single argument which is a target parameter. 8974 CombinedInfo.Types.back() |= 8975 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; 8976 8977 // Add flag stating this is an implicit map. 8978 if (IsImplicit) 8979 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; 8980 8981 // No user-defined mapper for default mapping. 8982 CombinedInfo.Mappers.push_back(nullptr); 8983 } 8984 }; 8985 } // anonymous namespace 8986 8987 static void emitNonContiguousDescriptor( 8988 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 8989 CGOpenMPRuntime::TargetDataInfo &Info) { 8990 CodeGenModule &CGM = CGF.CGM; 8991 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 8992 &NonContigInfo = CombinedInfo.NonContigInfo; 8993 8994 // Build an array of struct descriptor_dim and then assign it to 8995 // offload_args. 8996 // 8997 // struct descriptor_dim { 8998 // uint64_t offset; 8999 // uint64_t count; 9000 // uint64_t stride 9001 // }; 9002 ASTContext &C = CGF.getContext(); 9003 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9004 RecordDecl *RD; 9005 RD = C.buildImplicitRecord("descriptor_dim"); 9006 RD->startDefinition(); 9007 addFieldToRecordDecl(C, RD, Int64Ty); 9008 addFieldToRecordDecl(C, RD, Int64Ty); 9009 addFieldToRecordDecl(C, RD, Int64Ty); 9010 RD->completeDefinition(); 9011 QualType DimTy = C.getRecordType(RD); 9012 9013 enum { OffsetFD = 0, CountFD, StrideFD }; 9014 // We need two index variable here since the size of "Dims" is the same as the 9015 // size of Components, however, the size of offset, count, and stride is equal 9016 // to the size of base declaration that is non-contiguous. 9017 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9018 // Skip emitting ir if dimension size is 1 since it cannot be 9019 // non-contiguous. 9020 if (NonContigInfo.Dims[I] == 1) 9021 continue; 9022 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9023 QualType ArrayTy = 9024 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9025 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9026 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9027 unsigned RevIdx = EE - II - 1; 9028 LValue DimsLVal = CGF.MakeAddrLValue( 9029 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9030 // Offset 9031 LValue OffsetLVal = CGF.EmitLValueForField( 9032 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9033 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9034 // Count 9035 LValue CountLVal = CGF.EmitLValueForField( 9036 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9037 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9038 // Stride 9039 LValue StrideLVal = CGF.EmitLValueForField( 9040 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9041 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9042 } 9043 // args[I] = &dims 9044 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9045 DimsAddr, CGM.Int8PtrTy, CGM.Int8Ty); 9046 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9047 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9048 Info.RTArgs.PointersArray, 0, I); 9049 Address PAddr(P, CGM.VoidPtrTy, CGF.getPointerAlign()); 9050 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9051 ++L; 9052 } 9053 } 9054 9055 // Try to extract the base declaration from a `this->x` expression if possible. 9056 static ValueDecl *getDeclFromThisExpr(const Expr *E) { 9057 if (!E) 9058 return nullptr; 9059 9060 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts())) 9061 if (const MemberExpr *ME = 9062 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts())) 9063 return ME->getMemberDecl(); 9064 return nullptr; 9065 } 9066 9067 /// Emit a string constant containing the names of the values mapped to the 9068 /// offloading runtime library. 9069 llvm::Constant * 9070 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9071 MappableExprsHandler::MappingExprInfo &MapExprs) { 9072 9073 uint32_t SrcLocStrSize; 9074 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) 9075 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 9076 9077 SourceLocation Loc; 9078 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { 9079 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr())) 9080 Loc = VD->getLocation(); 9081 else 9082 Loc = MapExprs.getMapExpr()->getExprLoc(); 9083 } else { 9084 Loc = MapExprs.getMapDecl()->getLocation(); 9085 } 9086 9087 std::string ExprName; 9088 if (MapExprs.getMapExpr()) { 9089 PrintingPolicy P(CGF.getContext().getLangOpts()); 9090 llvm::raw_string_ostream OS(ExprName); 9091 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9092 OS.flush(); 9093 } else { 9094 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9095 } 9096 9097 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9098 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName, 9099 PLoc.getLine(), PLoc.getColumn(), 9100 SrcLocStrSize); 9101 } 9102 9103 /// Emit the arrays used to pass the captures and map information to the 9104 /// offloading runtime library. If there is no map or capture information, 9105 /// return nullptr by reference. 9106 static void emitOffloadingArrays( 9107 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9108 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9109 bool IsNonContiguous = false) { 9110 CodeGenModule &CGM = CGF.CGM; 9111 ASTContext &Ctx = CGF.getContext(); 9112 9113 // Reset the array information. 9114 Info.clearArrayInfo(); 9115 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9116 9117 if (Info.NumberOfPtrs) { 9118 // Detect if we have any capture size requiring runtime evaluation of the 9119 // size so that a constant array could be eventually used. 9120 9121 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9122 QualType PointerArrayType = Ctx.getConstantArrayType( 9123 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9124 /*IndexTypeQuals=*/0); 9125 9126 Info.RTArgs.BasePointersArray = 9127 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9128 Info.RTArgs.PointersArray = 9129 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9130 Address MappersArray = 9131 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9132 Info.RTArgs.MappersArray = MappersArray.getPointer(); 9133 9134 // If we don't have any VLA types or other types that require runtime 9135 // evaluation, we can use a constant array for the map sizes, otherwise we 9136 // need to fill up the arrays as we do for the pointers. 9137 QualType Int64Ty = 9138 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9139 SmallVector<llvm::Constant *> ConstSizes( 9140 CombinedInfo.Sizes.size(), llvm::ConstantInt::get(CGF.Int64Ty, 0)); 9141 llvm::SmallBitVector RuntimeSizes(CombinedInfo.Sizes.size()); 9142 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9143 if (auto *CI = dyn_cast<llvm::Constant>(CombinedInfo.Sizes[I])) { 9144 if (!isa<llvm::ConstantExpr>(CI) && !isa<llvm::GlobalValue>(CI)) { 9145 if (IsNonContiguous && 9146 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9147 CombinedInfo.Types[I] & 9148 OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG)) 9149 ConstSizes[I] = llvm::ConstantInt::get( 9150 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]); 9151 else 9152 ConstSizes[I] = CI; 9153 continue; 9154 } 9155 } 9156 RuntimeSizes.set(I); 9157 } 9158 9159 if (RuntimeSizes.all()) { 9160 QualType SizeArrayType = Ctx.getConstantArrayType( 9161 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9162 /*IndexTypeQuals=*/0); 9163 Info.RTArgs.SizesArray = 9164 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9165 } else { 9166 auto *SizesArrayInit = llvm::ConstantArray::get( 9167 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9168 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9169 auto *SizesArrayGbl = new llvm::GlobalVariable( 9170 CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true, 9171 llvm::GlobalValue::PrivateLinkage, SizesArrayInit, Name); 9172 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9173 if (RuntimeSizes.any()) { 9174 QualType SizeArrayType = Ctx.getConstantArrayType( 9175 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9176 /*IndexTypeQuals=*/0); 9177 Address Buffer = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes"); 9178 llvm::Value *GblConstPtr = 9179 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9180 SizesArrayGbl, CGM.Int64Ty->getPointerTo()); 9181 CGF.Builder.CreateMemCpy( 9182 Buffer, 9183 Address(GblConstPtr, CGM.Int64Ty, 9184 CGM.getNaturalTypeAlignment(Ctx.getIntTypeForBitwidth( 9185 /*DestWidth=*/64, /*Signed=*/false))), 9186 CGF.getTypeSize(SizeArrayType)); 9187 Info.RTArgs.SizesArray = Buffer.getPointer(); 9188 } else { 9189 Info.RTArgs.SizesArray = SizesArrayGbl; 9190 } 9191 } 9192 9193 // The map types are always constant so we don't need to generate code to 9194 // fill arrays. Instead, we create an array constant. 9195 SmallVector<uint64_t, 4> Mapping; 9196 for (auto mapFlag : CombinedInfo.Types) 9197 Mapping.push_back( 9198 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9199 mapFlag)); 9200 std::string MaptypesName = 9201 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9202 auto *MapTypesArrayGbl = 9203 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9204 Info.RTArgs.MapTypesArray = MapTypesArrayGbl; 9205 9206 // The information types are only built if there is debug information 9207 // requested. 9208 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9209 Info.RTArgs.MapNamesArray = llvm::Constant::getNullValue( 9210 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9211 } else { 9212 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9213 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9214 }; 9215 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9216 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9217 std::string MapnamesName = 9218 CGM.getOpenMPRuntime().getName({"offload_mapnames"}); 9219 auto *MapNamesArrayGbl = 9220 OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName); 9221 Info.RTArgs.MapNamesArray = MapNamesArrayGbl; 9222 } 9223 9224 // If there's a present map type modifier, it must not be applied to the end 9225 // of a region, so generate a separate map type array in that case. 9226 if (Info.separateBeginEndCalls()) { 9227 bool EndMapTypesDiffer = false; 9228 for (uint64_t &Type : Mapping) { 9229 if (Type & 9230 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9231 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) { 9232 Type &= 9233 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9234 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT); 9235 EndMapTypesDiffer = true; 9236 } 9237 } 9238 if (EndMapTypesDiffer) { 9239 MapTypesArrayGbl = 9240 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9241 Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl; 9242 } 9243 } 9244 9245 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9246 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9247 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9248 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9249 Info.RTArgs.BasePointersArray, 0, I); 9250 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9251 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9252 Address BPAddr(BP, BPVal->getType(), 9253 Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9254 CGF.Builder.CreateStore(BPVal, BPAddr); 9255 9256 if (Info.requiresDevicePointerInfo()) 9257 if (const ValueDecl *DevVD = 9258 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9259 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9260 9261 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9262 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9263 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9264 Info.RTArgs.PointersArray, 0, I); 9265 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9266 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9267 Address PAddr(P, PVal->getType(), Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9268 CGF.Builder.CreateStore(PVal, PAddr); 9269 9270 if (RuntimeSizes.test(I)) { 9271 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9272 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9273 Info.RTArgs.SizesArray, 9274 /*Idx0=*/0, 9275 /*Idx1=*/I); 9276 Address SAddr(S, CGM.Int64Ty, Ctx.getTypeAlignInChars(Int64Ty)); 9277 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9278 CGM.Int64Ty, 9279 /*isSigned=*/true), 9280 SAddr); 9281 } 9282 9283 // Fill up the mapper array. 9284 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9285 if (CombinedInfo.Mappers[I]) { 9286 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9287 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9288 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9289 Info.HasMapper = true; 9290 } 9291 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9292 CGF.Builder.CreateStore(MFunc, MAddr); 9293 } 9294 } 9295 9296 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9297 Info.NumberOfPtrs == 0) 9298 return; 9299 9300 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9301 } 9302 9303 /// Check for inner distribute directive. 9304 static const OMPExecutableDirective * 9305 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9306 const auto *CS = D.getInnermostCapturedStmt(); 9307 const auto *Body = 9308 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9309 const Stmt *ChildStmt = 9310 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9311 9312 if (const auto *NestedDir = 9313 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9314 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9315 switch (D.getDirectiveKind()) { 9316 case OMPD_target: 9317 if (isOpenMPDistributeDirective(DKind)) 9318 return NestedDir; 9319 if (DKind == OMPD_teams) { 9320 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9321 /*IgnoreCaptured=*/true); 9322 if (!Body) 9323 return nullptr; 9324 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9325 if (const auto *NND = 9326 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9327 DKind = NND->getDirectiveKind(); 9328 if (isOpenMPDistributeDirective(DKind)) 9329 return NND; 9330 } 9331 } 9332 return nullptr; 9333 case OMPD_target_teams: 9334 if (isOpenMPDistributeDirective(DKind)) 9335 return NestedDir; 9336 return nullptr; 9337 case OMPD_target_parallel: 9338 case OMPD_target_simd: 9339 case OMPD_target_parallel_for: 9340 case OMPD_target_parallel_for_simd: 9341 return nullptr; 9342 case OMPD_target_teams_distribute: 9343 case OMPD_target_teams_distribute_simd: 9344 case OMPD_target_teams_distribute_parallel_for: 9345 case OMPD_target_teams_distribute_parallel_for_simd: 9346 case OMPD_parallel: 9347 case OMPD_for: 9348 case OMPD_parallel_for: 9349 case OMPD_parallel_master: 9350 case OMPD_parallel_sections: 9351 case OMPD_for_simd: 9352 case OMPD_parallel_for_simd: 9353 case OMPD_cancel: 9354 case OMPD_cancellation_point: 9355 case OMPD_ordered: 9356 case OMPD_threadprivate: 9357 case OMPD_allocate: 9358 case OMPD_task: 9359 case OMPD_simd: 9360 case OMPD_tile: 9361 case OMPD_unroll: 9362 case OMPD_sections: 9363 case OMPD_section: 9364 case OMPD_single: 9365 case OMPD_master: 9366 case OMPD_critical: 9367 case OMPD_taskyield: 9368 case OMPD_barrier: 9369 case OMPD_taskwait: 9370 case OMPD_taskgroup: 9371 case OMPD_atomic: 9372 case OMPD_flush: 9373 case OMPD_depobj: 9374 case OMPD_scan: 9375 case OMPD_teams: 9376 case OMPD_target_data: 9377 case OMPD_target_exit_data: 9378 case OMPD_target_enter_data: 9379 case OMPD_distribute: 9380 case OMPD_distribute_simd: 9381 case OMPD_distribute_parallel_for: 9382 case OMPD_distribute_parallel_for_simd: 9383 case OMPD_teams_distribute: 9384 case OMPD_teams_distribute_simd: 9385 case OMPD_teams_distribute_parallel_for: 9386 case OMPD_teams_distribute_parallel_for_simd: 9387 case OMPD_target_update: 9388 case OMPD_declare_simd: 9389 case OMPD_declare_variant: 9390 case OMPD_begin_declare_variant: 9391 case OMPD_end_declare_variant: 9392 case OMPD_declare_target: 9393 case OMPD_end_declare_target: 9394 case OMPD_declare_reduction: 9395 case OMPD_declare_mapper: 9396 case OMPD_taskloop: 9397 case OMPD_taskloop_simd: 9398 case OMPD_master_taskloop: 9399 case OMPD_master_taskloop_simd: 9400 case OMPD_parallel_master_taskloop: 9401 case OMPD_parallel_master_taskloop_simd: 9402 case OMPD_requires: 9403 case OMPD_metadirective: 9404 case OMPD_unknown: 9405 default: 9406 llvm_unreachable("Unexpected directive."); 9407 } 9408 } 9409 9410 return nullptr; 9411 } 9412 9413 /// Emit the user-defined mapper function. The code generation follows the 9414 /// pattern in the example below. 9415 /// \code 9416 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9417 /// void *base, void *begin, 9418 /// int64_t size, int64_t type, 9419 /// void *name = nullptr) { 9420 /// // Allocate space for an array section first or add a base/begin for 9421 /// // pointer dereference. 9422 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9423 /// !maptype.IsDelete) 9424 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9425 /// size*sizeof(Ty), clearToFromMember(type)); 9426 /// // Map members. 9427 /// for (unsigned i = 0; i < size; i++) { 9428 /// // For each component specified by this mapper: 9429 /// for (auto c : begin[i]->all_components) { 9430 /// if (c.hasMapper()) 9431 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9432 /// c.arg_type, c.arg_name); 9433 /// else 9434 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9435 /// c.arg_begin, c.arg_size, c.arg_type, 9436 /// c.arg_name); 9437 /// } 9438 /// } 9439 /// // Delete the array section. 9440 /// if (size > 1 && maptype.IsDelete) 9441 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9442 /// size*sizeof(Ty), clearToFromMember(type)); 9443 /// } 9444 /// \endcode 9445 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9446 CodeGenFunction *CGF) { 9447 if (UDMMap.count(D) > 0) 9448 return; 9449 ASTContext &C = CGM.getContext(); 9450 QualType Ty = D->getType(); 9451 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9452 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9453 auto *MapperVarDecl = 9454 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9455 SourceLocation Loc = D->getLocation(); 9456 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9457 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty); 9458 9459 // Prepare mapper function arguments and attributes. 9460 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9461 C.VoidPtrTy, ImplicitParamDecl::Other); 9462 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9463 ImplicitParamDecl::Other); 9464 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9465 C.VoidPtrTy, ImplicitParamDecl::Other); 9466 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9467 ImplicitParamDecl::Other); 9468 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9469 ImplicitParamDecl::Other); 9470 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9471 ImplicitParamDecl::Other); 9472 FunctionArgList Args; 9473 Args.push_back(&HandleArg); 9474 Args.push_back(&BaseArg); 9475 Args.push_back(&BeginArg); 9476 Args.push_back(&SizeArg); 9477 Args.push_back(&TypeArg); 9478 Args.push_back(&NameArg); 9479 const CGFunctionInfo &FnInfo = 9480 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9481 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9482 SmallString<64> TyStr; 9483 llvm::raw_svector_ostream Out(TyStr); 9484 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9485 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9486 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9487 Name, &CGM.getModule()); 9488 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9489 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9490 // Start the mapper function code generation. 9491 CodeGenFunction MapperCGF(CGM); 9492 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9493 // Compute the starting and end addresses of array elements. 9494 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9495 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9496 C.getPointerType(Int64Ty), Loc); 9497 // Prepare common arguments for array initiation and deletion. 9498 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9499 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9500 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9501 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9502 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9503 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9504 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9505 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9506 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9507 // Convert the size in bytes into the number of array elements. 9508 Size = MapperCGF.Builder.CreateExactUDiv( 9509 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9510 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9511 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 9512 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size); 9513 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9514 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9515 C.getPointerType(Int64Ty), Loc); 9516 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar( 9517 MapperCGF.GetAddrOfLocalVar(&NameArg), 9518 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9519 9520 // Emit array initiation if this is an array section and \p MapType indicates 9521 // that memory allocation is required. 9522 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9523 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9524 MapName, ElementSize, HeadBB, /*IsInit=*/true); 9525 9526 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9527 9528 // Emit the loop header block. 9529 MapperCGF.EmitBlock(HeadBB); 9530 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9531 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9532 // Evaluate whether the initial condition is satisfied. 9533 llvm::Value *IsEmpty = 9534 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9535 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9536 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9537 9538 // Emit the loop body block. 9539 MapperCGF.EmitBlock(BodyBB); 9540 llvm::BasicBlock *LastBB = BodyBB; 9541 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9542 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9543 PtrPHI->addIncoming(PtrBegin, EntryBB); 9544 Address PtrCurrent(PtrPHI, ElemTy, 9545 MapperCGF.GetAddrOfLocalVar(&BeginArg) 9546 .getAlignment() 9547 .alignmentOfArrayElement(ElementSize)); 9548 // Privatize the declared variable of mapper to be the current array element. 9549 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9550 Scope.addPrivate(MapperVarDecl, PtrCurrent); 9551 (void)Scope.Privatize(); 9552 9553 // Get map clause information. Fill up the arrays with all mapped variables. 9554 MappableExprsHandler::MapCombinedInfoTy Info; 9555 MappableExprsHandler MEHandler(*D, MapperCGF); 9556 MEHandler.generateAllInfoForMapper(Info); 9557 9558 // Call the runtime API __tgt_mapper_num_components to get the number of 9559 // pre-existing components. 9560 llvm::Value *OffloadingArgs[] = {Handle}; 9561 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9562 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9563 OMPRTL___tgt_mapper_num_components), 9564 OffloadingArgs); 9565 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9566 PreviousSize, 9567 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9568 9569 // Fill up the runtime mapper handle for all components. 9570 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 9571 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9572 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9573 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9574 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9575 llvm::Value *CurSizeArg = Info.Sizes[I]; 9576 llvm::Value *CurNameArg = 9577 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9578 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 9579 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 9580 9581 // Extract the MEMBER_OF field from the map type. 9582 llvm::Value *OriMapType = MapperCGF.Builder.getInt64( 9583 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9584 Info.Types[I])); 9585 llvm::Value *MemberMapType = 9586 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9587 9588 // Combine the map type inherited from user-defined mapper with that 9589 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9590 // bits of the \a MapType, which is the input argument of the mapper 9591 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9592 // bits of MemberMapType. 9593 // [OpenMP 5.0], 1.2.6. map-type decay. 9594 // | alloc | to | from | tofrom | release | delete 9595 // ---------------------------------------------------------- 9596 // alloc | alloc | alloc | alloc | alloc | release | delete 9597 // to | alloc | to | alloc | to | release | delete 9598 // from | alloc | alloc | from | from | release | delete 9599 // tofrom | alloc | to | from | tofrom | release | delete 9600 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9601 MapType, 9602 MapperCGF.Builder.getInt64( 9603 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9604 OpenMPOffloadMappingFlags::OMP_MAP_TO | 9605 OpenMPOffloadMappingFlags::OMP_MAP_FROM))); 9606 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9607 llvm::BasicBlock *AllocElseBB = 9608 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9609 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9610 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9611 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9612 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9613 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9614 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9615 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9616 MapperCGF.EmitBlock(AllocBB); 9617 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9618 MemberMapType, 9619 MapperCGF.Builder.getInt64( 9620 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9621 OpenMPOffloadMappingFlags::OMP_MAP_TO | 9622 OpenMPOffloadMappingFlags::OMP_MAP_FROM))); 9623 MapperCGF.Builder.CreateBr(EndBB); 9624 MapperCGF.EmitBlock(AllocElseBB); 9625 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9626 LeftToFrom, 9627 MapperCGF.Builder.getInt64( 9628 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9629 OpenMPOffloadMappingFlags::OMP_MAP_TO))); 9630 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9631 // In case of to, clear OMP_MAP_FROM. 9632 MapperCGF.EmitBlock(ToBB); 9633 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9634 MemberMapType, 9635 MapperCGF.Builder.getInt64( 9636 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9637 OpenMPOffloadMappingFlags::OMP_MAP_FROM))); 9638 MapperCGF.Builder.CreateBr(EndBB); 9639 MapperCGF.EmitBlock(ToElseBB); 9640 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9641 LeftToFrom, 9642 MapperCGF.Builder.getInt64( 9643 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9644 OpenMPOffloadMappingFlags::OMP_MAP_FROM))); 9645 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9646 // In case of from, clear OMP_MAP_TO. 9647 MapperCGF.EmitBlock(FromBB); 9648 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9649 MemberMapType, 9650 MapperCGF.Builder.getInt64( 9651 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9652 OpenMPOffloadMappingFlags::OMP_MAP_TO))); 9653 // In case of tofrom, do nothing. 9654 MapperCGF.EmitBlock(EndBB); 9655 LastBB = EndBB; 9656 llvm::PHINode *CurMapType = 9657 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9658 CurMapType->addIncoming(AllocMapType, AllocBB); 9659 CurMapType->addIncoming(ToMapType, ToBB); 9660 CurMapType->addIncoming(FromMapType, FromBB); 9661 CurMapType->addIncoming(MemberMapType, ToElseBB); 9662 9663 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9664 CurSizeArg, CurMapType, CurNameArg}; 9665 if (Info.Mappers[I]) { 9666 // Call the corresponding mapper function. 9667 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 9668 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 9669 assert(MapperFunc && "Expect a valid mapper function is available."); 9670 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 9671 } else { 9672 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9673 // data structure. 9674 MapperCGF.EmitRuntimeCall( 9675 OMPBuilder.getOrCreateRuntimeFunction( 9676 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 9677 OffloadingArgs); 9678 } 9679 } 9680 9681 // Update the pointer to point to the next element that needs to be mapped, 9682 // and check whether we have mapped all elements. 9683 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9684 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9685 PtrPHI->addIncoming(PtrNext, LastBB); 9686 llvm::Value *IsDone = 9687 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9688 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9689 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9690 9691 MapperCGF.EmitBlock(ExitBB); 9692 // Emit array deletion if this is an array section and \p MapType indicates 9693 // that deletion is required. 9694 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9695 MapName, ElementSize, DoneBB, /*IsInit=*/false); 9696 9697 // Emit the function exit block. 9698 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9699 MapperCGF.FinishFunction(); 9700 UDMMap.try_emplace(D, Fn); 9701 if (CGF) { 9702 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9703 Decls.second.push_back(D); 9704 } 9705 } 9706 9707 /// Emit the array initialization or deletion portion for user-defined mapper 9708 /// code generation. First, it evaluates whether an array section is mapped and 9709 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9710 /// true, and \a MapType indicates to not delete this array, array 9711 /// initialization code is generated. If \a IsInit is false, and \a MapType 9712 /// indicates to not this array, array deletion code is generated. 9713 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9714 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9715 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9716 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, 9717 bool IsInit) { 9718 StringRef Prefix = IsInit ? ".init" : ".del"; 9719 9720 // Evaluate if this is an array section. 9721 llvm::BasicBlock *BodyBB = 9722 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9723 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 9724 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9725 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9726 MapType, 9727 MapperCGF.Builder.getInt64( 9728 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9729 OpenMPOffloadMappingFlags::OMP_MAP_DELETE))); 9730 llvm::Value *DeleteCond; 9731 llvm::Value *Cond; 9732 if (IsInit) { 9733 // base != begin? 9734 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin); 9735 // IsPtrAndObj? 9736 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 9737 MapType, 9738 MapperCGF.Builder.getInt64( 9739 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9740 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ))); 9741 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 9742 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 9743 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 9744 DeleteCond = MapperCGF.Builder.CreateIsNull( 9745 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9746 } else { 9747 Cond = IsArray; 9748 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9749 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9750 } 9751 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 9752 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 9753 9754 MapperCGF.EmitBlock(BodyBB); 9755 // Get the array size by multiplying element size and element number (i.e., \p 9756 // Size). 9757 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9758 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9759 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9760 // memory allocation/deletion purpose only. 9761 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9762 MapType, 9763 MapperCGF.Builder.getInt64( 9764 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9765 OpenMPOffloadMappingFlags::OMP_MAP_TO | 9766 OpenMPOffloadMappingFlags::OMP_MAP_FROM))); 9767 MapTypeArg = MapperCGF.Builder.CreateOr( 9768 MapTypeArg, 9769 MapperCGF.Builder.getInt64( 9770 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9771 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))); 9772 9773 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9774 // data structure. 9775 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 9776 ArraySize, MapTypeArg, MapName}; 9777 MapperCGF.EmitRuntimeCall( 9778 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9779 OMPRTL___tgt_push_mapper_component), 9780 OffloadingArgs); 9781 } 9782 9783 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 9784 const OMPDeclareMapperDecl *D) { 9785 auto I = UDMMap.find(D); 9786 if (I != UDMMap.end()) 9787 return I->second; 9788 emitUserDefinedMapper(D); 9789 return UDMMap.lookup(D); 9790 } 9791 9792 llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall( 9793 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9794 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9795 const OMPLoopDirective &D)> 9796 SizeEmitter) { 9797 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9798 const OMPExecutableDirective *TD = &D; 9799 // Get nested teams distribute kind directive, if any. 9800 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9801 TD = getNestedDistributeDirective(CGM.getContext(), D); 9802 if (!TD) 9803 return llvm::ConstantInt::get(CGF.Int64Ty, 0); 9804 9805 const auto *LD = cast<OMPLoopDirective>(TD); 9806 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) 9807 return NumIterations; 9808 return llvm::ConstantInt::get(CGF.Int64Ty, 0); 9809 } 9810 9811 void CGOpenMPRuntime::emitTargetCall( 9812 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9813 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9814 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9815 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9816 const OMPLoopDirective &D)> 9817 SizeEmitter) { 9818 if (!CGF.HaveInsertPoint()) 9819 return; 9820 9821 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsDevice && 9822 CGM.getLangOpts().OpenMPOffloadMandatory; 9823 9824 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!"); 9825 9826 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 9827 D.hasClausesOfKind<OMPNowaitClause>() || 9828 D.hasClausesOfKind<OMPInReductionClause>(); 9829 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9830 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9831 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9832 PrePostActionTy &) { 9833 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9834 }; 9835 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9836 9837 CodeGenFunction::OMPTargetDataInfo InputInfo; 9838 llvm::Value *MapTypesArray = nullptr; 9839 llvm::Value *MapNamesArray = nullptr; 9840 // Generate code for the host fallback function. 9841 auto &&FallbackGen = [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, 9842 &CS, OffloadingMandatory](CodeGenFunction &CGF) { 9843 if (OffloadingMandatory) { 9844 CGF.Builder.CreateUnreachable(); 9845 } else { 9846 if (RequiresOuterTask) { 9847 CapturedVars.clear(); 9848 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9849 } 9850 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9851 } 9852 }; 9853 // Fill up the pointer arrays and transfer execution to the device. 9854 auto &&ThenGen = [this, Device, OutlinedFnID, &D, &InputInfo, &MapTypesArray, 9855 &MapNamesArray, SizeEmitter, 9856 FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) { 9857 if (Device.getInt() == OMPC_DEVICE_ancestor) { 9858 // Reverse offloading is not supported, so just execute on the host. 9859 FallbackGen(CGF); 9860 return; 9861 } 9862 9863 // On top of the arrays that were filled up, the target offloading call 9864 // takes as arguments the device id as well as the host pointer. The host 9865 // pointer is used by the runtime library to identify the current target 9866 // region, so it only has to be unique and not necessarily point to 9867 // anything. It could be the pointer to the outlined function that 9868 // implements the target region, but we aren't using that so that the 9869 // compiler doesn't need to keep that, and could therefore inline the host 9870 // function if proven worthwhile during optimization. 9871 9872 // From this point on, we need to have an ID of the target region defined. 9873 assert(OutlinedFnID && "Invalid outlined function ID!"); 9874 (void)OutlinedFnID; 9875 9876 // Emit device ID if any. 9877 llvm::Value *DeviceID; 9878 if (Device.getPointer()) { 9879 assert((Device.getInt() == OMPC_DEVICE_unknown || 9880 Device.getInt() == OMPC_DEVICE_device_num) && 9881 "Expected device_num modifier."); 9882 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 9883 DeviceID = 9884 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 9885 } else { 9886 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9887 } 9888 9889 // Emit the number of elements in the offloading arrays. 9890 llvm::Value *PointerNum = 9891 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9892 9893 // Return value of the runtime offloading call. 9894 llvm::Value *Return; 9895 9896 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 9897 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 9898 9899 // Source location for the ident struct 9900 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 9901 9902 // Get tripcount for the target loop-based directive. 9903 llvm::Value *NumIterations = 9904 emitTargetNumIterationsCall(CGF, D, SizeEmitter); 9905 9906 llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0); 9907 if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) { 9908 CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF); 9909 llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr( 9910 DynMemClause->getSize(), /*IgnoreResultAssign=*/true); 9911 DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty, 9912 /*isSigned=*/false); 9913 } 9914 9915 llvm::Value *ZeroArray = 9916 llvm::Constant::getNullValue(llvm::ArrayType::get(CGF.CGM.Int32Ty, 3)); 9917 9918 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>(); 9919 llvm::Value *Flags = CGF.Builder.getInt64(HasNoWait); 9920 9921 llvm::Value *NumTeams3D = 9922 CGF.Builder.CreateInsertValue(ZeroArray, NumTeams, {0}); 9923 llvm::Value *NumThreads3D = 9924 CGF.Builder.CreateInsertValue(ZeroArray, NumThreads, {0}); 9925 9926 // Arguments for the target kernel. 9927 SmallVector<llvm::Value *> KernelArgs{ 9928 CGF.Builder.getInt32(/* Version */ 2), 9929 PointerNum, 9930 InputInfo.BasePointersArray.getPointer(), 9931 InputInfo.PointersArray.getPointer(), 9932 InputInfo.SizesArray.getPointer(), 9933 MapTypesArray, 9934 MapNamesArray, 9935 InputInfo.MappersArray.getPointer(), 9936 NumIterations, 9937 Flags, 9938 NumTeams3D, 9939 NumThreads3D, 9940 DynCGroupMem, 9941 }; 9942 9943 // The target region is an outlined function launched by the runtime 9944 // via calls to __tgt_target_kernel(). 9945 // 9946 // Note that on the host and CPU targets, the runtime implementation of 9947 // these calls simply call the outlined function without forking threads. 9948 // The outlined functions themselves have runtime calls to 9949 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 9950 // the compiler in emitTeamsCall() and emitParallelCall(). 9951 // 9952 // In contrast, on the NVPTX target, the implementation of 9953 // __tgt_target_teams() launches a GPU kernel with the requested number 9954 // of teams and threads so no additional calls to the runtime are required. 9955 // Check the error code and execute the host version if required. 9956 CGF.Builder.restoreIP(OMPBuilder.emitTargetKernel( 9957 CGF.Builder, Return, RTLoc, DeviceID, NumTeams, NumThreads, 9958 OutlinedFnID, KernelArgs)); 9959 9960 llvm::BasicBlock *OffloadFailedBlock = 9961 CGF.createBasicBlock("omp_offload.failed"); 9962 llvm::BasicBlock *OffloadContBlock = 9963 CGF.createBasicBlock("omp_offload.cont"); 9964 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 9965 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 9966 9967 CGF.EmitBlock(OffloadFailedBlock); 9968 FallbackGen(CGF); 9969 9970 CGF.EmitBranch(OffloadContBlock); 9971 9972 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 9973 }; 9974 9975 // Notify that the host version must be executed. 9976 auto &&ElseGen = [FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) { 9977 FallbackGen(CGF); 9978 }; 9979 9980 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 9981 &MapNamesArray, &CapturedVars, RequiresOuterTask, 9982 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 9983 // Fill up the arrays with all the captured variables. 9984 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 9985 9986 // Get mappable expression information. 9987 MappableExprsHandler MEHandler(D, CGF); 9988 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 9989 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 9990 9991 auto RI = CS.getCapturedRecordDecl()->field_begin(); 9992 auto *CV = CapturedVars.begin(); 9993 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 9994 CE = CS.capture_end(); 9995 CI != CE; ++CI, ++RI, ++CV) { 9996 MappableExprsHandler::MapCombinedInfoTy CurInfo; 9997 MappableExprsHandler::StructRangeInfoTy PartialStruct; 9998 9999 // VLA sizes are passed to the outlined region by copy and do not have map 10000 // information associated. 10001 if (CI->capturesVariableArrayType()) { 10002 CurInfo.Exprs.push_back(nullptr); 10003 CurInfo.BasePointers.push_back(*CV); 10004 CurInfo.Pointers.push_back(*CV); 10005 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10006 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10007 // Copy to the device as an argument. No need to retrieve it. 10008 CurInfo.Types.push_back( 10009 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | 10010 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM | 10011 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); 10012 CurInfo.Mappers.push_back(nullptr); 10013 } else { 10014 // If we have any information in the map clause, we use it, otherwise we 10015 // just do a default mapping. 10016 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10017 if (!CI->capturesThis()) 10018 MappedVarSet.insert(CI->getCapturedVar()); 10019 else 10020 MappedVarSet.insert(nullptr); 10021 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10022 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10023 // Generate correct mapping for variables captured by reference in 10024 // lambdas. 10025 if (CI->capturesVariable()) 10026 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10027 CurInfo, LambdaPointers); 10028 } 10029 // We expect to have at least an element of information for this capture. 10030 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10031 "Non-existing map pointer for capture!"); 10032 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10033 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10034 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10035 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10036 "Inconsistent map information sizes!"); 10037 10038 // If there is an entry in PartialStruct it means we have a struct with 10039 // individual members mapped. Emit an extra combined entry. 10040 if (PartialStruct.Base.isValid()) { 10041 CombinedInfo.append(PartialStruct.PreliminaryMapData); 10042 MEHandler.emitCombinedEntry( 10043 CombinedInfo, CurInfo.Types, PartialStruct, nullptr, 10044 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 10045 } 10046 10047 // We need to append the results of this capture to what we already have. 10048 CombinedInfo.append(CurInfo); 10049 } 10050 // Adjust MEMBER_OF flags for the lambdas captures. 10051 MEHandler.adjustMemberOfForLambdaCaptures( 10052 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10053 CombinedInfo.Types); 10054 // Map any list items in a map clause that were not captures because they 10055 // weren't referenced within the construct. 10056 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 10057 10058 CGOpenMPRuntime::TargetDataInfo Info; 10059 // Fill up the arrays and create the arguments. 10060 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10061 bool EmitDebug = 10062 CGF.CGM.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo; 10063 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info, 10064 EmitDebug, 10065 /*ForEndCall=*/false); 10066 10067 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10068 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, 10069 CGF.VoidPtrTy, CGM.getPointerAlign()); 10070 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, 10071 CGM.getPointerAlign()); 10072 InputInfo.SizesArray = 10073 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); 10074 InputInfo.MappersArray = 10075 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 10076 MapTypesArray = Info.RTArgs.MapTypesArray; 10077 MapNamesArray = Info.RTArgs.MapNamesArray; 10078 if (RequiresOuterTask) 10079 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10080 else 10081 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10082 }; 10083 10084 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10085 CodeGenFunction &CGF, PrePostActionTy &) { 10086 if (RequiresOuterTask) { 10087 CodeGenFunction::OMPTargetDataInfo InputInfo; 10088 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10089 } else { 10090 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10091 } 10092 }; 10093 10094 // If we have a target function ID it means that we need to support 10095 // offloading, otherwise, just execute on the host. We need to execute on host 10096 // regardless of the conditional in the if clause if, e.g., the user do not 10097 // specify target triples. 10098 if (OutlinedFnID) { 10099 if (IfCond) { 10100 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10101 } else { 10102 RegionCodeGenTy ThenRCG(TargetThenGen); 10103 ThenRCG(CGF); 10104 } 10105 } else { 10106 RegionCodeGenTy ElseRCG(TargetElseGen); 10107 ElseRCG(CGF); 10108 } 10109 } 10110 10111 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10112 StringRef ParentName) { 10113 if (!S) 10114 return; 10115 10116 // Codegen OMP target directives that offload compute to the device. 10117 bool RequiresDeviceCodegen = 10118 isa<OMPExecutableDirective>(S) && 10119 isOpenMPTargetExecutionDirective( 10120 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10121 10122 if (RequiresDeviceCodegen) { 10123 const auto &E = *cast<OMPExecutableDirective>(S); 10124 auto EntryInfo = 10125 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), ParentName); 10126 10127 // Is this a target region that should not be emitted as an entry point? If 10128 // so just signal we are done with this target region. 10129 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(EntryInfo)) 10130 return; 10131 10132 switch (E.getDirectiveKind()) { 10133 case OMPD_target: 10134 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10135 cast<OMPTargetDirective>(E)); 10136 break; 10137 case OMPD_target_parallel: 10138 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10139 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10140 break; 10141 case OMPD_target_teams: 10142 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10143 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10144 break; 10145 case OMPD_target_teams_distribute: 10146 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10147 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10148 break; 10149 case OMPD_target_teams_distribute_simd: 10150 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10151 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10152 break; 10153 case OMPD_target_parallel_for: 10154 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10155 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10156 break; 10157 case OMPD_target_parallel_for_simd: 10158 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10159 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10160 break; 10161 case OMPD_target_simd: 10162 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10163 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10164 break; 10165 case OMPD_target_teams_distribute_parallel_for: 10166 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10167 CGM, ParentName, 10168 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10169 break; 10170 case OMPD_target_teams_distribute_parallel_for_simd: 10171 CodeGenFunction:: 10172 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10173 CGM, ParentName, 10174 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10175 break; 10176 case OMPD_parallel: 10177 case OMPD_for: 10178 case OMPD_parallel_for: 10179 case OMPD_parallel_master: 10180 case OMPD_parallel_sections: 10181 case OMPD_for_simd: 10182 case OMPD_parallel_for_simd: 10183 case OMPD_cancel: 10184 case OMPD_cancellation_point: 10185 case OMPD_ordered: 10186 case OMPD_threadprivate: 10187 case OMPD_allocate: 10188 case OMPD_task: 10189 case OMPD_simd: 10190 case OMPD_tile: 10191 case OMPD_unroll: 10192 case OMPD_sections: 10193 case OMPD_section: 10194 case OMPD_single: 10195 case OMPD_master: 10196 case OMPD_critical: 10197 case OMPD_taskyield: 10198 case OMPD_barrier: 10199 case OMPD_taskwait: 10200 case OMPD_taskgroup: 10201 case OMPD_atomic: 10202 case OMPD_flush: 10203 case OMPD_depobj: 10204 case OMPD_scan: 10205 case OMPD_teams: 10206 case OMPD_target_data: 10207 case OMPD_target_exit_data: 10208 case OMPD_target_enter_data: 10209 case OMPD_distribute: 10210 case OMPD_distribute_simd: 10211 case OMPD_distribute_parallel_for: 10212 case OMPD_distribute_parallel_for_simd: 10213 case OMPD_teams_distribute: 10214 case OMPD_teams_distribute_simd: 10215 case OMPD_teams_distribute_parallel_for: 10216 case OMPD_teams_distribute_parallel_for_simd: 10217 case OMPD_target_update: 10218 case OMPD_declare_simd: 10219 case OMPD_declare_variant: 10220 case OMPD_begin_declare_variant: 10221 case OMPD_end_declare_variant: 10222 case OMPD_declare_target: 10223 case OMPD_end_declare_target: 10224 case OMPD_declare_reduction: 10225 case OMPD_declare_mapper: 10226 case OMPD_taskloop: 10227 case OMPD_taskloop_simd: 10228 case OMPD_master_taskloop: 10229 case OMPD_master_taskloop_simd: 10230 case OMPD_parallel_master_taskloop: 10231 case OMPD_parallel_master_taskloop_simd: 10232 case OMPD_requires: 10233 case OMPD_metadirective: 10234 case OMPD_unknown: 10235 default: 10236 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10237 } 10238 return; 10239 } 10240 10241 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10242 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10243 return; 10244 10245 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10246 return; 10247 } 10248 10249 // If this is a lambda function, look into its body. 10250 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10251 S = L->getBody(); 10252 10253 // Keep looking for target regions recursively. 10254 for (const Stmt *II : S->children()) 10255 scanForTargetRegionsFunctions(II, ParentName); 10256 } 10257 10258 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { 10259 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10260 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10261 if (!DevTy) 10262 return false; 10263 // Do not emit device_type(nohost) functions for the host. 10264 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10265 return true; 10266 // Do not emit device_type(host) functions for the device. 10267 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10268 return true; 10269 return false; 10270 } 10271 10272 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10273 // If emitting code for the host, we do not process FD here. Instead we do 10274 // the normal code generation. 10275 if (!CGM.getLangOpts().OpenMPIsDevice) { 10276 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) 10277 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10278 CGM.getLangOpts().OpenMPIsDevice)) 10279 return true; 10280 return false; 10281 } 10282 10283 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10284 // Try to detect target regions in the function. 10285 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10286 StringRef Name = CGM.getMangledName(GD); 10287 scanForTargetRegionsFunctions(FD->getBody(), Name); 10288 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10289 CGM.getLangOpts().OpenMPIsDevice)) 10290 return true; 10291 } 10292 10293 // Do not to emit function if it is not marked as declare target. 10294 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10295 AlreadyEmittedTargetDecls.count(VD) == 0; 10296 } 10297 10298 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10299 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()), 10300 CGM.getLangOpts().OpenMPIsDevice)) 10301 return true; 10302 10303 if (!CGM.getLangOpts().OpenMPIsDevice) 10304 return false; 10305 10306 // Check if there are Ctors/Dtors in this declaration and look for target 10307 // regions in it. We use the complete variant to produce the kernel name 10308 // mangling. 10309 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10310 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10311 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10312 StringRef ParentName = 10313 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10314 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10315 } 10316 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10317 StringRef ParentName = 10318 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10319 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10320 } 10321 } 10322 10323 // Do not to emit variable if it is not marked as declare target. 10324 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10325 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10326 cast<VarDecl>(GD.getDecl())); 10327 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10328 ((*Res == OMPDeclareTargetDeclAttr::MT_To || 10329 *Res == OMPDeclareTargetDeclAttr::MT_Enter) && 10330 HasRequiresUnifiedSharedMemory)) { 10331 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10332 return true; 10333 } 10334 return false; 10335 } 10336 10337 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10338 llvm::Constant *Addr) { 10339 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10340 !CGM.getLangOpts().OpenMPIsDevice) 10341 return; 10342 10343 // If we have host/nohost variables, they do not need to be registered. 10344 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10345 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10346 if (DevTy && *DevTy != OMPDeclareTargetDeclAttr::DT_Any) 10347 return; 10348 10349 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10350 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10351 if (!Res) { 10352 if (CGM.getLangOpts().OpenMPIsDevice) { 10353 // Register non-target variables being emitted in device code (debug info 10354 // may cause this). 10355 StringRef VarName = CGM.getMangledName(VD); 10356 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10357 } 10358 return; 10359 } 10360 // Register declare target variables. 10361 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags; 10362 StringRef VarName; 10363 int64_t VarSize; 10364 llvm::GlobalValue::LinkageTypes Linkage; 10365 10366 if ((*Res == OMPDeclareTargetDeclAttr::MT_To || 10367 *Res == OMPDeclareTargetDeclAttr::MT_Enter) && 10368 !HasRequiresUnifiedSharedMemory) { 10369 Flags = llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo; 10370 VarName = CGM.getMangledName(VD); 10371 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10372 VarSize = 10373 CGM.getContext().getTypeSizeInChars(VD->getType()).getQuantity(); 10374 assert(VarSize != 0 && "Expected non-zero size of the variable"); 10375 } else { 10376 VarSize = 0; 10377 } 10378 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10379 // Temp solution to prevent optimizations of the internal variables. 10380 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10381 // Do not create a "ref-variable" if the original is not also available 10382 // on the host. 10383 if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName)) 10384 return; 10385 std::string RefName = getName({VarName, "ref"}); 10386 if (!CGM.GetGlobalValue(RefName)) { 10387 llvm::Constant *AddrRef = 10388 OMPBuilder.getOrCreateInternalVariable(Addr->getType(), RefName); 10389 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10390 GVAddrRef->setConstant(/*Val=*/true); 10391 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10392 GVAddrRef->setInitializer(Addr); 10393 CGM.addCompilerUsedGlobal(GVAddrRef); 10394 } 10395 } 10396 } else { 10397 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10398 ((*Res == OMPDeclareTargetDeclAttr::MT_To || 10399 *Res == OMPDeclareTargetDeclAttr::MT_Enter) && 10400 HasRequiresUnifiedSharedMemory)) && 10401 "Declare target attribute must link or to with unified memory."); 10402 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10403 Flags = llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink; 10404 else 10405 Flags = llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo; 10406 10407 if (CGM.getLangOpts().OpenMPIsDevice) { 10408 VarName = Addr->getName(); 10409 Addr = nullptr; 10410 } else { 10411 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10412 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10413 } 10414 VarSize = CGM.getPointerSize().getQuantity(); 10415 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10416 } 10417 10418 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10419 VarName, Addr, VarSize, Flags, Linkage); 10420 } 10421 10422 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10423 if (isa<FunctionDecl>(GD.getDecl()) || 10424 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10425 return emitTargetFunctions(GD); 10426 10427 return emitTargetGlobalVariable(GD); 10428 } 10429 10430 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10431 for (const VarDecl *VD : DeferredGlobalVariables) { 10432 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10433 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10434 if (!Res) 10435 continue; 10436 if ((*Res == OMPDeclareTargetDeclAttr::MT_To || 10437 *Res == OMPDeclareTargetDeclAttr::MT_Enter) && 10438 !HasRequiresUnifiedSharedMemory) { 10439 CGM.EmitGlobal(VD); 10440 } else { 10441 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10442 ((*Res == OMPDeclareTargetDeclAttr::MT_To || 10443 *Res == OMPDeclareTargetDeclAttr::MT_Enter) && 10444 HasRequiresUnifiedSharedMemory)) && 10445 "Expected link clause or to clause with unified memory."); 10446 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10447 } 10448 } 10449 } 10450 10451 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10452 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10453 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10454 " Expected target-based directive."); 10455 } 10456 10457 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10458 for (const OMPClause *Clause : D->clauselists()) { 10459 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10460 HasRequiresUnifiedSharedMemory = true; 10461 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true); 10462 } else if (const auto *AC = 10463 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10464 switch (AC->getAtomicDefaultMemOrderKind()) { 10465 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10466 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10467 break; 10468 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10469 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10470 break; 10471 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10472 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10473 break; 10474 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10475 break; 10476 } 10477 } 10478 } 10479 } 10480 10481 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10482 return RequiresAtomicOrdering; 10483 } 10484 10485 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10486 LangAS &AS) { 10487 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10488 return false; 10489 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10490 switch(A->getAllocatorType()) { 10491 case OMPAllocateDeclAttr::OMPNullMemAlloc: 10492 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10493 // Not supported, fallback to the default mem space. 10494 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10495 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10496 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10497 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10498 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10499 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10500 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10501 AS = LangAS::Default; 10502 return true; 10503 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10504 llvm_unreachable("Expected predefined allocator for the variables with the " 10505 "static storage."); 10506 } 10507 return false; 10508 } 10509 10510 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10511 return HasRequiresUnifiedSharedMemory; 10512 } 10513 10514 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10515 CodeGenModule &CGM) 10516 : CGM(CGM) { 10517 if (CGM.getLangOpts().OpenMPIsDevice) { 10518 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10519 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10520 } 10521 } 10522 10523 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10524 if (CGM.getLangOpts().OpenMPIsDevice) 10525 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10526 } 10527 10528 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10529 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 10530 return true; 10531 10532 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10533 // Do not to emit function if it is marked as declare target as it was already 10534 // emitted. 10535 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10536 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10537 if (auto *F = dyn_cast_or_null<llvm::Function>( 10538 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10539 return !F->isDeclaration(); 10540 return false; 10541 } 10542 return true; 10543 } 10544 10545 return !AlreadyEmittedTargetDecls.insert(D).second; 10546 } 10547 10548 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10549 // If we don't have entries or if we are emitting code for the device, we 10550 // don't need to do anything. 10551 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10552 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 10553 (OffloadEntriesInfoManager.empty() && 10554 !HasEmittedDeclareTargetRegion && 10555 !HasEmittedTargetRegion)) 10556 return nullptr; 10557 10558 // Create and register the function that handles the requires directives. 10559 ASTContext &C = CGM.getContext(); 10560 10561 llvm::Function *RequiresRegFn; 10562 { 10563 CodeGenFunction CGF(CGM); 10564 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10565 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10566 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10567 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 10568 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10569 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 10570 // TODO: check for other requires clauses. 10571 // The requires directive takes effect only when a target region is 10572 // present in the compilation unit. Otherwise it is ignored and not 10573 // passed to the runtime. This avoids the runtime from throwing an error 10574 // for mismatching requires clauses across compilation units that don't 10575 // contain at least 1 target region. 10576 assert((HasEmittedTargetRegion || 10577 HasEmittedDeclareTargetRegion || 10578 !OffloadEntriesInfoManager.empty()) && 10579 "Target or declare target region expected."); 10580 if (HasRequiresUnifiedSharedMemory) 10581 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 10582 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10583 CGM.getModule(), OMPRTL___tgt_register_requires), 10584 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 10585 CGF.FinishFunction(); 10586 } 10587 return RequiresRegFn; 10588 } 10589 10590 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10591 const OMPExecutableDirective &D, 10592 SourceLocation Loc, 10593 llvm::Function *OutlinedFn, 10594 ArrayRef<llvm::Value *> CapturedVars) { 10595 if (!CGF.HaveInsertPoint()) 10596 return; 10597 10598 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10599 CodeGenFunction::RunCleanupsScope Scope(CGF); 10600 10601 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10602 llvm::Value *Args[] = { 10603 RTLoc, 10604 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10605 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10606 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10607 RealArgs.append(std::begin(Args), std::end(Args)); 10608 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10609 10610 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 10611 CGM.getModule(), OMPRTL___kmpc_fork_teams); 10612 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10613 } 10614 10615 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10616 const Expr *NumTeams, 10617 const Expr *ThreadLimit, 10618 SourceLocation Loc) { 10619 if (!CGF.HaveInsertPoint()) 10620 return; 10621 10622 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10623 10624 llvm::Value *NumTeamsVal = 10625 NumTeams 10626 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10627 CGF.CGM.Int32Ty, /* isSigned = */ true) 10628 : CGF.Builder.getInt32(0); 10629 10630 llvm::Value *ThreadLimitVal = 10631 ThreadLimit 10632 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10633 CGF.CGM.Int32Ty, /* isSigned = */ true) 10634 : CGF.Builder.getInt32(0); 10635 10636 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10637 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10638 ThreadLimitVal}; 10639 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10640 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 10641 PushNumTeamsArgs); 10642 } 10643 10644 void CGOpenMPRuntime::emitTargetDataCalls( 10645 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10646 const Expr *Device, const RegionCodeGenTy &CodeGen, 10647 CGOpenMPRuntime::TargetDataInfo &Info) { 10648 if (!CGF.HaveInsertPoint()) 10649 return; 10650 10651 // Action used to replace the default codegen action and turn privatization 10652 // off. 10653 PrePostActionTy NoPrivAction; 10654 10655 // Generate the code for the opening of the data environment. Capture all the 10656 // arguments of the runtime call by reference because they are used in the 10657 // closing of the region. 10658 auto &&BeginThenGen = [this, &D, Device, &Info, 10659 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10660 // Fill up the arrays with all the mapped variables. 10661 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10662 10663 // Get map clause information. 10664 MappableExprsHandler MEHandler(D, CGF); 10665 MEHandler.generateAllInfo(CombinedInfo); 10666 10667 // Fill up the arrays and create the arguments. 10668 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 10669 /*IsNonContiguous=*/true); 10670 10671 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs; 10672 bool EmitDebug = 10673 CGF.CGM.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo; 10674 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, RTArgs, Info, 10675 EmitDebug); 10676 10677 // Emit device ID if any. 10678 llvm::Value *DeviceID = nullptr; 10679 if (Device) { 10680 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10681 CGF.Int64Ty, /*isSigned=*/true); 10682 } else { 10683 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10684 } 10685 10686 // Emit the number of elements in the offloading arrays. 10687 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10688 // 10689 // Source location for the ident struct 10690 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10691 10692 llvm::Value *OffloadingArgs[] = {RTLoc, 10693 DeviceID, 10694 PointerNum, 10695 RTArgs.BasePointersArray, 10696 RTArgs.PointersArray, 10697 RTArgs.SizesArray, 10698 RTArgs.MapTypesArray, 10699 RTArgs.MapNamesArray, 10700 RTArgs.MappersArray}; 10701 CGF.EmitRuntimeCall( 10702 OMPBuilder.getOrCreateRuntimeFunction( 10703 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 10704 OffloadingArgs); 10705 10706 // If device pointer privatization is required, emit the body of the region 10707 // here. It will have to be duplicated: with and without privatization. 10708 if (!Info.CaptureDeviceAddrMap.empty()) 10709 CodeGen(CGF); 10710 }; 10711 10712 // Generate code for the closing of the data region. 10713 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 10714 PrePostActionTy &) { 10715 assert(Info.isValid() && "Invalid data environment closing arguments."); 10716 10717 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs; 10718 bool EmitDebug = 10719 CGF.CGM.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo; 10720 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, RTArgs, Info, 10721 EmitDebug, 10722 /*ForEndCall=*/true); 10723 10724 // Emit device ID if any. 10725 llvm::Value *DeviceID = nullptr; 10726 if (Device) { 10727 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10728 CGF.Int64Ty, /*isSigned=*/true); 10729 } else { 10730 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10731 } 10732 10733 // Emit the number of elements in the offloading arrays. 10734 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10735 10736 // Source location for the ident struct 10737 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10738 10739 llvm::Value *OffloadingArgs[] = {RTLoc, 10740 DeviceID, 10741 PointerNum, 10742 RTArgs.BasePointersArray, 10743 RTArgs.PointersArray, 10744 RTArgs.SizesArray, 10745 RTArgs.MapTypesArray, 10746 RTArgs.MapNamesArray, 10747 RTArgs.MappersArray}; 10748 CGF.EmitRuntimeCall( 10749 OMPBuilder.getOrCreateRuntimeFunction( 10750 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 10751 OffloadingArgs); 10752 }; 10753 10754 // If we need device pointer privatization, we need to emit the body of the 10755 // region with no privatization in the 'else' branch of the conditional. 10756 // Otherwise, we don't have to do anything. 10757 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10758 PrePostActionTy &) { 10759 if (!Info.CaptureDeviceAddrMap.empty()) { 10760 CodeGen.setAction(NoPrivAction); 10761 CodeGen(CGF); 10762 } 10763 }; 10764 10765 // We don't have to do anything to close the region if the if clause evaluates 10766 // to false. 10767 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10768 10769 if (IfCond) { 10770 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10771 } else { 10772 RegionCodeGenTy RCG(BeginThenGen); 10773 RCG(CGF); 10774 } 10775 10776 // If we don't require privatization of device pointers, we emit the body in 10777 // between the runtime calls. This avoids duplicating the body code. 10778 if (Info.CaptureDeviceAddrMap.empty()) { 10779 CodeGen.setAction(NoPrivAction); 10780 CodeGen(CGF); 10781 } 10782 10783 if (IfCond) { 10784 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10785 } else { 10786 RegionCodeGenTy RCG(EndThenGen); 10787 RCG(CGF); 10788 } 10789 } 10790 10791 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10792 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10793 const Expr *Device) { 10794 if (!CGF.HaveInsertPoint()) 10795 return; 10796 10797 assert((isa<OMPTargetEnterDataDirective>(D) || 10798 isa<OMPTargetExitDataDirective>(D) || 10799 isa<OMPTargetUpdateDirective>(D)) && 10800 "Expecting either target enter, exit data, or update directives."); 10801 10802 CodeGenFunction::OMPTargetDataInfo InputInfo; 10803 llvm::Value *MapTypesArray = nullptr; 10804 llvm::Value *MapNamesArray = nullptr; 10805 // Generate the code for the opening of the data environment. 10806 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 10807 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10808 // Emit device ID if any. 10809 llvm::Value *DeviceID = nullptr; 10810 if (Device) { 10811 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10812 CGF.Int64Ty, /*isSigned=*/true); 10813 } else { 10814 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10815 } 10816 10817 // Emit the number of elements in the offloading arrays. 10818 llvm::Constant *PointerNum = 10819 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10820 10821 // Source location for the ident struct 10822 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10823 10824 llvm::Value *OffloadingArgs[] = {RTLoc, 10825 DeviceID, 10826 PointerNum, 10827 InputInfo.BasePointersArray.getPointer(), 10828 InputInfo.PointersArray.getPointer(), 10829 InputInfo.SizesArray.getPointer(), 10830 MapTypesArray, 10831 MapNamesArray, 10832 InputInfo.MappersArray.getPointer()}; 10833 10834 // Select the right runtime function call for each standalone 10835 // directive. 10836 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10837 RuntimeFunction RTLFn; 10838 switch (D.getDirectiveKind()) { 10839 case OMPD_target_enter_data: 10840 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 10841 : OMPRTL___tgt_target_data_begin_mapper; 10842 break; 10843 case OMPD_target_exit_data: 10844 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 10845 : OMPRTL___tgt_target_data_end_mapper; 10846 break; 10847 case OMPD_target_update: 10848 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 10849 : OMPRTL___tgt_target_data_update_mapper; 10850 break; 10851 case OMPD_parallel: 10852 case OMPD_for: 10853 case OMPD_parallel_for: 10854 case OMPD_parallel_master: 10855 case OMPD_parallel_sections: 10856 case OMPD_for_simd: 10857 case OMPD_parallel_for_simd: 10858 case OMPD_cancel: 10859 case OMPD_cancellation_point: 10860 case OMPD_ordered: 10861 case OMPD_threadprivate: 10862 case OMPD_allocate: 10863 case OMPD_task: 10864 case OMPD_simd: 10865 case OMPD_tile: 10866 case OMPD_unroll: 10867 case OMPD_sections: 10868 case OMPD_section: 10869 case OMPD_single: 10870 case OMPD_master: 10871 case OMPD_critical: 10872 case OMPD_taskyield: 10873 case OMPD_barrier: 10874 case OMPD_taskwait: 10875 case OMPD_taskgroup: 10876 case OMPD_atomic: 10877 case OMPD_flush: 10878 case OMPD_depobj: 10879 case OMPD_scan: 10880 case OMPD_teams: 10881 case OMPD_target_data: 10882 case OMPD_distribute: 10883 case OMPD_distribute_simd: 10884 case OMPD_distribute_parallel_for: 10885 case OMPD_distribute_parallel_for_simd: 10886 case OMPD_teams_distribute: 10887 case OMPD_teams_distribute_simd: 10888 case OMPD_teams_distribute_parallel_for: 10889 case OMPD_teams_distribute_parallel_for_simd: 10890 case OMPD_declare_simd: 10891 case OMPD_declare_variant: 10892 case OMPD_begin_declare_variant: 10893 case OMPD_end_declare_variant: 10894 case OMPD_declare_target: 10895 case OMPD_end_declare_target: 10896 case OMPD_declare_reduction: 10897 case OMPD_declare_mapper: 10898 case OMPD_taskloop: 10899 case OMPD_taskloop_simd: 10900 case OMPD_master_taskloop: 10901 case OMPD_master_taskloop_simd: 10902 case OMPD_parallel_master_taskloop: 10903 case OMPD_parallel_master_taskloop_simd: 10904 case OMPD_target: 10905 case OMPD_target_simd: 10906 case OMPD_target_teams_distribute: 10907 case OMPD_target_teams_distribute_simd: 10908 case OMPD_target_teams_distribute_parallel_for: 10909 case OMPD_target_teams_distribute_parallel_for_simd: 10910 case OMPD_target_teams: 10911 case OMPD_target_parallel: 10912 case OMPD_target_parallel_for: 10913 case OMPD_target_parallel_for_simd: 10914 case OMPD_requires: 10915 case OMPD_metadirective: 10916 case OMPD_unknown: 10917 default: 10918 llvm_unreachable("Unexpected standalone target data directive."); 10919 break; 10920 } 10921 CGF.EmitRuntimeCall( 10922 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 10923 OffloadingArgs); 10924 }; 10925 10926 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10927 &MapNamesArray](CodeGenFunction &CGF, 10928 PrePostActionTy &) { 10929 // Fill up the arrays with all the mapped variables. 10930 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10931 10932 // Get map clause information. 10933 MappableExprsHandler MEHandler(D, CGF); 10934 MEHandler.generateAllInfo(CombinedInfo); 10935 10936 CGOpenMPRuntime::TargetDataInfo Info; 10937 // Fill up the arrays and create the arguments. 10938 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 10939 /*IsNonContiguous=*/true); 10940 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10941 D.hasClausesOfKind<OMPNowaitClause>(); 10942 bool EmitDebug = 10943 CGF.CGM.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo; 10944 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info, 10945 EmitDebug, 10946 /*ForEndCall=*/false); 10947 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10948 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, 10949 CGF.VoidPtrTy, CGM.getPointerAlign()); 10950 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, 10951 CGM.getPointerAlign()); 10952 InputInfo.SizesArray = 10953 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); 10954 InputInfo.MappersArray = 10955 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 10956 MapTypesArray = Info.RTArgs.MapTypesArray; 10957 MapNamesArray = Info.RTArgs.MapNamesArray; 10958 if (RequiresOuterTask) 10959 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10960 else 10961 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10962 }; 10963 10964 if (IfCond) { 10965 emitIfClause(CGF, IfCond, TargetThenGen, 10966 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 10967 } else { 10968 RegionCodeGenTy ThenRCG(TargetThenGen); 10969 ThenRCG(CGF); 10970 } 10971 } 10972 10973 namespace { 10974 /// Kind of parameter in a function with 'declare simd' directive. 10975 enum ParamKindTy { 10976 Linear, 10977 LinearRef, 10978 LinearUVal, 10979 LinearVal, 10980 Uniform, 10981 Vector, 10982 }; 10983 /// Attribute set of the parameter. 10984 struct ParamAttrTy { 10985 ParamKindTy Kind = Vector; 10986 llvm::APSInt StrideOrArg; 10987 llvm::APSInt Alignment; 10988 bool HasVarStride = false; 10989 }; 10990 } // namespace 10991 10992 static unsigned evaluateCDTSize(const FunctionDecl *FD, 10993 ArrayRef<ParamAttrTy> ParamAttrs) { 10994 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 10995 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 10996 // of that clause. The VLEN value must be power of 2. 10997 // In other case the notion of the function`s "characteristic data type" (CDT) 10998 // is used to compute the vector length. 10999 // CDT is defined in the following order: 11000 // a) For non-void function, the CDT is the return type. 11001 // b) If the function has any non-uniform, non-linear parameters, then the 11002 // CDT is the type of the first such parameter. 11003 // c) If the CDT determined by a) or b) above is struct, union, or class 11004 // type which is pass-by-value (except for the type that maps to the 11005 // built-in complex data type), the characteristic data type is int. 11006 // d) If none of the above three cases is applicable, the CDT is int. 11007 // The VLEN is then determined based on the CDT and the size of vector 11008 // register of that ISA for which current vector version is generated. The 11009 // VLEN is computed using the formula below: 11010 // VLEN = sizeof(vector_register) / sizeof(CDT), 11011 // where vector register size specified in section 3.2.1 Registers and the 11012 // Stack Frame of original AMD64 ABI document. 11013 QualType RetType = FD->getReturnType(); 11014 if (RetType.isNull()) 11015 return 0; 11016 ASTContext &C = FD->getASTContext(); 11017 QualType CDT; 11018 if (!RetType.isNull() && !RetType->isVoidType()) { 11019 CDT = RetType; 11020 } else { 11021 unsigned Offset = 0; 11022 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11023 if (ParamAttrs[Offset].Kind == Vector) 11024 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11025 ++Offset; 11026 } 11027 if (CDT.isNull()) { 11028 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11029 if (ParamAttrs[I + Offset].Kind == Vector) { 11030 CDT = FD->getParamDecl(I)->getType(); 11031 break; 11032 } 11033 } 11034 } 11035 } 11036 if (CDT.isNull()) 11037 CDT = C.IntTy; 11038 CDT = CDT->getCanonicalTypeUnqualified(); 11039 if (CDT->isRecordType() || CDT->isUnionType()) 11040 CDT = C.IntTy; 11041 return C.getTypeSize(CDT); 11042 } 11043 11044 /// Mangle the parameter part of the vector function name according to 11045 /// their OpenMP classification. The mangling function is defined in 11046 /// section 4.5 of the AAVFABI(2021Q1). 11047 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11048 SmallString<256> Buffer; 11049 llvm::raw_svector_ostream Out(Buffer); 11050 for (const auto &ParamAttr : ParamAttrs) { 11051 switch (ParamAttr.Kind) { 11052 case Linear: 11053 Out << 'l'; 11054 break; 11055 case LinearRef: 11056 Out << 'R'; 11057 break; 11058 case LinearUVal: 11059 Out << 'U'; 11060 break; 11061 case LinearVal: 11062 Out << 'L'; 11063 break; 11064 case Uniform: 11065 Out << 'u'; 11066 break; 11067 case Vector: 11068 Out << 'v'; 11069 break; 11070 } 11071 if (ParamAttr.HasVarStride) 11072 Out << "s" << ParamAttr.StrideOrArg; 11073 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef || 11074 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) { 11075 // Don't print the step value if it is not present or if it is 11076 // equal to 1. 11077 if (ParamAttr.StrideOrArg < 0) 11078 Out << 'n' << -ParamAttr.StrideOrArg; 11079 else if (ParamAttr.StrideOrArg != 1) 11080 Out << ParamAttr.StrideOrArg; 11081 } 11082 11083 if (!!ParamAttr.Alignment) 11084 Out << 'a' << ParamAttr.Alignment; 11085 } 11086 11087 return std::string(Out.str()); 11088 } 11089 11090 static void 11091 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11092 const llvm::APSInt &VLENVal, 11093 ArrayRef<ParamAttrTy> ParamAttrs, 11094 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11095 struct ISADataTy { 11096 char ISA; 11097 unsigned VecRegSize; 11098 }; 11099 ISADataTy ISAData[] = { 11100 { 11101 'b', 128 11102 }, // SSE 11103 { 11104 'c', 256 11105 }, // AVX 11106 { 11107 'd', 256 11108 }, // AVX2 11109 { 11110 'e', 512 11111 }, // AVX512 11112 }; 11113 llvm::SmallVector<char, 2> Masked; 11114 switch (State) { 11115 case OMPDeclareSimdDeclAttr::BS_Undefined: 11116 Masked.push_back('N'); 11117 Masked.push_back('M'); 11118 break; 11119 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11120 Masked.push_back('N'); 11121 break; 11122 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11123 Masked.push_back('M'); 11124 break; 11125 } 11126 for (char Mask : Masked) { 11127 for (const ISADataTy &Data : ISAData) { 11128 SmallString<256> Buffer; 11129 llvm::raw_svector_ostream Out(Buffer); 11130 Out << "_ZGV" << Data.ISA << Mask; 11131 if (!VLENVal) { 11132 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11133 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11134 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11135 } else { 11136 Out << VLENVal; 11137 } 11138 Out << mangleVectorParameters(ParamAttrs); 11139 Out << '_' << Fn->getName(); 11140 Fn->addFnAttr(Out.str()); 11141 } 11142 } 11143 } 11144 11145 // This are the Functions that are needed to mangle the name of the 11146 // vector functions generated by the compiler, according to the rules 11147 // defined in the "Vector Function ABI specifications for AArch64", 11148 // available at 11149 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11150 11151 /// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1). 11152 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11153 QT = QT.getCanonicalType(); 11154 11155 if (QT->isVoidType()) 11156 return false; 11157 11158 if (Kind == ParamKindTy::Uniform) 11159 return false; 11160 11161 if (Kind == ParamKindTy::LinearUVal || ParamKindTy::LinearRef) 11162 return false; 11163 11164 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) && 11165 !QT->isReferenceType()) 11166 return false; 11167 11168 return true; 11169 } 11170 11171 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11172 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11173 QT = QT.getCanonicalType(); 11174 unsigned Size = C.getTypeSize(QT); 11175 11176 // Only scalars and complex within 16 bytes wide set PVB to true. 11177 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11178 return false; 11179 11180 if (QT->isFloatingType()) 11181 return true; 11182 11183 if (QT->isIntegerType()) 11184 return true; 11185 11186 if (QT->isPointerType()) 11187 return true; 11188 11189 // TODO: Add support for complex types (section 3.1.2, item 2). 11190 11191 return false; 11192 } 11193 11194 /// Computes the lane size (LS) of a return type or of an input parameter, 11195 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11196 /// TODO: Add support for references, section 3.2.1, item 1. 11197 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11198 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11199 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11200 if (getAArch64PBV(PTy, C)) 11201 return C.getTypeSize(PTy); 11202 } 11203 if (getAArch64PBV(QT, C)) 11204 return C.getTypeSize(QT); 11205 11206 return C.getTypeSize(C.getUIntPtrType()); 11207 } 11208 11209 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11210 // signature of the scalar function, as defined in 3.2.2 of the 11211 // AAVFABI. 11212 static std::tuple<unsigned, unsigned, bool> 11213 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11214 QualType RetType = FD->getReturnType().getCanonicalType(); 11215 11216 ASTContext &C = FD->getASTContext(); 11217 11218 bool OutputBecomesInput = false; 11219 11220 llvm::SmallVector<unsigned, 8> Sizes; 11221 if (!RetType->isVoidType()) { 11222 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11223 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11224 OutputBecomesInput = true; 11225 } 11226 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11227 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11228 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11229 } 11230 11231 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11232 // The LS of a function parameter / return value can only be a power 11233 // of 2, starting from 8 bits, up to 128. 11234 assert(llvm::all_of(Sizes, 11235 [](unsigned Size) { 11236 return Size == 8 || Size == 16 || Size == 32 || 11237 Size == 64 || Size == 128; 11238 }) && 11239 "Invalid size"); 11240 11241 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11242 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11243 OutputBecomesInput); 11244 } 11245 11246 // Function used to add the attribute. The parameter `VLEN` is 11247 // templated to allow the use of "x" when targeting scalable functions 11248 // for SVE. 11249 template <typename T> 11250 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11251 char ISA, StringRef ParSeq, 11252 StringRef MangledName, bool OutputBecomesInput, 11253 llvm::Function *Fn) { 11254 SmallString<256> Buffer; 11255 llvm::raw_svector_ostream Out(Buffer); 11256 Out << Prefix << ISA << LMask << VLEN; 11257 if (OutputBecomesInput) 11258 Out << "v"; 11259 Out << ParSeq << "_" << MangledName; 11260 Fn->addFnAttr(Out.str()); 11261 } 11262 11263 // Helper function to generate the Advanced SIMD names depending on 11264 // the value of the NDS when simdlen is not present. 11265 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11266 StringRef Prefix, char ISA, 11267 StringRef ParSeq, StringRef MangledName, 11268 bool OutputBecomesInput, 11269 llvm::Function *Fn) { 11270 switch (NDS) { 11271 case 8: 11272 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11273 OutputBecomesInput, Fn); 11274 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11275 OutputBecomesInput, Fn); 11276 break; 11277 case 16: 11278 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11279 OutputBecomesInput, Fn); 11280 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11281 OutputBecomesInput, Fn); 11282 break; 11283 case 32: 11284 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11285 OutputBecomesInput, Fn); 11286 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11287 OutputBecomesInput, Fn); 11288 break; 11289 case 64: 11290 case 128: 11291 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11292 OutputBecomesInput, Fn); 11293 break; 11294 default: 11295 llvm_unreachable("Scalar type is too wide."); 11296 } 11297 } 11298 11299 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11300 static void emitAArch64DeclareSimdFunction( 11301 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11302 ArrayRef<ParamAttrTy> ParamAttrs, 11303 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11304 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11305 11306 // Get basic data for building the vector signature. 11307 const auto Data = getNDSWDS(FD, ParamAttrs); 11308 const unsigned NDS = std::get<0>(Data); 11309 const unsigned WDS = std::get<1>(Data); 11310 const bool OutputBecomesInput = std::get<2>(Data); 11311 11312 // Check the values provided via `simdlen` by the user. 11313 // 1. A `simdlen(1)` doesn't produce vector signatures, 11314 if (UserVLEN == 1) { 11315 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11316 DiagnosticsEngine::Warning, 11317 "The clause simdlen(1) has no effect when targeting aarch64."); 11318 CGM.getDiags().Report(SLoc, DiagID); 11319 return; 11320 } 11321 11322 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11323 // Advanced SIMD output. 11324 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11325 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11326 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11327 "power of 2 when targeting Advanced SIMD."); 11328 CGM.getDiags().Report(SLoc, DiagID); 11329 return; 11330 } 11331 11332 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11333 // limits. 11334 if (ISA == 's' && UserVLEN != 0) { 11335 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11336 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11337 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11338 "lanes in the architectural constraints " 11339 "for SVE (min is 128-bit, max is " 11340 "2048-bit, by steps of 128-bit)"); 11341 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11342 return; 11343 } 11344 } 11345 11346 // Sort out parameter sequence. 11347 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11348 StringRef Prefix = "_ZGV"; 11349 // Generate simdlen from user input (if any). 11350 if (UserVLEN) { 11351 if (ISA == 's') { 11352 // SVE generates only a masked function. 11353 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11354 OutputBecomesInput, Fn); 11355 } else { 11356 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11357 // Advanced SIMD generates one or two functions, depending on 11358 // the `[not]inbranch` clause. 11359 switch (State) { 11360 case OMPDeclareSimdDeclAttr::BS_Undefined: 11361 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11362 OutputBecomesInput, Fn); 11363 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11364 OutputBecomesInput, Fn); 11365 break; 11366 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11367 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11368 OutputBecomesInput, Fn); 11369 break; 11370 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11371 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11372 OutputBecomesInput, Fn); 11373 break; 11374 } 11375 } 11376 } else { 11377 // If no user simdlen is provided, follow the AAVFABI rules for 11378 // generating the vector length. 11379 if (ISA == 's') { 11380 // SVE, section 3.4.1, item 1. 11381 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11382 OutputBecomesInput, Fn); 11383 } else { 11384 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11385 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11386 // two vector names depending on the use of the clause 11387 // `[not]inbranch`. 11388 switch (State) { 11389 case OMPDeclareSimdDeclAttr::BS_Undefined: 11390 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11391 OutputBecomesInput, Fn); 11392 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11393 OutputBecomesInput, Fn); 11394 break; 11395 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11396 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11397 OutputBecomesInput, Fn); 11398 break; 11399 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11400 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11401 OutputBecomesInput, Fn); 11402 break; 11403 } 11404 } 11405 } 11406 } 11407 11408 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11409 llvm::Function *Fn) { 11410 ASTContext &C = CGM.getContext(); 11411 FD = FD->getMostRecentDecl(); 11412 while (FD) { 11413 // Map params to their positions in function decl. 11414 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11415 if (isa<CXXMethodDecl>(FD)) 11416 ParamPositions.try_emplace(FD, 0); 11417 unsigned ParamPos = ParamPositions.size(); 11418 for (const ParmVarDecl *P : FD->parameters()) { 11419 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11420 ++ParamPos; 11421 } 11422 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11423 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11424 // Mark uniform parameters. 11425 for (const Expr *E : Attr->uniforms()) { 11426 E = E->IgnoreParenImpCasts(); 11427 unsigned Pos; 11428 if (isa<CXXThisExpr>(E)) { 11429 Pos = ParamPositions[FD]; 11430 } else { 11431 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11432 ->getCanonicalDecl(); 11433 auto It = ParamPositions.find(PVD); 11434 assert(It != ParamPositions.end() && "Function parameter not found"); 11435 Pos = It->second; 11436 } 11437 ParamAttrs[Pos].Kind = Uniform; 11438 } 11439 // Get alignment info. 11440 auto *NI = Attr->alignments_begin(); 11441 for (const Expr *E : Attr->aligneds()) { 11442 E = E->IgnoreParenImpCasts(); 11443 unsigned Pos; 11444 QualType ParmTy; 11445 if (isa<CXXThisExpr>(E)) { 11446 Pos = ParamPositions[FD]; 11447 ParmTy = E->getType(); 11448 } else { 11449 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11450 ->getCanonicalDecl(); 11451 auto It = ParamPositions.find(PVD); 11452 assert(It != ParamPositions.end() && "Function parameter not found"); 11453 Pos = It->second; 11454 ParmTy = PVD->getType(); 11455 } 11456 ParamAttrs[Pos].Alignment = 11457 (*NI) 11458 ? (*NI)->EvaluateKnownConstInt(C) 11459 : llvm::APSInt::getUnsigned( 11460 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11461 .getQuantity()); 11462 ++NI; 11463 } 11464 // Mark linear parameters. 11465 auto *SI = Attr->steps_begin(); 11466 auto *MI = Attr->modifiers_begin(); 11467 for (const Expr *E : Attr->linears()) { 11468 E = E->IgnoreParenImpCasts(); 11469 unsigned Pos; 11470 bool IsReferenceType = false; 11471 // Rescaling factor needed to compute the linear parameter 11472 // value in the mangled name. 11473 unsigned PtrRescalingFactor = 1; 11474 if (isa<CXXThisExpr>(E)) { 11475 Pos = ParamPositions[FD]; 11476 auto *P = cast<PointerType>(E->getType()); 11477 PtrRescalingFactor = CGM.getContext() 11478 .getTypeSizeInChars(P->getPointeeType()) 11479 .getQuantity(); 11480 } else { 11481 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11482 ->getCanonicalDecl(); 11483 auto It = ParamPositions.find(PVD); 11484 assert(It != ParamPositions.end() && "Function parameter not found"); 11485 Pos = It->second; 11486 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 11487 PtrRescalingFactor = CGM.getContext() 11488 .getTypeSizeInChars(P->getPointeeType()) 11489 .getQuantity(); 11490 else if (PVD->getType()->isReferenceType()) { 11491 IsReferenceType = true; 11492 PtrRescalingFactor = 11493 CGM.getContext() 11494 .getTypeSizeInChars(PVD->getType().getNonReferenceType()) 11495 .getQuantity(); 11496 } 11497 } 11498 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 11499 if (*MI == OMPC_LINEAR_ref) 11500 ParamAttr.Kind = LinearRef; 11501 else if (*MI == OMPC_LINEAR_uval) 11502 ParamAttr.Kind = LinearUVal; 11503 else if (IsReferenceType) 11504 ParamAttr.Kind = LinearVal; 11505 else 11506 ParamAttr.Kind = Linear; 11507 // Assuming a stride of 1, for `linear` without modifiers. 11508 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 11509 if (*SI) { 11510 Expr::EvalResult Result; 11511 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 11512 if (const auto *DRE = 11513 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 11514 if (const auto *StridePVD = 11515 dyn_cast<ParmVarDecl>(DRE->getDecl())) { 11516 ParamAttr.HasVarStride = true; 11517 auto It = ParamPositions.find(StridePVD->getCanonicalDecl()); 11518 assert(It != ParamPositions.end() && 11519 "Function parameter not found"); 11520 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second); 11521 } 11522 } 11523 } else { 11524 ParamAttr.StrideOrArg = Result.Val.getInt(); 11525 } 11526 } 11527 // If we are using a linear clause on a pointer, we need to 11528 // rescale the value of linear_step with the byte size of the 11529 // pointee type. 11530 if (!ParamAttr.HasVarStride && 11531 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef)) 11532 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 11533 ++SI; 11534 ++MI; 11535 } 11536 llvm::APSInt VLENVal; 11537 SourceLocation ExprLoc; 11538 const Expr *VLENExpr = Attr->getSimdlen(); 11539 if (VLENExpr) { 11540 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11541 ExprLoc = VLENExpr->getExprLoc(); 11542 } 11543 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11544 if (CGM.getTriple().isX86()) { 11545 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11546 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11547 unsigned VLEN = VLENVal.getExtValue(); 11548 StringRef MangledName = Fn->getName(); 11549 if (CGM.getTarget().hasFeature("sve")) 11550 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11551 MangledName, 's', 128, Fn, ExprLoc); 11552 else if (CGM.getTarget().hasFeature("neon")) 11553 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11554 MangledName, 'n', 128, Fn, ExprLoc); 11555 } 11556 } 11557 FD = FD->getPreviousDecl(); 11558 } 11559 } 11560 11561 namespace { 11562 /// Cleanup action for doacross support. 11563 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 11564 public: 11565 static const int DoacrossFinArgs = 2; 11566 11567 private: 11568 llvm::FunctionCallee RTLFn; 11569 llvm::Value *Args[DoacrossFinArgs]; 11570 11571 public: 11572 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 11573 ArrayRef<llvm::Value *> CallArgs) 11574 : RTLFn(RTLFn) { 11575 assert(CallArgs.size() == DoacrossFinArgs); 11576 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11577 } 11578 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11579 if (!CGF.HaveInsertPoint()) 11580 return; 11581 CGF.EmitRuntimeCall(RTLFn, Args); 11582 } 11583 }; 11584 } // namespace 11585 11586 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11587 const OMPLoopDirective &D, 11588 ArrayRef<Expr *> NumIterations) { 11589 if (!CGF.HaveInsertPoint()) 11590 return; 11591 11592 ASTContext &C = CGM.getContext(); 11593 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11594 RecordDecl *RD; 11595 if (KmpDimTy.isNull()) { 11596 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11597 // kmp_int64 lo; // lower 11598 // kmp_int64 up; // upper 11599 // kmp_int64 st; // stride 11600 // }; 11601 RD = C.buildImplicitRecord("kmp_dim"); 11602 RD->startDefinition(); 11603 addFieldToRecordDecl(C, RD, Int64Ty); 11604 addFieldToRecordDecl(C, RD, Int64Ty); 11605 addFieldToRecordDecl(C, RD, Int64Ty); 11606 RD->completeDefinition(); 11607 KmpDimTy = C.getRecordType(RD); 11608 } else { 11609 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11610 } 11611 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11612 QualType ArrayTy = 11613 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 11614 11615 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11616 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11617 enum { LowerFD = 0, UpperFD, StrideFD }; 11618 // Fill dims with data. 11619 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11620 LValue DimsLVal = CGF.MakeAddrLValue( 11621 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11622 // dims.upper = num_iterations; 11623 LValue UpperLVal = CGF.EmitLValueForField( 11624 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11625 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 11626 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 11627 Int64Ty, NumIterations[I]->getExprLoc()); 11628 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11629 // dims.stride = 1; 11630 LValue StrideLVal = CGF.EmitLValueForField( 11631 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11632 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11633 StrideLVal); 11634 } 11635 11636 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11637 // kmp_int32 num_dims, struct kmp_dim * dims); 11638 llvm::Value *Args[] = { 11639 emitUpdateLocation(CGF, D.getBeginLoc()), 11640 getThreadID(CGF, D.getBeginLoc()), 11641 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11642 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11643 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 11644 CGM.VoidPtrTy)}; 11645 11646 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11647 CGM.getModule(), OMPRTL___kmpc_doacross_init); 11648 CGF.EmitRuntimeCall(RTLFn, Args); 11649 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11650 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11651 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11652 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 11653 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11654 llvm::ArrayRef(FiniArgs)); 11655 } 11656 11657 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11658 const OMPDependClause *C) { 11659 QualType Int64Ty = 11660 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11661 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11662 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11663 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 11664 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11665 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11666 const Expr *CounterVal = C->getLoopData(I); 11667 assert(CounterVal); 11668 llvm::Value *CntVal = CGF.EmitScalarConversion( 11669 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11670 CounterVal->getExprLoc()); 11671 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11672 /*Volatile=*/false, Int64Ty); 11673 } 11674 llvm::Value *Args[] = { 11675 emitUpdateLocation(CGF, C->getBeginLoc()), 11676 getThreadID(CGF, C->getBeginLoc()), 11677 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11678 llvm::FunctionCallee RTLFn; 11679 if (C->getDependencyKind() == OMPC_DEPEND_source) { 11680 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11681 OMPRTL___kmpc_doacross_post); 11682 } else { 11683 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 11684 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11685 OMPRTL___kmpc_doacross_wait); 11686 } 11687 CGF.EmitRuntimeCall(RTLFn, Args); 11688 } 11689 11690 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11691 llvm::FunctionCallee Callee, 11692 ArrayRef<llvm::Value *> Args) const { 11693 assert(Loc.isValid() && "Outlined function call location must be valid."); 11694 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11695 11696 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11697 if (Fn->doesNotThrow()) { 11698 CGF.EmitNounwindRuntimeCall(Fn, Args); 11699 return; 11700 } 11701 } 11702 CGF.EmitRuntimeCall(Callee, Args); 11703 } 11704 11705 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11706 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11707 ArrayRef<llvm::Value *> Args) const { 11708 emitCall(CGF, Loc, OutlinedFn, Args); 11709 } 11710 11711 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11712 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11713 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11714 HasEmittedDeclareTargetRegion = true; 11715 } 11716 11717 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11718 const VarDecl *NativeParam, 11719 const VarDecl *TargetParam) const { 11720 return CGF.GetAddrOfLocalVar(NativeParam); 11721 } 11722 11723 /// Return allocator value from expression, or return a null allocator (default 11724 /// when no allocator specified). 11725 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF, 11726 const Expr *Allocator) { 11727 llvm::Value *AllocVal; 11728 if (Allocator) { 11729 AllocVal = CGF.EmitScalarExpr(Allocator); 11730 // According to the standard, the original allocator type is a enum 11731 // (integer). Convert to pointer type, if required. 11732 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 11733 CGF.getContext().VoidPtrTy, 11734 Allocator->getExprLoc()); 11735 } else { 11736 // If no allocator specified, it defaults to the null allocator. 11737 AllocVal = llvm::Constant::getNullValue( 11738 CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy)); 11739 } 11740 return AllocVal; 11741 } 11742 11743 /// Return the alignment from an allocate directive if present. 11744 static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) { 11745 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD); 11746 11747 if (!AllocateAlignment) 11748 return nullptr; 11749 11750 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity()); 11751 } 11752 11753 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11754 const VarDecl *VD) { 11755 if (!VD) 11756 return Address::invalid(); 11757 Address UntiedAddr = Address::invalid(); 11758 Address UntiedRealAddr = Address::invalid(); 11759 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11760 if (It != FunctionToUntiedTaskStackMap.end()) { 11761 const UntiedLocalVarsAddressesMap &UntiedData = 11762 UntiedLocalVarsStack[It->second]; 11763 auto I = UntiedData.find(VD); 11764 if (I != UntiedData.end()) { 11765 UntiedAddr = I->second.first; 11766 UntiedRealAddr = I->second.second; 11767 } 11768 } 11769 const VarDecl *CVD = VD->getCanonicalDecl(); 11770 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 11771 // Use the default allocation. 11772 if (!isAllocatableDecl(VD)) 11773 return UntiedAddr; 11774 llvm::Value *Size; 11775 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11776 if (CVD->getType()->isVariablyModifiedType()) { 11777 Size = CGF.getTypeSize(CVD->getType()); 11778 // Align the size: ((size + align - 1) / align) * align 11779 Size = CGF.Builder.CreateNUWAdd( 11780 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11781 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11782 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11783 } else { 11784 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11785 Size = CGM.getSize(Sz.alignTo(Align)); 11786 } 11787 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11788 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11789 const Expr *Allocator = AA->getAllocator(); 11790 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator); 11791 llvm::Value *Alignment = getAlignmentValue(CGM, CVD); 11792 SmallVector<llvm::Value *, 4> Args; 11793 Args.push_back(ThreadID); 11794 if (Alignment) 11795 Args.push_back(Alignment); 11796 Args.push_back(Size); 11797 Args.push_back(AllocVal); 11798 llvm::omp::RuntimeFunction FnID = 11799 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc; 11800 llvm::Value *Addr = CGF.EmitRuntimeCall( 11801 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args, 11802 getName({CVD->getName(), ".void.addr"})); 11803 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11804 CGM.getModule(), OMPRTL___kmpc_free); 11805 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 11806 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11807 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 11808 if (UntiedAddr.isValid()) 11809 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 11810 11811 // Cleanup action for allocate support. 11812 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 11813 llvm::FunctionCallee RTLFn; 11814 SourceLocation::UIntTy LocEncoding; 11815 Address Addr; 11816 const Expr *AllocExpr; 11817 11818 public: 11819 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 11820 SourceLocation::UIntTy LocEncoding, Address Addr, 11821 const Expr *AllocExpr) 11822 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 11823 AllocExpr(AllocExpr) {} 11824 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11825 if (!CGF.HaveInsertPoint()) 11826 return; 11827 llvm::Value *Args[3]; 11828 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 11829 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 11830 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11831 Addr.getPointer(), CGF.VoidPtrTy); 11832 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr); 11833 Args[2] = AllocVal; 11834 CGF.EmitRuntimeCall(RTLFn, Args); 11835 } 11836 }; 11837 Address VDAddr = 11838 UntiedRealAddr.isValid() 11839 ? UntiedRealAddr 11840 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align); 11841 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 11842 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 11843 VDAddr, Allocator); 11844 if (UntiedRealAddr.isValid()) 11845 if (auto *Region = 11846 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 11847 Region->emitUntiedSwitch(CGF); 11848 return VDAddr; 11849 } 11850 return UntiedAddr; 11851 } 11852 11853 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 11854 const VarDecl *VD) const { 11855 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11856 if (It == FunctionToUntiedTaskStackMap.end()) 11857 return false; 11858 return UntiedLocalVarsStack[It->second].count(VD) > 0; 11859 } 11860 11861 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11862 CodeGenModule &CGM, const OMPLoopDirective &S) 11863 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11864 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11865 if (!NeedToPush) 11866 return; 11867 NontemporalDeclsSet &DS = 11868 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11869 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11870 for (const Stmt *Ref : C->private_refs()) { 11871 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11872 const ValueDecl *VD; 11873 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11874 VD = DRE->getDecl(); 11875 } else { 11876 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11877 assert((ME->isImplicitCXXThis() || 11878 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11879 "Expected member of current class."); 11880 VD = ME->getMemberDecl(); 11881 } 11882 DS.insert(VD); 11883 } 11884 } 11885 } 11886 11887 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 11888 if (!NeedToPush) 11889 return; 11890 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 11891 } 11892 11893 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 11894 CodeGenFunction &CGF, 11895 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 11896 std::pair<Address, Address>> &LocalVars) 11897 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 11898 if (!NeedToPush) 11899 return; 11900 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 11901 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 11902 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 11903 } 11904 11905 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 11906 if (!NeedToPush) 11907 return; 11908 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 11909 } 11910 11911 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 11912 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11913 11914 return llvm::any_of( 11915 CGM.getOpenMPRuntime().NontemporalDeclsStack, 11916 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); }); 11917 } 11918 11919 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 11920 const OMPExecutableDirective &S, 11921 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 11922 const { 11923 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 11924 // Vars in target/task regions must be excluded completely. 11925 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 11926 isOpenMPTaskingDirective(S.getDirectiveKind())) { 11927 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11928 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 11929 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 11930 for (const CapturedStmt::Capture &Cap : CS->captures()) { 11931 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 11932 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 11933 } 11934 } 11935 // Exclude vars in private clauses. 11936 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 11937 for (const Expr *Ref : C->varlists()) { 11938 if (!Ref->getType()->isScalarType()) 11939 continue; 11940 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11941 if (!DRE) 11942 continue; 11943 NeedToCheckForLPCs.insert(DRE->getDecl()); 11944 } 11945 } 11946 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 11947 for (const Expr *Ref : C->varlists()) { 11948 if (!Ref->getType()->isScalarType()) 11949 continue; 11950 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11951 if (!DRE) 11952 continue; 11953 NeedToCheckForLPCs.insert(DRE->getDecl()); 11954 } 11955 } 11956 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11957 for (const Expr *Ref : C->varlists()) { 11958 if (!Ref->getType()->isScalarType()) 11959 continue; 11960 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11961 if (!DRE) 11962 continue; 11963 NeedToCheckForLPCs.insert(DRE->getDecl()); 11964 } 11965 } 11966 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 11967 for (const Expr *Ref : C->varlists()) { 11968 if (!Ref->getType()->isScalarType()) 11969 continue; 11970 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11971 if (!DRE) 11972 continue; 11973 NeedToCheckForLPCs.insert(DRE->getDecl()); 11974 } 11975 } 11976 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 11977 for (const Expr *Ref : C->varlists()) { 11978 if (!Ref->getType()->isScalarType()) 11979 continue; 11980 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11981 if (!DRE) 11982 continue; 11983 NeedToCheckForLPCs.insert(DRE->getDecl()); 11984 } 11985 } 11986 for (const Decl *VD : NeedToCheckForLPCs) { 11987 for (const LastprivateConditionalData &Data : 11988 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 11989 if (Data.DeclToUniqueName.count(VD) > 0) { 11990 if (!Data.Disabled) 11991 NeedToAddForLPCsAsDisabled.insert(VD); 11992 break; 11993 } 11994 } 11995 } 11996 } 11997 11998 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11999 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12000 : CGM(CGF.CGM), 12001 Action((CGM.getLangOpts().OpenMP >= 50 && 12002 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12003 [](const OMPLastprivateClause *C) { 12004 return C->getKind() == 12005 OMPC_LASTPRIVATE_conditional; 12006 })) 12007 ? ActionToDo::PushAsLastprivateConditional 12008 : ActionToDo::DoNotPush) { 12009 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12010 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12011 return; 12012 assert(Action == ActionToDo::PushAsLastprivateConditional && 12013 "Expected a push action."); 12014 LastprivateConditionalData &Data = 12015 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12016 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12017 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12018 continue; 12019 12020 for (const Expr *Ref : C->varlists()) { 12021 Data.DeclToUniqueName.insert(std::make_pair( 12022 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12023 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12024 } 12025 } 12026 Data.IVLVal = IVLVal; 12027 Data.Fn = CGF.CurFn; 12028 } 12029 12030 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12031 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12032 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12033 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12034 if (CGM.getLangOpts().OpenMP < 50) 12035 return; 12036 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12037 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12038 if (!NeedToAddForLPCsAsDisabled.empty()) { 12039 Action = ActionToDo::DisableLastprivateConditional; 12040 LastprivateConditionalData &Data = 12041 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12042 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12043 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12044 Data.Fn = CGF.CurFn; 12045 Data.Disabled = true; 12046 } 12047 } 12048 12049 CGOpenMPRuntime::LastprivateConditionalRAII 12050 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12051 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12052 return LastprivateConditionalRAII(CGF, S); 12053 } 12054 12055 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12056 if (CGM.getLangOpts().OpenMP < 50) 12057 return; 12058 if (Action == ActionToDo::DisableLastprivateConditional) { 12059 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12060 "Expected list of disabled private vars."); 12061 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12062 } 12063 if (Action == ActionToDo::PushAsLastprivateConditional) { 12064 assert( 12065 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12066 "Expected list of lastprivate conditional vars."); 12067 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12068 } 12069 } 12070 12071 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12072 const VarDecl *VD) { 12073 ASTContext &C = CGM.getContext(); 12074 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12075 if (I == LastprivateConditionalToTypes.end()) 12076 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12077 QualType NewType; 12078 const FieldDecl *VDField; 12079 const FieldDecl *FiredField; 12080 LValue BaseLVal; 12081 auto VI = I->getSecond().find(VD); 12082 if (VI == I->getSecond().end()) { 12083 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12084 RD->startDefinition(); 12085 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12086 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12087 RD->completeDefinition(); 12088 NewType = C.getRecordType(RD); 12089 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12090 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12091 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12092 } else { 12093 NewType = std::get<0>(VI->getSecond()); 12094 VDField = std::get<1>(VI->getSecond()); 12095 FiredField = std::get<2>(VI->getSecond()); 12096 BaseLVal = std::get<3>(VI->getSecond()); 12097 } 12098 LValue FiredLVal = 12099 CGF.EmitLValueForField(BaseLVal, FiredField); 12100 CGF.EmitStoreOfScalar( 12101 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12102 FiredLVal); 12103 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12104 } 12105 12106 namespace { 12107 /// Checks if the lastprivate conditional variable is referenced in LHS. 12108 class LastprivateConditionalRefChecker final 12109 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12110 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12111 const Expr *FoundE = nullptr; 12112 const Decl *FoundD = nullptr; 12113 StringRef UniqueDeclName; 12114 LValue IVLVal; 12115 llvm::Function *FoundFn = nullptr; 12116 SourceLocation Loc; 12117 12118 public: 12119 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12120 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12121 llvm::reverse(LPM)) { 12122 auto It = D.DeclToUniqueName.find(E->getDecl()); 12123 if (It == D.DeclToUniqueName.end()) 12124 continue; 12125 if (D.Disabled) 12126 return false; 12127 FoundE = E; 12128 FoundD = E->getDecl()->getCanonicalDecl(); 12129 UniqueDeclName = It->second; 12130 IVLVal = D.IVLVal; 12131 FoundFn = D.Fn; 12132 break; 12133 } 12134 return FoundE == E; 12135 } 12136 bool VisitMemberExpr(const MemberExpr *E) { 12137 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12138 return false; 12139 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12140 llvm::reverse(LPM)) { 12141 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12142 if (It == D.DeclToUniqueName.end()) 12143 continue; 12144 if (D.Disabled) 12145 return false; 12146 FoundE = E; 12147 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12148 UniqueDeclName = It->second; 12149 IVLVal = D.IVLVal; 12150 FoundFn = D.Fn; 12151 break; 12152 } 12153 return FoundE == E; 12154 } 12155 bool VisitStmt(const Stmt *S) { 12156 for (const Stmt *Child : S->children()) { 12157 if (!Child) 12158 continue; 12159 if (const auto *E = dyn_cast<Expr>(Child)) 12160 if (!E->isGLValue()) 12161 continue; 12162 if (Visit(Child)) 12163 return true; 12164 } 12165 return false; 12166 } 12167 explicit LastprivateConditionalRefChecker( 12168 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12169 : LPM(LPM) {} 12170 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12171 getFoundData() const { 12172 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12173 } 12174 }; 12175 } // namespace 12176 12177 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12178 LValue IVLVal, 12179 StringRef UniqueDeclName, 12180 LValue LVal, 12181 SourceLocation Loc) { 12182 // Last updated loop counter for the lastprivate conditional var. 12183 // int<xx> last_iv = 0; 12184 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12185 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable( 12186 LLIVTy, getName({UniqueDeclName, "iv"})); 12187 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12188 IVLVal.getAlignment().getAsAlign()); 12189 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12190 12191 // Last value of the lastprivate conditional. 12192 // decltype(priv_a) last_a; 12193 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable( 12194 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12195 Last->setAlignment(LVal.getAlignment().getAsAlign()); 12196 LValue LastLVal = CGF.MakeAddrLValue( 12197 Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType()); 12198 12199 // Global loop counter. Required to handle inner parallel-for regions. 12200 // iv 12201 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12202 12203 // #pragma omp critical(a) 12204 // if (last_iv <= iv) { 12205 // last_iv = iv; 12206 // last_a = priv_a; 12207 // } 12208 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12209 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12210 Action.Enter(CGF); 12211 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12212 // (last_iv <= iv) ? Check if the variable is updated and store new 12213 // value in global var. 12214 llvm::Value *CmpRes; 12215 if (IVLVal.getType()->isSignedIntegerType()) { 12216 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12217 } else { 12218 assert(IVLVal.getType()->isUnsignedIntegerType() && 12219 "Loop iteration variable must be integer."); 12220 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12221 } 12222 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12223 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12224 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12225 // { 12226 CGF.EmitBlock(ThenBB); 12227 12228 // last_iv = iv; 12229 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12230 12231 // last_a = priv_a; 12232 switch (CGF.getEvaluationKind(LVal.getType())) { 12233 case TEK_Scalar: { 12234 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12235 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12236 break; 12237 } 12238 case TEK_Complex: { 12239 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12240 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12241 break; 12242 } 12243 case TEK_Aggregate: 12244 llvm_unreachable( 12245 "Aggregates are not supported in lastprivate conditional."); 12246 } 12247 // } 12248 CGF.EmitBranch(ExitBB); 12249 // There is no need to emit line number for unconditional branch. 12250 (void)ApplyDebugLocation::CreateEmpty(CGF); 12251 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12252 }; 12253 12254 if (CGM.getLangOpts().OpenMPSimd) { 12255 // Do not emit as a critical region as no parallel region could be emitted. 12256 RegionCodeGenTy ThenRCG(CodeGen); 12257 ThenRCG(CGF); 12258 } else { 12259 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12260 } 12261 } 12262 12263 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12264 const Expr *LHS) { 12265 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12266 return; 12267 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12268 if (!Checker.Visit(LHS)) 12269 return; 12270 const Expr *FoundE; 12271 const Decl *FoundD; 12272 StringRef UniqueDeclName; 12273 LValue IVLVal; 12274 llvm::Function *FoundFn; 12275 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12276 Checker.getFoundData(); 12277 if (FoundFn != CGF.CurFn) { 12278 // Special codegen for inner parallel regions. 12279 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12280 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12281 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12282 "Lastprivate conditional is not found in outer region."); 12283 QualType StructTy = std::get<0>(It->getSecond()); 12284 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12285 LValue PrivLVal = CGF.EmitLValue(FoundE); 12286 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12287 PrivLVal.getAddress(CGF), 12288 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)), 12289 CGF.ConvertTypeForMem(StructTy)); 12290 LValue BaseLVal = 12291 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12292 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12293 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12294 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12295 FiredLVal, llvm::AtomicOrdering::Unordered, 12296 /*IsVolatile=*/true, /*isInit=*/false); 12297 return; 12298 } 12299 12300 // Private address of the lastprivate conditional in the current context. 12301 // priv_a 12302 LValue LVal = CGF.EmitLValue(FoundE); 12303 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12304 FoundE->getExprLoc()); 12305 } 12306 12307 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12308 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12309 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12310 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12311 return; 12312 auto Range = llvm::reverse(LastprivateConditionalStack); 12313 auto It = llvm::find_if( 12314 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12315 if (It == Range.end() || It->Fn != CGF.CurFn) 12316 return; 12317 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12318 assert(LPCI != LastprivateConditionalToTypes.end() && 12319 "Lastprivates must be registered already."); 12320 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12321 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12322 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12323 for (const auto &Pair : It->DeclToUniqueName) { 12324 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12325 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD)) 12326 continue; 12327 auto I = LPCI->getSecond().find(Pair.first); 12328 assert(I != LPCI->getSecond().end() && 12329 "Lastprivate must be rehistered already."); 12330 // bool Cmp = priv_a.Fired != 0; 12331 LValue BaseLVal = std::get<3>(I->getSecond()); 12332 LValue FiredLVal = 12333 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12334 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12335 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12336 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12337 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12338 // if (Cmp) { 12339 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12340 CGF.EmitBlock(ThenBB); 12341 Address Addr = CGF.GetAddrOfLocalVar(VD); 12342 LValue LVal; 12343 if (VD->getType()->isReferenceType()) 12344 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12345 AlignmentSource::Decl); 12346 else 12347 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12348 AlignmentSource::Decl); 12349 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12350 D.getBeginLoc()); 12351 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12352 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12353 // } 12354 } 12355 } 12356 12357 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12358 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12359 SourceLocation Loc) { 12360 if (CGF.getLangOpts().OpenMP < 50) 12361 return; 12362 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12363 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12364 "Unknown lastprivate conditional variable."); 12365 StringRef UniqueName = It->second; 12366 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12367 // The variable was not updated in the region - exit. 12368 if (!GV) 12369 return; 12370 LValue LPLVal = CGF.MakeAddrLValue( 12371 Address(GV, GV->getValueType(), PrivLVal.getAlignment()), 12372 PrivLVal.getType().getNonReferenceType()); 12373 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12374 CGF.EmitStoreOfScalar(Res, PrivLVal); 12375 } 12376 12377 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12378 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12379 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12380 llvm_unreachable("Not supported in SIMD-only mode"); 12381 } 12382 12383 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12384 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12385 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12386 llvm_unreachable("Not supported in SIMD-only mode"); 12387 } 12388 12389 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12390 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12391 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12392 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12393 bool Tied, unsigned &NumberOfParts) { 12394 llvm_unreachable("Not supported in SIMD-only mode"); 12395 } 12396 12397 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12398 SourceLocation Loc, 12399 llvm::Function *OutlinedFn, 12400 ArrayRef<llvm::Value *> CapturedVars, 12401 const Expr *IfCond, 12402 llvm::Value *NumThreads) { 12403 llvm_unreachable("Not supported in SIMD-only mode"); 12404 } 12405 12406 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12407 CodeGenFunction &CGF, StringRef CriticalName, 12408 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12409 const Expr *Hint) { 12410 llvm_unreachable("Not supported in SIMD-only mode"); 12411 } 12412 12413 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12414 const RegionCodeGenTy &MasterOpGen, 12415 SourceLocation Loc) { 12416 llvm_unreachable("Not supported in SIMD-only mode"); 12417 } 12418 12419 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, 12420 const RegionCodeGenTy &MasterOpGen, 12421 SourceLocation Loc, 12422 const Expr *Filter) { 12423 llvm_unreachable("Not supported in SIMD-only mode"); 12424 } 12425 12426 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12427 SourceLocation Loc) { 12428 llvm_unreachable("Not supported in SIMD-only mode"); 12429 } 12430 12431 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12432 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12433 SourceLocation Loc) { 12434 llvm_unreachable("Not supported in SIMD-only mode"); 12435 } 12436 12437 void CGOpenMPSIMDRuntime::emitSingleRegion( 12438 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12439 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12440 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12441 ArrayRef<const Expr *> AssignmentOps) { 12442 llvm_unreachable("Not supported in SIMD-only mode"); 12443 } 12444 12445 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12446 const RegionCodeGenTy &OrderedOpGen, 12447 SourceLocation Loc, 12448 bool IsThreads) { 12449 llvm_unreachable("Not supported in SIMD-only mode"); 12450 } 12451 12452 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12453 SourceLocation Loc, 12454 OpenMPDirectiveKind Kind, 12455 bool EmitChecks, 12456 bool ForceSimpleCall) { 12457 llvm_unreachable("Not supported in SIMD-only mode"); 12458 } 12459 12460 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12461 CodeGenFunction &CGF, SourceLocation Loc, 12462 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12463 bool Ordered, const DispatchRTInput &DispatchValues) { 12464 llvm_unreachable("Not supported in SIMD-only mode"); 12465 } 12466 12467 void CGOpenMPSIMDRuntime::emitForStaticInit( 12468 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12469 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12470 llvm_unreachable("Not supported in SIMD-only mode"); 12471 } 12472 12473 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12474 CodeGenFunction &CGF, SourceLocation Loc, 12475 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12476 llvm_unreachable("Not supported in SIMD-only mode"); 12477 } 12478 12479 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12480 SourceLocation Loc, 12481 unsigned IVSize, 12482 bool IVSigned) { 12483 llvm_unreachable("Not supported in SIMD-only mode"); 12484 } 12485 12486 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12487 SourceLocation Loc, 12488 OpenMPDirectiveKind DKind) { 12489 llvm_unreachable("Not supported in SIMD-only mode"); 12490 } 12491 12492 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12493 SourceLocation Loc, 12494 unsigned IVSize, bool IVSigned, 12495 Address IL, Address LB, 12496 Address UB, Address ST) { 12497 llvm_unreachable("Not supported in SIMD-only mode"); 12498 } 12499 12500 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12501 llvm::Value *NumThreads, 12502 SourceLocation Loc) { 12503 llvm_unreachable("Not supported in SIMD-only mode"); 12504 } 12505 12506 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12507 ProcBindKind ProcBind, 12508 SourceLocation Loc) { 12509 llvm_unreachable("Not supported in SIMD-only mode"); 12510 } 12511 12512 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12513 const VarDecl *VD, 12514 Address VDAddr, 12515 SourceLocation Loc) { 12516 llvm_unreachable("Not supported in SIMD-only mode"); 12517 } 12518 12519 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12520 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12521 CodeGenFunction *CGF) { 12522 llvm_unreachable("Not supported in SIMD-only mode"); 12523 } 12524 12525 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12526 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12527 llvm_unreachable("Not supported in SIMD-only mode"); 12528 } 12529 12530 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 12531 ArrayRef<const Expr *> Vars, 12532 SourceLocation Loc, 12533 llvm::AtomicOrdering AO) { 12534 llvm_unreachable("Not supported in SIMD-only mode"); 12535 } 12536 12537 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 12538 const OMPExecutableDirective &D, 12539 llvm::Function *TaskFunction, 12540 QualType SharedsTy, Address Shareds, 12541 const Expr *IfCond, 12542 const OMPTaskDataTy &Data) { 12543 llvm_unreachable("Not supported in SIMD-only mode"); 12544 } 12545 12546 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 12547 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 12548 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 12549 const Expr *IfCond, const OMPTaskDataTy &Data) { 12550 llvm_unreachable("Not supported in SIMD-only mode"); 12551 } 12552 12553 void CGOpenMPSIMDRuntime::emitReduction( 12554 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 12555 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 12556 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 12557 assert(Options.SimpleReduction && "Only simple reduction is expected."); 12558 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 12559 ReductionOps, Options); 12560 } 12561 12562 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 12563 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 12564 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 12565 llvm_unreachable("Not supported in SIMD-only mode"); 12566 } 12567 12568 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 12569 SourceLocation Loc, 12570 bool IsWorksharingReduction) { 12571 llvm_unreachable("Not supported in SIMD-only mode"); 12572 } 12573 12574 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 12575 SourceLocation Loc, 12576 ReductionCodeGen &RCG, 12577 unsigned N) { 12578 llvm_unreachable("Not supported in SIMD-only mode"); 12579 } 12580 12581 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 12582 SourceLocation Loc, 12583 llvm::Value *ReductionsPtr, 12584 LValue SharedLVal) { 12585 llvm_unreachable("Not supported in SIMD-only mode"); 12586 } 12587 12588 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 12589 SourceLocation Loc, 12590 const OMPTaskDataTy &Data) { 12591 llvm_unreachable("Not supported in SIMD-only mode"); 12592 } 12593 12594 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 12595 CodeGenFunction &CGF, SourceLocation Loc, 12596 OpenMPDirectiveKind CancelRegion) { 12597 llvm_unreachable("Not supported in SIMD-only mode"); 12598 } 12599 12600 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 12601 SourceLocation Loc, const Expr *IfCond, 12602 OpenMPDirectiveKind CancelRegion) { 12603 llvm_unreachable("Not supported in SIMD-only mode"); 12604 } 12605 12606 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 12607 const OMPExecutableDirective &D, StringRef ParentName, 12608 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 12609 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 12610 llvm_unreachable("Not supported in SIMD-only mode"); 12611 } 12612 12613 void CGOpenMPSIMDRuntime::emitTargetCall( 12614 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12615 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 12616 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 12617 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 12618 const OMPLoopDirective &D)> 12619 SizeEmitter) { 12620 llvm_unreachable("Not supported in SIMD-only mode"); 12621 } 12622 12623 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 12624 llvm_unreachable("Not supported in SIMD-only mode"); 12625 } 12626 12627 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 12628 llvm_unreachable("Not supported in SIMD-only mode"); 12629 } 12630 12631 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 12632 return false; 12633 } 12634 12635 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 12636 const OMPExecutableDirective &D, 12637 SourceLocation Loc, 12638 llvm::Function *OutlinedFn, 12639 ArrayRef<llvm::Value *> CapturedVars) { 12640 llvm_unreachable("Not supported in SIMD-only mode"); 12641 } 12642 12643 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 12644 const Expr *NumTeams, 12645 const Expr *ThreadLimit, 12646 SourceLocation Loc) { 12647 llvm_unreachable("Not supported in SIMD-only mode"); 12648 } 12649 12650 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 12651 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12652 const Expr *Device, const RegionCodeGenTy &CodeGen, 12653 CGOpenMPRuntime::TargetDataInfo &Info) { 12654 llvm_unreachable("Not supported in SIMD-only mode"); 12655 } 12656 12657 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 12658 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12659 const Expr *Device) { 12660 llvm_unreachable("Not supported in SIMD-only mode"); 12661 } 12662 12663 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12664 const OMPLoopDirective &D, 12665 ArrayRef<Expr *> NumIterations) { 12666 llvm_unreachable("Not supported in SIMD-only mode"); 12667 } 12668 12669 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12670 const OMPDependClause *C) { 12671 llvm_unreachable("Not supported in SIMD-only mode"); 12672 } 12673 12674 const VarDecl * 12675 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12676 const VarDecl *NativeParam) const { 12677 llvm_unreachable("Not supported in SIMD-only mode"); 12678 } 12679 12680 Address 12681 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12682 const VarDecl *NativeParam, 12683 const VarDecl *TargetParam) const { 12684 llvm_unreachable("Not supported in SIMD-only mode"); 12685 } 12686