1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/APValue.h" 19 #include "clang/AST/Attr.h" 20 #include "clang/AST/Decl.h" 21 #include "clang/AST/OpenMPClause.h" 22 #include "clang/AST/StmtOpenMP.h" 23 #include "clang/AST/StmtVisitor.h" 24 #include "clang/Basic/BitmaskEnum.h" 25 #include "clang/Basic/FileManager.h" 26 #include "clang/Basic/OpenMPKinds.h" 27 #include "clang/Basic/SourceManager.h" 28 #include "clang/CodeGen/ConstantInitBuilder.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/SetOperations.h" 31 #include "llvm/ADT/StringExtras.h" 32 #include "llvm/Bitcode/BitcodeReader.h" 33 #include "llvm/IR/Constants.h" 34 #include "llvm/IR/DerivedTypes.h" 35 #include "llvm/IR/GlobalValue.h" 36 #include "llvm/IR/Value.h" 37 #include "llvm/Support/AtomicOrdering.h" 38 #include "llvm/Support/Format.h" 39 #include "llvm/Support/raw_ostream.h" 40 #include <cassert> 41 #include <numeric> 42 43 using namespace clang; 44 using namespace CodeGen; 45 using namespace llvm::omp; 46 47 namespace { 48 /// Base class for handling code generation inside OpenMP regions. 49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 50 public: 51 /// Kinds of OpenMP regions used in codegen. 52 enum CGOpenMPRegionKind { 53 /// Region with outlined function for standalone 'parallel' 54 /// directive. 55 ParallelOutlinedRegion, 56 /// Region with outlined function for standalone 'task' directive. 57 TaskOutlinedRegion, 58 /// Region for constructs that do not require function outlining, 59 /// like 'for', 'sections', 'atomic' etc. directives. 60 InlinedRegion, 61 /// Region with outlined function for standalone 'target' directive. 62 TargetRegion, 63 }; 64 65 CGOpenMPRegionInfo(const CapturedStmt &CS, 66 const CGOpenMPRegionKind RegionKind, 67 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 68 bool HasCancel) 69 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 70 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 71 72 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 73 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 74 bool HasCancel) 75 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 76 Kind(Kind), HasCancel(HasCancel) {} 77 78 /// Get a variable or parameter for storing global thread id 79 /// inside OpenMP construct. 80 virtual const VarDecl *getThreadIDVariable() const = 0; 81 82 /// Emit the captured statement body. 83 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 84 85 /// Get an LValue for the current ThreadID variable. 86 /// \return LValue for thread id variable. This LValue always has type int32*. 87 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 88 89 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 90 91 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 92 93 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 94 95 bool hasCancel() const { return HasCancel; } 96 97 static bool classof(const CGCapturedStmtInfo *Info) { 98 return Info->getKind() == CR_OpenMP; 99 } 100 101 ~CGOpenMPRegionInfo() override = default; 102 103 protected: 104 CGOpenMPRegionKind RegionKind; 105 RegionCodeGenTy CodeGen; 106 OpenMPDirectiveKind Kind; 107 bool HasCancel; 108 }; 109 110 /// API for captured statement code generation in OpenMP constructs. 111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 112 public: 113 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 114 const RegionCodeGenTy &CodeGen, 115 OpenMPDirectiveKind Kind, bool HasCancel, 116 StringRef HelperName) 117 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 118 HasCancel), 119 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 120 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 121 } 122 123 /// Get a variable or parameter for storing global thread id 124 /// inside OpenMP construct. 125 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 126 127 /// Get the name of the capture helper. 128 StringRef getHelperName() const override { return HelperName; } 129 130 static bool classof(const CGCapturedStmtInfo *Info) { 131 return CGOpenMPRegionInfo::classof(Info) && 132 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 133 ParallelOutlinedRegion; 134 } 135 136 private: 137 /// A variable or parameter storing global thread id for OpenMP 138 /// constructs. 139 const VarDecl *ThreadIDVar; 140 StringRef HelperName; 141 }; 142 143 /// API for captured statement code generation in OpenMP constructs. 144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 145 public: 146 class UntiedTaskActionTy final : public PrePostActionTy { 147 bool Untied; 148 const VarDecl *PartIDVar; 149 const RegionCodeGenTy UntiedCodeGen; 150 llvm::SwitchInst *UntiedSwitch = nullptr; 151 152 public: 153 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 154 const RegionCodeGenTy &UntiedCodeGen) 155 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 156 void Enter(CodeGenFunction &CGF) override { 157 if (Untied) { 158 // Emit task switching point. 159 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 160 CGF.GetAddrOfLocalVar(PartIDVar), 161 PartIDVar->getType()->castAs<PointerType>()); 162 llvm::Value *Res = 163 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 164 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 165 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 166 CGF.EmitBlock(DoneBB); 167 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 168 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 169 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 170 CGF.Builder.GetInsertBlock()); 171 emitUntiedSwitch(CGF); 172 } 173 } 174 void emitUntiedSwitch(CodeGenFunction &CGF) const { 175 if (Untied) { 176 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 177 CGF.GetAddrOfLocalVar(PartIDVar), 178 PartIDVar->getType()->castAs<PointerType>()); 179 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 180 PartIdLVal); 181 UntiedCodeGen(CGF); 182 CodeGenFunction::JumpDest CurPoint = 183 CGF.getJumpDestInCurrentScope(".untied.next."); 184 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 185 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 186 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 187 CGF.Builder.GetInsertBlock()); 188 CGF.EmitBranchThroughCleanup(CurPoint); 189 CGF.EmitBlock(CurPoint.getBlock()); 190 } 191 } 192 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 193 }; 194 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 195 const VarDecl *ThreadIDVar, 196 const RegionCodeGenTy &CodeGen, 197 OpenMPDirectiveKind Kind, bool HasCancel, 198 const UntiedTaskActionTy &Action) 199 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 200 ThreadIDVar(ThreadIDVar), Action(Action) { 201 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 202 } 203 204 /// Get a variable or parameter for storing global thread id 205 /// inside OpenMP construct. 206 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 207 208 /// Get an LValue for the current ThreadID variable. 209 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 210 211 /// Get the name of the capture helper. 212 StringRef getHelperName() const override { return ".omp_outlined."; } 213 214 void emitUntiedSwitch(CodeGenFunction &CGF) override { 215 Action.emitUntiedSwitch(CGF); 216 } 217 218 static bool classof(const CGCapturedStmtInfo *Info) { 219 return CGOpenMPRegionInfo::classof(Info) && 220 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 221 TaskOutlinedRegion; 222 } 223 224 private: 225 /// A variable or parameter storing global thread id for OpenMP 226 /// constructs. 227 const VarDecl *ThreadIDVar; 228 /// Action for emitting code for untied tasks. 229 const UntiedTaskActionTy &Action; 230 }; 231 232 /// API for inlined captured statement code generation in OpenMP 233 /// constructs. 234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 235 public: 236 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 237 const RegionCodeGenTy &CodeGen, 238 OpenMPDirectiveKind Kind, bool HasCancel) 239 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 240 OldCSI(OldCSI), 241 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 242 243 // Retrieve the value of the context parameter. 244 llvm::Value *getContextValue() const override { 245 if (OuterRegionInfo) 246 return OuterRegionInfo->getContextValue(); 247 llvm_unreachable("No context value for inlined OpenMP region"); 248 } 249 250 void setContextValue(llvm::Value *V) override { 251 if (OuterRegionInfo) { 252 OuterRegionInfo->setContextValue(V); 253 return; 254 } 255 llvm_unreachable("No context value for inlined OpenMP region"); 256 } 257 258 /// Lookup the captured field decl for a variable. 259 const FieldDecl *lookup(const VarDecl *VD) const override { 260 if (OuterRegionInfo) 261 return OuterRegionInfo->lookup(VD); 262 // If there is no outer outlined region,no need to lookup in a list of 263 // captured variables, we can use the original one. 264 return nullptr; 265 } 266 267 FieldDecl *getThisFieldDecl() const override { 268 if (OuterRegionInfo) 269 return OuterRegionInfo->getThisFieldDecl(); 270 return nullptr; 271 } 272 273 /// Get a variable or parameter for storing global thread id 274 /// inside OpenMP construct. 275 const VarDecl *getThreadIDVariable() const override { 276 if (OuterRegionInfo) 277 return OuterRegionInfo->getThreadIDVariable(); 278 return nullptr; 279 } 280 281 /// Get an LValue for the current ThreadID variable. 282 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 283 if (OuterRegionInfo) 284 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 285 llvm_unreachable("No LValue for inlined OpenMP construct"); 286 } 287 288 /// Get the name of the capture helper. 289 StringRef getHelperName() const override { 290 if (auto *OuterRegionInfo = getOldCSI()) 291 return OuterRegionInfo->getHelperName(); 292 llvm_unreachable("No helper name for inlined OpenMP construct"); 293 } 294 295 void emitUntiedSwitch(CodeGenFunction &CGF) override { 296 if (OuterRegionInfo) 297 OuterRegionInfo->emitUntiedSwitch(CGF); 298 } 299 300 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 301 302 static bool classof(const CGCapturedStmtInfo *Info) { 303 return CGOpenMPRegionInfo::classof(Info) && 304 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 305 } 306 307 ~CGOpenMPInlinedRegionInfo() override = default; 308 309 private: 310 /// CodeGen info about outer OpenMP region. 311 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 312 CGOpenMPRegionInfo *OuterRegionInfo; 313 }; 314 315 /// API for captured statement code generation in OpenMP target 316 /// constructs. For this captures, implicit parameters are used instead of the 317 /// captured fields. The name of the target region has to be unique in a given 318 /// application so it is provided by the client, because only the client has 319 /// the information to generate that. 320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 321 public: 322 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 323 const RegionCodeGenTy &CodeGen, StringRef HelperName) 324 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 325 /*HasCancel=*/false), 326 HelperName(HelperName) {} 327 328 /// This is unused for target regions because each starts executing 329 /// with a single thread. 330 const VarDecl *getThreadIDVariable() const override { return nullptr; } 331 332 /// Get the name of the capture helper. 333 StringRef getHelperName() const override { return HelperName; } 334 335 static bool classof(const CGCapturedStmtInfo *Info) { 336 return CGOpenMPRegionInfo::classof(Info) && 337 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 338 } 339 340 private: 341 StringRef HelperName; 342 }; 343 344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 345 llvm_unreachable("No codegen for expressions"); 346 } 347 /// API for generation of expressions captured in a innermost OpenMP 348 /// region. 349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 350 public: 351 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 352 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 353 OMPD_unknown, 354 /*HasCancel=*/false), 355 PrivScope(CGF) { 356 // Make sure the globals captured in the provided statement are local by 357 // using the privatization logic. We assume the same variable is not 358 // captured more than once. 359 for (const auto &C : CS.captures()) { 360 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 361 continue; 362 363 const VarDecl *VD = C.getCapturedVar(); 364 if (VD->isLocalVarDeclOrParm()) 365 continue; 366 367 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 368 /*RefersToEnclosingVariableOrCapture=*/false, 369 VD->getType().getNonReferenceType(), VK_LValue, 370 C.getLocation()); 371 PrivScope.addPrivate( 372 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 373 } 374 (void)PrivScope.Privatize(); 375 } 376 377 /// Lookup the captured field decl for a variable. 378 const FieldDecl *lookup(const VarDecl *VD) const override { 379 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 380 return FD; 381 return nullptr; 382 } 383 384 /// Emit the captured statement body. 385 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 386 llvm_unreachable("No body for expressions"); 387 } 388 389 /// Get a variable or parameter for storing global thread id 390 /// inside OpenMP construct. 391 const VarDecl *getThreadIDVariable() const override { 392 llvm_unreachable("No thread id for expressions"); 393 } 394 395 /// Get the name of the capture helper. 396 StringRef getHelperName() const override { 397 llvm_unreachable("No helper name for expressions"); 398 } 399 400 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 401 402 private: 403 /// Private scope to capture global variables. 404 CodeGenFunction::OMPPrivateScope PrivScope; 405 }; 406 407 /// RAII for emitting code of OpenMP constructs. 408 class InlinedOpenMPRegionRAII { 409 CodeGenFunction &CGF; 410 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 411 FieldDecl *LambdaThisCaptureField = nullptr; 412 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 413 bool NoInheritance = false; 414 415 public: 416 /// Constructs region for combined constructs. 417 /// \param CodeGen Code generation sequence for combined directives. Includes 418 /// a list of functions used for code generation of implicitly inlined 419 /// regions. 420 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 421 OpenMPDirectiveKind Kind, bool HasCancel, 422 bool NoInheritance = true) 423 : CGF(CGF), NoInheritance(NoInheritance) { 424 // Start emission for the construct. 425 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 426 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 427 if (NoInheritance) { 428 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 429 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 430 CGF.LambdaThisCaptureField = nullptr; 431 BlockInfo = CGF.BlockInfo; 432 CGF.BlockInfo = nullptr; 433 } 434 } 435 436 ~InlinedOpenMPRegionRAII() { 437 // Restore original CapturedStmtInfo only if we're done with code emission. 438 auto *OldCSI = 439 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 440 delete CGF.CapturedStmtInfo; 441 CGF.CapturedStmtInfo = OldCSI; 442 if (NoInheritance) { 443 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 444 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 445 CGF.BlockInfo = BlockInfo; 446 } 447 } 448 }; 449 450 /// Values for bit flags used in the ident_t to describe the fields. 451 /// All enumeric elements are named and described in accordance with the code 452 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 453 enum OpenMPLocationFlags : unsigned { 454 /// Use trampoline for internal microtask. 455 OMP_IDENT_IMD = 0x01, 456 /// Use c-style ident structure. 457 OMP_IDENT_KMPC = 0x02, 458 /// Atomic reduction option for kmpc_reduce. 459 OMP_ATOMIC_REDUCE = 0x10, 460 /// Explicit 'barrier' directive. 461 OMP_IDENT_BARRIER_EXPL = 0x20, 462 /// Implicit barrier in code. 463 OMP_IDENT_BARRIER_IMPL = 0x40, 464 /// Implicit barrier in 'for' directive. 465 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 466 /// Implicit barrier in 'sections' directive. 467 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 468 /// Implicit barrier in 'single' directive. 469 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 470 /// Call of __kmp_for_static_init for static loop. 471 OMP_IDENT_WORK_LOOP = 0x200, 472 /// Call of __kmp_for_static_init for sections. 473 OMP_IDENT_WORK_SECTIONS = 0x400, 474 /// Call of __kmp_for_static_init for distribute. 475 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 476 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 477 }; 478 479 namespace { 480 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 481 /// Values for bit flags for marking which requires clauses have been used. 482 enum OpenMPOffloadingRequiresDirFlags : int64_t { 483 /// flag undefined. 484 OMP_REQ_UNDEFINED = 0x000, 485 /// no requires clause present. 486 OMP_REQ_NONE = 0x001, 487 /// reverse_offload clause. 488 OMP_REQ_REVERSE_OFFLOAD = 0x002, 489 /// unified_address clause. 490 OMP_REQ_UNIFIED_ADDRESS = 0x004, 491 /// unified_shared_memory clause. 492 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 493 /// dynamic_allocators clause. 494 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 495 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 496 }; 497 498 enum OpenMPOffloadingReservedDeviceIDs { 499 /// Device ID if the device was not defined, runtime should get it 500 /// from environment variables in the spec. 501 OMP_DEVICEID_UNDEF = -1, 502 }; 503 } // anonymous namespace 504 505 /// Describes ident structure that describes a source location. 506 /// All descriptions are taken from 507 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 508 /// Original structure: 509 /// typedef struct ident { 510 /// kmp_int32 reserved_1; /**< might be used in Fortran; 511 /// see above */ 512 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 513 /// KMP_IDENT_KMPC identifies this union 514 /// member */ 515 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 516 /// see above */ 517 ///#if USE_ITT_BUILD 518 /// /* but currently used for storing 519 /// region-specific ITT */ 520 /// /* contextual information. */ 521 ///#endif /* USE_ITT_BUILD */ 522 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 523 /// C++ */ 524 /// char const *psource; /**< String describing the source location. 525 /// The string is composed of semi-colon separated 526 // fields which describe the source file, 527 /// the function and a pair of line numbers that 528 /// delimit the construct. 529 /// */ 530 /// } ident_t; 531 enum IdentFieldIndex { 532 /// might be used in Fortran 533 IdentField_Reserved_1, 534 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 535 IdentField_Flags, 536 /// Not really used in Fortran any more 537 IdentField_Reserved_2, 538 /// Source[4] in Fortran, do not use for C++ 539 IdentField_Reserved_3, 540 /// String describing the source location. The string is composed of 541 /// semi-colon separated fields which describe the source file, the function 542 /// and a pair of line numbers that delimit the construct. 543 IdentField_PSource 544 }; 545 546 /// Schedule types for 'omp for' loops (these enumerators are taken from 547 /// the enum sched_type in kmp.h). 548 enum OpenMPSchedType { 549 /// Lower bound for default (unordered) versions. 550 OMP_sch_lower = 32, 551 OMP_sch_static_chunked = 33, 552 OMP_sch_static = 34, 553 OMP_sch_dynamic_chunked = 35, 554 OMP_sch_guided_chunked = 36, 555 OMP_sch_runtime = 37, 556 OMP_sch_auto = 38, 557 /// static with chunk adjustment (e.g., simd) 558 OMP_sch_static_balanced_chunked = 45, 559 /// Lower bound for 'ordered' versions. 560 OMP_ord_lower = 64, 561 OMP_ord_static_chunked = 65, 562 OMP_ord_static = 66, 563 OMP_ord_dynamic_chunked = 67, 564 OMP_ord_guided_chunked = 68, 565 OMP_ord_runtime = 69, 566 OMP_ord_auto = 70, 567 OMP_sch_default = OMP_sch_static, 568 /// dist_schedule types 569 OMP_dist_sch_static_chunked = 91, 570 OMP_dist_sch_static = 92, 571 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 572 /// Set if the monotonic schedule modifier was present. 573 OMP_sch_modifier_monotonic = (1 << 29), 574 /// Set if the nonmonotonic schedule modifier was present. 575 OMP_sch_modifier_nonmonotonic = (1 << 30), 576 }; 577 578 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 579 /// region. 580 class CleanupTy final : public EHScopeStack::Cleanup { 581 PrePostActionTy *Action; 582 583 public: 584 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 585 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 586 if (!CGF.HaveInsertPoint()) 587 return; 588 Action->Exit(CGF); 589 } 590 }; 591 592 } // anonymous namespace 593 594 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 595 CodeGenFunction::RunCleanupsScope Scope(CGF); 596 if (PrePostAction) { 597 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 598 Callback(CodeGen, CGF, *PrePostAction); 599 } else { 600 PrePostActionTy Action; 601 Callback(CodeGen, CGF, Action); 602 } 603 } 604 605 /// Check if the combiner is a call to UDR combiner and if it is so return the 606 /// UDR decl used for reduction. 607 static const OMPDeclareReductionDecl * 608 getReductionInit(const Expr *ReductionOp) { 609 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 610 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 611 if (const auto *DRE = 612 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 613 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 614 return DRD; 615 return nullptr; 616 } 617 618 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 619 const OMPDeclareReductionDecl *DRD, 620 const Expr *InitOp, 621 Address Private, Address Original, 622 QualType Ty) { 623 if (DRD->getInitializer()) { 624 std::pair<llvm::Function *, llvm::Function *> Reduction = 625 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 626 const auto *CE = cast<CallExpr>(InitOp); 627 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 628 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 629 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 630 const auto *LHSDRE = 631 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 632 const auto *RHSDRE = 633 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 634 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 635 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 636 [=]() { return Private; }); 637 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 638 [=]() { return Original; }); 639 (void)PrivateScope.Privatize(); 640 RValue Func = RValue::get(Reduction.second); 641 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 642 CGF.EmitIgnoredExpr(InitOp); 643 } else { 644 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 645 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 646 auto *GV = new llvm::GlobalVariable( 647 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 648 llvm::GlobalValue::PrivateLinkage, Init, Name); 649 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 650 RValue InitRVal; 651 switch (CGF.getEvaluationKind(Ty)) { 652 case TEK_Scalar: 653 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 654 break; 655 case TEK_Complex: 656 InitRVal = 657 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 658 break; 659 case TEK_Aggregate: { 660 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); 661 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); 662 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 663 /*IsInitializer=*/false); 664 return; 665 } 666 } 667 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue); 668 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 669 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 670 /*IsInitializer=*/false); 671 } 672 } 673 674 /// Emit initialization of arrays of complex types. 675 /// \param DestAddr Address of the array. 676 /// \param Type Type of array. 677 /// \param Init Initial expression of array. 678 /// \param SrcAddr Address of the original array. 679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 680 QualType Type, bool EmitDeclareReductionInit, 681 const Expr *Init, 682 const OMPDeclareReductionDecl *DRD, 683 Address SrcAddr = Address::invalid()) { 684 // Perform element-by-element initialization. 685 QualType ElementTy; 686 687 // Drill down to the base element type on both arrays. 688 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 689 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 690 DestAddr = 691 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 692 if (DRD) 693 SrcAddr = 694 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 695 696 llvm::Value *SrcBegin = nullptr; 697 if (DRD) 698 SrcBegin = SrcAddr.getPointer(); 699 llvm::Value *DestBegin = DestAddr.getPointer(); 700 // Cast from pointer to array type to pointer to single element. 701 llvm::Value *DestEnd = 702 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); 703 // The basic structure here is a while-do loop. 704 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 705 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 706 llvm::Value *IsEmpty = 707 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 708 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 709 710 // Enter the loop body, making that address the current address. 711 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 712 CGF.EmitBlock(BodyBB); 713 714 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 715 716 llvm::PHINode *SrcElementPHI = nullptr; 717 Address SrcElementCurrent = Address::invalid(); 718 if (DRD) { 719 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 720 "omp.arraycpy.srcElementPast"); 721 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 722 SrcElementCurrent = 723 Address(SrcElementPHI, 724 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 725 } 726 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 727 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 728 DestElementPHI->addIncoming(DestBegin, EntryBB); 729 Address DestElementCurrent = 730 Address(DestElementPHI, 731 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 732 733 // Emit copy. 734 { 735 CodeGenFunction::RunCleanupsScope InitScope(CGF); 736 if (EmitDeclareReductionInit) { 737 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 738 SrcElementCurrent, ElementTy); 739 } else 740 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 741 /*IsInitializer=*/false); 742 } 743 744 if (DRD) { 745 // Shift the address forward by one element. 746 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 747 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, 748 "omp.arraycpy.dest.element"); 749 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 750 } 751 752 // Shift the address forward by one element. 753 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 754 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, 755 "omp.arraycpy.dest.element"); 756 // Check whether we've reached the end. 757 llvm::Value *Done = 758 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 759 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 760 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 761 762 // Done. 763 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 764 } 765 766 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 767 return CGF.EmitOMPSharedLValue(E); 768 } 769 770 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 771 const Expr *E) { 772 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 773 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 774 return LValue(); 775 } 776 777 void ReductionCodeGen::emitAggregateInitialization( 778 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 779 const OMPDeclareReductionDecl *DRD) { 780 // Emit VarDecl with copy init for arrays. 781 // Get the address of the original variable captured in current 782 // captured region. 783 const auto *PrivateVD = 784 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 785 bool EmitDeclareReductionInit = 786 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 787 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 788 EmitDeclareReductionInit, 789 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 790 : PrivateVD->getInit(), 791 DRD, SharedLVal.getAddress(CGF)); 792 } 793 794 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 795 ArrayRef<const Expr *> Origs, 796 ArrayRef<const Expr *> Privates, 797 ArrayRef<const Expr *> ReductionOps) { 798 ClausesData.reserve(Shareds.size()); 799 SharedAddresses.reserve(Shareds.size()); 800 Sizes.reserve(Shareds.size()); 801 BaseDecls.reserve(Shareds.size()); 802 const auto *IOrig = Origs.begin(); 803 const auto *IPriv = Privates.begin(); 804 const auto *IRed = ReductionOps.begin(); 805 for (const Expr *Ref : Shareds) { 806 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 807 std::advance(IOrig, 1); 808 std::advance(IPriv, 1); 809 std::advance(IRed, 1); 810 } 811 } 812 813 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 814 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 815 "Number of generated lvalues must be exactly N."); 816 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 817 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 818 SharedAddresses.emplace_back(First, Second); 819 if (ClausesData[N].Shared == ClausesData[N].Ref) { 820 OrigAddresses.emplace_back(First, Second); 821 } else { 822 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 823 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 824 OrigAddresses.emplace_back(First, Second); 825 } 826 } 827 828 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 829 const auto *PrivateVD = 830 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 831 QualType PrivateType = PrivateVD->getType(); 832 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 833 if (!PrivateType->isVariablyModifiedType()) { 834 Sizes.emplace_back( 835 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 836 nullptr); 837 return; 838 } 839 llvm::Value *Size; 840 llvm::Value *SizeInChars; 841 auto *ElemType = 842 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 843 ->getElementType(); 844 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 845 if (AsArraySection) { 846 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 847 OrigAddresses[N].first.getPointer(CGF)); 848 Size = CGF.Builder.CreateNUWAdd( 849 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 850 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 851 } else { 852 SizeInChars = 853 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 854 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 855 } 856 Sizes.emplace_back(SizeInChars, Size); 857 CodeGenFunction::OpaqueValueMapping OpaqueMap( 858 CGF, 859 cast<OpaqueValueExpr>( 860 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 861 RValue::get(Size)); 862 CGF.EmitVariablyModifiedType(PrivateType); 863 } 864 865 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 866 llvm::Value *Size) { 867 const auto *PrivateVD = 868 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 869 QualType PrivateType = PrivateVD->getType(); 870 if (!PrivateType->isVariablyModifiedType()) { 871 assert(!Size && !Sizes[N].second && 872 "Size should be nullptr for non-variably modified reduction " 873 "items."); 874 return; 875 } 876 CodeGenFunction::OpaqueValueMapping OpaqueMap( 877 CGF, 878 cast<OpaqueValueExpr>( 879 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 880 RValue::get(Size)); 881 CGF.EmitVariablyModifiedType(PrivateType); 882 } 883 884 void ReductionCodeGen::emitInitialization( 885 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 886 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 887 assert(SharedAddresses.size() > N && "No variable was generated"); 888 const auto *PrivateVD = 889 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 890 const OMPDeclareReductionDecl *DRD = 891 getReductionInit(ClausesData[N].ReductionOp); 892 QualType PrivateType = PrivateVD->getType(); 893 PrivateAddr = CGF.Builder.CreateElementBitCast( 894 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 895 QualType SharedType = SharedAddresses[N].first.getType(); 896 SharedLVal = CGF.MakeAddrLValue( 897 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 898 CGF.ConvertTypeForMem(SharedType)), 899 SharedType, SharedAddresses[N].first.getBaseInfo(), 900 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 901 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 902 if (DRD && DRD->getInitializer()) 903 (void)DefaultInit(CGF); 904 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 905 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 906 (void)DefaultInit(CGF); 907 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 908 PrivateAddr, SharedLVal.getAddress(CGF), 909 SharedLVal.getType()); 910 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 911 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 912 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 913 PrivateVD->getType().getQualifiers(), 914 /*IsInitializer=*/false); 915 } 916 } 917 918 bool ReductionCodeGen::needCleanups(unsigned N) { 919 const auto *PrivateVD = 920 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 921 QualType PrivateType = PrivateVD->getType(); 922 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 923 return DTorKind != QualType::DK_none; 924 } 925 926 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 927 Address PrivateAddr) { 928 const auto *PrivateVD = 929 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 930 QualType PrivateType = PrivateVD->getType(); 931 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 932 if (needCleanups(N)) { 933 PrivateAddr = CGF.Builder.CreateElementBitCast( 934 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 935 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 936 } 937 } 938 939 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 940 LValue BaseLV) { 941 BaseTy = BaseTy.getNonReferenceType(); 942 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 943 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 944 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 945 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 946 } else { 947 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 948 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 949 } 950 BaseTy = BaseTy->getPointeeType(); 951 } 952 return CGF.MakeAddrLValue( 953 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 954 CGF.ConvertTypeForMem(ElTy)), 955 BaseLV.getType(), BaseLV.getBaseInfo(), 956 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 957 } 958 959 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 960 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 961 llvm::Value *Addr) { 962 Address Tmp = Address::invalid(); 963 Address TopTmp = Address::invalid(); 964 Address MostTopTmp = Address::invalid(); 965 BaseTy = BaseTy.getNonReferenceType(); 966 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 967 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 968 Tmp = CGF.CreateMemTemp(BaseTy); 969 if (TopTmp.isValid()) 970 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 971 else 972 MostTopTmp = Tmp; 973 TopTmp = Tmp; 974 BaseTy = BaseTy->getPointeeType(); 975 } 976 llvm::Type *Ty = BaseLVType; 977 if (Tmp.isValid()) 978 Ty = Tmp.getElementType(); 979 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 980 if (Tmp.isValid()) { 981 CGF.Builder.CreateStore(Addr, Tmp); 982 return MostTopTmp; 983 } 984 return Address(Addr, BaseLVAlignment); 985 } 986 987 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 988 const VarDecl *OrigVD = nullptr; 989 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 990 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 991 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 992 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 993 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 994 Base = TempASE->getBase()->IgnoreParenImpCasts(); 995 DE = cast<DeclRefExpr>(Base); 996 OrigVD = cast<VarDecl>(DE->getDecl()); 997 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 998 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 999 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1000 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1001 DE = cast<DeclRefExpr>(Base); 1002 OrigVD = cast<VarDecl>(DE->getDecl()); 1003 } 1004 return OrigVD; 1005 } 1006 1007 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1008 Address PrivateAddr) { 1009 const DeclRefExpr *DE; 1010 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1011 BaseDecls.emplace_back(OrigVD); 1012 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1013 LValue BaseLValue = 1014 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1015 OriginalBaseLValue); 1016 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF); 1017 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1018 BaseLValue.getPointer(CGF), SharedAddr.getPointer()); 1019 llvm::Value *PrivatePointer = 1020 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1021 PrivateAddr.getPointer(), SharedAddr.getType()); 1022 llvm::Value *Ptr = CGF.Builder.CreateGEP( 1023 SharedAddr.getElementType(), PrivatePointer, Adjustment); 1024 return castToBase(CGF, OrigVD->getType(), 1025 SharedAddresses[N].first.getType(), 1026 OriginalBaseLValue.getAddress(CGF).getType(), 1027 OriginalBaseLValue.getAlignment(), Ptr); 1028 } 1029 BaseDecls.emplace_back( 1030 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1031 return PrivateAddr; 1032 } 1033 1034 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1035 const OMPDeclareReductionDecl *DRD = 1036 getReductionInit(ClausesData[N].ReductionOp); 1037 return DRD && DRD->getInitializer(); 1038 } 1039 1040 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1041 return CGF.EmitLoadOfPointerLValue( 1042 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1043 getThreadIDVariable()->getType()->castAs<PointerType>()); 1044 } 1045 1046 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 1047 if (!CGF.HaveInsertPoint()) 1048 return; 1049 // 1.2.2 OpenMP Language Terminology 1050 // Structured block - An executable statement with a single entry at the 1051 // top and a single exit at the bottom. 1052 // The point of exit cannot be a branch out of the structured block. 1053 // longjmp() and throw() must not violate the entry/exit criteria. 1054 CGF.EHStack.pushTerminate(); 1055 if (S) 1056 CGF.incrementProfileCounter(S); 1057 CodeGen(CGF); 1058 CGF.EHStack.popTerminate(); 1059 } 1060 1061 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1062 CodeGenFunction &CGF) { 1063 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1064 getThreadIDVariable()->getType(), 1065 AlignmentSource::Decl); 1066 } 1067 1068 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1069 QualType FieldTy) { 1070 auto *Field = FieldDecl::Create( 1071 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1072 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1073 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1074 Field->setAccess(AS_public); 1075 DC->addDecl(Field); 1076 return Field; 1077 } 1078 1079 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1080 StringRef Separator) 1081 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1082 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1083 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1084 1085 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1086 OMPBuilder.initialize(); 1087 loadOffloadInfoMetadata(); 1088 } 1089 1090 void CGOpenMPRuntime::clear() { 1091 InternalVars.clear(); 1092 // Clean non-target variable declarations possibly used only in debug info. 1093 for (const auto &Data : EmittedNonTargetVariables) { 1094 if (!Data.getValue().pointsToAliveValue()) 1095 continue; 1096 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1097 if (!GV) 1098 continue; 1099 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1100 continue; 1101 GV->eraseFromParent(); 1102 } 1103 } 1104 1105 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1106 SmallString<128> Buffer; 1107 llvm::raw_svector_ostream OS(Buffer); 1108 StringRef Sep = FirstSeparator; 1109 for (StringRef Part : Parts) { 1110 OS << Sep << Part; 1111 Sep = Separator; 1112 } 1113 return std::string(OS.str()); 1114 } 1115 1116 static llvm::Function * 1117 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1118 const Expr *CombinerInitializer, const VarDecl *In, 1119 const VarDecl *Out, bool IsCombiner) { 1120 // void .omp_combiner.(Ty *in, Ty *out); 1121 ASTContext &C = CGM.getContext(); 1122 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1123 FunctionArgList Args; 1124 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1125 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1126 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1127 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1128 Args.push_back(&OmpOutParm); 1129 Args.push_back(&OmpInParm); 1130 const CGFunctionInfo &FnInfo = 1131 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1132 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1133 std::string Name = CGM.getOpenMPRuntime().getName( 1134 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1135 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1136 Name, &CGM.getModule()); 1137 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1138 if (CGM.getLangOpts().Optimize) { 1139 Fn->removeFnAttr(llvm::Attribute::NoInline); 1140 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1141 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1142 } 1143 CodeGenFunction CGF(CGM); 1144 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1145 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1146 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1147 Out->getLocation()); 1148 CodeGenFunction::OMPPrivateScope Scope(CGF); 1149 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1150 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1151 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1152 .getAddress(CGF); 1153 }); 1154 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1155 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1156 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1157 .getAddress(CGF); 1158 }); 1159 (void)Scope.Privatize(); 1160 if (!IsCombiner && Out->hasInit() && 1161 !CGF.isTrivialInitializer(Out->getInit())) { 1162 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1163 Out->getType().getQualifiers(), 1164 /*IsInitializer=*/true); 1165 } 1166 if (CombinerInitializer) 1167 CGF.EmitIgnoredExpr(CombinerInitializer); 1168 Scope.ForceCleanup(); 1169 CGF.FinishFunction(); 1170 return Fn; 1171 } 1172 1173 void CGOpenMPRuntime::emitUserDefinedReduction( 1174 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1175 if (UDRMap.count(D) > 0) 1176 return; 1177 llvm::Function *Combiner = emitCombinerOrInitializer( 1178 CGM, D->getType(), D->getCombiner(), 1179 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1180 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1181 /*IsCombiner=*/true); 1182 llvm::Function *Initializer = nullptr; 1183 if (const Expr *Init = D->getInitializer()) { 1184 Initializer = emitCombinerOrInitializer( 1185 CGM, D->getType(), 1186 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1187 : nullptr, 1188 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1189 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1190 /*IsCombiner=*/false); 1191 } 1192 UDRMap.try_emplace(D, Combiner, Initializer); 1193 if (CGF) { 1194 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1195 Decls.second.push_back(D); 1196 } 1197 } 1198 1199 std::pair<llvm::Function *, llvm::Function *> 1200 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1201 auto I = UDRMap.find(D); 1202 if (I != UDRMap.end()) 1203 return I->second; 1204 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1205 return UDRMap.lookup(D); 1206 } 1207 1208 namespace { 1209 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1210 // Builder if one is present. 1211 struct PushAndPopStackRAII { 1212 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1213 bool HasCancel, llvm::omp::Directive Kind) 1214 : OMPBuilder(OMPBuilder) { 1215 if (!OMPBuilder) 1216 return; 1217 1218 // The following callback is the crucial part of clangs cleanup process. 1219 // 1220 // NOTE: 1221 // Once the OpenMPIRBuilder is used to create parallel regions (and 1222 // similar), the cancellation destination (Dest below) is determined via 1223 // IP. That means if we have variables to finalize we split the block at IP, 1224 // use the new block (=BB) as destination to build a JumpDest (via 1225 // getJumpDestInCurrentScope(BB)) which then is fed to 1226 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1227 // to push & pop an FinalizationInfo object. 1228 // The FiniCB will still be needed but at the point where the 1229 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1230 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1231 assert(IP.getBlock()->end() == IP.getPoint() && 1232 "Clang CG should cause non-terminated block!"); 1233 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1234 CGF.Builder.restoreIP(IP); 1235 CodeGenFunction::JumpDest Dest = 1236 CGF.getOMPCancelDestination(OMPD_parallel); 1237 CGF.EmitBranchThroughCleanup(Dest); 1238 }; 1239 1240 // TODO: Remove this once we emit parallel regions through the 1241 // OpenMPIRBuilder as it can do this setup internally. 1242 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); 1243 OMPBuilder->pushFinalizationCB(std::move(FI)); 1244 } 1245 ~PushAndPopStackRAII() { 1246 if (OMPBuilder) 1247 OMPBuilder->popFinalizationCB(); 1248 } 1249 llvm::OpenMPIRBuilder *OMPBuilder; 1250 }; 1251 } // namespace 1252 1253 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1254 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1255 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1256 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1257 assert(ThreadIDVar->getType()->isPointerType() && 1258 "thread id variable must be of type kmp_int32 *"); 1259 CodeGenFunction CGF(CGM, true); 1260 bool HasCancel = false; 1261 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1262 HasCancel = OPD->hasCancel(); 1263 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1264 HasCancel = OPD->hasCancel(); 1265 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1266 HasCancel = OPSD->hasCancel(); 1267 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1268 HasCancel = OPFD->hasCancel(); 1269 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1270 HasCancel = OPFD->hasCancel(); 1271 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1272 HasCancel = OPFD->hasCancel(); 1273 else if (const auto *OPFD = 1274 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1275 HasCancel = OPFD->hasCancel(); 1276 else if (const auto *OPFD = 1277 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1278 HasCancel = OPFD->hasCancel(); 1279 1280 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1281 // parallel region to make cancellation barriers work properly. 1282 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1283 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); 1284 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1285 HasCancel, OutlinedHelperName); 1286 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1287 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1288 } 1289 1290 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1291 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1292 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1293 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1294 return emitParallelOrTeamsOutlinedFunction( 1295 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1296 } 1297 1298 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1299 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1300 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1301 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1302 return emitParallelOrTeamsOutlinedFunction( 1303 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1304 } 1305 1306 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1307 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1308 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1309 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1310 bool Tied, unsigned &NumberOfParts) { 1311 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1312 PrePostActionTy &) { 1313 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1314 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1315 llvm::Value *TaskArgs[] = { 1316 UpLoc, ThreadID, 1317 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1318 TaskTVar->getType()->castAs<PointerType>()) 1319 .getPointer(CGF)}; 1320 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1321 CGM.getModule(), OMPRTL___kmpc_omp_task), 1322 TaskArgs); 1323 }; 1324 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1325 UntiedCodeGen); 1326 CodeGen.setAction(Action); 1327 assert(!ThreadIDVar->getType()->isPointerType() && 1328 "thread id variable must be of type kmp_int32 for tasks"); 1329 const OpenMPDirectiveKind Region = 1330 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1331 : OMPD_task; 1332 const CapturedStmt *CS = D.getCapturedStmt(Region); 1333 bool HasCancel = false; 1334 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1335 HasCancel = TD->hasCancel(); 1336 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1337 HasCancel = TD->hasCancel(); 1338 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1339 HasCancel = TD->hasCancel(); 1340 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1341 HasCancel = TD->hasCancel(); 1342 1343 CodeGenFunction CGF(CGM, true); 1344 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1345 InnermostKind, HasCancel, Action); 1346 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1347 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1348 if (!Tied) 1349 NumberOfParts = Action.getNumberOfParts(); 1350 return Res; 1351 } 1352 1353 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1354 const RecordDecl *RD, const CGRecordLayout &RL, 1355 ArrayRef<llvm::Constant *> Data) { 1356 llvm::StructType *StructTy = RL.getLLVMType(); 1357 unsigned PrevIdx = 0; 1358 ConstantInitBuilder CIBuilder(CGM); 1359 auto DI = Data.begin(); 1360 for (const FieldDecl *FD : RD->fields()) { 1361 unsigned Idx = RL.getLLVMFieldNo(FD); 1362 // Fill the alignment. 1363 for (unsigned I = PrevIdx; I < Idx; ++I) 1364 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1365 PrevIdx = Idx + 1; 1366 Fields.add(*DI); 1367 ++DI; 1368 } 1369 } 1370 1371 template <class... As> 1372 static llvm::GlobalVariable * 1373 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1374 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1375 As &&... Args) { 1376 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1377 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1378 ConstantInitBuilder CIBuilder(CGM); 1379 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1380 buildStructValue(Fields, CGM, RD, RL, Data); 1381 return Fields.finishAndCreateGlobal( 1382 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1383 std::forward<As>(Args)...); 1384 } 1385 1386 template <typename T> 1387 static void 1388 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1389 ArrayRef<llvm::Constant *> Data, 1390 T &Parent) { 1391 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1392 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1393 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1394 buildStructValue(Fields, CGM, RD, RL, Data); 1395 Fields.finishAndAddTo(Parent); 1396 } 1397 1398 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1399 bool AtCurrentPoint) { 1400 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1401 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1402 1403 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1404 if (AtCurrentPoint) { 1405 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1406 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1407 } else { 1408 Elem.second.ServiceInsertPt = 1409 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1410 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1411 } 1412 } 1413 1414 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1415 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1416 if (Elem.second.ServiceInsertPt) { 1417 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1418 Elem.second.ServiceInsertPt = nullptr; 1419 Ptr->eraseFromParent(); 1420 } 1421 } 1422 1423 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1424 SourceLocation Loc, 1425 SmallString<128> &Buffer) { 1426 llvm::raw_svector_ostream OS(Buffer); 1427 // Build debug location 1428 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1429 OS << ";" << PLoc.getFilename() << ";"; 1430 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1431 OS << FD->getQualifiedNameAsString(); 1432 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1433 return OS.str(); 1434 } 1435 1436 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1437 SourceLocation Loc, 1438 unsigned Flags) { 1439 llvm::Constant *SrcLocStr; 1440 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1441 Loc.isInvalid()) { 1442 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 1443 } else { 1444 std::string FunctionName = ""; 1445 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1446 FunctionName = FD->getQualifiedNameAsString(); 1447 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1448 const char *FileName = PLoc.getFilename(); 1449 unsigned Line = PLoc.getLine(); 1450 unsigned Column = PLoc.getColumn(); 1451 SrcLocStr = 1452 OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, Column); 1453 } 1454 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1455 return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags), 1456 Reserved2Flags); 1457 } 1458 1459 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1460 SourceLocation Loc) { 1461 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1462 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1463 // the clang invariants used below might be broken. 1464 if (CGM.getLangOpts().OpenMPIRBuilder) { 1465 SmallString<128> Buffer; 1466 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1467 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1468 getIdentStringFromSourceLocation(CGF, Loc, Buffer)); 1469 return OMPBuilder.getOrCreateThreadID( 1470 OMPBuilder.getOrCreateIdent(SrcLocStr)); 1471 } 1472 1473 llvm::Value *ThreadID = nullptr; 1474 // Check whether we've already cached a load of the thread id in this 1475 // function. 1476 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1477 if (I != OpenMPLocThreadIDMap.end()) { 1478 ThreadID = I->second.ThreadID; 1479 if (ThreadID != nullptr) 1480 return ThreadID; 1481 } 1482 // If exceptions are enabled, do not use parameter to avoid possible crash. 1483 if (auto *OMPRegionInfo = 1484 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1485 if (OMPRegionInfo->getThreadIDVariable()) { 1486 // Check if this an outlined function with thread id passed as argument. 1487 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1488 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1489 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1490 !CGF.getLangOpts().CXXExceptions || 1491 CGF.Builder.GetInsertBlock() == TopBlock || 1492 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1493 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1494 TopBlock || 1495 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1496 CGF.Builder.GetInsertBlock()) { 1497 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1498 // If value loaded in entry block, cache it and use it everywhere in 1499 // function. 1500 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1501 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1502 Elem.second.ThreadID = ThreadID; 1503 } 1504 return ThreadID; 1505 } 1506 } 1507 } 1508 1509 // This is not an outlined function region - need to call __kmpc_int32 1510 // kmpc_global_thread_num(ident_t *loc). 1511 // Generate thread id value and cache this value for use across the 1512 // function. 1513 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1514 if (!Elem.second.ServiceInsertPt) 1515 setLocThreadIdInsertPt(CGF); 1516 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1517 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1518 llvm::CallInst *Call = CGF.Builder.CreateCall( 1519 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1520 OMPRTL___kmpc_global_thread_num), 1521 emitUpdateLocation(CGF, Loc)); 1522 Call->setCallingConv(CGF.getRuntimeCC()); 1523 Elem.second.ThreadID = Call; 1524 return Call; 1525 } 1526 1527 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1528 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1529 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1530 clearLocThreadIdInsertPt(CGF); 1531 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1532 } 1533 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1534 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1535 UDRMap.erase(D); 1536 FunctionUDRMap.erase(CGF.CurFn); 1537 } 1538 auto I = FunctionUDMMap.find(CGF.CurFn); 1539 if (I != FunctionUDMMap.end()) { 1540 for(const auto *D : I->second) 1541 UDMMap.erase(D); 1542 FunctionUDMMap.erase(I); 1543 } 1544 LastprivateConditionalToTypes.erase(CGF.CurFn); 1545 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1546 } 1547 1548 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1549 return OMPBuilder.IdentPtr; 1550 } 1551 1552 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1553 if (!Kmpc_MicroTy) { 1554 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1555 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1556 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1557 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1558 } 1559 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1560 } 1561 1562 llvm::FunctionCallee 1563 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned, 1564 bool IsGPUDistribute) { 1565 assert((IVSize == 32 || IVSize == 64) && 1566 "IV size is not compatible with the omp runtime"); 1567 StringRef Name; 1568 if (IsGPUDistribute) 1569 Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4" 1570 : "__kmpc_distribute_static_init_4u") 1571 : (IVSigned ? "__kmpc_distribute_static_init_8" 1572 : "__kmpc_distribute_static_init_8u"); 1573 else 1574 Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1575 : "__kmpc_for_static_init_4u") 1576 : (IVSigned ? "__kmpc_for_static_init_8" 1577 : "__kmpc_for_static_init_8u"); 1578 1579 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1580 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1581 llvm::Type *TypeParams[] = { 1582 getIdentTyPointerTy(), // loc 1583 CGM.Int32Ty, // tid 1584 CGM.Int32Ty, // schedtype 1585 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1586 PtrTy, // p_lower 1587 PtrTy, // p_upper 1588 PtrTy, // p_stride 1589 ITy, // incr 1590 ITy // chunk 1591 }; 1592 auto *FnTy = 1593 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1594 return CGM.CreateRuntimeFunction(FnTy, Name); 1595 } 1596 1597 llvm::FunctionCallee 1598 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1599 assert((IVSize == 32 || IVSize == 64) && 1600 "IV size is not compatible with the omp runtime"); 1601 StringRef Name = 1602 IVSize == 32 1603 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1604 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1605 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1606 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1607 CGM.Int32Ty, // tid 1608 CGM.Int32Ty, // schedtype 1609 ITy, // lower 1610 ITy, // upper 1611 ITy, // stride 1612 ITy // chunk 1613 }; 1614 auto *FnTy = 1615 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1616 return CGM.CreateRuntimeFunction(FnTy, Name); 1617 } 1618 1619 llvm::FunctionCallee 1620 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1621 assert((IVSize == 32 || IVSize == 64) && 1622 "IV size is not compatible with the omp runtime"); 1623 StringRef Name = 1624 IVSize == 32 1625 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1626 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1627 llvm::Type *TypeParams[] = { 1628 getIdentTyPointerTy(), // loc 1629 CGM.Int32Ty, // tid 1630 }; 1631 auto *FnTy = 1632 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1633 return CGM.CreateRuntimeFunction(FnTy, Name); 1634 } 1635 1636 llvm::FunctionCallee 1637 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1638 assert((IVSize == 32 || IVSize == 64) && 1639 "IV size is not compatible with the omp runtime"); 1640 StringRef Name = 1641 IVSize == 32 1642 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1643 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1644 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1645 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1646 llvm::Type *TypeParams[] = { 1647 getIdentTyPointerTy(), // loc 1648 CGM.Int32Ty, // tid 1649 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1650 PtrTy, // p_lower 1651 PtrTy, // p_upper 1652 PtrTy // p_stride 1653 }; 1654 auto *FnTy = 1655 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1656 return CGM.CreateRuntimeFunction(FnTy, Name); 1657 } 1658 1659 /// Obtain information that uniquely identifies a target entry. This 1660 /// consists of the file and device IDs as well as line number associated with 1661 /// the relevant entry source location. 1662 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1663 unsigned &DeviceID, unsigned &FileID, 1664 unsigned &LineNum) { 1665 SourceManager &SM = C.getSourceManager(); 1666 1667 // The loc should be always valid and have a file ID (the user cannot use 1668 // #pragma directives in macros) 1669 1670 assert(Loc.isValid() && "Source location is expected to be always valid."); 1671 1672 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1673 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1674 1675 llvm::sys::fs::UniqueID ID; 1676 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1677 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false); 1678 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1679 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1680 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1681 << PLoc.getFilename() << EC.message(); 1682 } 1683 1684 DeviceID = ID.getDevice(); 1685 FileID = ID.getFile(); 1686 LineNum = PLoc.getLine(); 1687 } 1688 1689 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1690 if (CGM.getLangOpts().OpenMPSimd) 1691 return Address::invalid(); 1692 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1693 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1694 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1695 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1696 HasRequiresUnifiedSharedMemory))) { 1697 SmallString<64> PtrName; 1698 { 1699 llvm::raw_svector_ostream OS(PtrName); 1700 OS << CGM.getMangledName(GlobalDecl(VD)); 1701 if (!VD->isExternallyVisible()) { 1702 unsigned DeviceID, FileID, Line; 1703 getTargetEntryUniqueInfo(CGM.getContext(), 1704 VD->getCanonicalDecl()->getBeginLoc(), 1705 DeviceID, FileID, Line); 1706 OS << llvm::format("_%x", FileID); 1707 } 1708 OS << "_decl_tgt_ref_ptr"; 1709 } 1710 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1711 if (!Ptr) { 1712 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1713 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1714 PtrName); 1715 1716 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1717 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1718 1719 if (!CGM.getLangOpts().OpenMPIsDevice) 1720 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1721 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1722 } 1723 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 1724 } 1725 return Address::invalid(); 1726 } 1727 1728 llvm::Constant * 1729 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1730 assert(!CGM.getLangOpts().OpenMPUseTLS || 1731 !CGM.getContext().getTargetInfo().isTLSSupported()); 1732 // Lookup the entry, lazily creating it if necessary. 1733 std::string Suffix = getName({"cache", ""}); 1734 return getOrCreateInternalVariable( 1735 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1736 } 1737 1738 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1739 const VarDecl *VD, 1740 Address VDAddr, 1741 SourceLocation Loc) { 1742 if (CGM.getLangOpts().OpenMPUseTLS && 1743 CGM.getContext().getTargetInfo().isTLSSupported()) 1744 return VDAddr; 1745 1746 llvm::Type *VarTy = VDAddr.getElementType(); 1747 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1748 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1749 CGM.Int8PtrTy), 1750 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1751 getOrCreateThreadPrivateCache(VD)}; 1752 return Address(CGF.EmitRuntimeCall( 1753 OMPBuilder.getOrCreateRuntimeFunction( 1754 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1755 Args), 1756 VDAddr.getAlignment()); 1757 } 1758 1759 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1760 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1761 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1762 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1763 // library. 1764 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1765 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1766 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1767 OMPLoc); 1768 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1769 // to register constructor/destructor for variable. 1770 llvm::Value *Args[] = { 1771 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1772 Ctor, CopyCtor, Dtor}; 1773 CGF.EmitRuntimeCall( 1774 OMPBuilder.getOrCreateRuntimeFunction( 1775 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1776 Args); 1777 } 1778 1779 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1780 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1781 bool PerformInit, CodeGenFunction *CGF) { 1782 if (CGM.getLangOpts().OpenMPUseTLS && 1783 CGM.getContext().getTargetInfo().isTLSSupported()) 1784 return nullptr; 1785 1786 VD = VD->getDefinition(CGM.getContext()); 1787 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1788 QualType ASTTy = VD->getType(); 1789 1790 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1791 const Expr *Init = VD->getAnyInitializer(); 1792 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1793 // Generate function that re-emits the declaration's initializer into the 1794 // threadprivate copy of the variable VD 1795 CodeGenFunction CtorCGF(CGM); 1796 FunctionArgList Args; 1797 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1798 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1799 ImplicitParamDecl::Other); 1800 Args.push_back(&Dst); 1801 1802 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1803 CGM.getContext().VoidPtrTy, Args); 1804 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1805 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1806 llvm::Function *Fn = 1807 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1808 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1809 Args, Loc, Loc); 1810 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1811 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1812 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1813 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1814 Arg = CtorCGF.Builder.CreateElementBitCast( 1815 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1816 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1817 /*IsInitializer=*/true); 1818 ArgVal = CtorCGF.EmitLoadOfScalar( 1819 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1820 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1821 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1822 CtorCGF.FinishFunction(); 1823 Ctor = Fn; 1824 } 1825 if (VD->getType().isDestructedType() != QualType::DK_none) { 1826 // Generate function that emits destructor call for the threadprivate copy 1827 // of the variable VD 1828 CodeGenFunction DtorCGF(CGM); 1829 FunctionArgList Args; 1830 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1831 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1832 ImplicitParamDecl::Other); 1833 Args.push_back(&Dst); 1834 1835 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1836 CGM.getContext().VoidTy, Args); 1837 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1838 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1839 llvm::Function *Fn = 1840 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1841 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1842 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1843 Loc, Loc); 1844 // Create a scope with an artificial location for the body of this function. 1845 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1846 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1847 DtorCGF.GetAddrOfLocalVar(&Dst), 1848 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1849 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1850 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1851 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1852 DtorCGF.FinishFunction(); 1853 Dtor = Fn; 1854 } 1855 // Do not emit init function if it is not required. 1856 if (!Ctor && !Dtor) 1857 return nullptr; 1858 1859 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1860 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1861 /*isVarArg=*/false) 1862 ->getPointerTo(); 1863 // Copying constructor for the threadprivate variable. 1864 // Must be NULL - reserved by runtime, but currently it requires that this 1865 // parameter is always NULL. Otherwise it fires assertion. 1866 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1867 if (Ctor == nullptr) { 1868 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1869 /*isVarArg=*/false) 1870 ->getPointerTo(); 1871 Ctor = llvm::Constant::getNullValue(CtorTy); 1872 } 1873 if (Dtor == nullptr) { 1874 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1875 /*isVarArg=*/false) 1876 ->getPointerTo(); 1877 Dtor = llvm::Constant::getNullValue(DtorTy); 1878 } 1879 if (!CGF) { 1880 auto *InitFunctionTy = 1881 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1882 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1883 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1884 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1885 CodeGenFunction InitCGF(CGM); 1886 FunctionArgList ArgList; 1887 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1888 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1889 Loc, Loc); 1890 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1891 InitCGF.FinishFunction(); 1892 return InitFunction; 1893 } 1894 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1895 } 1896 return nullptr; 1897 } 1898 1899 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1900 llvm::GlobalVariable *Addr, 1901 bool PerformInit) { 1902 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1903 !CGM.getLangOpts().OpenMPIsDevice) 1904 return false; 1905 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1906 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1907 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1908 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1909 HasRequiresUnifiedSharedMemory)) 1910 return CGM.getLangOpts().OpenMPIsDevice; 1911 VD = VD->getDefinition(CGM.getContext()); 1912 assert(VD && "Unknown VarDecl"); 1913 1914 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1915 return CGM.getLangOpts().OpenMPIsDevice; 1916 1917 QualType ASTTy = VD->getType(); 1918 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1919 1920 // Produce the unique prefix to identify the new target regions. We use 1921 // the source location of the variable declaration which we know to not 1922 // conflict with any target region. 1923 unsigned DeviceID; 1924 unsigned FileID; 1925 unsigned Line; 1926 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1927 SmallString<128> Buffer, Out; 1928 { 1929 llvm::raw_svector_ostream OS(Buffer); 1930 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1931 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1932 } 1933 1934 const Expr *Init = VD->getAnyInitializer(); 1935 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1936 llvm::Constant *Ctor; 1937 llvm::Constant *ID; 1938 if (CGM.getLangOpts().OpenMPIsDevice) { 1939 // Generate function that re-emits the declaration's initializer into 1940 // the threadprivate copy of the variable VD 1941 CodeGenFunction CtorCGF(CGM); 1942 1943 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1944 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1945 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1946 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1947 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1948 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1949 FunctionArgList(), Loc, Loc); 1950 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1951 CtorCGF.EmitAnyExprToMem(Init, 1952 Address(Addr, CGM.getContext().getDeclAlign(VD)), 1953 Init->getType().getQualifiers(), 1954 /*IsInitializer=*/true); 1955 CtorCGF.FinishFunction(); 1956 Ctor = Fn; 1957 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1958 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1959 } else { 1960 Ctor = new llvm::GlobalVariable( 1961 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1962 llvm::GlobalValue::PrivateLinkage, 1963 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1964 ID = Ctor; 1965 } 1966 1967 // Register the information for the entry associated with the constructor. 1968 Out.clear(); 1969 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1970 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1971 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1972 } 1973 if (VD->getType().isDestructedType() != QualType::DK_none) { 1974 llvm::Constant *Dtor; 1975 llvm::Constant *ID; 1976 if (CGM.getLangOpts().OpenMPIsDevice) { 1977 // Generate function that emits destructor call for the threadprivate 1978 // copy of the variable VD 1979 CodeGenFunction DtorCGF(CGM); 1980 1981 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1982 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1983 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1984 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1985 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1986 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1987 FunctionArgList(), Loc, Loc); 1988 // Create a scope with an artificial location for the body of this 1989 // function. 1990 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1991 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 1992 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1993 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1994 DtorCGF.FinishFunction(); 1995 Dtor = Fn; 1996 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1997 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 1998 } else { 1999 Dtor = new llvm::GlobalVariable( 2000 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2001 llvm::GlobalValue::PrivateLinkage, 2002 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2003 ID = Dtor; 2004 } 2005 // Register the information for the entry associated with the destructor. 2006 Out.clear(); 2007 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2008 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2009 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2010 } 2011 return CGM.getLangOpts().OpenMPIsDevice; 2012 } 2013 2014 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2015 QualType VarType, 2016 StringRef Name) { 2017 std::string Suffix = getName({"artificial", ""}); 2018 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2019 llvm::Value *GAddr = 2020 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2021 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 2022 CGM.getTarget().isTLSSupported()) { 2023 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 2024 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 2025 } 2026 std::string CacheSuffix = getName({"cache", ""}); 2027 llvm::Value *Args[] = { 2028 emitUpdateLocation(CGF, SourceLocation()), 2029 getThreadID(CGF, SourceLocation()), 2030 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2031 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2032 /*isSigned=*/false), 2033 getOrCreateInternalVariable( 2034 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2035 return Address( 2036 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2037 CGF.EmitRuntimeCall( 2038 OMPBuilder.getOrCreateRuntimeFunction( 2039 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2040 Args), 2041 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2042 CGM.getContext().getTypeAlignInChars(VarType)); 2043 } 2044 2045 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2046 const RegionCodeGenTy &ThenGen, 2047 const RegionCodeGenTy &ElseGen) { 2048 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2049 2050 // If the condition constant folds and can be elided, try to avoid emitting 2051 // the condition and the dead arm of the if/else. 2052 bool CondConstant; 2053 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2054 if (CondConstant) 2055 ThenGen(CGF); 2056 else 2057 ElseGen(CGF); 2058 return; 2059 } 2060 2061 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2062 // emit the conditional branch. 2063 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2064 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2065 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2066 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2067 2068 // Emit the 'then' code. 2069 CGF.EmitBlock(ThenBlock); 2070 ThenGen(CGF); 2071 CGF.EmitBranch(ContBlock); 2072 // Emit the 'else' code if present. 2073 // There is no need to emit line number for unconditional branch. 2074 (void)ApplyDebugLocation::CreateEmpty(CGF); 2075 CGF.EmitBlock(ElseBlock); 2076 ElseGen(CGF); 2077 // There is no need to emit line number for unconditional branch. 2078 (void)ApplyDebugLocation::CreateEmpty(CGF); 2079 CGF.EmitBranch(ContBlock); 2080 // Emit the continuation block for code after the if. 2081 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2082 } 2083 2084 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2085 llvm::Function *OutlinedFn, 2086 ArrayRef<llvm::Value *> CapturedVars, 2087 const Expr *IfCond) { 2088 if (!CGF.HaveInsertPoint()) 2089 return; 2090 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2091 auto &M = CGM.getModule(); 2092 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2093 this](CodeGenFunction &CGF, PrePostActionTy &) { 2094 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2095 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2096 llvm::Value *Args[] = { 2097 RTLoc, 2098 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2099 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2100 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2101 RealArgs.append(std::begin(Args), std::end(Args)); 2102 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2103 2104 llvm::FunctionCallee RTLFn = 2105 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2106 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2107 }; 2108 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2109 this](CodeGenFunction &CGF, PrePostActionTy &) { 2110 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2111 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2112 // Build calls: 2113 // __kmpc_serialized_parallel(&Loc, GTid); 2114 llvm::Value *Args[] = {RTLoc, ThreadID}; 2115 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2116 M, OMPRTL___kmpc_serialized_parallel), 2117 Args); 2118 2119 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2120 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2121 Address ZeroAddrBound = 2122 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2123 /*Name=*/".bound.zero.addr"); 2124 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound); 2125 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2126 // ThreadId for serialized parallels is 0. 2127 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2128 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2129 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2130 2131 // Ensure we do not inline the function. This is trivially true for the ones 2132 // passed to __kmpc_fork_call but the ones called in serialized regions 2133 // could be inlined. This is not a perfect but it is closer to the invariant 2134 // we want, namely, every data environment starts with a new function. 2135 // TODO: We should pass the if condition to the runtime function and do the 2136 // handling there. Much cleaner code. 2137 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline); 2138 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 2139 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2140 2141 // __kmpc_end_serialized_parallel(&Loc, GTid); 2142 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2143 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2144 M, OMPRTL___kmpc_end_serialized_parallel), 2145 EndArgs); 2146 }; 2147 if (IfCond) { 2148 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2149 } else { 2150 RegionCodeGenTy ThenRCG(ThenGen); 2151 ThenRCG(CGF); 2152 } 2153 } 2154 2155 // If we're inside an (outlined) parallel region, use the region info's 2156 // thread-ID variable (it is passed in a first argument of the outlined function 2157 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2158 // regular serial code region, get thread ID by calling kmp_int32 2159 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2160 // return the address of that temp. 2161 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2162 SourceLocation Loc) { 2163 if (auto *OMPRegionInfo = 2164 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2165 if (OMPRegionInfo->getThreadIDVariable()) 2166 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2167 2168 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2169 QualType Int32Ty = 2170 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2171 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2172 CGF.EmitStoreOfScalar(ThreadID, 2173 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2174 2175 return ThreadIDTemp; 2176 } 2177 2178 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 2179 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2180 SmallString<256> Buffer; 2181 llvm::raw_svector_ostream Out(Buffer); 2182 Out << Name; 2183 StringRef RuntimeName = Out.str(); 2184 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2185 if (Elem.second) { 2186 assert(Elem.second->getType()->getPointerElementType() == Ty && 2187 "OMP internal variable has different type than requested"); 2188 return &*Elem.second; 2189 } 2190 2191 return Elem.second = new llvm::GlobalVariable( 2192 CGM.getModule(), Ty, /*IsConstant*/ false, 2193 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2194 Elem.first(), /*InsertBefore=*/nullptr, 2195 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2196 } 2197 2198 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2199 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2200 std::string Name = getName({Prefix, "var"}); 2201 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2202 } 2203 2204 namespace { 2205 /// Common pre(post)-action for different OpenMP constructs. 2206 class CommonActionTy final : public PrePostActionTy { 2207 llvm::FunctionCallee EnterCallee; 2208 ArrayRef<llvm::Value *> EnterArgs; 2209 llvm::FunctionCallee ExitCallee; 2210 ArrayRef<llvm::Value *> ExitArgs; 2211 bool Conditional; 2212 llvm::BasicBlock *ContBlock = nullptr; 2213 2214 public: 2215 CommonActionTy(llvm::FunctionCallee EnterCallee, 2216 ArrayRef<llvm::Value *> EnterArgs, 2217 llvm::FunctionCallee ExitCallee, 2218 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2219 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2220 ExitArgs(ExitArgs), Conditional(Conditional) {} 2221 void Enter(CodeGenFunction &CGF) override { 2222 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2223 if (Conditional) { 2224 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2225 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2226 ContBlock = CGF.createBasicBlock("omp_if.end"); 2227 // Generate the branch (If-stmt) 2228 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2229 CGF.EmitBlock(ThenBlock); 2230 } 2231 } 2232 void Done(CodeGenFunction &CGF) { 2233 // Emit the rest of blocks/branches 2234 CGF.EmitBranch(ContBlock); 2235 CGF.EmitBlock(ContBlock, true); 2236 } 2237 void Exit(CodeGenFunction &CGF) override { 2238 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2239 } 2240 }; 2241 } // anonymous namespace 2242 2243 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2244 StringRef CriticalName, 2245 const RegionCodeGenTy &CriticalOpGen, 2246 SourceLocation Loc, const Expr *Hint) { 2247 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2248 // CriticalOpGen(); 2249 // __kmpc_end_critical(ident_t *, gtid, Lock); 2250 // Prepare arguments and build a call to __kmpc_critical 2251 if (!CGF.HaveInsertPoint()) 2252 return; 2253 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2254 getCriticalRegionLock(CriticalName)}; 2255 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2256 std::end(Args)); 2257 if (Hint) { 2258 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2259 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2260 } 2261 CommonActionTy Action( 2262 OMPBuilder.getOrCreateRuntimeFunction( 2263 CGM.getModule(), 2264 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2265 EnterArgs, 2266 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2267 OMPRTL___kmpc_end_critical), 2268 Args); 2269 CriticalOpGen.setAction(Action); 2270 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2271 } 2272 2273 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2274 const RegionCodeGenTy &MasterOpGen, 2275 SourceLocation Loc) { 2276 if (!CGF.HaveInsertPoint()) 2277 return; 2278 // if(__kmpc_master(ident_t *, gtid)) { 2279 // MasterOpGen(); 2280 // __kmpc_end_master(ident_t *, gtid); 2281 // } 2282 // Prepare arguments and build a call to __kmpc_master 2283 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2284 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2285 CGM.getModule(), OMPRTL___kmpc_master), 2286 Args, 2287 OMPBuilder.getOrCreateRuntimeFunction( 2288 CGM.getModule(), OMPRTL___kmpc_end_master), 2289 Args, 2290 /*Conditional=*/true); 2291 MasterOpGen.setAction(Action); 2292 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2293 Action.Done(CGF); 2294 } 2295 2296 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, 2297 const RegionCodeGenTy &MaskedOpGen, 2298 SourceLocation Loc, const Expr *Filter) { 2299 if (!CGF.HaveInsertPoint()) 2300 return; 2301 // if(__kmpc_masked(ident_t *, gtid, filter)) { 2302 // MaskedOpGen(); 2303 // __kmpc_end_masked(iden_t *, gtid); 2304 // } 2305 // Prepare arguments and build a call to __kmpc_masked 2306 llvm::Value *FilterVal = Filter 2307 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) 2308 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 2309 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2310 FilterVal}; 2311 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), 2312 getThreadID(CGF, Loc)}; 2313 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2314 CGM.getModule(), OMPRTL___kmpc_masked), 2315 Args, 2316 OMPBuilder.getOrCreateRuntimeFunction( 2317 CGM.getModule(), OMPRTL___kmpc_end_masked), 2318 ArgsEnd, 2319 /*Conditional=*/true); 2320 MaskedOpGen.setAction(Action); 2321 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); 2322 Action.Done(CGF); 2323 } 2324 2325 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2326 SourceLocation Loc) { 2327 if (!CGF.HaveInsertPoint()) 2328 return; 2329 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2330 OMPBuilder.createTaskyield(CGF.Builder); 2331 } else { 2332 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2333 llvm::Value *Args[] = { 2334 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2335 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2336 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2337 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2338 Args); 2339 } 2340 2341 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2342 Region->emitUntiedSwitch(CGF); 2343 } 2344 2345 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2346 const RegionCodeGenTy &TaskgroupOpGen, 2347 SourceLocation Loc) { 2348 if (!CGF.HaveInsertPoint()) 2349 return; 2350 // __kmpc_taskgroup(ident_t *, gtid); 2351 // TaskgroupOpGen(); 2352 // __kmpc_end_taskgroup(ident_t *, gtid); 2353 // Prepare arguments and build a call to __kmpc_taskgroup 2354 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2355 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2356 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2357 Args, 2358 OMPBuilder.getOrCreateRuntimeFunction( 2359 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2360 Args); 2361 TaskgroupOpGen.setAction(Action); 2362 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2363 } 2364 2365 /// Given an array of pointers to variables, project the address of a 2366 /// given variable. 2367 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2368 unsigned Index, const VarDecl *Var) { 2369 // Pull out the pointer to the variable. 2370 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2371 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2372 2373 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2374 Addr = CGF.Builder.CreateElementBitCast( 2375 Addr, CGF.ConvertTypeForMem(Var->getType())); 2376 return Addr; 2377 } 2378 2379 static llvm::Value *emitCopyprivateCopyFunction( 2380 CodeGenModule &CGM, llvm::Type *ArgsType, 2381 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2382 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2383 SourceLocation Loc) { 2384 ASTContext &C = CGM.getContext(); 2385 // void copy_func(void *LHSArg, void *RHSArg); 2386 FunctionArgList Args; 2387 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2388 ImplicitParamDecl::Other); 2389 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2390 ImplicitParamDecl::Other); 2391 Args.push_back(&LHSArg); 2392 Args.push_back(&RHSArg); 2393 const auto &CGFI = 2394 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2395 std::string Name = 2396 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2397 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2398 llvm::GlobalValue::InternalLinkage, Name, 2399 &CGM.getModule()); 2400 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2401 Fn->setDoesNotRecurse(); 2402 CodeGenFunction CGF(CGM); 2403 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2404 // Dest = (void*[n])(LHSArg); 2405 // Src = (void*[n])(RHSArg); 2406 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2407 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2408 ArgsType), CGF.getPointerAlign()); 2409 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2410 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2411 ArgsType), CGF.getPointerAlign()); 2412 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2413 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2414 // ... 2415 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2416 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2417 const auto *DestVar = 2418 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2419 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2420 2421 const auto *SrcVar = 2422 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2423 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2424 2425 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2426 QualType Type = VD->getType(); 2427 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2428 } 2429 CGF.FinishFunction(); 2430 return Fn; 2431 } 2432 2433 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2434 const RegionCodeGenTy &SingleOpGen, 2435 SourceLocation Loc, 2436 ArrayRef<const Expr *> CopyprivateVars, 2437 ArrayRef<const Expr *> SrcExprs, 2438 ArrayRef<const Expr *> DstExprs, 2439 ArrayRef<const Expr *> AssignmentOps) { 2440 if (!CGF.HaveInsertPoint()) 2441 return; 2442 assert(CopyprivateVars.size() == SrcExprs.size() && 2443 CopyprivateVars.size() == DstExprs.size() && 2444 CopyprivateVars.size() == AssignmentOps.size()); 2445 ASTContext &C = CGM.getContext(); 2446 // int32 did_it = 0; 2447 // if(__kmpc_single(ident_t *, gtid)) { 2448 // SingleOpGen(); 2449 // __kmpc_end_single(ident_t *, gtid); 2450 // did_it = 1; 2451 // } 2452 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2453 // <copy_func>, did_it); 2454 2455 Address DidIt = Address::invalid(); 2456 if (!CopyprivateVars.empty()) { 2457 // int32 did_it = 0; 2458 QualType KmpInt32Ty = 2459 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2460 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2461 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2462 } 2463 // Prepare arguments and build a call to __kmpc_single 2464 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2465 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2466 CGM.getModule(), OMPRTL___kmpc_single), 2467 Args, 2468 OMPBuilder.getOrCreateRuntimeFunction( 2469 CGM.getModule(), OMPRTL___kmpc_end_single), 2470 Args, 2471 /*Conditional=*/true); 2472 SingleOpGen.setAction(Action); 2473 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2474 if (DidIt.isValid()) { 2475 // did_it = 1; 2476 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2477 } 2478 Action.Done(CGF); 2479 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2480 // <copy_func>, did_it); 2481 if (DidIt.isValid()) { 2482 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2483 QualType CopyprivateArrayTy = C.getConstantArrayType( 2484 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2485 /*IndexTypeQuals=*/0); 2486 // Create a list of all private variables for copyprivate. 2487 Address CopyprivateList = 2488 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2489 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2490 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2491 CGF.Builder.CreateStore( 2492 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2493 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2494 CGF.VoidPtrTy), 2495 Elem); 2496 } 2497 // Build function that copies private values from single region to all other 2498 // threads in the corresponding parallel region. 2499 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2500 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2501 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2502 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2503 Address CL = 2504 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2505 CGF.VoidPtrTy); 2506 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2507 llvm::Value *Args[] = { 2508 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2509 getThreadID(CGF, Loc), // i32 <gtid> 2510 BufSize, // size_t <buf_size> 2511 CL.getPointer(), // void *<copyprivate list> 2512 CpyFn, // void (*) (void *, void *) <copy_func> 2513 DidItVal // i32 did_it 2514 }; 2515 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2516 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2517 Args); 2518 } 2519 } 2520 2521 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2522 const RegionCodeGenTy &OrderedOpGen, 2523 SourceLocation Loc, bool IsThreads) { 2524 if (!CGF.HaveInsertPoint()) 2525 return; 2526 // __kmpc_ordered(ident_t *, gtid); 2527 // OrderedOpGen(); 2528 // __kmpc_end_ordered(ident_t *, gtid); 2529 // Prepare arguments and build a call to __kmpc_ordered 2530 if (IsThreads) { 2531 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2532 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2533 CGM.getModule(), OMPRTL___kmpc_ordered), 2534 Args, 2535 OMPBuilder.getOrCreateRuntimeFunction( 2536 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2537 Args); 2538 OrderedOpGen.setAction(Action); 2539 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2540 return; 2541 } 2542 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2543 } 2544 2545 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2546 unsigned Flags; 2547 if (Kind == OMPD_for) 2548 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2549 else if (Kind == OMPD_sections) 2550 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2551 else if (Kind == OMPD_single) 2552 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2553 else if (Kind == OMPD_barrier) 2554 Flags = OMP_IDENT_BARRIER_EXPL; 2555 else 2556 Flags = OMP_IDENT_BARRIER_IMPL; 2557 return Flags; 2558 } 2559 2560 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2561 CodeGenFunction &CGF, const OMPLoopDirective &S, 2562 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2563 // Check if the loop directive is actually a doacross loop directive. In this 2564 // case choose static, 1 schedule. 2565 if (llvm::any_of( 2566 S.getClausesOfKind<OMPOrderedClause>(), 2567 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2568 ScheduleKind = OMPC_SCHEDULE_static; 2569 // Chunk size is 1 in this case. 2570 llvm::APInt ChunkSize(32, 1); 2571 ChunkExpr = IntegerLiteral::Create( 2572 CGF.getContext(), ChunkSize, 2573 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2574 SourceLocation()); 2575 } 2576 } 2577 2578 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2579 OpenMPDirectiveKind Kind, bool EmitChecks, 2580 bool ForceSimpleCall) { 2581 // Check if we should use the OMPBuilder 2582 auto *OMPRegionInfo = 2583 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2584 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2585 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2586 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2587 return; 2588 } 2589 2590 if (!CGF.HaveInsertPoint()) 2591 return; 2592 // Build call __kmpc_cancel_barrier(loc, thread_id); 2593 // Build call __kmpc_barrier(loc, thread_id); 2594 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2595 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2596 // thread_id); 2597 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2598 getThreadID(CGF, Loc)}; 2599 if (OMPRegionInfo) { 2600 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2601 llvm::Value *Result = CGF.EmitRuntimeCall( 2602 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2603 OMPRTL___kmpc_cancel_barrier), 2604 Args); 2605 if (EmitChecks) { 2606 // if (__kmpc_cancel_barrier()) { 2607 // exit from construct; 2608 // } 2609 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2610 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2611 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2612 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2613 CGF.EmitBlock(ExitBB); 2614 // exit from construct; 2615 CodeGenFunction::JumpDest CancelDestination = 2616 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2617 CGF.EmitBranchThroughCleanup(CancelDestination); 2618 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2619 } 2620 return; 2621 } 2622 } 2623 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2624 CGM.getModule(), OMPRTL___kmpc_barrier), 2625 Args); 2626 } 2627 2628 /// Map the OpenMP loop schedule to the runtime enumeration. 2629 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2630 bool Chunked, bool Ordered) { 2631 switch (ScheduleKind) { 2632 case OMPC_SCHEDULE_static: 2633 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2634 : (Ordered ? OMP_ord_static : OMP_sch_static); 2635 case OMPC_SCHEDULE_dynamic: 2636 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2637 case OMPC_SCHEDULE_guided: 2638 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2639 case OMPC_SCHEDULE_runtime: 2640 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2641 case OMPC_SCHEDULE_auto: 2642 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2643 case OMPC_SCHEDULE_unknown: 2644 assert(!Chunked && "chunk was specified but schedule kind not known"); 2645 return Ordered ? OMP_ord_static : OMP_sch_static; 2646 } 2647 llvm_unreachable("Unexpected runtime schedule"); 2648 } 2649 2650 /// Map the OpenMP distribute schedule to the runtime enumeration. 2651 static OpenMPSchedType 2652 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2653 // only static is allowed for dist_schedule 2654 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2655 } 2656 2657 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2658 bool Chunked) const { 2659 OpenMPSchedType Schedule = 2660 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2661 return Schedule == OMP_sch_static; 2662 } 2663 2664 bool CGOpenMPRuntime::isStaticNonchunked( 2665 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2666 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2667 return Schedule == OMP_dist_sch_static; 2668 } 2669 2670 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2671 bool Chunked) const { 2672 OpenMPSchedType Schedule = 2673 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2674 return Schedule == OMP_sch_static_chunked; 2675 } 2676 2677 bool CGOpenMPRuntime::isStaticChunked( 2678 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2679 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2680 return Schedule == OMP_dist_sch_static_chunked; 2681 } 2682 2683 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2684 OpenMPSchedType Schedule = 2685 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2686 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2687 return Schedule != OMP_sch_static; 2688 } 2689 2690 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2691 OpenMPScheduleClauseModifier M1, 2692 OpenMPScheduleClauseModifier M2) { 2693 int Modifier = 0; 2694 switch (M1) { 2695 case OMPC_SCHEDULE_MODIFIER_monotonic: 2696 Modifier = OMP_sch_modifier_monotonic; 2697 break; 2698 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2699 Modifier = OMP_sch_modifier_nonmonotonic; 2700 break; 2701 case OMPC_SCHEDULE_MODIFIER_simd: 2702 if (Schedule == OMP_sch_static_chunked) 2703 Schedule = OMP_sch_static_balanced_chunked; 2704 break; 2705 case OMPC_SCHEDULE_MODIFIER_last: 2706 case OMPC_SCHEDULE_MODIFIER_unknown: 2707 break; 2708 } 2709 switch (M2) { 2710 case OMPC_SCHEDULE_MODIFIER_monotonic: 2711 Modifier = OMP_sch_modifier_monotonic; 2712 break; 2713 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2714 Modifier = OMP_sch_modifier_nonmonotonic; 2715 break; 2716 case OMPC_SCHEDULE_MODIFIER_simd: 2717 if (Schedule == OMP_sch_static_chunked) 2718 Schedule = OMP_sch_static_balanced_chunked; 2719 break; 2720 case OMPC_SCHEDULE_MODIFIER_last: 2721 case OMPC_SCHEDULE_MODIFIER_unknown: 2722 break; 2723 } 2724 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2725 // If the static schedule kind is specified or if the ordered clause is 2726 // specified, and if the nonmonotonic modifier is not specified, the effect is 2727 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2728 // modifier is specified, the effect is as if the nonmonotonic modifier is 2729 // specified. 2730 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2731 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2732 Schedule == OMP_sch_static_balanced_chunked || 2733 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2734 Schedule == OMP_dist_sch_static_chunked || 2735 Schedule == OMP_dist_sch_static)) 2736 Modifier = OMP_sch_modifier_nonmonotonic; 2737 } 2738 return Schedule | Modifier; 2739 } 2740 2741 void CGOpenMPRuntime::emitForDispatchInit( 2742 CodeGenFunction &CGF, SourceLocation Loc, 2743 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2744 bool Ordered, const DispatchRTInput &DispatchValues) { 2745 if (!CGF.HaveInsertPoint()) 2746 return; 2747 OpenMPSchedType Schedule = getRuntimeSchedule( 2748 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2749 assert(Ordered || 2750 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2751 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2752 Schedule != OMP_sch_static_balanced_chunked)); 2753 // Call __kmpc_dispatch_init( 2754 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2755 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2756 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2757 2758 // If the Chunk was not specified in the clause - use default value 1. 2759 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2760 : CGF.Builder.getIntN(IVSize, 1); 2761 llvm::Value *Args[] = { 2762 emitUpdateLocation(CGF, Loc), 2763 getThreadID(CGF, Loc), 2764 CGF.Builder.getInt32(addMonoNonMonoModifier( 2765 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2766 DispatchValues.LB, // Lower 2767 DispatchValues.UB, // Upper 2768 CGF.Builder.getIntN(IVSize, 1), // Stride 2769 Chunk // Chunk 2770 }; 2771 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2772 } 2773 2774 static void emitForStaticInitCall( 2775 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2776 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2777 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2778 const CGOpenMPRuntime::StaticRTInput &Values) { 2779 if (!CGF.HaveInsertPoint()) 2780 return; 2781 2782 assert(!Values.Ordered); 2783 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2784 Schedule == OMP_sch_static_balanced_chunked || 2785 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2786 Schedule == OMP_dist_sch_static || 2787 Schedule == OMP_dist_sch_static_chunked); 2788 2789 // Call __kmpc_for_static_init( 2790 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2791 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2792 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2793 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2794 llvm::Value *Chunk = Values.Chunk; 2795 if (Chunk == nullptr) { 2796 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2797 Schedule == OMP_dist_sch_static) && 2798 "expected static non-chunked schedule"); 2799 // If the Chunk was not specified in the clause - use default value 1. 2800 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2801 } else { 2802 assert((Schedule == OMP_sch_static_chunked || 2803 Schedule == OMP_sch_static_balanced_chunked || 2804 Schedule == OMP_ord_static_chunked || 2805 Schedule == OMP_dist_sch_static_chunked) && 2806 "expected static chunked schedule"); 2807 } 2808 llvm::Value *Args[] = { 2809 UpdateLocation, 2810 ThreadId, 2811 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2812 M2)), // Schedule type 2813 Values.IL.getPointer(), // &isLastIter 2814 Values.LB.getPointer(), // &LB 2815 Values.UB.getPointer(), // &UB 2816 Values.ST.getPointer(), // &Stride 2817 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2818 Chunk // Chunk 2819 }; 2820 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2821 } 2822 2823 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2824 SourceLocation Loc, 2825 OpenMPDirectiveKind DKind, 2826 const OpenMPScheduleTy &ScheduleKind, 2827 const StaticRTInput &Values) { 2828 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2829 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2830 assert(isOpenMPWorksharingDirective(DKind) && 2831 "Expected loop-based or sections-based directive."); 2832 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2833 isOpenMPLoopDirective(DKind) 2834 ? OMP_IDENT_WORK_LOOP 2835 : OMP_IDENT_WORK_SECTIONS); 2836 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2837 llvm::FunctionCallee StaticInitFunction = 2838 createForStaticInitFunction(Values.IVSize, Values.IVSigned, false); 2839 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2840 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2841 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2842 } 2843 2844 void CGOpenMPRuntime::emitDistributeStaticInit( 2845 CodeGenFunction &CGF, SourceLocation Loc, 2846 OpenMPDistScheduleClauseKind SchedKind, 2847 const CGOpenMPRuntime::StaticRTInput &Values) { 2848 OpenMPSchedType ScheduleNum = 2849 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2850 llvm::Value *UpdatedLocation = 2851 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2852 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2853 llvm::FunctionCallee StaticInitFunction; 2854 bool isGPUDistribute = 2855 CGM.getLangOpts().OpenMPIsDevice && 2856 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()); 2857 StaticInitFunction = createForStaticInitFunction( 2858 Values.IVSize, Values.IVSigned, isGPUDistribute); 2859 2860 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2861 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2862 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2863 } 2864 2865 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2866 SourceLocation Loc, 2867 OpenMPDirectiveKind DKind) { 2868 if (!CGF.HaveInsertPoint()) 2869 return; 2870 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2871 llvm::Value *Args[] = { 2872 emitUpdateLocation(CGF, Loc, 2873 isOpenMPDistributeDirective(DKind) 2874 ? OMP_IDENT_WORK_DISTRIBUTE 2875 : isOpenMPLoopDirective(DKind) 2876 ? OMP_IDENT_WORK_LOOP 2877 : OMP_IDENT_WORK_SECTIONS), 2878 getThreadID(CGF, Loc)}; 2879 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2880 if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice && 2881 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX())) 2882 CGF.EmitRuntimeCall( 2883 OMPBuilder.getOrCreateRuntimeFunction( 2884 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini), 2885 Args); 2886 else 2887 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2888 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2889 Args); 2890 } 2891 2892 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2893 SourceLocation Loc, 2894 unsigned IVSize, 2895 bool IVSigned) { 2896 if (!CGF.HaveInsertPoint()) 2897 return; 2898 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2899 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2900 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2901 } 2902 2903 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2904 SourceLocation Loc, unsigned IVSize, 2905 bool IVSigned, Address IL, 2906 Address LB, Address UB, 2907 Address ST) { 2908 // Call __kmpc_dispatch_next( 2909 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2910 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2911 // kmp_int[32|64] *p_stride); 2912 llvm::Value *Args[] = { 2913 emitUpdateLocation(CGF, Loc), 2914 getThreadID(CGF, Loc), 2915 IL.getPointer(), // &isLastIter 2916 LB.getPointer(), // &Lower 2917 UB.getPointer(), // &Upper 2918 ST.getPointer() // &Stride 2919 }; 2920 llvm::Value *Call = 2921 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2922 return CGF.EmitScalarConversion( 2923 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2924 CGF.getContext().BoolTy, Loc); 2925 } 2926 2927 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2928 llvm::Value *NumThreads, 2929 SourceLocation Loc) { 2930 if (!CGF.HaveInsertPoint()) 2931 return; 2932 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2933 llvm::Value *Args[] = { 2934 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2935 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2936 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2937 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2938 Args); 2939 } 2940 2941 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2942 ProcBindKind ProcBind, 2943 SourceLocation Loc) { 2944 if (!CGF.HaveInsertPoint()) 2945 return; 2946 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2947 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2948 llvm::Value *Args[] = { 2949 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2950 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2951 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2952 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2953 Args); 2954 } 2955 2956 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2957 SourceLocation Loc, llvm::AtomicOrdering AO) { 2958 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2959 OMPBuilder.createFlush(CGF.Builder); 2960 } else { 2961 if (!CGF.HaveInsertPoint()) 2962 return; 2963 // Build call void __kmpc_flush(ident_t *loc) 2964 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2965 CGM.getModule(), OMPRTL___kmpc_flush), 2966 emitUpdateLocation(CGF, Loc)); 2967 } 2968 } 2969 2970 namespace { 2971 /// Indexes of fields for type kmp_task_t. 2972 enum KmpTaskTFields { 2973 /// List of shared variables. 2974 KmpTaskTShareds, 2975 /// Task routine. 2976 KmpTaskTRoutine, 2977 /// Partition id for the untied tasks. 2978 KmpTaskTPartId, 2979 /// Function with call of destructors for private variables. 2980 Data1, 2981 /// Task priority. 2982 Data2, 2983 /// (Taskloops only) Lower bound. 2984 KmpTaskTLowerBound, 2985 /// (Taskloops only) Upper bound. 2986 KmpTaskTUpperBound, 2987 /// (Taskloops only) Stride. 2988 KmpTaskTStride, 2989 /// (Taskloops only) Is last iteration flag. 2990 KmpTaskTLastIter, 2991 /// (Taskloops only) Reduction data. 2992 KmpTaskTReductions, 2993 }; 2994 } // anonymous namespace 2995 2996 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2997 return OffloadEntriesTargetRegion.empty() && 2998 OffloadEntriesDeviceGlobalVar.empty(); 2999 } 3000 3001 /// Initialize target region entry. 3002 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3003 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3004 StringRef ParentName, unsigned LineNum, 3005 unsigned Order) { 3006 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3007 "only required for the device " 3008 "code generation."); 3009 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3010 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3011 OMPTargetRegionEntryTargetRegion); 3012 ++OffloadingEntriesNum; 3013 } 3014 3015 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3016 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3017 StringRef ParentName, unsigned LineNum, 3018 llvm::Constant *Addr, llvm::Constant *ID, 3019 OMPTargetRegionEntryKind Flags) { 3020 // If we are emitting code for a target, the entry is already initialized, 3021 // only has to be registered. 3022 if (CGM.getLangOpts().OpenMPIsDevice) { 3023 // This could happen if the device compilation is invoked standalone. 3024 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) 3025 return; 3026 auto &Entry = 3027 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3028 Entry.setAddress(Addr); 3029 Entry.setID(ID); 3030 Entry.setFlags(Flags); 3031 } else { 3032 if (Flags == 3033 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 3034 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 3035 /*IgnoreAddressId*/ true)) 3036 return; 3037 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 3038 "Target region entry already registered!"); 3039 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3040 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3041 ++OffloadingEntriesNum; 3042 } 3043 } 3044 3045 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3046 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 3047 bool IgnoreAddressId) const { 3048 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3049 if (PerDevice == OffloadEntriesTargetRegion.end()) 3050 return false; 3051 auto PerFile = PerDevice->second.find(FileID); 3052 if (PerFile == PerDevice->second.end()) 3053 return false; 3054 auto PerParentName = PerFile->second.find(ParentName); 3055 if (PerParentName == PerFile->second.end()) 3056 return false; 3057 auto PerLine = PerParentName->second.find(LineNum); 3058 if (PerLine == PerParentName->second.end()) 3059 return false; 3060 // Fail if this entry is already registered. 3061 if (!IgnoreAddressId && 3062 (PerLine->second.getAddress() || PerLine->second.getID())) 3063 return false; 3064 return true; 3065 } 3066 3067 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3068 const OffloadTargetRegionEntryInfoActTy &Action) { 3069 // Scan all target region entries and perform the provided action. 3070 for (const auto &D : OffloadEntriesTargetRegion) 3071 for (const auto &F : D.second) 3072 for (const auto &P : F.second) 3073 for (const auto &L : P.second) 3074 Action(D.first, F.first, P.first(), L.first, L.second); 3075 } 3076 3077 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3078 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3079 OMPTargetGlobalVarEntryKind Flags, 3080 unsigned Order) { 3081 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3082 "only required for the device " 3083 "code generation."); 3084 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3085 ++OffloadingEntriesNum; 3086 } 3087 3088 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3089 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3090 CharUnits VarSize, 3091 OMPTargetGlobalVarEntryKind Flags, 3092 llvm::GlobalValue::LinkageTypes Linkage) { 3093 if (CGM.getLangOpts().OpenMPIsDevice) { 3094 // This could happen if the device compilation is invoked standalone. 3095 if (!hasDeviceGlobalVarEntryInfo(VarName)) 3096 return; 3097 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3098 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3099 if (Entry.getVarSize().isZero()) { 3100 Entry.setVarSize(VarSize); 3101 Entry.setLinkage(Linkage); 3102 } 3103 return; 3104 } 3105 Entry.setVarSize(VarSize); 3106 Entry.setLinkage(Linkage); 3107 Entry.setAddress(Addr); 3108 } else { 3109 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3110 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3111 assert(Entry.isValid() && Entry.getFlags() == Flags && 3112 "Entry not initialized!"); 3113 if (Entry.getVarSize().isZero()) { 3114 Entry.setVarSize(VarSize); 3115 Entry.setLinkage(Linkage); 3116 } 3117 return; 3118 } 3119 OffloadEntriesDeviceGlobalVar.try_emplace( 3120 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3121 ++OffloadingEntriesNum; 3122 } 3123 } 3124 3125 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3126 actOnDeviceGlobalVarEntriesInfo( 3127 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3128 // Scan all target region entries and perform the provided action. 3129 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3130 Action(E.getKey(), E.getValue()); 3131 } 3132 3133 void CGOpenMPRuntime::createOffloadEntry( 3134 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3135 llvm::GlobalValue::LinkageTypes Linkage) { 3136 StringRef Name = Addr->getName(); 3137 llvm::Module &M = CGM.getModule(); 3138 llvm::LLVMContext &C = M.getContext(); 3139 3140 // Create constant string with the name. 3141 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3142 3143 std::string StringName = getName({"omp_offloading", "entry_name"}); 3144 auto *Str = new llvm::GlobalVariable( 3145 M, StrPtrInit->getType(), /*isConstant=*/true, 3146 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3147 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3148 3149 llvm::Constant *Data[] = { 3150 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), 3151 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), 3152 llvm::ConstantInt::get(CGM.SizeTy, Size), 3153 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3154 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3155 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3156 llvm::GlobalVariable *Entry = createGlobalStruct( 3157 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3158 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3159 3160 // The entry has to be created in the section the linker expects it to be. 3161 Entry->setSection("omp_offloading_entries"); 3162 } 3163 3164 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3165 // Emit the offloading entries and metadata so that the device codegen side 3166 // can easily figure out what to emit. The produced metadata looks like 3167 // this: 3168 // 3169 // !omp_offload.info = !{!1, ...} 3170 // 3171 // Right now we only generate metadata for function that contain target 3172 // regions. 3173 3174 // If we are in simd mode or there are no entries, we don't need to do 3175 // anything. 3176 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3177 return; 3178 3179 llvm::Module &M = CGM.getModule(); 3180 llvm::LLVMContext &C = M.getContext(); 3181 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3182 SourceLocation, StringRef>, 3183 16> 3184 OrderedEntries(OffloadEntriesInfoManager.size()); 3185 llvm::SmallVector<StringRef, 16> ParentFunctions( 3186 OffloadEntriesInfoManager.size()); 3187 3188 // Auxiliary methods to create metadata values and strings. 3189 auto &&GetMDInt = [this](unsigned V) { 3190 return llvm::ConstantAsMetadata::get( 3191 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3192 }; 3193 3194 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3195 3196 // Create the offloading info metadata node. 3197 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3198 3199 // Create function that emits metadata for each target region entry; 3200 auto &&TargetRegionMetadataEmitter = 3201 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3202 &GetMDString]( 3203 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3204 unsigned Line, 3205 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3206 // Generate metadata for target regions. Each entry of this metadata 3207 // contains: 3208 // - Entry 0 -> Kind of this type of metadata (0). 3209 // - Entry 1 -> Device ID of the file where the entry was identified. 3210 // - Entry 2 -> File ID of the file where the entry was identified. 3211 // - Entry 3 -> Mangled name of the function where the entry was 3212 // identified. 3213 // - Entry 4 -> Line in the file where the entry was identified. 3214 // - Entry 5 -> Order the entry was created. 3215 // The first element of the metadata node is the kind. 3216 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3217 GetMDInt(FileID), GetMDString(ParentName), 3218 GetMDInt(Line), GetMDInt(E.getOrder())}; 3219 3220 SourceLocation Loc; 3221 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3222 E = CGM.getContext().getSourceManager().fileinfo_end(); 3223 I != E; ++I) { 3224 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3225 I->getFirst()->getUniqueID().getFile() == FileID) { 3226 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3227 I->getFirst(), Line, 1); 3228 break; 3229 } 3230 } 3231 // Save this entry in the right position of the ordered entries array. 3232 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3233 ParentFunctions[E.getOrder()] = ParentName; 3234 3235 // Add metadata to the named metadata node. 3236 MD->addOperand(llvm::MDNode::get(C, Ops)); 3237 }; 3238 3239 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3240 TargetRegionMetadataEmitter); 3241 3242 // Create function that emits metadata for each device global variable entry; 3243 auto &&DeviceGlobalVarMetadataEmitter = 3244 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3245 MD](StringRef MangledName, 3246 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3247 &E) { 3248 // Generate metadata for global variables. Each entry of this metadata 3249 // contains: 3250 // - Entry 0 -> Kind of this type of metadata (1). 3251 // - Entry 1 -> Mangled name of the variable. 3252 // - Entry 2 -> Declare target kind. 3253 // - Entry 3 -> Order the entry was created. 3254 // The first element of the metadata node is the kind. 3255 llvm::Metadata *Ops[] = { 3256 GetMDInt(E.getKind()), GetMDString(MangledName), 3257 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3258 3259 // Save this entry in the right position of the ordered entries array. 3260 OrderedEntries[E.getOrder()] = 3261 std::make_tuple(&E, SourceLocation(), MangledName); 3262 3263 // Add metadata to the named metadata node. 3264 MD->addOperand(llvm::MDNode::get(C, Ops)); 3265 }; 3266 3267 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3268 DeviceGlobalVarMetadataEmitter); 3269 3270 for (const auto &E : OrderedEntries) { 3271 assert(std::get<0>(E) && "All ordered entries must exist!"); 3272 if (const auto *CE = 3273 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3274 std::get<0>(E))) { 3275 if (!CE->getID() || !CE->getAddress()) { 3276 // Do not blame the entry if the parent funtion is not emitted. 3277 StringRef FnName = ParentFunctions[CE->getOrder()]; 3278 if (!CGM.GetGlobalValue(FnName)) 3279 continue; 3280 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3281 DiagnosticsEngine::Error, 3282 "Offloading entry for target region in %0 is incorrect: either the " 3283 "address or the ID is invalid."); 3284 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3285 continue; 3286 } 3287 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3288 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3289 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3290 OffloadEntryInfoDeviceGlobalVar>( 3291 std::get<0>(E))) { 3292 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3293 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3294 CE->getFlags()); 3295 switch (Flags) { 3296 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3297 if (CGM.getLangOpts().OpenMPIsDevice && 3298 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3299 continue; 3300 if (!CE->getAddress()) { 3301 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3302 DiagnosticsEngine::Error, "Offloading entry for declare target " 3303 "variable %0 is incorrect: the " 3304 "address is invalid."); 3305 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3306 continue; 3307 } 3308 // The vaiable has no definition - no need to add the entry. 3309 if (CE->getVarSize().isZero()) 3310 continue; 3311 break; 3312 } 3313 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3314 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3315 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3316 "Declaret target link address is set."); 3317 if (CGM.getLangOpts().OpenMPIsDevice) 3318 continue; 3319 if (!CE->getAddress()) { 3320 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3321 DiagnosticsEngine::Error, 3322 "Offloading entry for declare target variable is incorrect: the " 3323 "address is invalid."); 3324 CGM.getDiags().Report(DiagID); 3325 continue; 3326 } 3327 break; 3328 } 3329 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3330 CE->getVarSize().getQuantity(), Flags, 3331 CE->getLinkage()); 3332 } else { 3333 llvm_unreachable("Unsupported entry kind."); 3334 } 3335 } 3336 } 3337 3338 /// Loads all the offload entries information from the host IR 3339 /// metadata. 3340 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3341 // If we are in target mode, load the metadata from the host IR. This code has 3342 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3343 3344 if (!CGM.getLangOpts().OpenMPIsDevice) 3345 return; 3346 3347 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3348 return; 3349 3350 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3351 if (auto EC = Buf.getError()) { 3352 CGM.getDiags().Report(diag::err_cannot_open_file) 3353 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3354 return; 3355 } 3356 3357 llvm::LLVMContext C; 3358 auto ME = expectedToErrorOrAndEmitErrors( 3359 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3360 3361 if (auto EC = ME.getError()) { 3362 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3363 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3364 CGM.getDiags().Report(DiagID) 3365 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3366 return; 3367 } 3368 3369 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3370 if (!MD) 3371 return; 3372 3373 for (llvm::MDNode *MN : MD->operands()) { 3374 auto &&GetMDInt = [MN](unsigned Idx) { 3375 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3376 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3377 }; 3378 3379 auto &&GetMDString = [MN](unsigned Idx) { 3380 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3381 return V->getString(); 3382 }; 3383 3384 switch (GetMDInt(0)) { 3385 default: 3386 llvm_unreachable("Unexpected metadata!"); 3387 break; 3388 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3389 OffloadingEntryInfoTargetRegion: 3390 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3391 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3392 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3393 /*Order=*/GetMDInt(5)); 3394 break; 3395 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3396 OffloadingEntryInfoDeviceGlobalVar: 3397 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3398 /*MangledName=*/GetMDString(1), 3399 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3400 /*Flags=*/GetMDInt(2)), 3401 /*Order=*/GetMDInt(3)); 3402 break; 3403 } 3404 } 3405 } 3406 3407 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3408 if (!KmpRoutineEntryPtrTy) { 3409 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3410 ASTContext &C = CGM.getContext(); 3411 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3412 FunctionProtoType::ExtProtoInfo EPI; 3413 KmpRoutineEntryPtrQTy = C.getPointerType( 3414 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3415 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3416 } 3417 } 3418 3419 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3420 // Make sure the type of the entry is already created. This is the type we 3421 // have to create: 3422 // struct __tgt_offload_entry{ 3423 // void *addr; // Pointer to the offload entry info. 3424 // // (function or global) 3425 // char *name; // Name of the function or global. 3426 // size_t size; // Size of the entry info (0 if it a function). 3427 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3428 // int32_t reserved; // Reserved, to use by the runtime library. 3429 // }; 3430 if (TgtOffloadEntryQTy.isNull()) { 3431 ASTContext &C = CGM.getContext(); 3432 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3433 RD->startDefinition(); 3434 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3435 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3436 addFieldToRecordDecl(C, RD, C.getSizeType()); 3437 addFieldToRecordDecl( 3438 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3439 addFieldToRecordDecl( 3440 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3441 RD->completeDefinition(); 3442 RD->addAttr(PackedAttr::CreateImplicit(C)); 3443 TgtOffloadEntryQTy = C.getRecordType(RD); 3444 } 3445 return TgtOffloadEntryQTy; 3446 } 3447 3448 namespace { 3449 struct PrivateHelpersTy { 3450 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3451 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3452 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3453 PrivateElemInit(PrivateElemInit) {} 3454 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3455 const Expr *OriginalRef = nullptr; 3456 const VarDecl *Original = nullptr; 3457 const VarDecl *PrivateCopy = nullptr; 3458 const VarDecl *PrivateElemInit = nullptr; 3459 bool isLocalPrivate() const { 3460 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3461 } 3462 }; 3463 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3464 } // anonymous namespace 3465 3466 static bool isAllocatableDecl(const VarDecl *VD) { 3467 const VarDecl *CVD = VD->getCanonicalDecl(); 3468 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3469 return false; 3470 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3471 // Use the default allocation. 3472 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 3473 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 3474 !AA->getAllocator()); 3475 } 3476 3477 static RecordDecl * 3478 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3479 if (!Privates.empty()) { 3480 ASTContext &C = CGM.getContext(); 3481 // Build struct .kmp_privates_t. { 3482 // /* private vars */ 3483 // }; 3484 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3485 RD->startDefinition(); 3486 for (const auto &Pair : Privates) { 3487 const VarDecl *VD = Pair.second.Original; 3488 QualType Type = VD->getType().getNonReferenceType(); 3489 // If the private variable is a local variable with lvalue ref type, 3490 // allocate the pointer instead of the pointee type. 3491 if (Pair.second.isLocalPrivate()) { 3492 if (VD->getType()->isLValueReferenceType()) 3493 Type = C.getPointerType(Type); 3494 if (isAllocatableDecl(VD)) 3495 Type = C.getPointerType(Type); 3496 } 3497 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3498 if (VD->hasAttrs()) { 3499 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3500 E(VD->getAttrs().end()); 3501 I != E; ++I) 3502 FD->addAttr(*I); 3503 } 3504 } 3505 RD->completeDefinition(); 3506 return RD; 3507 } 3508 return nullptr; 3509 } 3510 3511 static RecordDecl * 3512 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3513 QualType KmpInt32Ty, 3514 QualType KmpRoutineEntryPointerQTy) { 3515 ASTContext &C = CGM.getContext(); 3516 // Build struct kmp_task_t { 3517 // void * shareds; 3518 // kmp_routine_entry_t routine; 3519 // kmp_int32 part_id; 3520 // kmp_cmplrdata_t data1; 3521 // kmp_cmplrdata_t data2; 3522 // For taskloops additional fields: 3523 // kmp_uint64 lb; 3524 // kmp_uint64 ub; 3525 // kmp_int64 st; 3526 // kmp_int32 liter; 3527 // void * reductions; 3528 // }; 3529 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3530 UD->startDefinition(); 3531 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3532 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3533 UD->completeDefinition(); 3534 QualType KmpCmplrdataTy = C.getRecordType(UD); 3535 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3536 RD->startDefinition(); 3537 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3538 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3539 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3540 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3541 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3542 if (isOpenMPTaskLoopDirective(Kind)) { 3543 QualType KmpUInt64Ty = 3544 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3545 QualType KmpInt64Ty = 3546 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3547 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3548 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3549 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3550 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3551 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3552 } 3553 RD->completeDefinition(); 3554 return RD; 3555 } 3556 3557 static RecordDecl * 3558 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3559 ArrayRef<PrivateDataTy> Privates) { 3560 ASTContext &C = CGM.getContext(); 3561 // Build struct kmp_task_t_with_privates { 3562 // kmp_task_t task_data; 3563 // .kmp_privates_t. privates; 3564 // }; 3565 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3566 RD->startDefinition(); 3567 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3568 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3569 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3570 RD->completeDefinition(); 3571 return RD; 3572 } 3573 3574 /// Emit a proxy function which accepts kmp_task_t as the second 3575 /// argument. 3576 /// \code 3577 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3578 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3579 /// For taskloops: 3580 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3581 /// tt->reductions, tt->shareds); 3582 /// return 0; 3583 /// } 3584 /// \endcode 3585 static llvm::Function * 3586 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3587 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3588 QualType KmpTaskTWithPrivatesPtrQTy, 3589 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3590 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3591 llvm::Value *TaskPrivatesMap) { 3592 ASTContext &C = CGM.getContext(); 3593 FunctionArgList Args; 3594 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3595 ImplicitParamDecl::Other); 3596 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3597 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3598 ImplicitParamDecl::Other); 3599 Args.push_back(&GtidArg); 3600 Args.push_back(&TaskTypeArg); 3601 const auto &TaskEntryFnInfo = 3602 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3603 llvm::FunctionType *TaskEntryTy = 3604 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3605 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3606 auto *TaskEntry = llvm::Function::Create( 3607 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3608 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3609 TaskEntry->setDoesNotRecurse(); 3610 CodeGenFunction CGF(CGM); 3611 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3612 Loc, Loc); 3613 3614 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3615 // tt, 3616 // For taskloops: 3617 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3618 // tt->task_data.shareds); 3619 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3620 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3621 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3622 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3623 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3624 const auto *KmpTaskTWithPrivatesQTyRD = 3625 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3626 LValue Base = 3627 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3628 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3629 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3630 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3631 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3632 3633 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3634 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3635 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3636 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3637 CGF.ConvertTypeForMem(SharedsPtrTy)); 3638 3639 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3640 llvm::Value *PrivatesParam; 3641 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3642 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3643 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3644 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3645 } else { 3646 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3647 } 3648 3649 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3650 TaskPrivatesMap, 3651 CGF.Builder 3652 .CreatePointerBitCastOrAddrSpaceCast( 3653 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3654 .getPointer()}; 3655 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3656 std::end(CommonArgs)); 3657 if (isOpenMPTaskLoopDirective(Kind)) { 3658 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3659 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3660 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3661 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3662 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3663 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3664 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3665 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3666 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3667 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3668 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3669 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3670 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3671 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3672 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3673 CallArgs.push_back(LBParam); 3674 CallArgs.push_back(UBParam); 3675 CallArgs.push_back(StParam); 3676 CallArgs.push_back(LIParam); 3677 CallArgs.push_back(RParam); 3678 } 3679 CallArgs.push_back(SharedsParam); 3680 3681 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3682 CallArgs); 3683 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3684 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3685 CGF.FinishFunction(); 3686 return TaskEntry; 3687 } 3688 3689 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3690 SourceLocation Loc, 3691 QualType KmpInt32Ty, 3692 QualType KmpTaskTWithPrivatesPtrQTy, 3693 QualType KmpTaskTWithPrivatesQTy) { 3694 ASTContext &C = CGM.getContext(); 3695 FunctionArgList Args; 3696 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3697 ImplicitParamDecl::Other); 3698 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3699 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3700 ImplicitParamDecl::Other); 3701 Args.push_back(&GtidArg); 3702 Args.push_back(&TaskTypeArg); 3703 const auto &DestructorFnInfo = 3704 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3705 llvm::FunctionType *DestructorFnTy = 3706 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3707 std::string Name = 3708 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3709 auto *DestructorFn = 3710 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3711 Name, &CGM.getModule()); 3712 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3713 DestructorFnInfo); 3714 DestructorFn->setDoesNotRecurse(); 3715 CodeGenFunction CGF(CGM); 3716 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3717 Args, Loc, Loc); 3718 3719 LValue Base = CGF.EmitLoadOfPointerLValue( 3720 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3721 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3722 const auto *KmpTaskTWithPrivatesQTyRD = 3723 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3724 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3725 Base = CGF.EmitLValueForField(Base, *FI); 3726 for (const auto *Field : 3727 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3728 if (QualType::DestructionKind DtorKind = 3729 Field->getType().isDestructedType()) { 3730 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3731 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3732 } 3733 } 3734 CGF.FinishFunction(); 3735 return DestructorFn; 3736 } 3737 3738 /// Emit a privates mapping function for correct handling of private and 3739 /// firstprivate variables. 3740 /// \code 3741 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3742 /// **noalias priv1,..., <tyn> **noalias privn) { 3743 /// *priv1 = &.privates.priv1; 3744 /// ...; 3745 /// *privn = &.privates.privn; 3746 /// } 3747 /// \endcode 3748 static llvm::Value * 3749 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3750 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3751 ArrayRef<PrivateDataTy> Privates) { 3752 ASTContext &C = CGM.getContext(); 3753 FunctionArgList Args; 3754 ImplicitParamDecl TaskPrivatesArg( 3755 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3756 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3757 ImplicitParamDecl::Other); 3758 Args.push_back(&TaskPrivatesArg); 3759 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3760 unsigned Counter = 1; 3761 for (const Expr *E : Data.PrivateVars) { 3762 Args.push_back(ImplicitParamDecl::Create( 3763 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3764 C.getPointerType(C.getPointerType(E->getType())) 3765 .withConst() 3766 .withRestrict(), 3767 ImplicitParamDecl::Other)); 3768 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3769 PrivateVarsPos[VD] = Counter; 3770 ++Counter; 3771 } 3772 for (const Expr *E : Data.FirstprivateVars) { 3773 Args.push_back(ImplicitParamDecl::Create( 3774 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3775 C.getPointerType(C.getPointerType(E->getType())) 3776 .withConst() 3777 .withRestrict(), 3778 ImplicitParamDecl::Other)); 3779 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3780 PrivateVarsPos[VD] = Counter; 3781 ++Counter; 3782 } 3783 for (const Expr *E : Data.LastprivateVars) { 3784 Args.push_back(ImplicitParamDecl::Create( 3785 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3786 C.getPointerType(C.getPointerType(E->getType())) 3787 .withConst() 3788 .withRestrict(), 3789 ImplicitParamDecl::Other)); 3790 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3791 PrivateVarsPos[VD] = Counter; 3792 ++Counter; 3793 } 3794 for (const VarDecl *VD : Data.PrivateLocals) { 3795 QualType Ty = VD->getType().getNonReferenceType(); 3796 if (VD->getType()->isLValueReferenceType()) 3797 Ty = C.getPointerType(Ty); 3798 if (isAllocatableDecl(VD)) 3799 Ty = C.getPointerType(Ty); 3800 Args.push_back(ImplicitParamDecl::Create( 3801 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3802 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3803 ImplicitParamDecl::Other)); 3804 PrivateVarsPos[VD] = Counter; 3805 ++Counter; 3806 } 3807 const auto &TaskPrivatesMapFnInfo = 3808 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3809 llvm::FunctionType *TaskPrivatesMapTy = 3810 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3811 std::string Name = 3812 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3813 auto *TaskPrivatesMap = llvm::Function::Create( 3814 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3815 &CGM.getModule()); 3816 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3817 TaskPrivatesMapFnInfo); 3818 if (CGM.getLangOpts().Optimize) { 3819 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3820 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3821 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3822 } 3823 CodeGenFunction CGF(CGM); 3824 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3825 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3826 3827 // *privi = &.privates.privi; 3828 LValue Base = CGF.EmitLoadOfPointerLValue( 3829 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3830 TaskPrivatesArg.getType()->castAs<PointerType>()); 3831 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3832 Counter = 0; 3833 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3834 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3835 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3836 LValue RefLVal = 3837 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3838 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3839 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3840 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3841 ++Counter; 3842 } 3843 CGF.FinishFunction(); 3844 return TaskPrivatesMap; 3845 } 3846 3847 /// Emit initialization for private variables in task-based directives. 3848 static void emitPrivatesInit(CodeGenFunction &CGF, 3849 const OMPExecutableDirective &D, 3850 Address KmpTaskSharedsPtr, LValue TDBase, 3851 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3852 QualType SharedsTy, QualType SharedsPtrTy, 3853 const OMPTaskDataTy &Data, 3854 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3855 ASTContext &C = CGF.getContext(); 3856 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3857 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3858 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3859 ? OMPD_taskloop 3860 : OMPD_task; 3861 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3862 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3863 LValue SrcBase; 3864 bool IsTargetTask = 3865 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3866 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3867 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3868 // PointersArray, SizesArray, and MappersArray. The original variables for 3869 // these arrays are not captured and we get their addresses explicitly. 3870 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3871 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3872 SrcBase = CGF.MakeAddrLValue( 3873 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3874 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3875 SharedsTy); 3876 } 3877 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3878 for (const PrivateDataTy &Pair : Privates) { 3879 // Do not initialize private locals. 3880 if (Pair.second.isLocalPrivate()) { 3881 ++FI; 3882 continue; 3883 } 3884 const VarDecl *VD = Pair.second.PrivateCopy; 3885 const Expr *Init = VD->getAnyInitializer(); 3886 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3887 !CGF.isTrivialInitializer(Init)))) { 3888 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3889 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3890 const VarDecl *OriginalVD = Pair.second.Original; 3891 // Check if the variable is the target-based BasePointersArray, 3892 // PointersArray, SizesArray, or MappersArray. 3893 LValue SharedRefLValue; 3894 QualType Type = PrivateLValue.getType(); 3895 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3896 if (IsTargetTask && !SharedField) { 3897 assert(isa<ImplicitParamDecl>(OriginalVD) && 3898 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3899 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3900 ->getNumParams() == 0 && 3901 isa<TranslationUnitDecl>( 3902 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3903 ->getDeclContext()) && 3904 "Expected artificial target data variable."); 3905 SharedRefLValue = 3906 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3907 } else if (ForDup) { 3908 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3909 SharedRefLValue = CGF.MakeAddrLValue( 3910 Address(SharedRefLValue.getPointer(CGF), 3911 C.getDeclAlign(OriginalVD)), 3912 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3913 SharedRefLValue.getTBAAInfo()); 3914 } else if (CGF.LambdaCaptureFields.count( 3915 Pair.second.Original->getCanonicalDecl()) > 0 || 3916 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) { 3917 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3918 } else { 3919 // Processing for implicitly captured variables. 3920 InlinedOpenMPRegionRAII Region( 3921 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3922 /*HasCancel=*/false, /*NoInheritance=*/true); 3923 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3924 } 3925 if (Type->isArrayType()) { 3926 // Initialize firstprivate array. 3927 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3928 // Perform simple memcpy. 3929 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3930 } else { 3931 // Initialize firstprivate array using element-by-element 3932 // initialization. 3933 CGF.EmitOMPAggregateAssign( 3934 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3935 Type, 3936 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3937 Address SrcElement) { 3938 // Clean up any temporaries needed by the initialization. 3939 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3940 InitScope.addPrivate( 3941 Elem, [SrcElement]() -> Address { return SrcElement; }); 3942 (void)InitScope.Privatize(); 3943 // Emit initialization for single element. 3944 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3945 CGF, &CapturesInfo); 3946 CGF.EmitAnyExprToMem(Init, DestElement, 3947 Init->getType().getQualifiers(), 3948 /*IsInitializer=*/false); 3949 }); 3950 } 3951 } else { 3952 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3953 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3954 return SharedRefLValue.getAddress(CGF); 3955 }); 3956 (void)InitScope.Privatize(); 3957 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3958 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3959 /*capturedByInit=*/false); 3960 } 3961 } else { 3962 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3963 } 3964 } 3965 ++FI; 3966 } 3967 } 3968 3969 /// Check if duplication function is required for taskloops. 3970 static bool checkInitIsRequired(CodeGenFunction &CGF, 3971 ArrayRef<PrivateDataTy> Privates) { 3972 bool InitRequired = false; 3973 for (const PrivateDataTy &Pair : Privates) { 3974 if (Pair.second.isLocalPrivate()) 3975 continue; 3976 const VarDecl *VD = Pair.second.PrivateCopy; 3977 const Expr *Init = VD->getAnyInitializer(); 3978 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3979 !CGF.isTrivialInitializer(Init)); 3980 if (InitRequired) 3981 break; 3982 } 3983 return InitRequired; 3984 } 3985 3986 3987 /// Emit task_dup function (for initialization of 3988 /// private/firstprivate/lastprivate vars and last_iter flag) 3989 /// \code 3990 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3991 /// lastpriv) { 3992 /// // setup lastprivate flag 3993 /// task_dst->last = lastpriv; 3994 /// // could be constructor calls here... 3995 /// } 3996 /// \endcode 3997 static llvm::Value * 3998 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3999 const OMPExecutableDirective &D, 4000 QualType KmpTaskTWithPrivatesPtrQTy, 4001 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4002 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4003 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4004 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4005 ASTContext &C = CGM.getContext(); 4006 FunctionArgList Args; 4007 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4008 KmpTaskTWithPrivatesPtrQTy, 4009 ImplicitParamDecl::Other); 4010 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4011 KmpTaskTWithPrivatesPtrQTy, 4012 ImplicitParamDecl::Other); 4013 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4014 ImplicitParamDecl::Other); 4015 Args.push_back(&DstArg); 4016 Args.push_back(&SrcArg); 4017 Args.push_back(&LastprivArg); 4018 const auto &TaskDupFnInfo = 4019 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4020 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4021 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4022 auto *TaskDup = llvm::Function::Create( 4023 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4024 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4025 TaskDup->setDoesNotRecurse(); 4026 CodeGenFunction CGF(CGM); 4027 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4028 Loc); 4029 4030 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4031 CGF.GetAddrOfLocalVar(&DstArg), 4032 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4033 // task_dst->liter = lastpriv; 4034 if (WithLastIter) { 4035 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4036 LValue Base = CGF.EmitLValueForField( 4037 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4038 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4039 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4040 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4041 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4042 } 4043 4044 // Emit initial values for private copies (if any). 4045 assert(!Privates.empty()); 4046 Address KmpTaskSharedsPtr = Address::invalid(); 4047 if (!Data.FirstprivateVars.empty()) { 4048 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4049 CGF.GetAddrOfLocalVar(&SrcArg), 4050 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4051 LValue Base = CGF.EmitLValueForField( 4052 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4053 KmpTaskSharedsPtr = Address( 4054 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4055 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4056 KmpTaskTShareds)), 4057 Loc), 4058 CGM.getNaturalTypeAlignment(SharedsTy)); 4059 } 4060 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4061 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4062 CGF.FinishFunction(); 4063 return TaskDup; 4064 } 4065 4066 /// Checks if destructor function is required to be generated. 4067 /// \return true if cleanups are required, false otherwise. 4068 static bool 4069 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4070 ArrayRef<PrivateDataTy> Privates) { 4071 for (const PrivateDataTy &P : Privates) { 4072 if (P.second.isLocalPrivate()) 4073 continue; 4074 QualType Ty = P.second.Original->getType().getNonReferenceType(); 4075 if (Ty.isDestructedType()) 4076 return true; 4077 } 4078 return false; 4079 } 4080 4081 namespace { 4082 /// Loop generator for OpenMP iterator expression. 4083 class OMPIteratorGeneratorScope final 4084 : public CodeGenFunction::OMPPrivateScope { 4085 CodeGenFunction &CGF; 4086 const OMPIteratorExpr *E = nullptr; 4087 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4088 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4089 OMPIteratorGeneratorScope() = delete; 4090 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4091 4092 public: 4093 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4094 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4095 if (!E) 4096 return; 4097 SmallVector<llvm::Value *, 4> Uppers; 4098 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4099 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4100 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4101 addPrivate(VD, [&CGF, VD]() { 4102 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4103 }); 4104 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4105 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4106 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4107 "counter.addr"); 4108 }); 4109 } 4110 Privatize(); 4111 4112 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4113 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4114 LValue CLVal = 4115 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4116 HelperData.CounterVD->getType()); 4117 // Counter = 0; 4118 CGF.EmitStoreOfScalar( 4119 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4120 CLVal); 4121 CodeGenFunction::JumpDest &ContDest = 4122 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4123 CodeGenFunction::JumpDest &ExitDest = 4124 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4125 // N = <number-of_iterations>; 4126 llvm::Value *N = Uppers[I]; 4127 // cont: 4128 // if (Counter < N) goto body; else goto exit; 4129 CGF.EmitBlock(ContDest.getBlock()); 4130 auto *CVal = 4131 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4132 llvm::Value *Cmp = 4133 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4134 ? CGF.Builder.CreateICmpSLT(CVal, N) 4135 : CGF.Builder.CreateICmpULT(CVal, N); 4136 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4137 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4138 // body: 4139 CGF.EmitBlock(BodyBB); 4140 // Iteri = Begini + Counter * Stepi; 4141 CGF.EmitIgnoredExpr(HelperData.Update); 4142 } 4143 } 4144 ~OMPIteratorGeneratorScope() { 4145 if (!E) 4146 return; 4147 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4148 // Counter = Counter + 1; 4149 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4150 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4151 // goto cont; 4152 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4153 // exit: 4154 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4155 } 4156 } 4157 }; 4158 } // namespace 4159 4160 static std::pair<llvm::Value *, llvm::Value *> 4161 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4162 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4163 llvm::Value *Addr; 4164 if (OASE) { 4165 const Expr *Base = OASE->getBase(); 4166 Addr = CGF.EmitScalarExpr(Base); 4167 } else { 4168 Addr = CGF.EmitLValue(E).getPointer(CGF); 4169 } 4170 llvm::Value *SizeVal; 4171 QualType Ty = E->getType(); 4172 if (OASE) { 4173 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4174 for (const Expr *SE : OASE->getDimensions()) { 4175 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4176 Sz = CGF.EmitScalarConversion( 4177 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4178 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4179 } 4180 } else if (const auto *ASE = 4181 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4182 LValue UpAddrLVal = 4183 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4184 Address UpAddrAddress = UpAddrLVal.getAddress(CGF); 4185 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 4186 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1); 4187 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4188 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4189 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4190 } else { 4191 SizeVal = CGF.getTypeSize(Ty); 4192 } 4193 return std::make_pair(Addr, SizeVal); 4194 } 4195 4196 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4197 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4198 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4199 if (KmpTaskAffinityInfoTy.isNull()) { 4200 RecordDecl *KmpAffinityInfoRD = 4201 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4202 KmpAffinityInfoRD->startDefinition(); 4203 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4204 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4205 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4206 KmpAffinityInfoRD->completeDefinition(); 4207 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4208 } 4209 } 4210 4211 CGOpenMPRuntime::TaskResultTy 4212 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4213 const OMPExecutableDirective &D, 4214 llvm::Function *TaskFunction, QualType SharedsTy, 4215 Address Shareds, const OMPTaskDataTy &Data) { 4216 ASTContext &C = CGM.getContext(); 4217 llvm::SmallVector<PrivateDataTy, 4> Privates; 4218 // Aggregate privates and sort them by the alignment. 4219 const auto *I = Data.PrivateCopies.begin(); 4220 for (const Expr *E : Data.PrivateVars) { 4221 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4222 Privates.emplace_back( 4223 C.getDeclAlign(VD), 4224 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4225 /*PrivateElemInit=*/nullptr)); 4226 ++I; 4227 } 4228 I = Data.FirstprivateCopies.begin(); 4229 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4230 for (const Expr *E : Data.FirstprivateVars) { 4231 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4232 Privates.emplace_back( 4233 C.getDeclAlign(VD), 4234 PrivateHelpersTy( 4235 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4236 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4237 ++I; 4238 ++IElemInitRef; 4239 } 4240 I = Data.LastprivateCopies.begin(); 4241 for (const Expr *E : Data.LastprivateVars) { 4242 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4243 Privates.emplace_back( 4244 C.getDeclAlign(VD), 4245 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4246 /*PrivateElemInit=*/nullptr)); 4247 ++I; 4248 } 4249 for (const VarDecl *VD : Data.PrivateLocals) { 4250 if (isAllocatableDecl(VD)) 4251 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4252 else 4253 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4254 } 4255 llvm::stable_sort(Privates, 4256 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4257 return L.first > R.first; 4258 }); 4259 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4260 // Build type kmp_routine_entry_t (if not built yet). 4261 emitKmpRoutineEntryT(KmpInt32Ty); 4262 // Build type kmp_task_t (if not built yet). 4263 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4264 if (SavedKmpTaskloopTQTy.isNull()) { 4265 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4266 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4267 } 4268 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4269 } else { 4270 assert((D.getDirectiveKind() == OMPD_task || 4271 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4272 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4273 "Expected taskloop, task or target directive"); 4274 if (SavedKmpTaskTQTy.isNull()) { 4275 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4276 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4277 } 4278 KmpTaskTQTy = SavedKmpTaskTQTy; 4279 } 4280 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4281 // Build particular struct kmp_task_t for the given task. 4282 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4283 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4284 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4285 QualType KmpTaskTWithPrivatesPtrQTy = 4286 C.getPointerType(KmpTaskTWithPrivatesQTy); 4287 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4288 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4289 KmpTaskTWithPrivatesTy->getPointerTo(); 4290 llvm::Value *KmpTaskTWithPrivatesTySize = 4291 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4292 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4293 4294 // Emit initial values for private copies (if any). 4295 llvm::Value *TaskPrivatesMap = nullptr; 4296 llvm::Type *TaskPrivatesMapTy = 4297 std::next(TaskFunction->arg_begin(), 3)->getType(); 4298 if (!Privates.empty()) { 4299 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4300 TaskPrivatesMap = 4301 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4302 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4303 TaskPrivatesMap, TaskPrivatesMapTy); 4304 } else { 4305 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4306 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4307 } 4308 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4309 // kmp_task_t *tt); 4310 llvm::Function *TaskEntry = emitProxyTaskFunction( 4311 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4312 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4313 TaskPrivatesMap); 4314 4315 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4316 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4317 // kmp_routine_entry_t *task_entry); 4318 // Task flags. Format is taken from 4319 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 4320 // description of kmp_tasking_flags struct. 4321 enum { 4322 TiedFlag = 0x1, 4323 FinalFlag = 0x2, 4324 DestructorsFlag = 0x8, 4325 PriorityFlag = 0x20, 4326 DetachableFlag = 0x40, 4327 }; 4328 unsigned Flags = Data.Tied ? TiedFlag : 0; 4329 bool NeedsCleanup = false; 4330 if (!Privates.empty()) { 4331 NeedsCleanup = 4332 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4333 if (NeedsCleanup) 4334 Flags = Flags | DestructorsFlag; 4335 } 4336 if (Data.Priority.getInt()) 4337 Flags = Flags | PriorityFlag; 4338 if (D.hasClausesOfKind<OMPDetachClause>()) 4339 Flags = Flags | DetachableFlag; 4340 llvm::Value *TaskFlags = 4341 Data.Final.getPointer() 4342 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4343 CGF.Builder.getInt32(FinalFlag), 4344 CGF.Builder.getInt32(/*C=*/0)) 4345 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4346 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4347 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4348 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4349 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4350 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4351 TaskEntry, KmpRoutineEntryPtrTy)}; 4352 llvm::Value *NewTask; 4353 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4354 // Check if we have any device clause associated with the directive. 4355 const Expr *Device = nullptr; 4356 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4357 Device = C->getDevice(); 4358 // Emit device ID if any otherwise use default value. 4359 llvm::Value *DeviceID; 4360 if (Device) 4361 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4362 CGF.Int64Ty, /*isSigned=*/true); 4363 else 4364 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4365 AllocArgs.push_back(DeviceID); 4366 NewTask = CGF.EmitRuntimeCall( 4367 OMPBuilder.getOrCreateRuntimeFunction( 4368 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4369 AllocArgs); 4370 } else { 4371 NewTask = 4372 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4373 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4374 AllocArgs); 4375 } 4376 // Emit detach clause initialization. 4377 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4378 // task_descriptor); 4379 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4380 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4381 LValue EvtLVal = CGF.EmitLValue(Evt); 4382 4383 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4384 // int gtid, kmp_task_t *task); 4385 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4386 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4387 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4388 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4389 OMPBuilder.getOrCreateRuntimeFunction( 4390 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4391 {Loc, Tid, NewTask}); 4392 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4393 Evt->getExprLoc()); 4394 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4395 } 4396 // Process affinity clauses. 4397 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4398 // Process list of affinity data. 4399 ASTContext &C = CGM.getContext(); 4400 Address AffinitiesArray = Address::invalid(); 4401 // Calculate number of elements to form the array of affinity data. 4402 llvm::Value *NumOfElements = nullptr; 4403 unsigned NumAffinities = 0; 4404 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4405 if (const Expr *Modifier = C->getModifier()) { 4406 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4407 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4408 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4409 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4410 NumOfElements = 4411 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4412 } 4413 } else { 4414 NumAffinities += C->varlist_size(); 4415 } 4416 } 4417 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4418 // Fields ids in kmp_task_affinity_info record. 4419 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4420 4421 QualType KmpTaskAffinityInfoArrayTy; 4422 if (NumOfElements) { 4423 NumOfElements = CGF.Builder.CreateNUWAdd( 4424 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4425 auto *OVE = new (C) OpaqueValueExpr( 4426 Loc, 4427 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4428 VK_PRValue); 4429 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4430 RValue::get(NumOfElements)); 4431 KmpTaskAffinityInfoArrayTy = 4432 C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal, 4433 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4434 // Properly emit variable-sized array. 4435 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4436 ImplicitParamDecl::Other); 4437 CGF.EmitVarDecl(*PD); 4438 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4439 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4440 /*isSigned=*/false); 4441 } else { 4442 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4443 KmpTaskAffinityInfoTy, 4444 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4445 ArrayType::Normal, /*IndexTypeQuals=*/0); 4446 AffinitiesArray = 4447 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4448 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4449 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4450 /*isSigned=*/false); 4451 } 4452 4453 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4454 // Fill array by elements without iterators. 4455 unsigned Pos = 0; 4456 bool HasIterator = false; 4457 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4458 if (C->getModifier()) { 4459 HasIterator = true; 4460 continue; 4461 } 4462 for (const Expr *E : C->varlists()) { 4463 llvm::Value *Addr; 4464 llvm::Value *Size; 4465 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4466 LValue Base = 4467 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4468 KmpTaskAffinityInfoTy); 4469 // affs[i].base_addr = &<Affinities[i].second>; 4470 LValue BaseAddrLVal = CGF.EmitLValueForField( 4471 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4472 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4473 BaseAddrLVal); 4474 // affs[i].len = sizeof(<Affinities[i].second>); 4475 LValue LenLVal = CGF.EmitLValueForField( 4476 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4477 CGF.EmitStoreOfScalar(Size, LenLVal); 4478 ++Pos; 4479 } 4480 } 4481 LValue PosLVal; 4482 if (HasIterator) { 4483 PosLVal = CGF.MakeAddrLValue( 4484 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4485 C.getSizeType()); 4486 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4487 } 4488 // Process elements with iterators. 4489 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4490 const Expr *Modifier = C->getModifier(); 4491 if (!Modifier) 4492 continue; 4493 OMPIteratorGeneratorScope IteratorScope( 4494 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4495 for (const Expr *E : C->varlists()) { 4496 llvm::Value *Addr; 4497 llvm::Value *Size; 4498 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4499 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4500 LValue Base = CGF.MakeAddrLValue( 4501 Address(CGF.Builder.CreateGEP(AffinitiesArray.getElementType(), 4502 AffinitiesArray.getPointer(), Idx), 4503 AffinitiesArray.getAlignment()), 4504 KmpTaskAffinityInfoTy); 4505 // affs[i].base_addr = &<Affinities[i].second>; 4506 LValue BaseAddrLVal = CGF.EmitLValueForField( 4507 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4508 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4509 BaseAddrLVal); 4510 // affs[i].len = sizeof(<Affinities[i].second>); 4511 LValue LenLVal = CGF.EmitLValueForField( 4512 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4513 CGF.EmitStoreOfScalar(Size, LenLVal); 4514 Idx = CGF.Builder.CreateNUWAdd( 4515 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4516 CGF.EmitStoreOfScalar(Idx, PosLVal); 4517 } 4518 } 4519 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4520 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4521 // naffins, kmp_task_affinity_info_t *affin_list); 4522 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4523 llvm::Value *GTid = getThreadID(CGF, Loc); 4524 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4525 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4526 // FIXME: Emit the function and ignore its result for now unless the 4527 // runtime function is properly implemented. 4528 (void)CGF.EmitRuntimeCall( 4529 OMPBuilder.getOrCreateRuntimeFunction( 4530 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4531 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4532 } 4533 llvm::Value *NewTaskNewTaskTTy = 4534 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4535 NewTask, KmpTaskTWithPrivatesPtrTy); 4536 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4537 KmpTaskTWithPrivatesQTy); 4538 LValue TDBase = 4539 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4540 // Fill the data in the resulting kmp_task_t record. 4541 // Copy shareds if there are any. 4542 Address KmpTaskSharedsPtr = Address::invalid(); 4543 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4544 KmpTaskSharedsPtr = 4545 Address(CGF.EmitLoadOfScalar( 4546 CGF.EmitLValueForField( 4547 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4548 KmpTaskTShareds)), 4549 Loc), 4550 CGM.getNaturalTypeAlignment(SharedsTy)); 4551 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4552 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4553 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4554 } 4555 // Emit initial values for private copies (if any). 4556 TaskResultTy Result; 4557 if (!Privates.empty()) { 4558 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4559 SharedsTy, SharedsPtrTy, Data, Privates, 4560 /*ForDup=*/false); 4561 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4562 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4563 Result.TaskDupFn = emitTaskDupFunction( 4564 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4565 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4566 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4567 } 4568 } 4569 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4570 enum { Priority = 0, Destructors = 1 }; 4571 // Provide pointer to function with destructors for privates. 4572 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4573 const RecordDecl *KmpCmplrdataUD = 4574 (*FI)->getType()->getAsUnionType()->getDecl(); 4575 if (NeedsCleanup) { 4576 llvm::Value *DestructorFn = emitDestructorsFunction( 4577 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4578 KmpTaskTWithPrivatesQTy); 4579 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4580 LValue DestructorsLV = CGF.EmitLValueForField( 4581 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4582 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4583 DestructorFn, KmpRoutineEntryPtrTy), 4584 DestructorsLV); 4585 } 4586 // Set priority. 4587 if (Data.Priority.getInt()) { 4588 LValue Data2LV = CGF.EmitLValueForField( 4589 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4590 LValue PriorityLV = CGF.EmitLValueForField( 4591 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4592 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4593 } 4594 Result.NewTask = NewTask; 4595 Result.TaskEntry = TaskEntry; 4596 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4597 Result.TDBase = TDBase; 4598 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4599 return Result; 4600 } 4601 4602 namespace { 4603 /// Dependence kind for RTL. 4604 enum RTLDependenceKindTy { 4605 DepIn = 0x01, 4606 DepInOut = 0x3, 4607 DepMutexInOutSet = 0x4 4608 }; 4609 /// Fields ids in kmp_depend_info record. 4610 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4611 } // namespace 4612 4613 /// Translates internal dependency kind into the runtime kind. 4614 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4615 RTLDependenceKindTy DepKind; 4616 switch (K) { 4617 case OMPC_DEPEND_in: 4618 DepKind = DepIn; 4619 break; 4620 // Out and InOut dependencies must use the same code. 4621 case OMPC_DEPEND_out: 4622 case OMPC_DEPEND_inout: 4623 DepKind = DepInOut; 4624 break; 4625 case OMPC_DEPEND_mutexinoutset: 4626 DepKind = DepMutexInOutSet; 4627 break; 4628 case OMPC_DEPEND_source: 4629 case OMPC_DEPEND_sink: 4630 case OMPC_DEPEND_depobj: 4631 case OMPC_DEPEND_unknown: 4632 llvm_unreachable("Unknown task dependence type"); 4633 } 4634 return DepKind; 4635 } 4636 4637 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4638 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4639 QualType &FlagsTy) { 4640 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4641 if (KmpDependInfoTy.isNull()) { 4642 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4643 KmpDependInfoRD->startDefinition(); 4644 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4645 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4646 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4647 KmpDependInfoRD->completeDefinition(); 4648 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4649 } 4650 } 4651 4652 std::pair<llvm::Value *, LValue> 4653 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4654 SourceLocation Loc) { 4655 ASTContext &C = CGM.getContext(); 4656 QualType FlagsTy; 4657 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4658 RecordDecl *KmpDependInfoRD = 4659 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4660 LValue Base = CGF.EmitLoadOfPointerLValue( 4661 DepobjLVal.getAddress(CGF), 4662 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4663 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4664 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4665 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4666 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4667 Base.getTBAAInfo()); 4668 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4669 Addr.getElementType(), Addr.getPointer(), 4670 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4671 LValue NumDepsBase = CGF.MakeAddrLValue( 4672 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4673 Base.getBaseInfo(), Base.getTBAAInfo()); 4674 // NumDeps = deps[i].base_addr; 4675 LValue BaseAddrLVal = CGF.EmitLValueForField( 4676 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4677 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4678 return std::make_pair(NumDeps, Base); 4679 } 4680 4681 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4682 llvm::PointerUnion<unsigned *, LValue *> Pos, 4683 const OMPTaskDataTy::DependData &Data, 4684 Address DependenciesArray) { 4685 CodeGenModule &CGM = CGF.CGM; 4686 ASTContext &C = CGM.getContext(); 4687 QualType FlagsTy; 4688 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4689 RecordDecl *KmpDependInfoRD = 4690 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4691 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4692 4693 OMPIteratorGeneratorScope IteratorScope( 4694 CGF, cast_or_null<OMPIteratorExpr>( 4695 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4696 : nullptr)); 4697 for (const Expr *E : Data.DepExprs) { 4698 llvm::Value *Addr; 4699 llvm::Value *Size; 4700 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4701 LValue Base; 4702 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4703 Base = CGF.MakeAddrLValue( 4704 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4705 } else { 4706 LValue &PosLVal = *Pos.get<LValue *>(); 4707 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4708 Base = CGF.MakeAddrLValue( 4709 Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(), 4710 DependenciesArray.getPointer(), Idx), 4711 DependenciesArray.getAlignment()), 4712 KmpDependInfoTy); 4713 } 4714 // deps[i].base_addr = &<Dependencies[i].second>; 4715 LValue BaseAddrLVal = CGF.EmitLValueForField( 4716 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4717 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4718 BaseAddrLVal); 4719 // deps[i].len = sizeof(<Dependencies[i].second>); 4720 LValue LenLVal = CGF.EmitLValueForField( 4721 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4722 CGF.EmitStoreOfScalar(Size, LenLVal); 4723 // deps[i].flags = <Dependencies[i].first>; 4724 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4725 LValue FlagsLVal = CGF.EmitLValueForField( 4726 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4727 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4728 FlagsLVal); 4729 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4730 ++(*P); 4731 } else { 4732 LValue &PosLVal = *Pos.get<LValue *>(); 4733 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4734 Idx = CGF.Builder.CreateNUWAdd(Idx, 4735 llvm::ConstantInt::get(Idx->getType(), 1)); 4736 CGF.EmitStoreOfScalar(Idx, PosLVal); 4737 } 4738 } 4739 } 4740 4741 static SmallVector<llvm::Value *, 4> 4742 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4743 const OMPTaskDataTy::DependData &Data) { 4744 assert(Data.DepKind == OMPC_DEPEND_depobj && 4745 "Expected depobj dependecy kind."); 4746 SmallVector<llvm::Value *, 4> Sizes; 4747 SmallVector<LValue, 4> SizeLVals; 4748 ASTContext &C = CGF.getContext(); 4749 QualType FlagsTy; 4750 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4751 RecordDecl *KmpDependInfoRD = 4752 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4753 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4754 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4755 { 4756 OMPIteratorGeneratorScope IteratorScope( 4757 CGF, cast_or_null<OMPIteratorExpr>( 4758 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4759 : nullptr)); 4760 for (const Expr *E : Data.DepExprs) { 4761 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4762 LValue Base = CGF.EmitLoadOfPointerLValue( 4763 DepobjLVal.getAddress(CGF), 4764 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4765 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4766 Base.getAddress(CGF), KmpDependInfoPtrT); 4767 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4768 Base.getTBAAInfo()); 4769 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4770 Addr.getElementType(), Addr.getPointer(), 4771 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4772 LValue NumDepsBase = CGF.MakeAddrLValue( 4773 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4774 Base.getBaseInfo(), Base.getTBAAInfo()); 4775 // NumDeps = deps[i].base_addr; 4776 LValue BaseAddrLVal = CGF.EmitLValueForField( 4777 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4778 llvm::Value *NumDeps = 4779 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4780 LValue NumLVal = CGF.MakeAddrLValue( 4781 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4782 C.getUIntPtrType()); 4783 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0), 4784 NumLVal.getAddress(CGF)); 4785 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4786 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4787 CGF.EmitStoreOfScalar(Add, NumLVal); 4788 SizeLVals.push_back(NumLVal); 4789 } 4790 } 4791 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4792 llvm::Value *Size = 4793 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4794 Sizes.push_back(Size); 4795 } 4796 return Sizes; 4797 } 4798 4799 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4800 LValue PosLVal, 4801 const OMPTaskDataTy::DependData &Data, 4802 Address DependenciesArray) { 4803 assert(Data.DepKind == OMPC_DEPEND_depobj && 4804 "Expected depobj dependecy kind."); 4805 ASTContext &C = CGF.getContext(); 4806 QualType FlagsTy; 4807 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4808 RecordDecl *KmpDependInfoRD = 4809 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4810 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4811 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4812 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4813 { 4814 OMPIteratorGeneratorScope IteratorScope( 4815 CGF, cast_or_null<OMPIteratorExpr>( 4816 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4817 : nullptr)); 4818 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4819 const Expr *E = Data.DepExprs[I]; 4820 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4821 LValue Base = CGF.EmitLoadOfPointerLValue( 4822 DepobjLVal.getAddress(CGF), 4823 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4824 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4825 Base.getAddress(CGF), KmpDependInfoPtrT); 4826 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4827 Base.getTBAAInfo()); 4828 4829 // Get number of elements in a single depobj. 4830 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4831 Addr.getElementType(), Addr.getPointer(), 4832 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4833 LValue NumDepsBase = CGF.MakeAddrLValue( 4834 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4835 Base.getBaseInfo(), Base.getTBAAInfo()); 4836 // NumDeps = deps[i].base_addr; 4837 LValue BaseAddrLVal = CGF.EmitLValueForField( 4838 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4839 llvm::Value *NumDeps = 4840 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4841 4842 // memcopy dependency data. 4843 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4844 ElSize, 4845 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4846 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4847 Address DepAddr = 4848 Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(), 4849 DependenciesArray.getPointer(), Pos), 4850 DependenciesArray.getAlignment()); 4851 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4852 4853 // Increase pos. 4854 // pos += size; 4855 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4856 CGF.EmitStoreOfScalar(Add, PosLVal); 4857 } 4858 } 4859 } 4860 4861 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4862 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4863 SourceLocation Loc) { 4864 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4865 return D.DepExprs.empty(); 4866 })) 4867 return std::make_pair(nullptr, Address::invalid()); 4868 // Process list of dependencies. 4869 ASTContext &C = CGM.getContext(); 4870 Address DependenciesArray = Address::invalid(); 4871 llvm::Value *NumOfElements = nullptr; 4872 unsigned NumDependencies = std::accumulate( 4873 Dependencies.begin(), Dependencies.end(), 0, 4874 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4875 return D.DepKind == OMPC_DEPEND_depobj 4876 ? V 4877 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4878 }); 4879 QualType FlagsTy; 4880 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4881 bool HasDepobjDeps = false; 4882 bool HasRegularWithIterators = false; 4883 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4884 llvm::Value *NumOfRegularWithIterators = 4885 llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4886 // Calculate number of depobj dependecies and regular deps with the iterators. 4887 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4888 if (D.DepKind == OMPC_DEPEND_depobj) { 4889 SmallVector<llvm::Value *, 4> Sizes = 4890 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4891 for (llvm::Value *Size : Sizes) { 4892 NumOfDepobjElements = 4893 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4894 } 4895 HasDepobjDeps = true; 4896 continue; 4897 } 4898 // Include number of iterations, if any. 4899 4900 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4901 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4902 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4903 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4904 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul( 4905 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size())); 4906 NumOfRegularWithIterators = 4907 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps); 4908 } 4909 HasRegularWithIterators = true; 4910 continue; 4911 } 4912 } 4913 4914 QualType KmpDependInfoArrayTy; 4915 if (HasDepobjDeps || HasRegularWithIterators) { 4916 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4917 /*isSigned=*/false); 4918 if (HasDepobjDeps) { 4919 NumOfElements = 4920 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4921 } 4922 if (HasRegularWithIterators) { 4923 NumOfElements = 4924 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4925 } 4926 auto *OVE = new (C) OpaqueValueExpr( 4927 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4928 VK_PRValue); 4929 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4930 RValue::get(NumOfElements)); 4931 KmpDependInfoArrayTy = 4932 C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal, 4933 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4934 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4935 // Properly emit variable-sized array. 4936 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4937 ImplicitParamDecl::Other); 4938 CGF.EmitVarDecl(*PD); 4939 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4940 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4941 /*isSigned=*/false); 4942 } else { 4943 KmpDependInfoArrayTy = C.getConstantArrayType( 4944 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4945 ArrayType::Normal, /*IndexTypeQuals=*/0); 4946 DependenciesArray = 4947 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4948 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4949 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4950 /*isSigned=*/false); 4951 } 4952 unsigned Pos = 0; 4953 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4954 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4955 Dependencies[I].IteratorExpr) 4956 continue; 4957 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4958 DependenciesArray); 4959 } 4960 // Copy regular dependecies with iterators. 4961 LValue PosLVal = CGF.MakeAddrLValue( 4962 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4963 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4964 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4965 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4966 !Dependencies[I].IteratorExpr) 4967 continue; 4968 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4969 DependenciesArray); 4970 } 4971 // Copy final depobj arrays without iterators. 4972 if (HasDepobjDeps) { 4973 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4974 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4975 continue; 4976 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4977 DependenciesArray); 4978 } 4979 } 4980 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4981 DependenciesArray, CGF.VoidPtrTy); 4982 return std::make_pair(NumOfElements, DependenciesArray); 4983 } 4984 4985 Address CGOpenMPRuntime::emitDepobjDependClause( 4986 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4987 SourceLocation Loc) { 4988 if (Dependencies.DepExprs.empty()) 4989 return Address::invalid(); 4990 // Process list of dependencies. 4991 ASTContext &C = CGM.getContext(); 4992 Address DependenciesArray = Address::invalid(); 4993 unsigned NumDependencies = Dependencies.DepExprs.size(); 4994 QualType FlagsTy; 4995 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4996 RecordDecl *KmpDependInfoRD = 4997 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4998 4999 llvm::Value *Size; 5000 // Define type kmp_depend_info[<Dependencies.size()>]; 5001 // For depobj reserve one extra element to store the number of elements. 5002 // It is required to handle depobj(x) update(in) construct. 5003 // kmp_depend_info[<Dependencies.size()>] deps; 5004 llvm::Value *NumDepsVal; 5005 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 5006 if (const auto *IE = 5007 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 5008 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 5009 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 5010 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 5011 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 5012 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 5013 } 5014 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 5015 NumDepsVal); 5016 CharUnits SizeInBytes = 5017 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 5018 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 5019 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 5020 NumDepsVal = 5021 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 5022 } else { 5023 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5024 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 5025 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5026 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 5027 Size = CGM.getSize(Sz.alignTo(Align)); 5028 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 5029 } 5030 // Need to allocate on the dynamic memory. 5031 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5032 // Use default allocator. 5033 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5034 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 5035 5036 llvm::Value *Addr = 5037 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5038 CGM.getModule(), OMPRTL___kmpc_alloc), 5039 Args, ".dep.arr.addr"); 5040 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5041 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 5042 DependenciesArray = Address(Addr, Align); 5043 // Write number of elements in the first element of array for depobj. 5044 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 5045 // deps[i].base_addr = NumDependencies; 5046 LValue BaseAddrLVal = CGF.EmitLValueForField( 5047 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5048 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 5049 llvm::PointerUnion<unsigned *, LValue *> Pos; 5050 unsigned Idx = 1; 5051 LValue PosLVal; 5052 if (Dependencies.IteratorExpr) { 5053 PosLVal = CGF.MakeAddrLValue( 5054 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 5055 C.getSizeType()); 5056 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 5057 /*IsInit=*/true); 5058 Pos = &PosLVal; 5059 } else { 5060 Pos = &Idx; 5061 } 5062 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 5063 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5064 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 5065 return DependenciesArray; 5066 } 5067 5068 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5069 SourceLocation Loc) { 5070 ASTContext &C = CGM.getContext(); 5071 QualType FlagsTy; 5072 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5073 LValue Base = CGF.EmitLoadOfPointerLValue( 5074 DepobjLVal.getAddress(CGF), 5075 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5076 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5077 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5078 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5079 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5080 Addr.getElementType(), Addr.getPointer(), 5081 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5082 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5083 CGF.VoidPtrTy); 5084 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5085 // Use default allocator. 5086 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5087 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5088 5089 // _kmpc_free(gtid, addr, nullptr); 5090 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5091 CGM.getModule(), OMPRTL___kmpc_free), 5092 Args); 5093 } 5094 5095 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5096 OpenMPDependClauseKind NewDepKind, 5097 SourceLocation Loc) { 5098 ASTContext &C = CGM.getContext(); 5099 QualType FlagsTy; 5100 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5101 RecordDecl *KmpDependInfoRD = 5102 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5103 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5104 llvm::Value *NumDeps; 5105 LValue Base; 5106 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5107 5108 Address Begin = Base.getAddress(CGF); 5109 // Cast from pointer to array type to pointer to single element. 5110 llvm::Value *End = CGF.Builder.CreateGEP( 5111 Begin.getElementType(), Begin.getPointer(), NumDeps); 5112 // The basic structure here is a while-do loop. 5113 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5114 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5115 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5116 CGF.EmitBlock(BodyBB); 5117 llvm::PHINode *ElementPHI = 5118 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5119 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5120 Begin = Address(ElementPHI, Begin.getAlignment()); 5121 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5122 Base.getTBAAInfo()); 5123 // deps[i].flags = NewDepKind; 5124 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5125 LValue FlagsLVal = CGF.EmitLValueForField( 5126 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5127 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5128 FlagsLVal); 5129 5130 // Shift the address forward by one element. 5131 Address ElementNext = 5132 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5133 ElementPHI->addIncoming(ElementNext.getPointer(), 5134 CGF.Builder.GetInsertBlock()); 5135 llvm::Value *IsEmpty = 5136 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5137 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5138 // Done. 5139 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5140 } 5141 5142 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5143 const OMPExecutableDirective &D, 5144 llvm::Function *TaskFunction, 5145 QualType SharedsTy, Address Shareds, 5146 const Expr *IfCond, 5147 const OMPTaskDataTy &Data) { 5148 if (!CGF.HaveInsertPoint()) 5149 return; 5150 5151 TaskResultTy Result = 5152 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5153 llvm::Value *NewTask = Result.NewTask; 5154 llvm::Function *TaskEntry = Result.TaskEntry; 5155 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5156 LValue TDBase = Result.TDBase; 5157 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5158 // Process list of dependences. 5159 Address DependenciesArray = Address::invalid(); 5160 llvm::Value *NumOfElements; 5161 std::tie(NumOfElements, DependenciesArray) = 5162 emitDependClause(CGF, Data.Dependences, Loc); 5163 5164 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5165 // libcall. 5166 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5167 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5168 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5169 // list is not empty 5170 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5171 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5172 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5173 llvm::Value *DepTaskArgs[7]; 5174 if (!Data.Dependences.empty()) { 5175 DepTaskArgs[0] = UpLoc; 5176 DepTaskArgs[1] = ThreadID; 5177 DepTaskArgs[2] = NewTask; 5178 DepTaskArgs[3] = NumOfElements; 5179 DepTaskArgs[4] = DependenciesArray.getPointer(); 5180 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5181 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5182 } 5183 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5184 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5185 if (!Data.Tied) { 5186 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5187 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5188 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5189 } 5190 if (!Data.Dependences.empty()) { 5191 CGF.EmitRuntimeCall( 5192 OMPBuilder.getOrCreateRuntimeFunction( 5193 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5194 DepTaskArgs); 5195 } else { 5196 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5197 CGM.getModule(), OMPRTL___kmpc_omp_task), 5198 TaskArgs); 5199 } 5200 // Check if parent region is untied and build return for untied task; 5201 if (auto *Region = 5202 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5203 Region->emitUntiedSwitch(CGF); 5204 }; 5205 5206 llvm::Value *DepWaitTaskArgs[6]; 5207 if (!Data.Dependences.empty()) { 5208 DepWaitTaskArgs[0] = UpLoc; 5209 DepWaitTaskArgs[1] = ThreadID; 5210 DepWaitTaskArgs[2] = NumOfElements; 5211 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5212 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5213 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5214 } 5215 auto &M = CGM.getModule(); 5216 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5217 TaskEntry, &Data, &DepWaitTaskArgs, 5218 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5219 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5220 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5221 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5222 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5223 // is specified. 5224 if (!Data.Dependences.empty()) 5225 CGF.EmitRuntimeCall( 5226 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5227 DepWaitTaskArgs); 5228 // Call proxy_task_entry(gtid, new_task); 5229 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5230 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5231 Action.Enter(CGF); 5232 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5233 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5234 OutlinedFnArgs); 5235 }; 5236 5237 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5238 // kmp_task_t *new_task); 5239 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5240 // kmp_task_t *new_task); 5241 RegionCodeGenTy RCG(CodeGen); 5242 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5243 M, OMPRTL___kmpc_omp_task_begin_if0), 5244 TaskArgs, 5245 OMPBuilder.getOrCreateRuntimeFunction( 5246 M, OMPRTL___kmpc_omp_task_complete_if0), 5247 TaskArgs); 5248 RCG.setAction(Action); 5249 RCG(CGF); 5250 }; 5251 5252 if (IfCond) { 5253 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5254 } else { 5255 RegionCodeGenTy ThenRCG(ThenCodeGen); 5256 ThenRCG(CGF); 5257 } 5258 } 5259 5260 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5261 const OMPLoopDirective &D, 5262 llvm::Function *TaskFunction, 5263 QualType SharedsTy, Address Shareds, 5264 const Expr *IfCond, 5265 const OMPTaskDataTy &Data) { 5266 if (!CGF.HaveInsertPoint()) 5267 return; 5268 TaskResultTy Result = 5269 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5270 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5271 // libcall. 5272 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5273 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5274 // sched, kmp_uint64 grainsize, void *task_dup); 5275 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5276 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5277 llvm::Value *IfVal; 5278 if (IfCond) { 5279 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5280 /*isSigned=*/true); 5281 } else { 5282 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5283 } 5284 5285 LValue LBLVal = CGF.EmitLValueForField( 5286 Result.TDBase, 5287 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5288 const auto *LBVar = 5289 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5290 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5291 LBLVal.getQuals(), 5292 /*IsInitializer=*/true); 5293 LValue UBLVal = CGF.EmitLValueForField( 5294 Result.TDBase, 5295 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5296 const auto *UBVar = 5297 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5298 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5299 UBLVal.getQuals(), 5300 /*IsInitializer=*/true); 5301 LValue StLVal = CGF.EmitLValueForField( 5302 Result.TDBase, 5303 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5304 const auto *StVar = 5305 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5306 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5307 StLVal.getQuals(), 5308 /*IsInitializer=*/true); 5309 // Store reductions address. 5310 LValue RedLVal = CGF.EmitLValueForField( 5311 Result.TDBase, 5312 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5313 if (Data.Reductions) { 5314 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5315 } else { 5316 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5317 CGF.getContext().VoidPtrTy); 5318 } 5319 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5320 llvm::Value *TaskArgs[] = { 5321 UpLoc, 5322 ThreadID, 5323 Result.NewTask, 5324 IfVal, 5325 LBLVal.getPointer(CGF), 5326 UBLVal.getPointer(CGF), 5327 CGF.EmitLoadOfScalar(StLVal, Loc), 5328 llvm::ConstantInt::getSigned( 5329 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5330 llvm::ConstantInt::getSigned( 5331 CGF.IntTy, Data.Schedule.getPointer() 5332 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5333 : NoSchedule), 5334 Data.Schedule.getPointer() 5335 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5336 /*isSigned=*/false) 5337 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5338 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5339 Result.TaskDupFn, CGF.VoidPtrTy) 5340 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5341 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5342 CGM.getModule(), OMPRTL___kmpc_taskloop), 5343 TaskArgs); 5344 } 5345 5346 /// Emit reduction operation for each element of array (required for 5347 /// array sections) LHS op = RHS. 5348 /// \param Type Type of array. 5349 /// \param LHSVar Variable on the left side of the reduction operation 5350 /// (references element of array in original variable). 5351 /// \param RHSVar Variable on the right side of the reduction operation 5352 /// (references element of array in original variable). 5353 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5354 /// RHSVar. 5355 static void EmitOMPAggregateReduction( 5356 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5357 const VarDecl *RHSVar, 5358 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5359 const Expr *, const Expr *)> &RedOpGen, 5360 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5361 const Expr *UpExpr = nullptr) { 5362 // Perform element-by-element initialization. 5363 QualType ElementTy; 5364 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5365 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5366 5367 // Drill down to the base element type on both arrays. 5368 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5369 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5370 5371 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5372 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5373 // Cast from pointer to array type to pointer to single element. 5374 llvm::Value *LHSEnd = 5375 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements); 5376 // The basic structure here is a while-do loop. 5377 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5378 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5379 llvm::Value *IsEmpty = 5380 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5381 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5382 5383 // Enter the loop body, making that address the current address. 5384 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5385 CGF.EmitBlock(BodyBB); 5386 5387 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5388 5389 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5390 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5391 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5392 Address RHSElementCurrent = 5393 Address(RHSElementPHI, 5394 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5395 5396 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5397 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5398 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5399 Address LHSElementCurrent = 5400 Address(LHSElementPHI, 5401 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5402 5403 // Emit copy. 5404 CodeGenFunction::OMPPrivateScope Scope(CGF); 5405 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5406 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5407 Scope.Privatize(); 5408 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5409 Scope.ForceCleanup(); 5410 5411 // Shift the address forward by one element. 5412 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5413 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1, 5414 "omp.arraycpy.dest.element"); 5415 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5416 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1, 5417 "omp.arraycpy.src.element"); 5418 // Check whether we've reached the end. 5419 llvm::Value *Done = 5420 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5421 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5422 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5423 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5424 5425 // Done. 5426 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5427 } 5428 5429 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5430 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5431 /// UDR combiner function. 5432 static void emitReductionCombiner(CodeGenFunction &CGF, 5433 const Expr *ReductionOp) { 5434 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5435 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5436 if (const auto *DRE = 5437 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5438 if (const auto *DRD = 5439 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5440 std::pair<llvm::Function *, llvm::Function *> Reduction = 5441 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5442 RValue Func = RValue::get(Reduction.first); 5443 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5444 CGF.EmitIgnoredExpr(ReductionOp); 5445 return; 5446 } 5447 CGF.EmitIgnoredExpr(ReductionOp); 5448 } 5449 5450 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5451 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5452 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5453 ArrayRef<const Expr *> ReductionOps) { 5454 ASTContext &C = CGM.getContext(); 5455 5456 // void reduction_func(void *LHSArg, void *RHSArg); 5457 FunctionArgList Args; 5458 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5459 ImplicitParamDecl::Other); 5460 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5461 ImplicitParamDecl::Other); 5462 Args.push_back(&LHSArg); 5463 Args.push_back(&RHSArg); 5464 const auto &CGFI = 5465 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5466 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5467 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5468 llvm::GlobalValue::InternalLinkage, Name, 5469 &CGM.getModule()); 5470 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5471 Fn->setDoesNotRecurse(); 5472 CodeGenFunction CGF(CGM); 5473 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5474 5475 // Dst = (void*[n])(LHSArg); 5476 // Src = (void*[n])(RHSArg); 5477 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5478 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5479 ArgsType), CGF.getPointerAlign()); 5480 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5481 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5482 ArgsType), CGF.getPointerAlign()); 5483 5484 // ... 5485 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5486 // ... 5487 CodeGenFunction::OMPPrivateScope Scope(CGF); 5488 auto IPriv = Privates.begin(); 5489 unsigned Idx = 0; 5490 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5491 const auto *RHSVar = 5492 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5493 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5494 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5495 }); 5496 const auto *LHSVar = 5497 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5498 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5499 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5500 }); 5501 QualType PrivTy = (*IPriv)->getType(); 5502 if (PrivTy->isVariablyModifiedType()) { 5503 // Get array size and emit VLA type. 5504 ++Idx; 5505 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5506 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5507 const VariableArrayType *VLA = 5508 CGF.getContext().getAsVariableArrayType(PrivTy); 5509 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5510 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5511 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5512 CGF.EmitVariablyModifiedType(PrivTy); 5513 } 5514 } 5515 Scope.Privatize(); 5516 IPriv = Privates.begin(); 5517 auto ILHS = LHSExprs.begin(); 5518 auto IRHS = RHSExprs.begin(); 5519 for (const Expr *E : ReductionOps) { 5520 if ((*IPriv)->getType()->isArrayType()) { 5521 // Emit reduction for array section. 5522 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5523 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5524 EmitOMPAggregateReduction( 5525 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5526 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5527 emitReductionCombiner(CGF, E); 5528 }); 5529 } else { 5530 // Emit reduction for array subscript or single variable. 5531 emitReductionCombiner(CGF, E); 5532 } 5533 ++IPriv; 5534 ++ILHS; 5535 ++IRHS; 5536 } 5537 Scope.ForceCleanup(); 5538 CGF.FinishFunction(); 5539 return Fn; 5540 } 5541 5542 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5543 const Expr *ReductionOp, 5544 const Expr *PrivateRef, 5545 const DeclRefExpr *LHS, 5546 const DeclRefExpr *RHS) { 5547 if (PrivateRef->getType()->isArrayType()) { 5548 // Emit reduction for array section. 5549 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5550 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5551 EmitOMPAggregateReduction( 5552 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5553 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5554 emitReductionCombiner(CGF, ReductionOp); 5555 }); 5556 } else { 5557 // Emit reduction for array subscript or single variable. 5558 emitReductionCombiner(CGF, ReductionOp); 5559 } 5560 } 5561 5562 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5563 ArrayRef<const Expr *> Privates, 5564 ArrayRef<const Expr *> LHSExprs, 5565 ArrayRef<const Expr *> RHSExprs, 5566 ArrayRef<const Expr *> ReductionOps, 5567 ReductionOptionsTy Options) { 5568 if (!CGF.HaveInsertPoint()) 5569 return; 5570 5571 bool WithNowait = Options.WithNowait; 5572 bool SimpleReduction = Options.SimpleReduction; 5573 5574 // Next code should be emitted for reduction: 5575 // 5576 // static kmp_critical_name lock = { 0 }; 5577 // 5578 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5579 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5580 // ... 5581 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5582 // *(Type<n>-1*)rhs[<n>-1]); 5583 // } 5584 // 5585 // ... 5586 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5587 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5588 // RedList, reduce_func, &<lock>)) { 5589 // case 1: 5590 // ... 5591 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5592 // ... 5593 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5594 // break; 5595 // case 2: 5596 // ... 5597 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5598 // ... 5599 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5600 // break; 5601 // default:; 5602 // } 5603 // 5604 // if SimpleReduction is true, only the next code is generated: 5605 // ... 5606 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5607 // ... 5608 5609 ASTContext &C = CGM.getContext(); 5610 5611 if (SimpleReduction) { 5612 CodeGenFunction::RunCleanupsScope Scope(CGF); 5613 auto IPriv = Privates.begin(); 5614 auto ILHS = LHSExprs.begin(); 5615 auto IRHS = RHSExprs.begin(); 5616 for (const Expr *E : ReductionOps) { 5617 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5618 cast<DeclRefExpr>(*IRHS)); 5619 ++IPriv; 5620 ++ILHS; 5621 ++IRHS; 5622 } 5623 return; 5624 } 5625 5626 // 1. Build a list of reduction variables. 5627 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5628 auto Size = RHSExprs.size(); 5629 for (const Expr *E : Privates) { 5630 if (E->getType()->isVariablyModifiedType()) 5631 // Reserve place for array size. 5632 ++Size; 5633 } 5634 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5635 QualType ReductionArrayTy = 5636 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5637 /*IndexTypeQuals=*/0); 5638 Address ReductionList = 5639 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5640 auto IPriv = Privates.begin(); 5641 unsigned Idx = 0; 5642 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5643 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5644 CGF.Builder.CreateStore( 5645 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5646 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5647 Elem); 5648 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5649 // Store array size. 5650 ++Idx; 5651 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5652 llvm::Value *Size = CGF.Builder.CreateIntCast( 5653 CGF.getVLASize( 5654 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5655 .NumElts, 5656 CGF.SizeTy, /*isSigned=*/false); 5657 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5658 Elem); 5659 } 5660 } 5661 5662 // 2. Emit reduce_func(). 5663 llvm::Function *ReductionFn = emitReductionFunction( 5664 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5665 LHSExprs, RHSExprs, ReductionOps); 5666 5667 // 3. Create static kmp_critical_name lock = { 0 }; 5668 std::string Name = getName({"reduction"}); 5669 llvm::Value *Lock = getCriticalRegionLock(Name); 5670 5671 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5672 // RedList, reduce_func, &<lock>); 5673 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5674 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5675 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5676 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5677 ReductionList.getPointer(), CGF.VoidPtrTy); 5678 llvm::Value *Args[] = { 5679 IdentTLoc, // ident_t *<loc> 5680 ThreadId, // i32 <gtid> 5681 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5682 ReductionArrayTySize, // size_type sizeof(RedList) 5683 RL, // void *RedList 5684 ReductionFn, // void (*) (void *, void *) <reduce_func> 5685 Lock // kmp_critical_name *&<lock> 5686 }; 5687 llvm::Value *Res = CGF.EmitRuntimeCall( 5688 OMPBuilder.getOrCreateRuntimeFunction( 5689 CGM.getModule(), 5690 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5691 Args); 5692 5693 // 5. Build switch(res) 5694 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5695 llvm::SwitchInst *SwInst = 5696 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5697 5698 // 6. Build case 1: 5699 // ... 5700 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5701 // ... 5702 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5703 // break; 5704 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5705 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5706 CGF.EmitBlock(Case1BB); 5707 5708 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5709 llvm::Value *EndArgs[] = { 5710 IdentTLoc, // ident_t *<loc> 5711 ThreadId, // i32 <gtid> 5712 Lock // kmp_critical_name *&<lock> 5713 }; 5714 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5715 CodeGenFunction &CGF, PrePostActionTy &Action) { 5716 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5717 auto IPriv = Privates.begin(); 5718 auto ILHS = LHSExprs.begin(); 5719 auto IRHS = RHSExprs.begin(); 5720 for (const Expr *E : ReductionOps) { 5721 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5722 cast<DeclRefExpr>(*IRHS)); 5723 ++IPriv; 5724 ++ILHS; 5725 ++IRHS; 5726 } 5727 }; 5728 RegionCodeGenTy RCG(CodeGen); 5729 CommonActionTy Action( 5730 nullptr, llvm::None, 5731 OMPBuilder.getOrCreateRuntimeFunction( 5732 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5733 : OMPRTL___kmpc_end_reduce), 5734 EndArgs); 5735 RCG.setAction(Action); 5736 RCG(CGF); 5737 5738 CGF.EmitBranch(DefaultBB); 5739 5740 // 7. Build case 2: 5741 // ... 5742 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5743 // ... 5744 // break; 5745 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5746 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5747 CGF.EmitBlock(Case2BB); 5748 5749 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5750 CodeGenFunction &CGF, PrePostActionTy &Action) { 5751 auto ILHS = LHSExprs.begin(); 5752 auto IRHS = RHSExprs.begin(); 5753 auto IPriv = Privates.begin(); 5754 for (const Expr *E : ReductionOps) { 5755 const Expr *XExpr = nullptr; 5756 const Expr *EExpr = nullptr; 5757 const Expr *UpExpr = nullptr; 5758 BinaryOperatorKind BO = BO_Comma; 5759 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5760 if (BO->getOpcode() == BO_Assign) { 5761 XExpr = BO->getLHS(); 5762 UpExpr = BO->getRHS(); 5763 } 5764 } 5765 // Try to emit update expression as a simple atomic. 5766 const Expr *RHSExpr = UpExpr; 5767 if (RHSExpr) { 5768 // Analyze RHS part of the whole expression. 5769 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5770 RHSExpr->IgnoreParenImpCasts())) { 5771 // If this is a conditional operator, analyze its condition for 5772 // min/max reduction operator. 5773 RHSExpr = ACO->getCond(); 5774 } 5775 if (const auto *BORHS = 5776 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5777 EExpr = BORHS->getRHS(); 5778 BO = BORHS->getOpcode(); 5779 } 5780 } 5781 if (XExpr) { 5782 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5783 auto &&AtomicRedGen = [BO, VD, 5784 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5785 const Expr *EExpr, const Expr *UpExpr) { 5786 LValue X = CGF.EmitLValue(XExpr); 5787 RValue E; 5788 if (EExpr) 5789 E = CGF.EmitAnyExpr(EExpr); 5790 CGF.EmitOMPAtomicSimpleUpdateExpr( 5791 X, E, BO, /*IsXLHSInRHSPart=*/true, 5792 llvm::AtomicOrdering::Monotonic, Loc, 5793 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5794 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5795 PrivateScope.addPrivate( 5796 VD, [&CGF, VD, XRValue, Loc]() { 5797 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5798 CGF.emitOMPSimpleStore( 5799 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5800 VD->getType().getNonReferenceType(), Loc); 5801 return LHSTemp; 5802 }); 5803 (void)PrivateScope.Privatize(); 5804 return CGF.EmitAnyExpr(UpExpr); 5805 }); 5806 }; 5807 if ((*IPriv)->getType()->isArrayType()) { 5808 // Emit atomic reduction for array section. 5809 const auto *RHSVar = 5810 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5811 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5812 AtomicRedGen, XExpr, EExpr, UpExpr); 5813 } else { 5814 // Emit atomic reduction for array subscript or single variable. 5815 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5816 } 5817 } else { 5818 // Emit as a critical region. 5819 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5820 const Expr *, const Expr *) { 5821 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5822 std::string Name = RT.getName({"atomic_reduction"}); 5823 RT.emitCriticalRegion( 5824 CGF, Name, 5825 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5826 Action.Enter(CGF); 5827 emitReductionCombiner(CGF, E); 5828 }, 5829 Loc); 5830 }; 5831 if ((*IPriv)->getType()->isArrayType()) { 5832 const auto *LHSVar = 5833 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5834 const auto *RHSVar = 5835 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5836 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5837 CritRedGen); 5838 } else { 5839 CritRedGen(CGF, nullptr, nullptr, nullptr); 5840 } 5841 } 5842 ++ILHS; 5843 ++IRHS; 5844 ++IPriv; 5845 } 5846 }; 5847 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5848 if (!WithNowait) { 5849 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5850 llvm::Value *EndArgs[] = { 5851 IdentTLoc, // ident_t *<loc> 5852 ThreadId, // i32 <gtid> 5853 Lock // kmp_critical_name *&<lock> 5854 }; 5855 CommonActionTy Action(nullptr, llvm::None, 5856 OMPBuilder.getOrCreateRuntimeFunction( 5857 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5858 EndArgs); 5859 AtomicRCG.setAction(Action); 5860 AtomicRCG(CGF); 5861 } else { 5862 AtomicRCG(CGF); 5863 } 5864 5865 CGF.EmitBranch(DefaultBB); 5866 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5867 } 5868 5869 /// Generates unique name for artificial threadprivate variables. 5870 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5871 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5872 const Expr *Ref) { 5873 SmallString<256> Buffer; 5874 llvm::raw_svector_ostream Out(Buffer); 5875 const clang::DeclRefExpr *DE; 5876 const VarDecl *D = ::getBaseDecl(Ref, DE); 5877 if (!D) 5878 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5879 D = D->getCanonicalDecl(); 5880 std::string Name = CGM.getOpenMPRuntime().getName( 5881 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5882 Out << Prefix << Name << "_" 5883 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5884 return std::string(Out.str()); 5885 } 5886 5887 /// Emits reduction initializer function: 5888 /// \code 5889 /// void @.red_init(void* %arg, void* %orig) { 5890 /// %0 = bitcast void* %arg to <type>* 5891 /// store <type> <init>, <type>* %0 5892 /// ret void 5893 /// } 5894 /// \endcode 5895 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5896 SourceLocation Loc, 5897 ReductionCodeGen &RCG, unsigned N) { 5898 ASTContext &C = CGM.getContext(); 5899 QualType VoidPtrTy = C.VoidPtrTy; 5900 VoidPtrTy.addRestrict(); 5901 FunctionArgList Args; 5902 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5903 ImplicitParamDecl::Other); 5904 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5905 ImplicitParamDecl::Other); 5906 Args.emplace_back(&Param); 5907 Args.emplace_back(&ParamOrig); 5908 const auto &FnInfo = 5909 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5910 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5911 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5912 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5913 Name, &CGM.getModule()); 5914 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5915 Fn->setDoesNotRecurse(); 5916 CodeGenFunction CGF(CGM); 5917 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5918 Address PrivateAddr = CGF.EmitLoadOfPointer( 5919 CGF.GetAddrOfLocalVar(&Param), 5920 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5921 llvm::Value *Size = nullptr; 5922 // If the size of the reduction item is non-constant, load it from global 5923 // threadprivate variable. 5924 if (RCG.getSizes(N).second) { 5925 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5926 CGF, CGM.getContext().getSizeType(), 5927 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5928 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5929 CGM.getContext().getSizeType(), Loc); 5930 } 5931 RCG.emitAggregateType(CGF, N, Size); 5932 LValue OrigLVal; 5933 // If initializer uses initializer from declare reduction construct, emit a 5934 // pointer to the address of the original reduction item (reuired by reduction 5935 // initializer) 5936 if (RCG.usesReductionInitializer(N)) { 5937 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5938 SharedAddr = CGF.EmitLoadOfPointer( 5939 SharedAddr, 5940 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5941 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5942 } else { 5943 OrigLVal = CGF.MakeNaturalAlignAddrLValue( 5944 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5945 CGM.getContext().VoidPtrTy); 5946 } 5947 // Emit the initializer: 5948 // %0 = bitcast void* %arg to <type>* 5949 // store <type> <init>, <type>* %0 5950 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, 5951 [](CodeGenFunction &) { return false; }); 5952 CGF.FinishFunction(); 5953 return Fn; 5954 } 5955 5956 /// Emits reduction combiner function: 5957 /// \code 5958 /// void @.red_comb(void* %arg0, void* %arg1) { 5959 /// %lhs = bitcast void* %arg0 to <type>* 5960 /// %rhs = bitcast void* %arg1 to <type>* 5961 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5962 /// store <type> %2, <type>* %lhs 5963 /// ret void 5964 /// } 5965 /// \endcode 5966 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5967 SourceLocation Loc, 5968 ReductionCodeGen &RCG, unsigned N, 5969 const Expr *ReductionOp, 5970 const Expr *LHS, const Expr *RHS, 5971 const Expr *PrivateRef) { 5972 ASTContext &C = CGM.getContext(); 5973 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5974 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5975 FunctionArgList Args; 5976 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5977 C.VoidPtrTy, ImplicitParamDecl::Other); 5978 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5979 ImplicitParamDecl::Other); 5980 Args.emplace_back(&ParamInOut); 5981 Args.emplace_back(&ParamIn); 5982 const auto &FnInfo = 5983 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5984 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5985 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5986 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5987 Name, &CGM.getModule()); 5988 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5989 Fn->setDoesNotRecurse(); 5990 CodeGenFunction CGF(CGM); 5991 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5992 llvm::Value *Size = nullptr; 5993 // If the size of the reduction item is non-constant, load it from global 5994 // threadprivate variable. 5995 if (RCG.getSizes(N).second) { 5996 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5997 CGF, CGM.getContext().getSizeType(), 5998 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5999 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6000 CGM.getContext().getSizeType(), Loc); 6001 } 6002 RCG.emitAggregateType(CGF, N, Size); 6003 // Remap lhs and rhs variables to the addresses of the function arguments. 6004 // %lhs = bitcast void* %arg0 to <type>* 6005 // %rhs = bitcast void* %arg1 to <type>* 6006 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6007 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 6008 // Pull out the pointer to the variable. 6009 Address PtrAddr = CGF.EmitLoadOfPointer( 6010 CGF.GetAddrOfLocalVar(&ParamInOut), 6011 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6012 return CGF.Builder.CreateElementBitCast( 6013 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 6014 }); 6015 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 6016 // Pull out the pointer to the variable. 6017 Address PtrAddr = CGF.EmitLoadOfPointer( 6018 CGF.GetAddrOfLocalVar(&ParamIn), 6019 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6020 return CGF.Builder.CreateElementBitCast( 6021 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 6022 }); 6023 PrivateScope.Privatize(); 6024 // Emit the combiner body: 6025 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6026 // store <type> %2, <type>* %lhs 6027 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6028 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6029 cast<DeclRefExpr>(RHS)); 6030 CGF.FinishFunction(); 6031 return Fn; 6032 } 6033 6034 /// Emits reduction finalizer function: 6035 /// \code 6036 /// void @.red_fini(void* %arg) { 6037 /// %0 = bitcast void* %arg to <type>* 6038 /// <destroy>(<type>* %0) 6039 /// ret void 6040 /// } 6041 /// \endcode 6042 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6043 SourceLocation Loc, 6044 ReductionCodeGen &RCG, unsigned N) { 6045 if (!RCG.needCleanups(N)) 6046 return nullptr; 6047 ASTContext &C = CGM.getContext(); 6048 FunctionArgList Args; 6049 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6050 ImplicitParamDecl::Other); 6051 Args.emplace_back(&Param); 6052 const auto &FnInfo = 6053 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6054 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6055 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6056 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6057 Name, &CGM.getModule()); 6058 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6059 Fn->setDoesNotRecurse(); 6060 CodeGenFunction CGF(CGM); 6061 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6062 Address PrivateAddr = CGF.EmitLoadOfPointer( 6063 CGF.GetAddrOfLocalVar(&Param), 6064 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6065 llvm::Value *Size = nullptr; 6066 // If the size of the reduction item is non-constant, load it from global 6067 // threadprivate variable. 6068 if (RCG.getSizes(N).second) { 6069 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6070 CGF, CGM.getContext().getSizeType(), 6071 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6072 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6073 CGM.getContext().getSizeType(), Loc); 6074 } 6075 RCG.emitAggregateType(CGF, N, Size); 6076 // Emit the finalizer body: 6077 // <destroy>(<type>* %0) 6078 RCG.emitCleanups(CGF, N, PrivateAddr); 6079 CGF.FinishFunction(Loc); 6080 return Fn; 6081 } 6082 6083 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6084 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6085 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6086 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6087 return nullptr; 6088 6089 // Build typedef struct: 6090 // kmp_taskred_input { 6091 // void *reduce_shar; // shared reduction item 6092 // void *reduce_orig; // original reduction item used for initialization 6093 // size_t reduce_size; // size of data item 6094 // void *reduce_init; // data initialization routine 6095 // void *reduce_fini; // data finalization routine 6096 // void *reduce_comb; // data combiner routine 6097 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6098 // } kmp_taskred_input_t; 6099 ASTContext &C = CGM.getContext(); 6100 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6101 RD->startDefinition(); 6102 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6103 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6104 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6105 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6106 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6107 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6108 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6109 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6110 RD->completeDefinition(); 6111 QualType RDType = C.getRecordType(RD); 6112 unsigned Size = Data.ReductionVars.size(); 6113 llvm::APInt ArraySize(/*numBits=*/64, Size); 6114 QualType ArrayRDType = C.getConstantArrayType( 6115 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6116 // kmp_task_red_input_t .rd_input.[Size]; 6117 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6118 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6119 Data.ReductionCopies, Data.ReductionOps); 6120 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6121 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6122 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6123 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6124 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6125 TaskRedInput.getPointer(), Idxs, 6126 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6127 ".rd_input.gep."); 6128 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6129 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6130 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6131 RCG.emitSharedOrigLValue(CGF, Cnt); 6132 llvm::Value *CastedShared = 6133 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6134 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6135 // ElemLVal.reduce_orig = &Origs[Cnt]; 6136 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6137 llvm::Value *CastedOrig = 6138 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6139 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6140 RCG.emitAggregateType(CGF, Cnt); 6141 llvm::Value *SizeValInChars; 6142 llvm::Value *SizeVal; 6143 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6144 // We use delayed creation/initialization for VLAs and array sections. It is 6145 // required because runtime does not provide the way to pass the sizes of 6146 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6147 // threadprivate global variables are used to store these values and use 6148 // them in the functions. 6149 bool DelayedCreation = !!SizeVal; 6150 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6151 /*isSigned=*/false); 6152 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6153 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6154 // ElemLVal.reduce_init = init; 6155 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6156 llvm::Value *InitAddr = 6157 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6158 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6159 // ElemLVal.reduce_fini = fini; 6160 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6161 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6162 llvm::Value *FiniAddr = Fini 6163 ? CGF.EmitCastToVoidPtr(Fini) 6164 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6165 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6166 // ElemLVal.reduce_comb = comb; 6167 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6168 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6169 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6170 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6171 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6172 // ElemLVal.flags = 0; 6173 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6174 if (DelayedCreation) { 6175 CGF.EmitStoreOfScalar( 6176 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6177 FlagsLVal); 6178 } else 6179 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6180 FlagsLVal.getType()); 6181 } 6182 if (Data.IsReductionWithTaskMod) { 6183 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6184 // is_ws, int num, void *data); 6185 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6186 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6187 CGM.IntTy, /*isSigned=*/true); 6188 llvm::Value *Args[] = { 6189 IdentTLoc, GTid, 6190 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6191 /*isSigned=*/true), 6192 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6193 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6194 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6195 return CGF.EmitRuntimeCall( 6196 OMPBuilder.getOrCreateRuntimeFunction( 6197 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6198 Args); 6199 } 6200 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6201 llvm::Value *Args[] = { 6202 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6203 /*isSigned=*/true), 6204 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6205 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6206 CGM.VoidPtrTy)}; 6207 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6208 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6209 Args); 6210 } 6211 6212 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6213 SourceLocation Loc, 6214 bool IsWorksharingReduction) { 6215 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6216 // is_ws, int num, void *data); 6217 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6218 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6219 CGM.IntTy, /*isSigned=*/true); 6220 llvm::Value *Args[] = {IdentTLoc, GTid, 6221 llvm::ConstantInt::get(CGM.IntTy, 6222 IsWorksharingReduction ? 1 : 0, 6223 /*isSigned=*/true)}; 6224 (void)CGF.EmitRuntimeCall( 6225 OMPBuilder.getOrCreateRuntimeFunction( 6226 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6227 Args); 6228 } 6229 6230 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6231 SourceLocation Loc, 6232 ReductionCodeGen &RCG, 6233 unsigned N) { 6234 auto Sizes = RCG.getSizes(N); 6235 // Emit threadprivate global variable if the type is non-constant 6236 // (Sizes.second = nullptr). 6237 if (Sizes.second) { 6238 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6239 /*isSigned=*/false); 6240 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6241 CGF, CGM.getContext().getSizeType(), 6242 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6243 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6244 } 6245 } 6246 6247 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6248 SourceLocation Loc, 6249 llvm::Value *ReductionsPtr, 6250 LValue SharedLVal) { 6251 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6252 // *d); 6253 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6254 CGM.IntTy, 6255 /*isSigned=*/true), 6256 ReductionsPtr, 6257 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6258 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6259 return Address( 6260 CGF.EmitRuntimeCall( 6261 OMPBuilder.getOrCreateRuntimeFunction( 6262 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6263 Args), 6264 SharedLVal.getAlignment()); 6265 } 6266 6267 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, 6268 const OMPTaskDataTy &Data) { 6269 if (!CGF.HaveInsertPoint()) 6270 return; 6271 6272 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) { 6273 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder. 6274 OMPBuilder.createTaskwait(CGF.Builder); 6275 } else { 6276 llvm::Value *ThreadID = getThreadID(CGF, Loc); 6277 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 6278 auto &M = CGM.getModule(); 6279 Address DependenciesArray = Address::invalid(); 6280 llvm::Value *NumOfElements; 6281 std::tie(NumOfElements, DependenciesArray) = 6282 emitDependClause(CGF, Data.Dependences, Loc); 6283 llvm::Value *DepWaitTaskArgs[6]; 6284 if (!Data.Dependences.empty()) { 6285 DepWaitTaskArgs[0] = UpLoc; 6286 DepWaitTaskArgs[1] = ThreadID; 6287 DepWaitTaskArgs[2] = NumOfElements; 6288 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 6289 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 6290 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6291 6292 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 6293 6294 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 6295 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 6296 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 6297 // is specified. 6298 CGF.EmitRuntimeCall( 6299 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 6300 DepWaitTaskArgs); 6301 6302 } else { 6303 6304 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6305 // global_tid); 6306 llvm::Value *Args[] = {UpLoc, ThreadID}; 6307 // Ignore return result until untied tasks are supported. 6308 CGF.EmitRuntimeCall( 6309 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait), 6310 Args); 6311 } 6312 } 6313 6314 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6315 Region->emitUntiedSwitch(CGF); 6316 } 6317 6318 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6319 OpenMPDirectiveKind InnerKind, 6320 const RegionCodeGenTy &CodeGen, 6321 bool HasCancel) { 6322 if (!CGF.HaveInsertPoint()) 6323 return; 6324 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 6325 InnerKind != OMPD_critical && 6326 InnerKind != OMPD_master && 6327 InnerKind != OMPD_masked); 6328 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6329 } 6330 6331 namespace { 6332 enum RTCancelKind { 6333 CancelNoreq = 0, 6334 CancelParallel = 1, 6335 CancelLoop = 2, 6336 CancelSections = 3, 6337 CancelTaskgroup = 4 6338 }; 6339 } // anonymous namespace 6340 6341 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6342 RTCancelKind CancelKind = CancelNoreq; 6343 if (CancelRegion == OMPD_parallel) 6344 CancelKind = CancelParallel; 6345 else if (CancelRegion == OMPD_for) 6346 CancelKind = CancelLoop; 6347 else if (CancelRegion == OMPD_sections) 6348 CancelKind = CancelSections; 6349 else { 6350 assert(CancelRegion == OMPD_taskgroup); 6351 CancelKind = CancelTaskgroup; 6352 } 6353 return CancelKind; 6354 } 6355 6356 void CGOpenMPRuntime::emitCancellationPointCall( 6357 CodeGenFunction &CGF, SourceLocation Loc, 6358 OpenMPDirectiveKind CancelRegion) { 6359 if (!CGF.HaveInsertPoint()) 6360 return; 6361 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6362 // global_tid, kmp_int32 cncl_kind); 6363 if (auto *OMPRegionInfo = 6364 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6365 // For 'cancellation point taskgroup', the task region info may not have a 6366 // cancel. This may instead happen in another adjacent task. 6367 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6368 llvm::Value *Args[] = { 6369 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6370 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6371 // Ignore return result until untied tasks are supported. 6372 llvm::Value *Result = CGF.EmitRuntimeCall( 6373 OMPBuilder.getOrCreateRuntimeFunction( 6374 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6375 Args); 6376 // if (__kmpc_cancellationpoint()) { 6377 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6378 // exit from construct; 6379 // } 6380 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6381 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6382 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6383 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6384 CGF.EmitBlock(ExitBB); 6385 if (CancelRegion == OMPD_parallel) 6386 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6387 // exit from construct; 6388 CodeGenFunction::JumpDest CancelDest = 6389 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6390 CGF.EmitBranchThroughCleanup(CancelDest); 6391 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6392 } 6393 } 6394 } 6395 6396 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6397 const Expr *IfCond, 6398 OpenMPDirectiveKind CancelRegion) { 6399 if (!CGF.HaveInsertPoint()) 6400 return; 6401 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6402 // kmp_int32 cncl_kind); 6403 auto &M = CGM.getModule(); 6404 if (auto *OMPRegionInfo = 6405 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6406 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6407 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6408 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6409 llvm::Value *Args[] = { 6410 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6411 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6412 // Ignore return result until untied tasks are supported. 6413 llvm::Value *Result = CGF.EmitRuntimeCall( 6414 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6415 // if (__kmpc_cancel()) { 6416 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6417 // exit from construct; 6418 // } 6419 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6420 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6421 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6422 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6423 CGF.EmitBlock(ExitBB); 6424 if (CancelRegion == OMPD_parallel) 6425 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6426 // exit from construct; 6427 CodeGenFunction::JumpDest CancelDest = 6428 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6429 CGF.EmitBranchThroughCleanup(CancelDest); 6430 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6431 }; 6432 if (IfCond) { 6433 emitIfClause(CGF, IfCond, ThenGen, 6434 [](CodeGenFunction &, PrePostActionTy &) {}); 6435 } else { 6436 RegionCodeGenTy ThenRCG(ThenGen); 6437 ThenRCG(CGF); 6438 } 6439 } 6440 } 6441 6442 namespace { 6443 /// Cleanup action for uses_allocators support. 6444 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6445 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6446 6447 public: 6448 OMPUsesAllocatorsActionTy( 6449 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6450 : Allocators(Allocators) {} 6451 void Enter(CodeGenFunction &CGF) override { 6452 if (!CGF.HaveInsertPoint()) 6453 return; 6454 for (const auto &AllocatorData : Allocators) { 6455 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6456 CGF, AllocatorData.first, AllocatorData.second); 6457 } 6458 } 6459 void Exit(CodeGenFunction &CGF) override { 6460 if (!CGF.HaveInsertPoint()) 6461 return; 6462 for (const auto &AllocatorData : Allocators) { 6463 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6464 AllocatorData.first); 6465 } 6466 } 6467 }; 6468 } // namespace 6469 6470 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6471 const OMPExecutableDirective &D, StringRef ParentName, 6472 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6473 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6474 assert(!ParentName.empty() && "Invalid target region parent name!"); 6475 HasEmittedTargetRegion = true; 6476 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6477 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6478 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6479 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6480 if (!D.AllocatorTraits) 6481 continue; 6482 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6483 } 6484 } 6485 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6486 CodeGen.setAction(UsesAllocatorAction); 6487 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6488 IsOffloadEntry, CodeGen); 6489 } 6490 6491 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6492 const Expr *Allocator, 6493 const Expr *AllocatorTraits) { 6494 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6495 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6496 // Use default memspace handle. 6497 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6498 llvm::Value *NumTraits = llvm::ConstantInt::get( 6499 CGF.IntTy, cast<ConstantArrayType>( 6500 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6501 ->getSize() 6502 .getLimitedValue()); 6503 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6504 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6505 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6506 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6507 AllocatorTraitsLVal.getBaseInfo(), 6508 AllocatorTraitsLVal.getTBAAInfo()); 6509 llvm::Value *Traits = 6510 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6511 6512 llvm::Value *AllocatorVal = 6513 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6514 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6515 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6516 // Store to allocator. 6517 CGF.EmitVarDecl(*cast<VarDecl>( 6518 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6519 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6520 AllocatorVal = 6521 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6522 Allocator->getType(), Allocator->getExprLoc()); 6523 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6524 } 6525 6526 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6527 const Expr *Allocator) { 6528 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6529 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6530 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6531 llvm::Value *AllocatorVal = 6532 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6533 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6534 CGF.getContext().VoidPtrTy, 6535 Allocator->getExprLoc()); 6536 (void)CGF.EmitRuntimeCall( 6537 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6538 OMPRTL___kmpc_destroy_allocator), 6539 {ThreadId, AllocatorVal}); 6540 } 6541 6542 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6543 const OMPExecutableDirective &D, StringRef ParentName, 6544 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6545 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6546 // Create a unique name for the entry function using the source location 6547 // information of the current target region. The name will be something like: 6548 // 6549 // __omp_offloading_DD_FFFF_PP_lBB 6550 // 6551 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6552 // mangled name of the function that encloses the target region and BB is the 6553 // line number of the target region. 6554 6555 unsigned DeviceID; 6556 unsigned FileID; 6557 unsigned Line; 6558 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6559 Line); 6560 SmallString<64> EntryFnName; 6561 { 6562 llvm::raw_svector_ostream OS(EntryFnName); 6563 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6564 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6565 } 6566 6567 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6568 6569 CodeGenFunction CGF(CGM, true); 6570 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6571 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6572 6573 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6574 6575 // If this target outline function is not an offload entry, we don't need to 6576 // register it. 6577 if (!IsOffloadEntry) 6578 return; 6579 6580 // The target region ID is used by the runtime library to identify the current 6581 // target region, so it only has to be unique and not necessarily point to 6582 // anything. It could be the pointer to the outlined function that implements 6583 // the target region, but we aren't using that so that the compiler doesn't 6584 // need to keep that, and could therefore inline the host function if proven 6585 // worthwhile during optimization. In the other hand, if emitting code for the 6586 // device, the ID has to be the function address so that it can retrieved from 6587 // the offloading entry and launched by the runtime library. We also mark the 6588 // outlined function to have external linkage in case we are emitting code for 6589 // the device, because these functions will be entry points to the device. 6590 6591 if (CGM.getLangOpts().OpenMPIsDevice) { 6592 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6593 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6594 OutlinedFn->setDSOLocal(false); 6595 if (CGM.getTriple().isAMDGCN()) 6596 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 6597 } else { 6598 std::string Name = getName({EntryFnName, "region_id"}); 6599 OutlinedFnID = new llvm::GlobalVariable( 6600 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6601 llvm::GlobalValue::WeakAnyLinkage, 6602 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6603 } 6604 6605 // Register the information for the entry associated with this target region. 6606 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6607 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6608 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6609 6610 // Add NumTeams and ThreadLimit attributes to the outlined GPU function 6611 int32_t DefaultValTeams = -1; 6612 getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams); 6613 if (DefaultValTeams > 0) { 6614 OutlinedFn->addFnAttr("omp_target_num_teams", 6615 std::to_string(DefaultValTeams)); 6616 } 6617 int32_t DefaultValThreads = -1; 6618 getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads); 6619 if (DefaultValThreads > 0) { 6620 OutlinedFn->addFnAttr("omp_target_thread_limit", 6621 std::to_string(DefaultValThreads)); 6622 } 6623 } 6624 6625 /// Checks if the expression is constant or does not have non-trivial function 6626 /// calls. 6627 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6628 // We can skip constant expressions. 6629 // We can skip expressions with trivial calls or simple expressions. 6630 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6631 !E->hasNonTrivialCall(Ctx)) && 6632 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6633 } 6634 6635 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6636 const Stmt *Body) { 6637 const Stmt *Child = Body->IgnoreContainers(); 6638 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6639 Child = nullptr; 6640 for (const Stmt *S : C->body()) { 6641 if (const auto *E = dyn_cast<Expr>(S)) { 6642 if (isTrivial(Ctx, E)) 6643 continue; 6644 } 6645 // Some of the statements can be ignored. 6646 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6647 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6648 continue; 6649 // Analyze declarations. 6650 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6651 if (llvm::all_of(DS->decls(), [](const Decl *D) { 6652 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6653 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6654 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6655 isa<UsingDirectiveDecl>(D) || 6656 isa<OMPDeclareReductionDecl>(D) || 6657 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6658 return true; 6659 const auto *VD = dyn_cast<VarDecl>(D); 6660 if (!VD) 6661 return false; 6662 return VD->hasGlobalStorage() || !VD->isUsed(); 6663 })) 6664 continue; 6665 } 6666 // Found multiple children - cannot get the one child only. 6667 if (Child) 6668 return nullptr; 6669 Child = S; 6670 } 6671 if (Child) 6672 Child = Child->IgnoreContainers(); 6673 } 6674 return Child; 6675 } 6676 6677 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( 6678 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6679 int32_t &DefaultVal) { 6680 6681 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6682 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6683 "Expected target-based executable directive."); 6684 switch (DirectiveKind) { 6685 case OMPD_target: { 6686 const auto *CS = D.getInnermostCapturedStmt(); 6687 const auto *Body = 6688 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6689 const Stmt *ChildStmt = 6690 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6691 if (const auto *NestedDir = 6692 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6693 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6694 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6695 const Expr *NumTeams = 6696 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6697 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6698 if (auto Constant = 6699 NumTeams->getIntegerConstantExpr(CGF.getContext())) 6700 DefaultVal = Constant->getExtValue(); 6701 return NumTeams; 6702 } 6703 DefaultVal = 0; 6704 return nullptr; 6705 } 6706 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6707 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) { 6708 DefaultVal = 1; 6709 return nullptr; 6710 } 6711 DefaultVal = 1; 6712 return nullptr; 6713 } 6714 // A value of -1 is used to check if we need to emit no teams region 6715 DefaultVal = -1; 6716 return nullptr; 6717 } 6718 case OMPD_target_teams: 6719 case OMPD_target_teams_distribute: 6720 case OMPD_target_teams_distribute_simd: 6721 case OMPD_target_teams_distribute_parallel_for: 6722 case OMPD_target_teams_distribute_parallel_for_simd: { 6723 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6724 const Expr *NumTeams = 6725 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6726 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6727 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) 6728 DefaultVal = Constant->getExtValue(); 6729 return NumTeams; 6730 } 6731 DefaultVal = 0; 6732 return nullptr; 6733 } 6734 case OMPD_target_parallel: 6735 case OMPD_target_parallel_for: 6736 case OMPD_target_parallel_for_simd: 6737 case OMPD_target_simd: 6738 DefaultVal = 1; 6739 return nullptr; 6740 case OMPD_parallel: 6741 case OMPD_for: 6742 case OMPD_parallel_for: 6743 case OMPD_parallel_master: 6744 case OMPD_parallel_sections: 6745 case OMPD_for_simd: 6746 case OMPD_parallel_for_simd: 6747 case OMPD_cancel: 6748 case OMPD_cancellation_point: 6749 case OMPD_ordered: 6750 case OMPD_threadprivate: 6751 case OMPD_allocate: 6752 case OMPD_task: 6753 case OMPD_simd: 6754 case OMPD_tile: 6755 case OMPD_unroll: 6756 case OMPD_sections: 6757 case OMPD_section: 6758 case OMPD_single: 6759 case OMPD_master: 6760 case OMPD_critical: 6761 case OMPD_taskyield: 6762 case OMPD_barrier: 6763 case OMPD_taskwait: 6764 case OMPD_taskgroup: 6765 case OMPD_atomic: 6766 case OMPD_flush: 6767 case OMPD_depobj: 6768 case OMPD_scan: 6769 case OMPD_teams: 6770 case OMPD_target_data: 6771 case OMPD_target_exit_data: 6772 case OMPD_target_enter_data: 6773 case OMPD_distribute: 6774 case OMPD_distribute_simd: 6775 case OMPD_distribute_parallel_for: 6776 case OMPD_distribute_parallel_for_simd: 6777 case OMPD_teams_distribute: 6778 case OMPD_teams_distribute_simd: 6779 case OMPD_teams_distribute_parallel_for: 6780 case OMPD_teams_distribute_parallel_for_simd: 6781 case OMPD_target_update: 6782 case OMPD_declare_simd: 6783 case OMPD_declare_variant: 6784 case OMPD_begin_declare_variant: 6785 case OMPD_end_declare_variant: 6786 case OMPD_declare_target: 6787 case OMPD_end_declare_target: 6788 case OMPD_declare_reduction: 6789 case OMPD_declare_mapper: 6790 case OMPD_taskloop: 6791 case OMPD_taskloop_simd: 6792 case OMPD_master_taskloop: 6793 case OMPD_master_taskloop_simd: 6794 case OMPD_parallel_master_taskloop: 6795 case OMPD_parallel_master_taskloop_simd: 6796 case OMPD_requires: 6797 case OMPD_metadirective: 6798 case OMPD_unknown: 6799 break; 6800 default: 6801 break; 6802 } 6803 llvm_unreachable("Unexpected directive kind."); 6804 } 6805 6806 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( 6807 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6808 assert(!CGF.getLangOpts().OpenMPIsDevice && 6809 "Clauses associated with the teams directive expected to be emitted " 6810 "only for the host!"); 6811 CGBuilderTy &Bld = CGF.Builder; 6812 int32_t DefaultNT = -1; 6813 const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT); 6814 if (NumTeams != nullptr) { 6815 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6816 6817 switch (DirectiveKind) { 6818 case OMPD_target: { 6819 const auto *CS = D.getInnermostCapturedStmt(); 6820 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6821 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6822 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6823 /*IgnoreResultAssign*/ true); 6824 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6825 /*isSigned=*/true); 6826 } 6827 case OMPD_target_teams: 6828 case OMPD_target_teams_distribute: 6829 case OMPD_target_teams_distribute_simd: 6830 case OMPD_target_teams_distribute_parallel_for: 6831 case OMPD_target_teams_distribute_parallel_for_simd: { 6832 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6833 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6834 /*IgnoreResultAssign*/ true); 6835 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6836 /*isSigned=*/true); 6837 } 6838 default: 6839 break; 6840 } 6841 } else if (DefaultNT == -1) { 6842 return nullptr; 6843 } 6844 6845 return Bld.getInt32(DefaultNT); 6846 } 6847 6848 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6849 llvm::Value *DefaultThreadLimitVal) { 6850 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6851 CGF.getContext(), CS->getCapturedStmt()); 6852 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6853 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6854 llvm::Value *NumThreads = nullptr; 6855 llvm::Value *CondVal = nullptr; 6856 // Handle if clause. If if clause present, the number of threads is 6857 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6858 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6859 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6860 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6861 const OMPIfClause *IfClause = nullptr; 6862 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6863 if (C->getNameModifier() == OMPD_unknown || 6864 C->getNameModifier() == OMPD_parallel) { 6865 IfClause = C; 6866 break; 6867 } 6868 } 6869 if (IfClause) { 6870 const Expr *Cond = IfClause->getCondition(); 6871 bool Result; 6872 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6873 if (!Result) 6874 return CGF.Builder.getInt32(1); 6875 } else { 6876 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6877 if (const auto *PreInit = 6878 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6879 for (const auto *I : PreInit->decls()) { 6880 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6881 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6882 } else { 6883 CodeGenFunction::AutoVarEmission Emission = 6884 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6885 CGF.EmitAutoVarCleanups(Emission); 6886 } 6887 } 6888 } 6889 CondVal = CGF.EvaluateExprAsBool(Cond); 6890 } 6891 } 6892 } 6893 // Check the value of num_threads clause iff if clause was not specified 6894 // or is not evaluated to false. 6895 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6896 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6897 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6898 const auto *NumThreadsClause = 6899 Dir->getSingleClause<OMPNumThreadsClause>(); 6900 CodeGenFunction::LexicalScope Scope( 6901 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6902 if (const auto *PreInit = 6903 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6904 for (const auto *I : PreInit->decls()) { 6905 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6906 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6907 } else { 6908 CodeGenFunction::AutoVarEmission Emission = 6909 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6910 CGF.EmitAutoVarCleanups(Emission); 6911 } 6912 } 6913 } 6914 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6915 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6916 /*isSigned=*/false); 6917 if (DefaultThreadLimitVal) 6918 NumThreads = CGF.Builder.CreateSelect( 6919 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6920 DefaultThreadLimitVal, NumThreads); 6921 } else { 6922 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6923 : CGF.Builder.getInt32(0); 6924 } 6925 // Process condition of the if clause. 6926 if (CondVal) { 6927 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6928 CGF.Builder.getInt32(1)); 6929 } 6930 return NumThreads; 6931 } 6932 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6933 return CGF.Builder.getInt32(1); 6934 return DefaultThreadLimitVal; 6935 } 6936 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6937 : CGF.Builder.getInt32(0); 6938 } 6939 6940 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( 6941 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6942 int32_t &DefaultVal) { 6943 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6944 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6945 "Expected target-based executable directive."); 6946 6947 switch (DirectiveKind) { 6948 case OMPD_target: 6949 // Teams have no clause thread_limit 6950 return nullptr; 6951 case OMPD_target_teams: 6952 case OMPD_target_teams_distribute: 6953 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6954 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6955 const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit(); 6956 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6957 if (auto Constant = 6958 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6959 DefaultVal = Constant->getExtValue(); 6960 return ThreadLimit; 6961 } 6962 return nullptr; 6963 case OMPD_target_parallel: 6964 case OMPD_target_parallel_for: 6965 case OMPD_target_parallel_for_simd: 6966 case OMPD_target_teams_distribute_parallel_for: 6967 case OMPD_target_teams_distribute_parallel_for_simd: { 6968 Expr *ThreadLimit = nullptr; 6969 Expr *NumThreads = nullptr; 6970 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6971 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6972 ThreadLimit = ThreadLimitClause->getThreadLimit(); 6973 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6974 if (auto Constant = 6975 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6976 DefaultVal = Constant->getExtValue(); 6977 } 6978 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6979 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6980 NumThreads = NumThreadsClause->getNumThreads(); 6981 if (NumThreads->isIntegerConstantExpr(CGF.getContext())) { 6982 if (auto Constant = 6983 NumThreads->getIntegerConstantExpr(CGF.getContext())) { 6984 if (Constant->getExtValue() < DefaultVal) { 6985 DefaultVal = Constant->getExtValue(); 6986 ThreadLimit = NumThreads; 6987 } 6988 } 6989 } 6990 } 6991 return ThreadLimit; 6992 } 6993 case OMPD_target_teams_distribute_simd: 6994 case OMPD_target_simd: 6995 DefaultVal = 1; 6996 return nullptr; 6997 case OMPD_parallel: 6998 case OMPD_for: 6999 case OMPD_parallel_for: 7000 case OMPD_parallel_master: 7001 case OMPD_parallel_sections: 7002 case OMPD_for_simd: 7003 case OMPD_parallel_for_simd: 7004 case OMPD_cancel: 7005 case OMPD_cancellation_point: 7006 case OMPD_ordered: 7007 case OMPD_threadprivate: 7008 case OMPD_allocate: 7009 case OMPD_task: 7010 case OMPD_simd: 7011 case OMPD_tile: 7012 case OMPD_unroll: 7013 case OMPD_sections: 7014 case OMPD_section: 7015 case OMPD_single: 7016 case OMPD_master: 7017 case OMPD_critical: 7018 case OMPD_taskyield: 7019 case OMPD_barrier: 7020 case OMPD_taskwait: 7021 case OMPD_taskgroup: 7022 case OMPD_atomic: 7023 case OMPD_flush: 7024 case OMPD_depobj: 7025 case OMPD_scan: 7026 case OMPD_teams: 7027 case OMPD_target_data: 7028 case OMPD_target_exit_data: 7029 case OMPD_target_enter_data: 7030 case OMPD_distribute: 7031 case OMPD_distribute_simd: 7032 case OMPD_distribute_parallel_for: 7033 case OMPD_distribute_parallel_for_simd: 7034 case OMPD_teams_distribute: 7035 case OMPD_teams_distribute_simd: 7036 case OMPD_teams_distribute_parallel_for: 7037 case OMPD_teams_distribute_parallel_for_simd: 7038 case OMPD_target_update: 7039 case OMPD_declare_simd: 7040 case OMPD_declare_variant: 7041 case OMPD_begin_declare_variant: 7042 case OMPD_end_declare_variant: 7043 case OMPD_declare_target: 7044 case OMPD_end_declare_target: 7045 case OMPD_declare_reduction: 7046 case OMPD_declare_mapper: 7047 case OMPD_taskloop: 7048 case OMPD_taskloop_simd: 7049 case OMPD_master_taskloop: 7050 case OMPD_master_taskloop_simd: 7051 case OMPD_parallel_master_taskloop: 7052 case OMPD_parallel_master_taskloop_simd: 7053 case OMPD_requires: 7054 case OMPD_unknown: 7055 break; 7056 default: 7057 break; 7058 } 7059 llvm_unreachable("Unsupported directive kind."); 7060 } 7061 7062 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( 7063 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 7064 assert(!CGF.getLangOpts().OpenMPIsDevice && 7065 "Clauses associated with the teams directive expected to be emitted " 7066 "only for the host!"); 7067 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 7068 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 7069 "Expected target-based executable directive."); 7070 CGBuilderTy &Bld = CGF.Builder; 7071 llvm::Value *ThreadLimitVal = nullptr; 7072 llvm::Value *NumThreadsVal = nullptr; 7073 switch (DirectiveKind) { 7074 case OMPD_target: { 7075 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7076 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7077 return NumThreads; 7078 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7079 CGF.getContext(), CS->getCapturedStmt()); 7080 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7081 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 7082 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7083 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7084 const auto *ThreadLimitClause = 7085 Dir->getSingleClause<OMPThreadLimitClause>(); 7086 CodeGenFunction::LexicalScope Scope( 7087 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 7088 if (const auto *PreInit = 7089 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 7090 for (const auto *I : PreInit->decls()) { 7091 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7092 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7093 } else { 7094 CodeGenFunction::AutoVarEmission Emission = 7095 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7096 CGF.EmitAutoVarCleanups(Emission); 7097 } 7098 } 7099 } 7100 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7101 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7102 ThreadLimitVal = 7103 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7104 } 7105 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 7106 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 7107 CS = Dir->getInnermostCapturedStmt(); 7108 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7109 CGF.getContext(), CS->getCapturedStmt()); 7110 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 7111 } 7112 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 7113 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 7114 CS = Dir->getInnermostCapturedStmt(); 7115 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7116 return NumThreads; 7117 } 7118 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 7119 return Bld.getInt32(1); 7120 } 7121 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7122 } 7123 case OMPD_target_teams: { 7124 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7125 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7126 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7127 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7128 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7129 ThreadLimitVal = 7130 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7131 } 7132 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7133 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7134 return NumThreads; 7135 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7136 CGF.getContext(), CS->getCapturedStmt()); 7137 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7138 if (Dir->getDirectiveKind() == OMPD_distribute) { 7139 CS = Dir->getInnermostCapturedStmt(); 7140 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7141 return NumThreads; 7142 } 7143 } 7144 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7145 } 7146 case OMPD_target_teams_distribute: 7147 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7148 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7149 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7150 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7151 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7152 ThreadLimitVal = 7153 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7154 } 7155 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 7156 case OMPD_target_parallel: 7157 case OMPD_target_parallel_for: 7158 case OMPD_target_parallel_for_simd: 7159 case OMPD_target_teams_distribute_parallel_for: 7160 case OMPD_target_teams_distribute_parallel_for_simd: { 7161 llvm::Value *CondVal = nullptr; 7162 // Handle if clause. If if clause present, the number of threads is 7163 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 7164 if (D.hasClausesOfKind<OMPIfClause>()) { 7165 const OMPIfClause *IfClause = nullptr; 7166 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 7167 if (C->getNameModifier() == OMPD_unknown || 7168 C->getNameModifier() == OMPD_parallel) { 7169 IfClause = C; 7170 break; 7171 } 7172 } 7173 if (IfClause) { 7174 const Expr *Cond = IfClause->getCondition(); 7175 bool Result; 7176 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7177 if (!Result) 7178 return Bld.getInt32(1); 7179 } else { 7180 CodeGenFunction::RunCleanupsScope Scope(CGF); 7181 CondVal = CGF.EvaluateExprAsBool(Cond); 7182 } 7183 } 7184 } 7185 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7186 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7187 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7188 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7189 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7190 ThreadLimitVal = 7191 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7192 } 7193 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 7194 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 7195 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 7196 llvm::Value *NumThreads = CGF.EmitScalarExpr( 7197 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 7198 NumThreadsVal = 7199 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 7200 ThreadLimitVal = ThreadLimitVal 7201 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 7202 ThreadLimitVal), 7203 NumThreadsVal, ThreadLimitVal) 7204 : NumThreadsVal; 7205 } 7206 if (!ThreadLimitVal) 7207 ThreadLimitVal = Bld.getInt32(0); 7208 if (CondVal) 7209 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 7210 return ThreadLimitVal; 7211 } 7212 case OMPD_target_teams_distribute_simd: 7213 case OMPD_target_simd: 7214 return Bld.getInt32(1); 7215 case OMPD_parallel: 7216 case OMPD_for: 7217 case OMPD_parallel_for: 7218 case OMPD_parallel_master: 7219 case OMPD_parallel_sections: 7220 case OMPD_for_simd: 7221 case OMPD_parallel_for_simd: 7222 case OMPD_cancel: 7223 case OMPD_cancellation_point: 7224 case OMPD_ordered: 7225 case OMPD_threadprivate: 7226 case OMPD_allocate: 7227 case OMPD_task: 7228 case OMPD_simd: 7229 case OMPD_tile: 7230 case OMPD_unroll: 7231 case OMPD_sections: 7232 case OMPD_section: 7233 case OMPD_single: 7234 case OMPD_master: 7235 case OMPD_critical: 7236 case OMPD_taskyield: 7237 case OMPD_barrier: 7238 case OMPD_taskwait: 7239 case OMPD_taskgroup: 7240 case OMPD_atomic: 7241 case OMPD_flush: 7242 case OMPD_depobj: 7243 case OMPD_scan: 7244 case OMPD_teams: 7245 case OMPD_target_data: 7246 case OMPD_target_exit_data: 7247 case OMPD_target_enter_data: 7248 case OMPD_distribute: 7249 case OMPD_distribute_simd: 7250 case OMPD_distribute_parallel_for: 7251 case OMPD_distribute_parallel_for_simd: 7252 case OMPD_teams_distribute: 7253 case OMPD_teams_distribute_simd: 7254 case OMPD_teams_distribute_parallel_for: 7255 case OMPD_teams_distribute_parallel_for_simd: 7256 case OMPD_target_update: 7257 case OMPD_declare_simd: 7258 case OMPD_declare_variant: 7259 case OMPD_begin_declare_variant: 7260 case OMPD_end_declare_variant: 7261 case OMPD_declare_target: 7262 case OMPD_end_declare_target: 7263 case OMPD_declare_reduction: 7264 case OMPD_declare_mapper: 7265 case OMPD_taskloop: 7266 case OMPD_taskloop_simd: 7267 case OMPD_master_taskloop: 7268 case OMPD_master_taskloop_simd: 7269 case OMPD_parallel_master_taskloop: 7270 case OMPD_parallel_master_taskloop_simd: 7271 case OMPD_requires: 7272 case OMPD_metadirective: 7273 case OMPD_unknown: 7274 break; 7275 default: 7276 break; 7277 } 7278 llvm_unreachable("Unsupported directive kind."); 7279 } 7280 7281 namespace { 7282 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7283 7284 // Utility to handle information from clauses associated with a given 7285 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7286 // It provides a convenient interface to obtain the information and generate 7287 // code for that information. 7288 class MappableExprsHandler { 7289 public: 7290 /// Values for bit flags used to specify the mapping type for 7291 /// offloading. 7292 enum OpenMPOffloadMappingFlags : uint64_t { 7293 /// No flags 7294 OMP_MAP_NONE = 0x0, 7295 /// Allocate memory on the device and move data from host to device. 7296 OMP_MAP_TO = 0x01, 7297 /// Allocate memory on the device and move data from device to host. 7298 OMP_MAP_FROM = 0x02, 7299 /// Always perform the requested mapping action on the element, even 7300 /// if it was already mapped before. 7301 OMP_MAP_ALWAYS = 0x04, 7302 /// Delete the element from the device environment, ignoring the 7303 /// current reference count associated with the element. 7304 OMP_MAP_DELETE = 0x08, 7305 /// The element being mapped is a pointer-pointee pair; both the 7306 /// pointer and the pointee should be mapped. 7307 OMP_MAP_PTR_AND_OBJ = 0x10, 7308 /// This flags signals that the base address of an entry should be 7309 /// passed to the target kernel as an argument. 7310 OMP_MAP_TARGET_PARAM = 0x20, 7311 /// Signal that the runtime library has to return the device pointer 7312 /// in the current position for the data being mapped. Used when we have the 7313 /// use_device_ptr or use_device_addr clause. 7314 OMP_MAP_RETURN_PARAM = 0x40, 7315 /// This flag signals that the reference being passed is a pointer to 7316 /// private data. 7317 OMP_MAP_PRIVATE = 0x80, 7318 /// Pass the element to the device by value. 7319 OMP_MAP_LITERAL = 0x100, 7320 /// Implicit map 7321 OMP_MAP_IMPLICIT = 0x200, 7322 /// Close is a hint to the runtime to allocate memory close to 7323 /// the target device. 7324 OMP_MAP_CLOSE = 0x400, 7325 /// 0x800 is reserved for compatibility with XLC. 7326 /// Produce a runtime error if the data is not already allocated. 7327 OMP_MAP_PRESENT = 0x1000, 7328 // Increment and decrement a separate reference counter so that the data 7329 // cannot be unmapped within the associated region. Thus, this flag is 7330 // intended to be used on 'target' and 'target data' directives because they 7331 // are inherently structured. It is not intended to be used on 'target 7332 // enter data' and 'target exit data' directives because they are inherently 7333 // dynamic. 7334 // This is an OpenMP extension for the sake of OpenACC support. 7335 OMP_MAP_OMPX_HOLD = 0x2000, 7336 /// Signal that the runtime library should use args as an array of 7337 /// descriptor_dim pointers and use args_size as dims. Used when we have 7338 /// non-contiguous list items in target update directive 7339 OMP_MAP_NON_CONTIG = 0x100000000000, 7340 /// The 16 MSBs of the flags indicate whether the entry is member of some 7341 /// struct/class. 7342 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7343 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7344 }; 7345 7346 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7347 static unsigned getFlagMemberOffset() { 7348 unsigned Offset = 0; 7349 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7350 Remain = Remain >> 1) 7351 Offset++; 7352 return Offset; 7353 } 7354 7355 /// Class that holds debugging information for a data mapping to be passed to 7356 /// the runtime library. 7357 class MappingExprInfo { 7358 /// The variable declaration used for the data mapping. 7359 const ValueDecl *MapDecl = nullptr; 7360 /// The original expression used in the map clause, or null if there is 7361 /// none. 7362 const Expr *MapExpr = nullptr; 7363 7364 public: 7365 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 7366 : MapDecl(MapDecl), MapExpr(MapExpr) {} 7367 7368 const ValueDecl *getMapDecl() const { return MapDecl; } 7369 const Expr *getMapExpr() const { return MapExpr; } 7370 }; 7371 7372 /// Class that associates information with a base pointer to be passed to the 7373 /// runtime library. 7374 class BasePointerInfo { 7375 /// The base pointer. 7376 llvm::Value *Ptr = nullptr; 7377 /// The base declaration that refers to this device pointer, or null if 7378 /// there is none. 7379 const ValueDecl *DevPtrDecl = nullptr; 7380 7381 public: 7382 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7383 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7384 llvm::Value *operator*() const { return Ptr; } 7385 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7386 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7387 }; 7388 7389 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 7390 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7391 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7392 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7393 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7394 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7395 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7396 7397 /// This structure contains combined information generated for mappable 7398 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7399 /// mappers, and non-contiguous information. 7400 struct MapCombinedInfoTy { 7401 struct StructNonContiguousInfo { 7402 bool IsNonContiguous = false; 7403 MapDimArrayTy Dims; 7404 MapNonContiguousArrayTy Offsets; 7405 MapNonContiguousArrayTy Counts; 7406 MapNonContiguousArrayTy Strides; 7407 }; 7408 MapExprsArrayTy Exprs; 7409 MapBaseValuesArrayTy BasePointers; 7410 MapValuesArrayTy Pointers; 7411 MapValuesArrayTy Sizes; 7412 MapFlagsArrayTy Types; 7413 MapMappersArrayTy Mappers; 7414 StructNonContiguousInfo NonContigInfo; 7415 7416 /// Append arrays in \a CurInfo. 7417 void append(MapCombinedInfoTy &CurInfo) { 7418 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 7419 BasePointers.append(CurInfo.BasePointers.begin(), 7420 CurInfo.BasePointers.end()); 7421 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7422 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7423 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7424 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7425 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7426 CurInfo.NonContigInfo.Dims.end()); 7427 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7428 CurInfo.NonContigInfo.Offsets.end()); 7429 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7430 CurInfo.NonContigInfo.Counts.end()); 7431 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7432 CurInfo.NonContigInfo.Strides.end()); 7433 } 7434 }; 7435 7436 /// Map between a struct and the its lowest & highest elements which have been 7437 /// mapped. 7438 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7439 /// HE(FieldIndex, Pointer)} 7440 struct StructRangeInfoTy { 7441 MapCombinedInfoTy PreliminaryMapData; 7442 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7443 0, Address::invalid()}; 7444 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7445 0, Address::invalid()}; 7446 Address Base = Address::invalid(); 7447 Address LB = Address::invalid(); 7448 bool IsArraySection = false; 7449 bool HasCompleteRecord = false; 7450 }; 7451 7452 private: 7453 /// Kind that defines how a device pointer has to be returned. 7454 struct MapInfo { 7455 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7456 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7457 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7458 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7459 bool ReturnDevicePointer = false; 7460 bool IsImplicit = false; 7461 const ValueDecl *Mapper = nullptr; 7462 const Expr *VarRef = nullptr; 7463 bool ForDeviceAddr = false; 7464 7465 MapInfo() = default; 7466 MapInfo( 7467 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7468 OpenMPMapClauseKind MapType, 7469 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7470 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7471 bool ReturnDevicePointer, bool IsImplicit, 7472 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 7473 bool ForDeviceAddr = false) 7474 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7475 MotionModifiers(MotionModifiers), 7476 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7477 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 7478 }; 7479 7480 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7481 /// member and there is no map information about it, then emission of that 7482 /// entry is deferred until the whole struct has been processed. 7483 struct DeferredDevicePtrEntryTy { 7484 const Expr *IE = nullptr; 7485 const ValueDecl *VD = nullptr; 7486 bool ForDeviceAddr = false; 7487 7488 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7489 bool ForDeviceAddr) 7490 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7491 }; 7492 7493 /// The target directive from where the mappable clauses were extracted. It 7494 /// is either a executable directive or a user-defined mapper directive. 7495 llvm::PointerUnion<const OMPExecutableDirective *, 7496 const OMPDeclareMapperDecl *> 7497 CurDir; 7498 7499 /// Function the directive is being generated for. 7500 CodeGenFunction &CGF; 7501 7502 /// Set of all first private variables in the current directive. 7503 /// bool data is set to true if the variable is implicitly marked as 7504 /// firstprivate, false otherwise. 7505 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7506 7507 /// Map between device pointer declarations and their expression components. 7508 /// The key value for declarations in 'this' is null. 7509 llvm::DenseMap< 7510 const ValueDecl *, 7511 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7512 DevPointersMap; 7513 7514 /// Map between lambda declarations and their map type. 7515 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap; 7516 7517 llvm::Value *getExprTypeSize(const Expr *E) const { 7518 QualType ExprTy = E->getType().getCanonicalType(); 7519 7520 // Calculate the size for array shaping expression. 7521 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7522 llvm::Value *Size = 7523 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7524 for (const Expr *SE : OAE->getDimensions()) { 7525 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7526 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7527 CGF.getContext().getSizeType(), 7528 SE->getExprLoc()); 7529 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7530 } 7531 return Size; 7532 } 7533 7534 // Reference types are ignored for mapping purposes. 7535 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7536 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7537 7538 // Given that an array section is considered a built-in type, we need to 7539 // do the calculation based on the length of the section instead of relying 7540 // on CGF.getTypeSize(E->getType()). 7541 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7542 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7543 OAE->getBase()->IgnoreParenImpCasts()) 7544 .getCanonicalType(); 7545 7546 // If there is no length associated with the expression and lower bound is 7547 // not specified too, that means we are using the whole length of the 7548 // base. 7549 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7550 !OAE->getLowerBound()) 7551 return CGF.getTypeSize(BaseTy); 7552 7553 llvm::Value *ElemSize; 7554 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7555 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7556 } else { 7557 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7558 assert(ATy && "Expecting array type if not a pointer type."); 7559 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7560 } 7561 7562 // If we don't have a length at this point, that is because we have an 7563 // array section with a single element. 7564 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7565 return ElemSize; 7566 7567 if (const Expr *LenExpr = OAE->getLength()) { 7568 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7569 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7570 CGF.getContext().getSizeType(), 7571 LenExpr->getExprLoc()); 7572 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7573 } 7574 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7575 OAE->getLowerBound() && "expected array_section[lb:]."); 7576 // Size = sizetype - lb * elemtype; 7577 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7578 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7579 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7580 CGF.getContext().getSizeType(), 7581 OAE->getLowerBound()->getExprLoc()); 7582 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7583 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7584 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7585 LengthVal = CGF.Builder.CreateSelect( 7586 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7587 return LengthVal; 7588 } 7589 return CGF.getTypeSize(ExprTy); 7590 } 7591 7592 /// Return the corresponding bits for a given map clause modifier. Add 7593 /// a flag marking the map as a pointer if requested. Add a flag marking the 7594 /// map as the first one of a series of maps that relate to the same map 7595 /// expression. 7596 OpenMPOffloadMappingFlags getMapTypeBits( 7597 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7598 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7599 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7600 OpenMPOffloadMappingFlags Bits = 7601 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7602 switch (MapType) { 7603 case OMPC_MAP_alloc: 7604 case OMPC_MAP_release: 7605 // alloc and release is the default behavior in the runtime library, i.e. 7606 // if we don't pass any bits alloc/release that is what the runtime is 7607 // going to do. Therefore, we don't need to signal anything for these two 7608 // type modifiers. 7609 break; 7610 case OMPC_MAP_to: 7611 Bits |= OMP_MAP_TO; 7612 break; 7613 case OMPC_MAP_from: 7614 Bits |= OMP_MAP_FROM; 7615 break; 7616 case OMPC_MAP_tofrom: 7617 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7618 break; 7619 case OMPC_MAP_delete: 7620 Bits |= OMP_MAP_DELETE; 7621 break; 7622 case OMPC_MAP_unknown: 7623 llvm_unreachable("Unexpected map type!"); 7624 } 7625 if (AddPtrFlag) 7626 Bits |= OMP_MAP_PTR_AND_OBJ; 7627 if (AddIsTargetParamFlag) 7628 Bits |= OMP_MAP_TARGET_PARAM; 7629 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always)) 7630 Bits |= OMP_MAP_ALWAYS; 7631 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close)) 7632 Bits |= OMP_MAP_CLOSE; 7633 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) || 7634 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present)) 7635 Bits |= OMP_MAP_PRESENT; 7636 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold)) 7637 Bits |= OMP_MAP_OMPX_HOLD; 7638 if (IsNonContiguous) 7639 Bits |= OMP_MAP_NON_CONTIG; 7640 return Bits; 7641 } 7642 7643 /// Return true if the provided expression is a final array section. A 7644 /// final array section, is one whose length can't be proved to be one. 7645 bool isFinalArraySectionExpression(const Expr *E) const { 7646 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7647 7648 // It is not an array section and therefore not a unity-size one. 7649 if (!OASE) 7650 return false; 7651 7652 // An array section with no colon always refer to a single element. 7653 if (OASE->getColonLocFirst().isInvalid()) 7654 return false; 7655 7656 const Expr *Length = OASE->getLength(); 7657 7658 // If we don't have a length we have to check if the array has size 1 7659 // for this dimension. Also, we should always expect a length if the 7660 // base type is pointer. 7661 if (!Length) { 7662 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7663 OASE->getBase()->IgnoreParenImpCasts()) 7664 .getCanonicalType(); 7665 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7666 return ATy->getSize().getSExtValue() != 1; 7667 // If we don't have a constant dimension length, we have to consider 7668 // the current section as having any size, so it is not necessarily 7669 // unitary. If it happen to be unity size, that's user fault. 7670 return true; 7671 } 7672 7673 // Check if the length evaluates to 1. 7674 Expr::EvalResult Result; 7675 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7676 return true; // Can have more that size 1. 7677 7678 llvm::APSInt ConstLength = Result.Val.getInt(); 7679 return ConstLength.getSExtValue() != 1; 7680 } 7681 7682 /// Generate the base pointers, section pointers, sizes, map type bits, and 7683 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7684 /// map type, map or motion modifiers, and expression components. 7685 /// \a IsFirstComponent should be set to true if the provided set of 7686 /// components is the first associated with a capture. 7687 void generateInfoForComponentList( 7688 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7689 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7690 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7691 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7692 bool IsFirstComponentList, bool IsImplicit, 7693 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7694 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7695 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7696 OverlappedElements = llvm::None) const { 7697 // The following summarizes what has to be generated for each map and the 7698 // types below. The generated information is expressed in this order: 7699 // base pointer, section pointer, size, flags 7700 // (to add to the ones that come from the map type and modifier). 7701 // 7702 // double d; 7703 // int i[100]; 7704 // float *p; 7705 // 7706 // struct S1 { 7707 // int i; 7708 // float f[50]; 7709 // } 7710 // struct S2 { 7711 // int i; 7712 // float f[50]; 7713 // S1 s; 7714 // double *p; 7715 // struct S2 *ps; 7716 // int &ref; 7717 // } 7718 // S2 s; 7719 // S2 *ps; 7720 // 7721 // map(d) 7722 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7723 // 7724 // map(i) 7725 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7726 // 7727 // map(i[1:23]) 7728 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7729 // 7730 // map(p) 7731 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7732 // 7733 // map(p[1:24]) 7734 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7735 // in unified shared memory mode or for local pointers 7736 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7737 // 7738 // map(s) 7739 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7740 // 7741 // map(s.i) 7742 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7743 // 7744 // map(s.s.f) 7745 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7746 // 7747 // map(s.p) 7748 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7749 // 7750 // map(to: s.p[:22]) 7751 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7752 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7753 // &(s.p), &(s.p[0]), 22*sizeof(double), 7754 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7755 // (*) alloc space for struct members, only this is a target parameter 7756 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7757 // optimizes this entry out, same in the examples below) 7758 // (***) map the pointee (map: to) 7759 // 7760 // map(to: s.ref) 7761 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) 7762 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7763 // (*) alloc space for struct members, only this is a target parameter 7764 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7765 // optimizes this entry out, same in the examples below) 7766 // (***) map the pointee (map: to) 7767 // 7768 // map(s.ps) 7769 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7770 // 7771 // map(from: s.ps->s.i) 7772 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7773 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7774 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7775 // 7776 // map(to: s.ps->ps) 7777 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7778 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7779 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7780 // 7781 // map(s.ps->ps->ps) 7782 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7783 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7784 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7785 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7786 // 7787 // map(to: s.ps->ps->s.f[:22]) 7788 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7789 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7790 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7791 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7792 // 7793 // map(ps) 7794 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7795 // 7796 // map(ps->i) 7797 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7798 // 7799 // map(ps->s.f) 7800 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7801 // 7802 // map(from: ps->p) 7803 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7804 // 7805 // map(to: ps->p[:22]) 7806 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7807 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7808 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7809 // 7810 // map(ps->ps) 7811 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7812 // 7813 // map(from: ps->ps->s.i) 7814 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7815 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7816 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7817 // 7818 // map(from: ps->ps->ps) 7819 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7820 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7821 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7822 // 7823 // map(ps->ps->ps->ps) 7824 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7825 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7826 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7827 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7828 // 7829 // map(to: ps->ps->ps->s.f[:22]) 7830 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7831 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7832 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7833 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7834 // 7835 // map(to: s.f[:22]) map(from: s.p[:33]) 7836 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7837 // sizeof(double*) (**), TARGET_PARAM 7838 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7839 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7840 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7841 // (*) allocate contiguous space needed to fit all mapped members even if 7842 // we allocate space for members not mapped (in this example, 7843 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7844 // them as well because they fall between &s.f[0] and &s.p) 7845 // 7846 // map(from: s.f[:22]) map(to: ps->p[:33]) 7847 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7848 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7849 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7850 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7851 // (*) the struct this entry pertains to is the 2nd element in the list of 7852 // arguments, hence MEMBER_OF(2) 7853 // 7854 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7855 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7856 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7857 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7858 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7859 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7860 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7861 // (*) the struct this entry pertains to is the 4th element in the list 7862 // of arguments, hence MEMBER_OF(4) 7863 7864 // Track if the map information being generated is the first for a capture. 7865 bool IsCaptureFirstInfo = IsFirstComponentList; 7866 // When the variable is on a declare target link or in a to clause with 7867 // unified memory, a reference is needed to hold the host/device address 7868 // of the variable. 7869 bool RequiresReference = false; 7870 7871 // Scan the components from the base to the complete expression. 7872 auto CI = Components.rbegin(); 7873 auto CE = Components.rend(); 7874 auto I = CI; 7875 7876 // Track if the map information being generated is the first for a list of 7877 // components. 7878 bool IsExpressionFirstInfo = true; 7879 bool FirstPointerInComplexData = false; 7880 Address BP = Address::invalid(); 7881 const Expr *AssocExpr = I->getAssociatedExpression(); 7882 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7883 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7884 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7885 7886 if (isa<MemberExpr>(AssocExpr)) { 7887 // The base is the 'this' pointer. The content of the pointer is going 7888 // to be the base of the field being mapped. 7889 BP = CGF.LoadCXXThisAddress(); 7890 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7891 (OASE && 7892 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7893 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7894 } else if (OAShE && 7895 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7896 BP = Address( 7897 CGF.EmitScalarExpr(OAShE->getBase()), 7898 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7899 } else { 7900 // The base is the reference to the variable. 7901 // BP = &Var. 7902 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7903 if (const auto *VD = 7904 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7905 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7906 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7907 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7908 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7909 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7910 RequiresReference = true; 7911 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7912 } 7913 } 7914 } 7915 7916 // If the variable is a pointer and is being dereferenced (i.e. is not 7917 // the last component), the base has to be the pointer itself, not its 7918 // reference. References are ignored for mapping purposes. 7919 QualType Ty = 7920 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7921 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7922 // No need to generate individual map information for the pointer, it 7923 // can be associated with the combined storage if shared memory mode is 7924 // active or the base declaration is not global variable. 7925 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7926 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7927 !VD || VD->hasLocalStorage()) 7928 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7929 else 7930 FirstPointerInComplexData = true; 7931 ++I; 7932 } 7933 } 7934 7935 // Track whether a component of the list should be marked as MEMBER_OF some 7936 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7937 // in a component list should be marked as MEMBER_OF, all subsequent entries 7938 // do not belong to the base struct. E.g. 7939 // struct S2 s; 7940 // s.ps->ps->ps->f[:] 7941 // (1) (2) (3) (4) 7942 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7943 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7944 // is the pointee of ps(2) which is not member of struct s, so it should not 7945 // be marked as such (it is still PTR_AND_OBJ). 7946 // The variable is initialized to false so that PTR_AND_OBJ entries which 7947 // are not struct members are not considered (e.g. array of pointers to 7948 // data). 7949 bool ShouldBeMemberOf = false; 7950 7951 // Variable keeping track of whether or not we have encountered a component 7952 // in the component list which is a member expression. Useful when we have a 7953 // pointer or a final array section, in which case it is the previous 7954 // component in the list which tells us whether we have a member expression. 7955 // E.g. X.f[:] 7956 // While processing the final array section "[:]" it is "f" which tells us 7957 // whether we are dealing with a member of a declared struct. 7958 const MemberExpr *EncounteredME = nullptr; 7959 7960 // Track for the total number of dimension. Start from one for the dummy 7961 // dimension. 7962 uint64_t DimSize = 1; 7963 7964 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7965 bool IsPrevMemberReference = false; 7966 7967 for (; I != CE; ++I) { 7968 // If the current component is member of a struct (parent struct) mark it. 7969 if (!EncounteredME) { 7970 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7971 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7972 // as MEMBER_OF the parent struct. 7973 if (EncounteredME) { 7974 ShouldBeMemberOf = true; 7975 // Do not emit as complex pointer if this is actually not array-like 7976 // expression. 7977 if (FirstPointerInComplexData) { 7978 QualType Ty = std::prev(I) 7979 ->getAssociatedDeclaration() 7980 ->getType() 7981 .getNonReferenceType(); 7982 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7983 FirstPointerInComplexData = false; 7984 } 7985 } 7986 } 7987 7988 auto Next = std::next(I); 7989 7990 // We need to generate the addresses and sizes if this is the last 7991 // component, if the component is a pointer or if it is an array section 7992 // whose length can't be proved to be one. If this is a pointer, it 7993 // becomes the base address for the following components. 7994 7995 // A final array section, is one whose length can't be proved to be one. 7996 // If the map item is non-contiguous then we don't treat any array section 7997 // as final array section. 7998 bool IsFinalArraySection = 7999 !IsNonContiguous && 8000 isFinalArraySectionExpression(I->getAssociatedExpression()); 8001 8002 // If we have a declaration for the mapping use that, otherwise use 8003 // the base declaration of the map clause. 8004 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 8005 ? I->getAssociatedDeclaration() 8006 : BaseDecl; 8007 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression() 8008 : MapExpr; 8009 8010 // Get information on whether the element is a pointer. Have to do a 8011 // special treatment for array sections given that they are built-in 8012 // types. 8013 const auto *OASE = 8014 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 8015 const auto *OAShE = 8016 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 8017 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 8018 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 8019 bool IsPointer = 8020 OAShE || 8021 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 8022 .getCanonicalType() 8023 ->isAnyPointerType()) || 8024 I->getAssociatedExpression()->getType()->isAnyPointerType(); 8025 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && 8026 MapDecl && 8027 MapDecl->getType()->isLValueReferenceType(); 8028 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 8029 8030 if (OASE) 8031 ++DimSize; 8032 8033 if (Next == CE || IsMemberReference || IsNonDerefPointer || 8034 IsFinalArraySection) { 8035 // If this is not the last component, we expect the pointer to be 8036 // associated with an array expression or member expression. 8037 assert((Next == CE || 8038 isa<MemberExpr>(Next->getAssociatedExpression()) || 8039 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 8040 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 8041 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 8042 isa<UnaryOperator>(Next->getAssociatedExpression()) || 8043 isa<BinaryOperator>(Next->getAssociatedExpression())) && 8044 "Unexpected expression"); 8045 8046 Address LB = Address::invalid(); 8047 Address LowestElem = Address::invalid(); 8048 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, 8049 const MemberExpr *E) { 8050 const Expr *BaseExpr = E->getBase(); 8051 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a 8052 // scalar. 8053 LValue BaseLV; 8054 if (E->isArrow()) { 8055 LValueBaseInfo BaseInfo; 8056 TBAAAccessInfo TBAAInfo; 8057 Address Addr = 8058 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); 8059 QualType PtrTy = BaseExpr->getType()->getPointeeType(); 8060 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); 8061 } else { 8062 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); 8063 } 8064 return BaseLV; 8065 }; 8066 if (OAShE) { 8067 LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 8068 CGF.getContext().getTypeAlignInChars( 8069 OAShE->getBase()->getType())); 8070 } else if (IsMemberReference) { 8071 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); 8072 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8073 LowestElem = CGF.EmitLValueForFieldInitialization( 8074 BaseLVal, cast<FieldDecl>(MapDecl)) 8075 .getAddress(CGF); 8076 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) 8077 .getAddress(CGF); 8078 } else { 8079 LowestElem = LB = 8080 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 8081 .getAddress(CGF); 8082 } 8083 8084 // If this component is a pointer inside the base struct then we don't 8085 // need to create any entry for it - it will be combined with the object 8086 // it is pointing to into a single PTR_AND_OBJ entry. 8087 bool IsMemberPointerOrAddr = 8088 EncounteredME && 8089 (((IsPointer || ForDeviceAddr) && 8090 I->getAssociatedExpression() == EncounteredME) || 8091 (IsPrevMemberReference && !IsPointer) || 8092 (IsMemberReference && Next != CE && 8093 !Next->getAssociatedExpression()->getType()->isPointerType())); 8094 if (!OverlappedElements.empty() && Next == CE) { 8095 // Handle base element with the info for overlapped elements. 8096 assert(!PartialStruct.Base.isValid() && "The base element is set."); 8097 assert(!IsPointer && 8098 "Unexpected base element with the pointer type."); 8099 // Mark the whole struct as the struct that requires allocation on the 8100 // device. 8101 PartialStruct.LowestElem = {0, LowestElem}; 8102 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 8103 I->getAssociatedExpression()->getType()); 8104 Address HB = CGF.Builder.CreateConstGEP( 8105 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem, 8106 CGF.VoidPtrTy), 8107 TypeSize.getQuantity() - 1); 8108 PartialStruct.HighestElem = { 8109 std::numeric_limits<decltype( 8110 PartialStruct.HighestElem.first)>::max(), 8111 HB}; 8112 PartialStruct.Base = BP; 8113 PartialStruct.LB = LB; 8114 assert( 8115 PartialStruct.PreliminaryMapData.BasePointers.empty() && 8116 "Overlapped elements must be used only once for the variable."); 8117 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 8118 // Emit data for non-overlapped data. 8119 OpenMPOffloadMappingFlags Flags = 8120 OMP_MAP_MEMBER_OF | 8121 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 8122 /*AddPtrFlag=*/false, 8123 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 8124 llvm::Value *Size = nullptr; 8125 // Do bitcopy of all non-overlapped structure elements. 8126 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 8127 Component : OverlappedElements) { 8128 Address ComponentLB = Address::invalid(); 8129 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 8130 Component) { 8131 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { 8132 const auto *FD = dyn_cast<FieldDecl>(VD); 8133 if (FD && FD->getType()->isLValueReferenceType()) { 8134 const auto *ME = 8135 cast<MemberExpr>(MC.getAssociatedExpression()); 8136 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8137 ComponentLB = 8138 CGF.EmitLValueForFieldInitialization(BaseLVal, FD) 8139 .getAddress(CGF); 8140 } else { 8141 ComponentLB = 8142 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 8143 .getAddress(CGF); 8144 } 8145 Size = CGF.Builder.CreatePtrDiff( 8146 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 8147 CGF.EmitCastToVoidPtr(LB.getPointer())); 8148 break; 8149 } 8150 } 8151 assert(Size && "Failed to determine structure size"); 8152 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8153 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8154 CombinedInfo.Pointers.push_back(LB.getPointer()); 8155 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8156 Size, CGF.Int64Ty, /*isSigned=*/true)); 8157 CombinedInfo.Types.push_back(Flags); 8158 CombinedInfo.Mappers.push_back(nullptr); 8159 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8160 : 1); 8161 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 8162 } 8163 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8164 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8165 CombinedInfo.Pointers.push_back(LB.getPointer()); 8166 Size = CGF.Builder.CreatePtrDiff( 8167 CGF.Builder.CreateConstGEP(HB, 1).getPointer(), 8168 CGF.EmitCastToVoidPtr(LB.getPointer())); 8169 CombinedInfo.Sizes.push_back( 8170 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8171 CombinedInfo.Types.push_back(Flags); 8172 CombinedInfo.Mappers.push_back(nullptr); 8173 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8174 : 1); 8175 break; 8176 } 8177 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 8178 if (!IsMemberPointerOrAddr || 8179 (Next == CE && MapType != OMPC_MAP_unknown)) { 8180 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8181 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8182 CombinedInfo.Pointers.push_back(LB.getPointer()); 8183 CombinedInfo.Sizes.push_back( 8184 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8185 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8186 : 1); 8187 8188 // If Mapper is valid, the last component inherits the mapper. 8189 bool HasMapper = Mapper && Next == CE; 8190 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 8191 8192 // We need to add a pointer flag for each map that comes from the 8193 // same expression except for the first one. We also need to signal 8194 // this map is the first one that relates with the current capture 8195 // (there is a set of entries for each capture). 8196 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 8197 MapType, MapModifiers, MotionModifiers, IsImplicit, 8198 !IsExpressionFirstInfo || RequiresReference || 8199 FirstPointerInComplexData || IsMemberReference, 8200 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 8201 8202 if (!IsExpressionFirstInfo || IsMemberReference) { 8203 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 8204 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 8205 if (IsPointer || (IsMemberReference && Next != CE)) 8206 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 8207 OMP_MAP_DELETE | OMP_MAP_CLOSE); 8208 8209 if (ShouldBeMemberOf) { 8210 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 8211 // should be later updated with the correct value of MEMBER_OF. 8212 Flags |= OMP_MAP_MEMBER_OF; 8213 // From now on, all subsequent PTR_AND_OBJ entries should not be 8214 // marked as MEMBER_OF. 8215 ShouldBeMemberOf = false; 8216 } 8217 } 8218 8219 CombinedInfo.Types.push_back(Flags); 8220 } 8221 8222 // If we have encountered a member expression so far, keep track of the 8223 // mapped member. If the parent is "*this", then the value declaration 8224 // is nullptr. 8225 if (EncounteredME) { 8226 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 8227 unsigned FieldIndex = FD->getFieldIndex(); 8228 8229 // Update info about the lowest and highest elements for this struct 8230 if (!PartialStruct.Base.isValid()) { 8231 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8232 if (IsFinalArraySection) { 8233 Address HB = 8234 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 8235 .getAddress(CGF); 8236 PartialStruct.HighestElem = {FieldIndex, HB}; 8237 } else { 8238 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8239 } 8240 PartialStruct.Base = BP; 8241 PartialStruct.LB = BP; 8242 } else if (FieldIndex < PartialStruct.LowestElem.first) { 8243 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8244 } else if (FieldIndex > PartialStruct.HighestElem.first) { 8245 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8246 } 8247 } 8248 8249 // Need to emit combined struct for array sections. 8250 if (IsFinalArraySection || IsNonContiguous) 8251 PartialStruct.IsArraySection = true; 8252 8253 // If we have a final array section, we are done with this expression. 8254 if (IsFinalArraySection) 8255 break; 8256 8257 // The pointer becomes the base for the next element. 8258 if (Next != CE) 8259 BP = IsMemberReference ? LowestElem : LB; 8260 8261 IsExpressionFirstInfo = false; 8262 IsCaptureFirstInfo = false; 8263 FirstPointerInComplexData = false; 8264 IsPrevMemberReference = IsMemberReference; 8265 } else if (FirstPointerInComplexData) { 8266 QualType Ty = Components.rbegin() 8267 ->getAssociatedDeclaration() 8268 ->getType() 8269 .getNonReferenceType(); 8270 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 8271 FirstPointerInComplexData = false; 8272 } 8273 } 8274 // If ran into the whole component - allocate the space for the whole 8275 // record. 8276 if (!EncounteredME) 8277 PartialStruct.HasCompleteRecord = true; 8278 8279 if (!IsNonContiguous) 8280 return; 8281 8282 const ASTContext &Context = CGF.getContext(); 8283 8284 // For supporting stride in array section, we need to initialize the first 8285 // dimension size as 1, first offset as 0, and first count as 1 8286 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 8287 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8288 MapValuesArrayTy CurStrides; 8289 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8290 uint64_t ElementTypeSize; 8291 8292 // Collect Size information for each dimension and get the element size as 8293 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 8294 // should be [10, 10] and the first stride is 4 btyes. 8295 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8296 Components) { 8297 const Expr *AssocExpr = Component.getAssociatedExpression(); 8298 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8299 8300 if (!OASE) 8301 continue; 8302 8303 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 8304 auto *CAT = Context.getAsConstantArrayType(Ty); 8305 auto *VAT = Context.getAsVariableArrayType(Ty); 8306 8307 // We need all the dimension size except for the last dimension. 8308 assert((VAT || CAT || &Component == &*Components.begin()) && 8309 "Should be either ConstantArray or VariableArray if not the " 8310 "first Component"); 8311 8312 // Get element size if CurStrides is empty. 8313 if (CurStrides.empty()) { 8314 const Type *ElementType = nullptr; 8315 if (CAT) 8316 ElementType = CAT->getElementType().getTypePtr(); 8317 else if (VAT) 8318 ElementType = VAT->getElementType().getTypePtr(); 8319 else 8320 assert(&Component == &*Components.begin() && 8321 "Only expect pointer (non CAT or VAT) when this is the " 8322 "first Component"); 8323 // If ElementType is null, then it means the base is a pointer 8324 // (neither CAT nor VAT) and we'll attempt to get ElementType again 8325 // for next iteration. 8326 if (ElementType) { 8327 // For the case that having pointer as base, we need to remove one 8328 // level of indirection. 8329 if (&Component != &*Components.begin()) 8330 ElementType = ElementType->getPointeeOrArrayElementType(); 8331 ElementTypeSize = 8332 Context.getTypeSizeInChars(ElementType).getQuantity(); 8333 CurStrides.push_back( 8334 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 8335 } 8336 } 8337 // Get dimension value except for the last dimension since we don't need 8338 // it. 8339 if (DimSizes.size() < Components.size() - 1) { 8340 if (CAT) 8341 DimSizes.push_back(llvm::ConstantInt::get( 8342 CGF.Int64Ty, CAT->getSize().getZExtValue())); 8343 else if (VAT) 8344 DimSizes.push_back(CGF.Builder.CreateIntCast( 8345 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 8346 /*IsSigned=*/false)); 8347 } 8348 } 8349 8350 // Skip the dummy dimension since we have already have its information. 8351 auto DI = DimSizes.begin() + 1; 8352 // Product of dimension. 8353 llvm::Value *DimProd = 8354 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 8355 8356 // Collect info for non-contiguous. Notice that offset, count, and stride 8357 // are only meaningful for array-section, so we insert a null for anything 8358 // other than array-section. 8359 // Also, the size of offset, count, and stride are not the same as 8360 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 8361 // count, and stride are the same as the number of non-contiguous 8362 // declaration in target update to/from clause. 8363 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8364 Components) { 8365 const Expr *AssocExpr = Component.getAssociatedExpression(); 8366 8367 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 8368 llvm::Value *Offset = CGF.Builder.CreateIntCast( 8369 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 8370 /*isSigned=*/false); 8371 CurOffsets.push_back(Offset); 8372 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 8373 CurStrides.push_back(CurStrides.back()); 8374 continue; 8375 } 8376 8377 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8378 8379 if (!OASE) 8380 continue; 8381 8382 // Offset 8383 const Expr *OffsetExpr = OASE->getLowerBound(); 8384 llvm::Value *Offset = nullptr; 8385 if (!OffsetExpr) { 8386 // If offset is absent, then we just set it to zero. 8387 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 8388 } else { 8389 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 8390 CGF.Int64Ty, 8391 /*isSigned=*/false); 8392 } 8393 CurOffsets.push_back(Offset); 8394 8395 // Count 8396 const Expr *CountExpr = OASE->getLength(); 8397 llvm::Value *Count = nullptr; 8398 if (!CountExpr) { 8399 // In Clang, once a high dimension is an array section, we construct all 8400 // the lower dimension as array section, however, for case like 8401 // arr[0:2][2], Clang construct the inner dimension as an array section 8402 // but it actually is not in an array section form according to spec. 8403 if (!OASE->getColonLocFirst().isValid() && 8404 !OASE->getColonLocSecond().isValid()) { 8405 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 8406 } else { 8407 // OpenMP 5.0, 2.1.5 Array Sections, Description. 8408 // When the length is absent it defaults to ⌈(size − 8409 // lower-bound)/stride⌉, where size is the size of the array 8410 // dimension. 8411 const Expr *StrideExpr = OASE->getStride(); 8412 llvm::Value *Stride = 8413 StrideExpr 8414 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8415 CGF.Int64Ty, /*isSigned=*/false) 8416 : nullptr; 8417 if (Stride) 8418 Count = CGF.Builder.CreateUDiv( 8419 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8420 else 8421 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8422 } 8423 } else { 8424 Count = CGF.EmitScalarExpr(CountExpr); 8425 } 8426 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8427 CurCounts.push_back(Count); 8428 8429 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8430 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8431 // Offset Count Stride 8432 // D0 0 1 4 (int) <- dummy dimension 8433 // D1 0 2 8 (2 * (1) * 4) 8434 // D2 1 2 20 (1 * (1 * 5) * 4) 8435 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8436 const Expr *StrideExpr = OASE->getStride(); 8437 llvm::Value *Stride = 8438 StrideExpr 8439 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8440 CGF.Int64Ty, /*isSigned=*/false) 8441 : nullptr; 8442 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8443 if (Stride) 8444 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8445 else 8446 CurStrides.push_back(DimProd); 8447 if (DI != DimSizes.end()) 8448 ++DI; 8449 } 8450 8451 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8452 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8453 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8454 } 8455 8456 /// Return the adjusted map modifiers if the declaration a capture refers to 8457 /// appears in a first-private clause. This is expected to be used only with 8458 /// directives that start with 'target'. 8459 MappableExprsHandler::OpenMPOffloadMappingFlags 8460 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8461 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8462 8463 // A first private variable captured by reference will use only the 8464 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8465 // declaration is known as first-private in this handler. 8466 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8467 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8468 return MappableExprsHandler::OMP_MAP_TO | 8469 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8470 return MappableExprsHandler::OMP_MAP_PRIVATE | 8471 MappableExprsHandler::OMP_MAP_TO; 8472 } 8473 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl()); 8474 if (I != LambdasMap.end()) 8475 // for map(to: lambda): using user specified map type. 8476 return getMapTypeBits( 8477 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(), 8478 /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(), 8479 /*AddPtrFlag=*/false, 8480 /*AddIsTargetParamFlag=*/false, 8481 /*isNonContiguous=*/false); 8482 return MappableExprsHandler::OMP_MAP_TO | 8483 MappableExprsHandler::OMP_MAP_FROM; 8484 } 8485 8486 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8487 // Rotate by getFlagMemberOffset() bits. 8488 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8489 << getFlagMemberOffset()); 8490 } 8491 8492 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8493 OpenMPOffloadMappingFlags MemberOfFlag) { 8494 // If the entry is PTR_AND_OBJ but has not been marked with the special 8495 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8496 // marked as MEMBER_OF. 8497 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8498 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8499 return; 8500 8501 // Reset the placeholder value to prepare the flag for the assignment of the 8502 // proper MEMBER_OF value. 8503 Flags &= ~OMP_MAP_MEMBER_OF; 8504 Flags |= MemberOfFlag; 8505 } 8506 8507 void getPlainLayout(const CXXRecordDecl *RD, 8508 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8509 bool AsBase) const { 8510 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8511 8512 llvm::StructType *St = 8513 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8514 8515 unsigned NumElements = St->getNumElements(); 8516 llvm::SmallVector< 8517 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8518 RecordLayout(NumElements); 8519 8520 // Fill bases. 8521 for (const auto &I : RD->bases()) { 8522 if (I.isVirtual()) 8523 continue; 8524 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8525 // Ignore empty bases. 8526 if (Base->isEmpty() || CGF.getContext() 8527 .getASTRecordLayout(Base) 8528 .getNonVirtualSize() 8529 .isZero()) 8530 continue; 8531 8532 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8533 RecordLayout[FieldIndex] = Base; 8534 } 8535 // Fill in virtual bases. 8536 for (const auto &I : RD->vbases()) { 8537 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8538 // Ignore empty bases. 8539 if (Base->isEmpty()) 8540 continue; 8541 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8542 if (RecordLayout[FieldIndex]) 8543 continue; 8544 RecordLayout[FieldIndex] = Base; 8545 } 8546 // Fill in all the fields. 8547 assert(!RD->isUnion() && "Unexpected union."); 8548 for (const auto *Field : RD->fields()) { 8549 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8550 // will fill in later.) 8551 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8552 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8553 RecordLayout[FieldIndex] = Field; 8554 } 8555 } 8556 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8557 &Data : RecordLayout) { 8558 if (Data.isNull()) 8559 continue; 8560 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8561 getPlainLayout(Base, Layout, /*AsBase=*/true); 8562 else 8563 Layout.push_back(Data.get<const FieldDecl *>()); 8564 } 8565 } 8566 8567 /// Generate all the base pointers, section pointers, sizes, map types, and 8568 /// mappers for the extracted mappable expressions (all included in \a 8569 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8570 /// pair of the relevant declaration and index where it occurs is appended to 8571 /// the device pointers info array. 8572 void generateAllInfoForClauses( 8573 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 8574 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8575 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8576 // We have to process the component lists that relate with the same 8577 // declaration in a single chunk so that we can generate the map flags 8578 // correctly. Therefore, we organize all lists in a map. 8579 enum MapKind { Present, Allocs, Other, Total }; 8580 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8581 SmallVector<SmallVector<MapInfo, 8>, 4>> 8582 Info; 8583 8584 // Helper function to fill the information map for the different supported 8585 // clauses. 8586 auto &&InfoGen = 8587 [&Info, &SkipVarSet]( 8588 const ValueDecl *D, MapKind Kind, 8589 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8590 OpenMPMapClauseKind MapType, 8591 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8592 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8593 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8594 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8595 if (SkipVarSet.contains(D)) 8596 return; 8597 auto It = Info.find(D); 8598 if (It == Info.end()) 8599 It = Info 8600 .insert(std::make_pair( 8601 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 8602 .first; 8603 It->second[Kind].emplace_back( 8604 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 8605 IsImplicit, Mapper, VarRef, ForDeviceAddr); 8606 }; 8607 8608 for (const auto *Cl : Clauses) { 8609 const auto *C = dyn_cast<OMPMapClause>(Cl); 8610 if (!C) 8611 continue; 8612 MapKind Kind = Other; 8613 if (llvm::is_contained(C->getMapTypeModifiers(), 8614 OMPC_MAP_MODIFIER_present)) 8615 Kind = Present; 8616 else if (C->getMapType() == OMPC_MAP_alloc) 8617 Kind = Allocs; 8618 const auto *EI = C->getVarRefs().begin(); 8619 for (const auto L : C->component_lists()) { 8620 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8621 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 8622 C->getMapTypeModifiers(), llvm::None, 8623 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8624 E); 8625 ++EI; 8626 } 8627 } 8628 for (const auto *Cl : Clauses) { 8629 const auto *C = dyn_cast<OMPToClause>(Cl); 8630 if (!C) 8631 continue; 8632 MapKind Kind = Other; 8633 if (llvm::is_contained(C->getMotionModifiers(), 8634 OMPC_MOTION_MODIFIER_present)) 8635 Kind = Present; 8636 const auto *EI = C->getVarRefs().begin(); 8637 for (const auto L : C->component_lists()) { 8638 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None, 8639 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8640 C->isImplicit(), std::get<2>(L), *EI); 8641 ++EI; 8642 } 8643 } 8644 for (const auto *Cl : Clauses) { 8645 const auto *C = dyn_cast<OMPFromClause>(Cl); 8646 if (!C) 8647 continue; 8648 MapKind Kind = Other; 8649 if (llvm::is_contained(C->getMotionModifiers(), 8650 OMPC_MOTION_MODIFIER_present)) 8651 Kind = Present; 8652 const auto *EI = C->getVarRefs().begin(); 8653 for (const auto L : C->component_lists()) { 8654 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None, 8655 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8656 C->isImplicit(), std::get<2>(L), *EI); 8657 ++EI; 8658 } 8659 } 8660 8661 // Look at the use_device_ptr clause information and mark the existing map 8662 // entries as such. If there is no map information for an entry in the 8663 // use_device_ptr list, we create one with map type 'alloc' and zero size 8664 // section. It is the user fault if that was not mapped before. If there is 8665 // no map information and the pointer is a struct member, then we defer the 8666 // emission of that entry until the whole struct has been processed. 8667 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8668 SmallVector<DeferredDevicePtrEntryTy, 4>> 8669 DeferredInfo; 8670 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8671 8672 for (const auto *Cl : Clauses) { 8673 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 8674 if (!C) 8675 continue; 8676 for (const auto L : C->component_lists()) { 8677 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8678 std::get<1>(L); 8679 assert(!Components.empty() && 8680 "Not expecting empty list of components!"); 8681 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8682 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8683 const Expr *IE = Components.back().getAssociatedExpression(); 8684 // If the first component is a member expression, we have to look into 8685 // 'this', which maps to null in the map of map information. Otherwise 8686 // look directly for the information. 8687 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8688 8689 // We potentially have map information for this declaration already. 8690 // Look for the first set of components that refer to it. 8691 if (It != Info.end()) { 8692 bool Found = false; 8693 for (auto &Data : It->second) { 8694 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8695 return MI.Components.back().getAssociatedDeclaration() == VD; 8696 }); 8697 // If we found a map entry, signal that the pointer has to be 8698 // returned and move on to the next declaration. Exclude cases where 8699 // the base pointer is mapped as array subscript, array section or 8700 // array shaping. The base address is passed as a pointer to base in 8701 // this case and cannot be used as a base for use_device_ptr list 8702 // item. 8703 if (CI != Data.end()) { 8704 auto PrevCI = std::next(CI->Components.rbegin()); 8705 const auto *VarD = dyn_cast<VarDecl>(VD); 8706 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8707 isa<MemberExpr>(IE) || 8708 !VD->getType().getNonReferenceType()->isPointerType() || 8709 PrevCI == CI->Components.rend() || 8710 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8711 VarD->hasLocalStorage()) { 8712 CI->ReturnDevicePointer = true; 8713 Found = true; 8714 break; 8715 } 8716 } 8717 } 8718 if (Found) 8719 continue; 8720 } 8721 8722 // We didn't find any match in our map information - generate a zero 8723 // size array section - if the pointer is a struct member we defer this 8724 // action until the whole struct has been processed. 8725 if (isa<MemberExpr>(IE)) { 8726 // Insert the pointer into Info to be processed by 8727 // generateInfoForComponentList. Because it is a member pointer 8728 // without a pointee, no entry will be generated for it, therefore 8729 // we need to generate one after the whole struct has been processed. 8730 // Nonetheless, generateInfoForComponentList must be called to take 8731 // the pointer into account for the calculation of the range of the 8732 // partial struct. 8733 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None, 8734 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8735 nullptr); 8736 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8737 } else { 8738 llvm::Value *Ptr = 8739 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8740 UseDevicePtrCombinedInfo.Exprs.push_back(VD); 8741 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8742 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8743 UseDevicePtrCombinedInfo.Sizes.push_back( 8744 llvm::Constant::getNullValue(CGF.Int64Ty)); 8745 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8746 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8747 } 8748 } 8749 } 8750 8751 // Look at the use_device_addr clause information and mark the existing map 8752 // entries as such. If there is no map information for an entry in the 8753 // use_device_addr list, we create one with map type 'alloc' and zero size 8754 // section. It is the user fault if that was not mapped before. If there is 8755 // no map information and the pointer is a struct member, then we defer the 8756 // emission of that entry until the whole struct has been processed. 8757 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8758 for (const auto *Cl : Clauses) { 8759 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8760 if (!C) 8761 continue; 8762 for (const auto L : C->component_lists()) { 8763 assert(!std::get<1>(L).empty() && 8764 "Not expecting empty list of components!"); 8765 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8766 if (!Processed.insert(VD).second) 8767 continue; 8768 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8769 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8770 // If the first component is a member expression, we have to look into 8771 // 'this', which maps to null in the map of map information. Otherwise 8772 // look directly for the information. 8773 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8774 8775 // We potentially have map information for this declaration already. 8776 // Look for the first set of components that refer to it. 8777 if (It != Info.end()) { 8778 bool Found = false; 8779 for (auto &Data : It->second) { 8780 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8781 return MI.Components.back().getAssociatedDeclaration() == VD; 8782 }); 8783 // If we found a map entry, signal that the pointer has to be 8784 // returned and move on to the next declaration. 8785 if (CI != Data.end()) { 8786 CI->ReturnDevicePointer = true; 8787 Found = true; 8788 break; 8789 } 8790 } 8791 if (Found) 8792 continue; 8793 } 8794 8795 // We didn't find any match in our map information - generate a zero 8796 // size array section - if the pointer is a struct member we defer this 8797 // action until the whole struct has been processed. 8798 if (isa<MemberExpr>(IE)) { 8799 // Insert the pointer into Info to be processed by 8800 // generateInfoForComponentList. Because it is a member pointer 8801 // without a pointee, no entry will be generated for it, therefore 8802 // we need to generate one after the whole struct has been processed. 8803 // Nonetheless, generateInfoForComponentList must be called to take 8804 // the pointer into account for the calculation of the range of the 8805 // partial struct. 8806 InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8807 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8808 nullptr, nullptr, /*ForDeviceAddr=*/true); 8809 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8810 } else { 8811 llvm::Value *Ptr; 8812 if (IE->isGLValue()) 8813 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8814 else 8815 Ptr = CGF.EmitScalarExpr(IE); 8816 CombinedInfo.Exprs.push_back(VD); 8817 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8818 CombinedInfo.Pointers.push_back(Ptr); 8819 CombinedInfo.Sizes.push_back( 8820 llvm::Constant::getNullValue(CGF.Int64Ty)); 8821 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8822 CombinedInfo.Mappers.push_back(nullptr); 8823 } 8824 } 8825 } 8826 8827 for (const auto &Data : Info) { 8828 StructRangeInfoTy PartialStruct; 8829 // Temporary generated information. 8830 MapCombinedInfoTy CurInfo; 8831 const Decl *D = Data.first; 8832 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8833 for (const auto &M : Data.second) { 8834 for (const MapInfo &L : M) { 8835 assert(!L.Components.empty() && 8836 "Not expecting declaration with no component lists."); 8837 8838 // Remember the current base pointer index. 8839 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8840 CurInfo.NonContigInfo.IsNonContiguous = 8841 L.Components.back().isNonContiguous(); 8842 generateInfoForComponentList( 8843 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8844 CurInfo, PartialStruct, /*IsFirstComponentList=*/false, 8845 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); 8846 8847 // If this entry relates with a device pointer, set the relevant 8848 // declaration and add the 'return pointer' flag. 8849 if (L.ReturnDevicePointer) { 8850 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8851 "Unexpected number of mapped base pointers."); 8852 8853 const ValueDecl *RelevantVD = 8854 L.Components.back().getAssociatedDeclaration(); 8855 assert(RelevantVD && 8856 "No relevant declaration related with device pointer??"); 8857 8858 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8859 RelevantVD); 8860 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8861 } 8862 } 8863 } 8864 8865 // Append any pending zero-length pointers which are struct members and 8866 // used with use_device_ptr or use_device_addr. 8867 auto CI = DeferredInfo.find(Data.first); 8868 if (CI != DeferredInfo.end()) { 8869 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8870 llvm::Value *BasePtr; 8871 llvm::Value *Ptr; 8872 if (L.ForDeviceAddr) { 8873 if (L.IE->isGLValue()) 8874 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8875 else 8876 Ptr = this->CGF.EmitScalarExpr(L.IE); 8877 BasePtr = Ptr; 8878 // Entry is RETURN_PARAM. Also, set the placeholder value 8879 // MEMBER_OF=FFFF so that the entry is later updated with the 8880 // correct value of MEMBER_OF. 8881 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8882 } else { 8883 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8884 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8885 L.IE->getExprLoc()); 8886 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8887 // placeholder value MEMBER_OF=FFFF so that the entry is later 8888 // updated with the correct value of MEMBER_OF. 8889 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8890 OMP_MAP_MEMBER_OF); 8891 } 8892 CurInfo.Exprs.push_back(L.VD); 8893 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8894 CurInfo.Pointers.push_back(Ptr); 8895 CurInfo.Sizes.push_back( 8896 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8897 CurInfo.Mappers.push_back(nullptr); 8898 } 8899 } 8900 // If there is an entry in PartialStruct it means we have a struct with 8901 // individual members mapped. Emit an extra combined entry. 8902 if (PartialStruct.Base.isValid()) { 8903 CurInfo.NonContigInfo.Dims.push_back(0); 8904 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8905 } 8906 8907 // We need to append the results of this capture to what we already 8908 // have. 8909 CombinedInfo.append(CurInfo); 8910 } 8911 // Append data for use_device_ptr clauses. 8912 CombinedInfo.append(UseDevicePtrCombinedInfo); 8913 } 8914 8915 public: 8916 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8917 : CurDir(&Dir), CGF(CGF) { 8918 // Extract firstprivate clause information. 8919 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8920 for (const auto *D : C->varlists()) 8921 FirstPrivateDecls.try_emplace( 8922 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8923 // Extract implicit firstprivates from uses_allocators clauses. 8924 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8925 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8926 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8927 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8928 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8929 /*Implicit=*/true); 8930 else if (const auto *VD = dyn_cast<VarDecl>( 8931 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8932 ->getDecl())) 8933 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8934 } 8935 } 8936 // Extract device pointer clause information. 8937 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8938 for (auto L : C->component_lists()) 8939 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8940 // Extract map information. 8941 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) { 8942 if (C->getMapType() != OMPC_MAP_to) 8943 continue; 8944 for (auto L : C->component_lists()) { 8945 const ValueDecl *VD = std::get<0>(L); 8946 const auto *RD = VD ? VD->getType() 8947 .getCanonicalType() 8948 .getNonReferenceType() 8949 ->getAsCXXRecordDecl() 8950 : nullptr; 8951 if (RD && RD->isLambda()) 8952 LambdasMap.try_emplace(std::get<0>(L), C); 8953 } 8954 } 8955 } 8956 8957 /// Constructor for the declare mapper directive. 8958 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8959 : CurDir(&Dir), CGF(CGF) {} 8960 8961 /// Generate code for the combined entry if we have a partially mapped struct 8962 /// and take care of the mapping flags of the arguments corresponding to 8963 /// individual struct members. 8964 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8965 MapFlagsArrayTy &CurTypes, 8966 const StructRangeInfoTy &PartialStruct, 8967 const ValueDecl *VD = nullptr, 8968 bool NotTargetParams = true) const { 8969 if (CurTypes.size() == 1 && 8970 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && 8971 !PartialStruct.IsArraySection) 8972 return; 8973 Address LBAddr = PartialStruct.LowestElem.second; 8974 Address HBAddr = PartialStruct.HighestElem.second; 8975 if (PartialStruct.HasCompleteRecord) { 8976 LBAddr = PartialStruct.LB; 8977 HBAddr = PartialStruct.LB; 8978 } 8979 CombinedInfo.Exprs.push_back(VD); 8980 // Base is the base of the struct 8981 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8982 // Pointer is the address of the lowest element 8983 llvm::Value *LB = LBAddr.getPointer(); 8984 CombinedInfo.Pointers.push_back(LB); 8985 // There should not be a mapper for a combined entry. 8986 CombinedInfo.Mappers.push_back(nullptr); 8987 // Size is (addr of {highest+1} element) - (addr of lowest element) 8988 llvm::Value *HB = HBAddr.getPointer(); 8989 llvm::Value *HAddr = 8990 CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1); 8991 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8992 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8993 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 8994 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8995 /*isSigned=*/false); 8996 CombinedInfo.Sizes.push_back(Size); 8997 // Map type is always TARGET_PARAM, if generate info for captures. 8998 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 8999 : OMP_MAP_TARGET_PARAM); 9000 // If any element has the present modifier, then make sure the runtime 9001 // doesn't attempt to allocate the struct. 9002 if (CurTypes.end() != 9003 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 9004 return Type & OMP_MAP_PRESENT; 9005 })) 9006 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 9007 // Remove TARGET_PARAM flag from the first element 9008 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 9009 // If any element has the ompx_hold modifier, then make sure the runtime 9010 // uses the hold reference count for the struct as a whole so that it won't 9011 // be unmapped by an extra dynamic reference count decrement. Add it to all 9012 // elements as well so the runtime knows which reference count to check 9013 // when determining whether it's time for device-to-host transfers of 9014 // individual elements. 9015 if (CurTypes.end() != 9016 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 9017 return Type & OMP_MAP_OMPX_HOLD; 9018 })) { 9019 CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD; 9020 for (auto &M : CurTypes) 9021 M |= OMP_MAP_OMPX_HOLD; 9022 } 9023 9024 // All other current entries will be MEMBER_OF the combined entry 9025 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9026 // 0xFFFF in the MEMBER_OF field). 9027 OpenMPOffloadMappingFlags MemberOfFlag = 9028 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 9029 for (auto &M : CurTypes) 9030 setCorrectMemberOfFlag(M, MemberOfFlag); 9031 } 9032 9033 /// Generate all the base pointers, section pointers, sizes, map types, and 9034 /// mappers for the extracted mappable expressions (all included in \a 9035 /// CombinedInfo). Also, for each item that relates with a device pointer, a 9036 /// pair of the relevant declaration and index where it occurs is appended to 9037 /// the device pointers info array. 9038 void generateAllInfo( 9039 MapCombinedInfoTy &CombinedInfo, 9040 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 9041 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 9042 assert(CurDir.is<const OMPExecutableDirective *>() && 9043 "Expect a executable directive"); 9044 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9045 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); 9046 } 9047 9048 /// Generate all the base pointers, section pointers, sizes, map types, and 9049 /// mappers for the extracted map clauses of user-defined mapper (all included 9050 /// in \a CombinedInfo). 9051 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 9052 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 9053 "Expect a declare mapper directive"); 9054 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 9055 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); 9056 } 9057 9058 /// Emit capture info for lambdas for variables captured by reference. 9059 void generateInfoForLambdaCaptures( 9060 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9061 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 9062 const auto *RD = VD->getType() 9063 .getCanonicalType() 9064 .getNonReferenceType() 9065 ->getAsCXXRecordDecl(); 9066 if (!RD || !RD->isLambda()) 9067 return; 9068 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 9069 LValue VDLVal = CGF.MakeAddrLValue( 9070 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 9071 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 9072 FieldDecl *ThisCapture = nullptr; 9073 RD->getCaptureFields(Captures, ThisCapture); 9074 if (ThisCapture) { 9075 LValue ThisLVal = 9076 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 9077 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 9078 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 9079 VDLVal.getPointer(CGF)); 9080 CombinedInfo.Exprs.push_back(VD); 9081 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 9082 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 9083 CombinedInfo.Sizes.push_back( 9084 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 9085 CGF.Int64Ty, /*isSigned=*/true)); 9086 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9087 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9088 CombinedInfo.Mappers.push_back(nullptr); 9089 } 9090 for (const LambdaCapture &LC : RD->captures()) { 9091 if (!LC.capturesVariable()) 9092 continue; 9093 const VarDecl *VD = LC.getCapturedVar(); 9094 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 9095 continue; 9096 auto It = Captures.find(VD); 9097 assert(It != Captures.end() && "Found lambda capture without field."); 9098 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 9099 if (LC.getCaptureKind() == LCK_ByRef) { 9100 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 9101 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9102 VDLVal.getPointer(CGF)); 9103 CombinedInfo.Exprs.push_back(VD); 9104 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9105 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 9106 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9107 CGF.getTypeSize( 9108 VD->getType().getCanonicalType().getNonReferenceType()), 9109 CGF.Int64Ty, /*isSigned=*/true)); 9110 } else { 9111 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 9112 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9113 VDLVal.getPointer(CGF)); 9114 CombinedInfo.Exprs.push_back(VD); 9115 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9116 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 9117 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 9118 } 9119 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9120 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9121 CombinedInfo.Mappers.push_back(nullptr); 9122 } 9123 } 9124 9125 /// Set correct indices for lambdas captures. 9126 void adjustMemberOfForLambdaCaptures( 9127 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 9128 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 9129 MapFlagsArrayTy &Types) const { 9130 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 9131 // Set correct member_of idx for all implicit lambda captures. 9132 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9133 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 9134 continue; 9135 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 9136 assert(BasePtr && "Unable to find base lambda address."); 9137 int TgtIdx = -1; 9138 for (unsigned J = I; J > 0; --J) { 9139 unsigned Idx = J - 1; 9140 if (Pointers[Idx] != BasePtr) 9141 continue; 9142 TgtIdx = Idx; 9143 break; 9144 } 9145 assert(TgtIdx != -1 && "Unable to find parent lambda."); 9146 // All other current entries will be MEMBER_OF the combined entry 9147 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9148 // 0xFFFF in the MEMBER_OF field). 9149 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 9150 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 9151 } 9152 } 9153 9154 /// Generate the base pointers, section pointers, sizes, map types, and 9155 /// mappers associated to a given capture (all included in \a CombinedInfo). 9156 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 9157 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9158 StructRangeInfoTy &PartialStruct) const { 9159 assert(!Cap->capturesVariableArrayType() && 9160 "Not expecting to generate map info for a variable array type!"); 9161 9162 // We need to know when we generating information for the first component 9163 const ValueDecl *VD = Cap->capturesThis() 9164 ? nullptr 9165 : Cap->getCapturedVar()->getCanonicalDecl(); 9166 9167 // for map(to: lambda): skip here, processing it in 9168 // generateDefaultMapInfo 9169 if (LambdasMap.count(VD)) 9170 return; 9171 9172 // If this declaration appears in a is_device_ptr clause we just have to 9173 // pass the pointer by value. If it is a reference to a declaration, we just 9174 // pass its value. 9175 if (DevPointersMap.count(VD)) { 9176 CombinedInfo.Exprs.push_back(VD); 9177 CombinedInfo.BasePointers.emplace_back(Arg, VD); 9178 CombinedInfo.Pointers.push_back(Arg); 9179 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9180 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 9181 /*isSigned=*/true)); 9182 CombinedInfo.Types.push_back( 9183 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 9184 OMP_MAP_TARGET_PARAM); 9185 CombinedInfo.Mappers.push_back(nullptr); 9186 return; 9187 } 9188 9189 using MapData = 9190 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 9191 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 9192 const ValueDecl *, const Expr *>; 9193 SmallVector<MapData, 4> DeclComponentLists; 9194 assert(CurDir.is<const OMPExecutableDirective *>() && 9195 "Expect a executable directive"); 9196 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9197 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 9198 const auto *EI = C->getVarRefs().begin(); 9199 for (const auto L : C->decl_component_lists(VD)) { 9200 const ValueDecl *VDecl, *Mapper; 9201 // The Expression is not correct if the mapping is implicit 9202 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 9203 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9204 std::tie(VDecl, Components, Mapper) = L; 9205 assert(VDecl == VD && "We got information for the wrong declaration??"); 9206 assert(!Components.empty() && 9207 "Not expecting declaration with no component lists."); 9208 DeclComponentLists.emplace_back(Components, C->getMapType(), 9209 C->getMapTypeModifiers(), 9210 C->isImplicit(), Mapper, E); 9211 ++EI; 9212 } 9213 } 9214 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 9215 const MapData &RHS) { 9216 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 9217 OpenMPMapClauseKind MapType = std::get<1>(RHS); 9218 bool HasPresent = 9219 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9220 bool HasAllocs = MapType == OMPC_MAP_alloc; 9221 MapModifiers = std::get<2>(RHS); 9222 MapType = std::get<1>(LHS); 9223 bool HasPresentR = 9224 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9225 bool HasAllocsR = MapType == OMPC_MAP_alloc; 9226 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 9227 }); 9228 9229 // Find overlapping elements (including the offset from the base element). 9230 llvm::SmallDenseMap< 9231 const MapData *, 9232 llvm::SmallVector< 9233 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 9234 4> 9235 OverlappedData; 9236 size_t Count = 0; 9237 for (const MapData &L : DeclComponentLists) { 9238 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9239 OpenMPMapClauseKind MapType; 9240 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9241 bool IsImplicit; 9242 const ValueDecl *Mapper; 9243 const Expr *VarRef; 9244 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9245 L; 9246 ++Count; 9247 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 9248 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 9249 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 9250 VarRef) = L1; 9251 auto CI = Components.rbegin(); 9252 auto CE = Components.rend(); 9253 auto SI = Components1.rbegin(); 9254 auto SE = Components1.rend(); 9255 for (; CI != CE && SI != SE; ++CI, ++SI) { 9256 if (CI->getAssociatedExpression()->getStmtClass() != 9257 SI->getAssociatedExpression()->getStmtClass()) 9258 break; 9259 // Are we dealing with different variables/fields? 9260 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 9261 break; 9262 } 9263 // Found overlapping if, at least for one component, reached the head 9264 // of the components list. 9265 if (CI == CE || SI == SE) { 9266 // Ignore it if it is the same component. 9267 if (CI == CE && SI == SE) 9268 continue; 9269 const auto It = (SI == SE) ? CI : SI; 9270 // If one component is a pointer and another one is a kind of 9271 // dereference of this pointer (array subscript, section, dereference, 9272 // etc.), it is not an overlapping. 9273 // Same, if one component is a base and another component is a 9274 // dereferenced pointer memberexpr with the same base. 9275 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 9276 (std::prev(It)->getAssociatedDeclaration() && 9277 std::prev(It) 9278 ->getAssociatedDeclaration() 9279 ->getType() 9280 ->isPointerType()) || 9281 (It->getAssociatedDeclaration() && 9282 It->getAssociatedDeclaration()->getType()->isPointerType() && 9283 std::next(It) != CE && std::next(It) != SE)) 9284 continue; 9285 const MapData &BaseData = CI == CE ? L : L1; 9286 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 9287 SI == SE ? Components : Components1; 9288 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 9289 OverlappedElements.getSecond().push_back(SubData); 9290 } 9291 } 9292 } 9293 // Sort the overlapped elements for each item. 9294 llvm::SmallVector<const FieldDecl *, 4> Layout; 9295 if (!OverlappedData.empty()) { 9296 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 9297 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 9298 while (BaseType != OrigType) { 9299 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 9300 OrigType = BaseType->getPointeeOrArrayElementType(); 9301 } 9302 9303 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 9304 getPlainLayout(CRD, Layout, /*AsBase=*/false); 9305 else { 9306 const auto *RD = BaseType->getAsRecordDecl(); 9307 Layout.append(RD->field_begin(), RD->field_end()); 9308 } 9309 } 9310 for (auto &Pair : OverlappedData) { 9311 llvm::stable_sort( 9312 Pair.getSecond(), 9313 [&Layout]( 9314 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 9315 OMPClauseMappableExprCommon::MappableExprComponentListRef 9316 Second) { 9317 auto CI = First.rbegin(); 9318 auto CE = First.rend(); 9319 auto SI = Second.rbegin(); 9320 auto SE = Second.rend(); 9321 for (; CI != CE && SI != SE; ++CI, ++SI) { 9322 if (CI->getAssociatedExpression()->getStmtClass() != 9323 SI->getAssociatedExpression()->getStmtClass()) 9324 break; 9325 // Are we dealing with different variables/fields? 9326 if (CI->getAssociatedDeclaration() != 9327 SI->getAssociatedDeclaration()) 9328 break; 9329 } 9330 9331 // Lists contain the same elements. 9332 if (CI == CE && SI == SE) 9333 return false; 9334 9335 // List with less elements is less than list with more elements. 9336 if (CI == CE || SI == SE) 9337 return CI == CE; 9338 9339 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 9340 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 9341 if (FD1->getParent() == FD2->getParent()) 9342 return FD1->getFieldIndex() < FD2->getFieldIndex(); 9343 const auto *It = 9344 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 9345 return FD == FD1 || FD == FD2; 9346 }); 9347 return *It == FD1; 9348 }); 9349 } 9350 9351 // Associated with a capture, because the mapping flags depend on it. 9352 // Go through all of the elements with the overlapped elements. 9353 bool IsFirstComponentList = true; 9354 for (const auto &Pair : OverlappedData) { 9355 const MapData &L = *Pair.getFirst(); 9356 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9357 OpenMPMapClauseKind MapType; 9358 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9359 bool IsImplicit; 9360 const ValueDecl *Mapper; 9361 const Expr *VarRef; 9362 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9363 L; 9364 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 9365 OverlappedComponents = Pair.getSecond(); 9366 generateInfoForComponentList( 9367 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 9368 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 9369 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 9370 IsFirstComponentList = false; 9371 } 9372 // Go through other elements without overlapped elements. 9373 for (const MapData &L : DeclComponentLists) { 9374 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9375 OpenMPMapClauseKind MapType; 9376 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9377 bool IsImplicit; 9378 const ValueDecl *Mapper; 9379 const Expr *VarRef; 9380 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9381 L; 9382 auto It = OverlappedData.find(&L); 9383 if (It == OverlappedData.end()) 9384 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 9385 Components, CombinedInfo, PartialStruct, 9386 IsFirstComponentList, IsImplicit, Mapper, 9387 /*ForDeviceAddr=*/false, VD, VarRef); 9388 IsFirstComponentList = false; 9389 } 9390 } 9391 9392 /// Generate the default map information for a given capture \a CI, 9393 /// record field declaration \a RI and captured value \a CV. 9394 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 9395 const FieldDecl &RI, llvm::Value *CV, 9396 MapCombinedInfoTy &CombinedInfo) const { 9397 bool IsImplicit = true; 9398 // Do the default mapping. 9399 if (CI.capturesThis()) { 9400 CombinedInfo.Exprs.push_back(nullptr); 9401 CombinedInfo.BasePointers.push_back(CV); 9402 CombinedInfo.Pointers.push_back(CV); 9403 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 9404 CombinedInfo.Sizes.push_back( 9405 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 9406 CGF.Int64Ty, /*isSigned=*/true)); 9407 // Default map type. 9408 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 9409 } else if (CI.capturesVariableByCopy()) { 9410 const VarDecl *VD = CI.getCapturedVar(); 9411 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9412 CombinedInfo.BasePointers.push_back(CV); 9413 CombinedInfo.Pointers.push_back(CV); 9414 if (!RI.getType()->isAnyPointerType()) { 9415 // We have to signal to the runtime captures passed by value that are 9416 // not pointers. 9417 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 9418 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9419 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 9420 } else { 9421 // Pointers are implicitly mapped with a zero size and no flags 9422 // (other than first map that is added for all implicit maps). 9423 CombinedInfo.Types.push_back(OMP_MAP_NONE); 9424 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 9425 } 9426 auto I = FirstPrivateDecls.find(VD); 9427 if (I != FirstPrivateDecls.end()) 9428 IsImplicit = I->getSecond(); 9429 } else { 9430 assert(CI.capturesVariable() && "Expected captured reference."); 9431 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 9432 QualType ElementType = PtrTy->getPointeeType(); 9433 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9434 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 9435 // The default map type for a scalar/complex type is 'to' because by 9436 // default the value doesn't have to be retrieved. For an aggregate 9437 // type, the default is 'tofrom'. 9438 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 9439 const VarDecl *VD = CI.getCapturedVar(); 9440 auto I = FirstPrivateDecls.find(VD); 9441 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9442 CombinedInfo.BasePointers.push_back(CV); 9443 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9444 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9445 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9446 AlignmentSource::Decl)); 9447 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 9448 } else { 9449 CombinedInfo.Pointers.push_back(CV); 9450 } 9451 if (I != FirstPrivateDecls.end()) 9452 IsImplicit = I->getSecond(); 9453 } 9454 // Every default map produces a single argument which is a target parameter. 9455 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 9456 9457 // Add flag stating this is an implicit map. 9458 if (IsImplicit) 9459 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 9460 9461 // No user-defined mapper for default mapping. 9462 CombinedInfo.Mappers.push_back(nullptr); 9463 } 9464 }; 9465 } // anonymous namespace 9466 9467 static void emitNonContiguousDescriptor( 9468 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9469 CGOpenMPRuntime::TargetDataInfo &Info) { 9470 CodeGenModule &CGM = CGF.CGM; 9471 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 9472 &NonContigInfo = CombinedInfo.NonContigInfo; 9473 9474 // Build an array of struct descriptor_dim and then assign it to 9475 // offload_args. 9476 // 9477 // struct descriptor_dim { 9478 // uint64_t offset; 9479 // uint64_t count; 9480 // uint64_t stride 9481 // }; 9482 ASTContext &C = CGF.getContext(); 9483 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9484 RecordDecl *RD; 9485 RD = C.buildImplicitRecord("descriptor_dim"); 9486 RD->startDefinition(); 9487 addFieldToRecordDecl(C, RD, Int64Ty); 9488 addFieldToRecordDecl(C, RD, Int64Ty); 9489 addFieldToRecordDecl(C, RD, Int64Ty); 9490 RD->completeDefinition(); 9491 QualType DimTy = C.getRecordType(RD); 9492 9493 enum { OffsetFD = 0, CountFD, StrideFD }; 9494 // We need two index variable here since the size of "Dims" is the same as the 9495 // size of Components, however, the size of offset, count, and stride is equal 9496 // to the size of base declaration that is non-contiguous. 9497 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9498 // Skip emitting ir if dimension size is 1 since it cannot be 9499 // non-contiguous. 9500 if (NonContigInfo.Dims[I] == 1) 9501 continue; 9502 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9503 QualType ArrayTy = 9504 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9505 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9506 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9507 unsigned RevIdx = EE - II - 1; 9508 LValue DimsLVal = CGF.MakeAddrLValue( 9509 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9510 // Offset 9511 LValue OffsetLVal = CGF.EmitLValueForField( 9512 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9513 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9514 // Count 9515 LValue CountLVal = CGF.EmitLValueForField( 9516 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9517 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9518 // Stride 9519 LValue StrideLVal = CGF.EmitLValueForField( 9520 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9521 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9522 } 9523 // args[I] = &dims 9524 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9525 DimsAddr, CGM.Int8PtrTy); 9526 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9527 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9528 Info.PointersArray, 0, I); 9529 Address PAddr(P, CGF.getPointerAlign()); 9530 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9531 ++L; 9532 } 9533 } 9534 9535 // Try to extract the base declaration from a `this->x` expression if possible. 9536 static ValueDecl *getDeclFromThisExpr(const Expr *E) { 9537 if (!E) 9538 return nullptr; 9539 9540 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts())) 9541 if (const MemberExpr *ME = 9542 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts())) 9543 return ME->getMemberDecl(); 9544 return nullptr; 9545 } 9546 9547 /// Emit a string constant containing the names of the values mapped to the 9548 /// offloading runtime library. 9549 llvm::Constant * 9550 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9551 MappableExprsHandler::MappingExprInfo &MapExprs) { 9552 9553 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) 9554 return OMPBuilder.getOrCreateDefaultSrcLocStr(); 9555 9556 SourceLocation Loc; 9557 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { 9558 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr())) 9559 Loc = VD->getLocation(); 9560 else 9561 Loc = MapExprs.getMapExpr()->getExprLoc(); 9562 } else { 9563 Loc = MapExprs.getMapDecl()->getLocation(); 9564 } 9565 9566 std::string ExprName = ""; 9567 if (MapExprs.getMapExpr()) { 9568 PrintingPolicy P(CGF.getContext().getLangOpts()); 9569 llvm::raw_string_ostream OS(ExprName); 9570 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9571 OS.flush(); 9572 } else { 9573 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9574 } 9575 9576 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9577 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName.c_str(), 9578 PLoc.getLine(), PLoc.getColumn()); 9579 } 9580 9581 /// Emit the arrays used to pass the captures and map information to the 9582 /// offloading runtime library. If there is no map or capture information, 9583 /// return nullptr by reference. 9584 static void emitOffloadingArrays( 9585 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9586 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9587 bool IsNonContiguous = false) { 9588 CodeGenModule &CGM = CGF.CGM; 9589 ASTContext &Ctx = CGF.getContext(); 9590 9591 // Reset the array information. 9592 Info.clearArrayInfo(); 9593 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9594 9595 if (Info.NumberOfPtrs) { 9596 // Detect if we have any capture size requiring runtime evaluation of the 9597 // size so that a constant array could be eventually used. 9598 bool hasRuntimeEvaluationCaptureSize = false; 9599 for (llvm::Value *S : CombinedInfo.Sizes) 9600 if (!isa<llvm::Constant>(S)) { 9601 hasRuntimeEvaluationCaptureSize = true; 9602 break; 9603 } 9604 9605 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9606 QualType PointerArrayType = Ctx.getConstantArrayType( 9607 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9608 /*IndexTypeQuals=*/0); 9609 9610 Info.BasePointersArray = 9611 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9612 Info.PointersArray = 9613 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9614 Address MappersArray = 9615 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9616 Info.MappersArray = MappersArray.getPointer(); 9617 9618 // If we don't have any VLA types or other types that require runtime 9619 // evaluation, we can use a constant array for the map sizes, otherwise we 9620 // need to fill up the arrays as we do for the pointers. 9621 QualType Int64Ty = 9622 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9623 if (hasRuntimeEvaluationCaptureSize) { 9624 QualType SizeArrayType = Ctx.getConstantArrayType( 9625 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9626 /*IndexTypeQuals=*/0); 9627 Info.SizesArray = 9628 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9629 } else { 9630 // We expect all the sizes to be constant, so we collect them to create 9631 // a constant array. 9632 SmallVector<llvm::Constant *, 16> ConstSizes; 9633 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9634 if (IsNonContiguous && 9635 (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) { 9636 ConstSizes.push_back(llvm::ConstantInt::get( 9637 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I])); 9638 } else { 9639 ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I])); 9640 } 9641 } 9642 9643 auto *SizesArrayInit = llvm::ConstantArray::get( 9644 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9645 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9646 auto *SizesArrayGbl = new llvm::GlobalVariable( 9647 CGM.getModule(), SizesArrayInit->getType(), 9648 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9649 SizesArrayInit, Name); 9650 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9651 Info.SizesArray = SizesArrayGbl; 9652 } 9653 9654 // The map types are always constant so we don't need to generate code to 9655 // fill arrays. Instead, we create an array constant. 9656 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9657 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9658 std::string MaptypesName = 9659 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9660 auto *MapTypesArrayGbl = 9661 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9662 Info.MapTypesArray = MapTypesArrayGbl; 9663 9664 // The information types are only built if there is debug information 9665 // requested. 9666 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9667 Info.MapNamesArray = llvm::Constant::getNullValue( 9668 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9669 } else { 9670 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9671 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9672 }; 9673 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9674 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9675 std::string MapnamesName = 9676 CGM.getOpenMPRuntime().getName({"offload_mapnames"}); 9677 auto *MapNamesArrayGbl = 9678 OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName); 9679 Info.MapNamesArray = MapNamesArrayGbl; 9680 } 9681 9682 // If there's a present map type modifier, it must not be applied to the end 9683 // of a region, so generate a separate map type array in that case. 9684 if (Info.separateBeginEndCalls()) { 9685 bool EndMapTypesDiffer = false; 9686 for (uint64_t &Type : Mapping) { 9687 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9688 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9689 EndMapTypesDiffer = true; 9690 } 9691 } 9692 if (EndMapTypesDiffer) { 9693 MapTypesArrayGbl = 9694 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9695 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9696 } 9697 } 9698 9699 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9700 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9701 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9702 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9703 Info.BasePointersArray, 0, I); 9704 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9705 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9706 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9707 CGF.Builder.CreateStore(BPVal, BPAddr); 9708 9709 if (Info.requiresDevicePointerInfo()) 9710 if (const ValueDecl *DevVD = 9711 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9712 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9713 9714 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9715 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9716 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9717 Info.PointersArray, 0, I); 9718 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9719 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9720 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9721 CGF.Builder.CreateStore(PVal, PAddr); 9722 9723 if (hasRuntimeEvaluationCaptureSize) { 9724 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9725 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9726 Info.SizesArray, 9727 /*Idx0=*/0, 9728 /*Idx1=*/I); 9729 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 9730 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9731 CGM.Int64Ty, 9732 /*isSigned=*/true), 9733 SAddr); 9734 } 9735 9736 // Fill up the mapper array. 9737 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9738 if (CombinedInfo.Mappers[I]) { 9739 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9740 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9741 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9742 Info.HasMapper = true; 9743 } 9744 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9745 CGF.Builder.CreateStore(MFunc, MAddr); 9746 } 9747 } 9748 9749 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9750 Info.NumberOfPtrs == 0) 9751 return; 9752 9753 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9754 } 9755 9756 namespace { 9757 /// Additional arguments for emitOffloadingArraysArgument function. 9758 struct ArgumentsOptions { 9759 bool ForEndCall = false; 9760 ArgumentsOptions() = default; 9761 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9762 }; 9763 } // namespace 9764 9765 /// Emit the arguments to be passed to the runtime library based on the 9766 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 9767 /// ForEndCall, emit map types to be passed for the end of the region instead of 9768 /// the beginning. 9769 static void emitOffloadingArraysArgument( 9770 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9771 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9772 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, 9773 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, 9774 const ArgumentsOptions &Options = ArgumentsOptions()) { 9775 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9776 "expected region end call to runtime only when end call is separate"); 9777 CodeGenModule &CGM = CGF.CGM; 9778 if (Info.NumberOfPtrs) { 9779 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9780 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9781 Info.BasePointersArray, 9782 /*Idx0=*/0, /*Idx1=*/0); 9783 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9784 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9785 Info.PointersArray, 9786 /*Idx0=*/0, 9787 /*Idx1=*/0); 9788 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9789 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9790 /*Idx0=*/0, /*Idx1=*/0); 9791 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9792 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9793 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9794 : Info.MapTypesArray, 9795 /*Idx0=*/0, 9796 /*Idx1=*/0); 9797 9798 // Only emit the mapper information arrays if debug information is 9799 // requested. 9800 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9801 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9802 else 9803 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9804 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9805 Info.MapNamesArray, 9806 /*Idx0=*/0, 9807 /*Idx1=*/0); 9808 // If there is no user-defined mapper, set the mapper array to nullptr to 9809 // avoid an unnecessary data privatization 9810 if (!Info.HasMapper) 9811 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9812 else 9813 MappersArrayArg = 9814 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9815 } else { 9816 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9817 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9818 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9819 MapTypesArrayArg = 9820 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9821 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9822 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9823 } 9824 } 9825 9826 /// Check for inner distribute directive. 9827 static const OMPExecutableDirective * 9828 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9829 const auto *CS = D.getInnermostCapturedStmt(); 9830 const auto *Body = 9831 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9832 const Stmt *ChildStmt = 9833 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9834 9835 if (const auto *NestedDir = 9836 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9837 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9838 switch (D.getDirectiveKind()) { 9839 case OMPD_target: 9840 if (isOpenMPDistributeDirective(DKind)) 9841 return NestedDir; 9842 if (DKind == OMPD_teams) { 9843 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9844 /*IgnoreCaptured=*/true); 9845 if (!Body) 9846 return nullptr; 9847 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9848 if (const auto *NND = 9849 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9850 DKind = NND->getDirectiveKind(); 9851 if (isOpenMPDistributeDirective(DKind)) 9852 return NND; 9853 } 9854 } 9855 return nullptr; 9856 case OMPD_target_teams: 9857 if (isOpenMPDistributeDirective(DKind)) 9858 return NestedDir; 9859 return nullptr; 9860 case OMPD_target_parallel: 9861 case OMPD_target_simd: 9862 case OMPD_target_parallel_for: 9863 case OMPD_target_parallel_for_simd: 9864 return nullptr; 9865 case OMPD_target_teams_distribute: 9866 case OMPD_target_teams_distribute_simd: 9867 case OMPD_target_teams_distribute_parallel_for: 9868 case OMPD_target_teams_distribute_parallel_for_simd: 9869 case OMPD_parallel: 9870 case OMPD_for: 9871 case OMPD_parallel_for: 9872 case OMPD_parallel_master: 9873 case OMPD_parallel_sections: 9874 case OMPD_for_simd: 9875 case OMPD_parallel_for_simd: 9876 case OMPD_cancel: 9877 case OMPD_cancellation_point: 9878 case OMPD_ordered: 9879 case OMPD_threadprivate: 9880 case OMPD_allocate: 9881 case OMPD_task: 9882 case OMPD_simd: 9883 case OMPD_tile: 9884 case OMPD_unroll: 9885 case OMPD_sections: 9886 case OMPD_section: 9887 case OMPD_single: 9888 case OMPD_master: 9889 case OMPD_critical: 9890 case OMPD_taskyield: 9891 case OMPD_barrier: 9892 case OMPD_taskwait: 9893 case OMPD_taskgroup: 9894 case OMPD_atomic: 9895 case OMPD_flush: 9896 case OMPD_depobj: 9897 case OMPD_scan: 9898 case OMPD_teams: 9899 case OMPD_target_data: 9900 case OMPD_target_exit_data: 9901 case OMPD_target_enter_data: 9902 case OMPD_distribute: 9903 case OMPD_distribute_simd: 9904 case OMPD_distribute_parallel_for: 9905 case OMPD_distribute_parallel_for_simd: 9906 case OMPD_teams_distribute: 9907 case OMPD_teams_distribute_simd: 9908 case OMPD_teams_distribute_parallel_for: 9909 case OMPD_teams_distribute_parallel_for_simd: 9910 case OMPD_target_update: 9911 case OMPD_declare_simd: 9912 case OMPD_declare_variant: 9913 case OMPD_begin_declare_variant: 9914 case OMPD_end_declare_variant: 9915 case OMPD_declare_target: 9916 case OMPD_end_declare_target: 9917 case OMPD_declare_reduction: 9918 case OMPD_declare_mapper: 9919 case OMPD_taskloop: 9920 case OMPD_taskloop_simd: 9921 case OMPD_master_taskloop: 9922 case OMPD_master_taskloop_simd: 9923 case OMPD_parallel_master_taskloop: 9924 case OMPD_parallel_master_taskloop_simd: 9925 case OMPD_requires: 9926 case OMPD_metadirective: 9927 case OMPD_unknown: 9928 default: 9929 llvm_unreachable("Unexpected directive."); 9930 } 9931 } 9932 9933 return nullptr; 9934 } 9935 9936 /// Emit the user-defined mapper function. The code generation follows the 9937 /// pattern in the example below. 9938 /// \code 9939 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9940 /// void *base, void *begin, 9941 /// int64_t size, int64_t type, 9942 /// void *name = nullptr) { 9943 /// // Allocate space for an array section first or add a base/begin for 9944 /// // pointer dereference. 9945 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9946 /// !maptype.IsDelete) 9947 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9948 /// size*sizeof(Ty), clearToFromMember(type)); 9949 /// // Map members. 9950 /// for (unsigned i = 0; i < size; i++) { 9951 /// // For each component specified by this mapper: 9952 /// for (auto c : begin[i]->all_components) { 9953 /// if (c.hasMapper()) 9954 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9955 /// c.arg_type, c.arg_name); 9956 /// else 9957 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9958 /// c.arg_begin, c.arg_size, c.arg_type, 9959 /// c.arg_name); 9960 /// } 9961 /// } 9962 /// // Delete the array section. 9963 /// if (size > 1 && maptype.IsDelete) 9964 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9965 /// size*sizeof(Ty), clearToFromMember(type)); 9966 /// } 9967 /// \endcode 9968 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9969 CodeGenFunction *CGF) { 9970 if (UDMMap.count(D) > 0) 9971 return; 9972 ASTContext &C = CGM.getContext(); 9973 QualType Ty = D->getType(); 9974 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9975 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9976 auto *MapperVarDecl = 9977 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9978 SourceLocation Loc = D->getLocation(); 9979 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9980 9981 // Prepare mapper function arguments and attributes. 9982 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9983 C.VoidPtrTy, ImplicitParamDecl::Other); 9984 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9985 ImplicitParamDecl::Other); 9986 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9987 C.VoidPtrTy, ImplicitParamDecl::Other); 9988 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9989 ImplicitParamDecl::Other); 9990 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9991 ImplicitParamDecl::Other); 9992 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9993 ImplicitParamDecl::Other); 9994 FunctionArgList Args; 9995 Args.push_back(&HandleArg); 9996 Args.push_back(&BaseArg); 9997 Args.push_back(&BeginArg); 9998 Args.push_back(&SizeArg); 9999 Args.push_back(&TypeArg); 10000 Args.push_back(&NameArg); 10001 const CGFunctionInfo &FnInfo = 10002 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 10003 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 10004 SmallString<64> TyStr; 10005 llvm::raw_svector_ostream Out(TyStr); 10006 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 10007 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 10008 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 10009 Name, &CGM.getModule()); 10010 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 10011 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 10012 // Start the mapper function code generation. 10013 CodeGenFunction MapperCGF(CGM); 10014 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 10015 // Compute the starting and end addresses of array elements. 10016 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 10017 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 10018 C.getPointerType(Int64Ty), Loc); 10019 // Prepare common arguments for array initiation and deletion. 10020 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 10021 MapperCGF.GetAddrOfLocalVar(&HandleArg), 10022 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10023 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 10024 MapperCGF.GetAddrOfLocalVar(&BaseArg), 10025 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10026 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 10027 MapperCGF.GetAddrOfLocalVar(&BeginArg), 10028 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10029 // Convert the size in bytes into the number of array elements. 10030 Size = MapperCGF.Builder.CreateExactUDiv( 10031 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10032 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 10033 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 10034 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP( 10035 PtrBegin->getType()->getPointerElementType(), PtrBegin, Size); 10036 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 10037 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 10038 C.getPointerType(Int64Ty), Loc); 10039 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar( 10040 MapperCGF.GetAddrOfLocalVar(&NameArg), 10041 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10042 10043 // Emit array initiation if this is an array section and \p MapType indicates 10044 // that memory allocation is required. 10045 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 10046 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10047 MapName, ElementSize, HeadBB, /*IsInit=*/true); 10048 10049 // Emit a for loop to iterate through SizeArg of elements and map all of them. 10050 10051 // Emit the loop header block. 10052 MapperCGF.EmitBlock(HeadBB); 10053 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 10054 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 10055 // Evaluate whether the initial condition is satisfied. 10056 llvm::Value *IsEmpty = 10057 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 10058 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 10059 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 10060 10061 // Emit the loop body block. 10062 MapperCGF.EmitBlock(BodyBB); 10063 llvm::BasicBlock *LastBB = BodyBB; 10064 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 10065 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 10066 PtrPHI->addIncoming(PtrBegin, EntryBB); 10067 Address PtrCurrent = 10068 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 10069 .getAlignment() 10070 .alignmentOfArrayElement(ElementSize)); 10071 // Privatize the declared variable of mapper to be the current array element. 10072 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 10073 Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; }); 10074 (void)Scope.Privatize(); 10075 10076 // Get map clause information. Fill up the arrays with all mapped variables. 10077 MappableExprsHandler::MapCombinedInfoTy Info; 10078 MappableExprsHandler MEHandler(*D, MapperCGF); 10079 MEHandler.generateAllInfoForMapper(Info); 10080 10081 // Call the runtime API __tgt_mapper_num_components to get the number of 10082 // pre-existing components. 10083 llvm::Value *OffloadingArgs[] = {Handle}; 10084 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 10085 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10086 OMPRTL___tgt_mapper_num_components), 10087 OffloadingArgs); 10088 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 10089 PreviousSize, 10090 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 10091 10092 // Fill up the runtime mapper handle for all components. 10093 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 10094 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 10095 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10096 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 10097 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10098 llvm::Value *CurSizeArg = Info.Sizes[I]; 10099 llvm::Value *CurNameArg = 10100 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 10101 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 10102 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 10103 10104 // Extract the MEMBER_OF field from the map type. 10105 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 10106 llvm::Value *MemberMapType = 10107 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 10108 10109 // Combine the map type inherited from user-defined mapper with that 10110 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 10111 // bits of the \a MapType, which is the input argument of the mapper 10112 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 10113 // bits of MemberMapType. 10114 // [OpenMP 5.0], 1.2.6. map-type decay. 10115 // | alloc | to | from | tofrom | release | delete 10116 // ---------------------------------------------------------- 10117 // alloc | alloc | alloc | alloc | alloc | release | delete 10118 // to | alloc | to | alloc | to | release | delete 10119 // from | alloc | alloc | from | from | release | delete 10120 // tofrom | alloc | to | from | tofrom | release | delete 10121 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 10122 MapType, 10123 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 10124 MappableExprsHandler::OMP_MAP_FROM)); 10125 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 10126 llvm::BasicBlock *AllocElseBB = 10127 MapperCGF.createBasicBlock("omp.type.alloc.else"); 10128 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 10129 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 10130 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 10131 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 10132 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 10133 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 10134 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 10135 MapperCGF.EmitBlock(AllocBB); 10136 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 10137 MemberMapType, 10138 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10139 MappableExprsHandler::OMP_MAP_FROM))); 10140 MapperCGF.Builder.CreateBr(EndBB); 10141 MapperCGF.EmitBlock(AllocElseBB); 10142 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 10143 LeftToFrom, 10144 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 10145 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 10146 // In case of to, clear OMP_MAP_FROM. 10147 MapperCGF.EmitBlock(ToBB); 10148 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 10149 MemberMapType, 10150 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 10151 MapperCGF.Builder.CreateBr(EndBB); 10152 MapperCGF.EmitBlock(ToElseBB); 10153 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 10154 LeftToFrom, 10155 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 10156 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 10157 // In case of from, clear OMP_MAP_TO. 10158 MapperCGF.EmitBlock(FromBB); 10159 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 10160 MemberMapType, 10161 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 10162 // In case of tofrom, do nothing. 10163 MapperCGF.EmitBlock(EndBB); 10164 LastBB = EndBB; 10165 llvm::PHINode *CurMapType = 10166 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 10167 CurMapType->addIncoming(AllocMapType, AllocBB); 10168 CurMapType->addIncoming(ToMapType, ToBB); 10169 CurMapType->addIncoming(FromMapType, FromBB); 10170 CurMapType->addIncoming(MemberMapType, ToElseBB); 10171 10172 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 10173 CurSizeArg, CurMapType, CurNameArg}; 10174 if (Info.Mappers[I]) { 10175 // Call the corresponding mapper function. 10176 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 10177 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 10178 assert(MapperFunc && "Expect a valid mapper function is available."); 10179 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 10180 } else { 10181 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10182 // data structure. 10183 MapperCGF.EmitRuntimeCall( 10184 OMPBuilder.getOrCreateRuntimeFunction( 10185 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 10186 OffloadingArgs); 10187 } 10188 } 10189 10190 // Update the pointer to point to the next element that needs to be mapped, 10191 // and check whether we have mapped all elements. 10192 llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType(); 10193 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 10194 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 10195 PtrPHI->addIncoming(PtrNext, LastBB); 10196 llvm::Value *IsDone = 10197 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 10198 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 10199 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 10200 10201 MapperCGF.EmitBlock(ExitBB); 10202 // Emit array deletion if this is an array section and \p MapType indicates 10203 // that deletion is required. 10204 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10205 MapName, ElementSize, DoneBB, /*IsInit=*/false); 10206 10207 // Emit the function exit block. 10208 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 10209 MapperCGF.FinishFunction(); 10210 UDMMap.try_emplace(D, Fn); 10211 if (CGF) { 10212 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 10213 Decls.second.push_back(D); 10214 } 10215 } 10216 10217 /// Emit the array initialization or deletion portion for user-defined mapper 10218 /// code generation. First, it evaluates whether an array section is mapped and 10219 /// whether the \a MapType instructs to delete this section. If \a IsInit is 10220 /// true, and \a MapType indicates to not delete this array, array 10221 /// initialization code is generated. If \a IsInit is false, and \a MapType 10222 /// indicates to not this array, array deletion code is generated. 10223 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 10224 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 10225 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 10226 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, 10227 bool IsInit) { 10228 StringRef Prefix = IsInit ? ".init" : ".del"; 10229 10230 // Evaluate if this is an array section. 10231 llvm::BasicBlock *BodyBB = 10232 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 10233 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 10234 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 10235 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 10236 MapType, 10237 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 10238 llvm::Value *DeleteCond; 10239 llvm::Value *Cond; 10240 if (IsInit) { 10241 // base != begin? 10242 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull( 10243 MapperCGF.Builder.CreatePtrDiff(Base, Begin)); 10244 // IsPtrAndObj? 10245 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 10246 MapType, 10247 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ)); 10248 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 10249 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 10250 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 10251 DeleteCond = MapperCGF.Builder.CreateIsNull( 10252 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10253 } else { 10254 Cond = IsArray; 10255 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 10256 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10257 } 10258 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 10259 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 10260 10261 MapperCGF.EmitBlock(BodyBB); 10262 // Get the array size by multiplying element size and element number (i.e., \p 10263 // Size). 10264 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 10265 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10266 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 10267 // memory allocation/deletion purpose only. 10268 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 10269 MapType, 10270 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10271 MappableExprsHandler::OMP_MAP_FROM))); 10272 MapTypeArg = MapperCGF.Builder.CreateOr( 10273 MapTypeArg, 10274 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT)); 10275 10276 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10277 // data structure. 10278 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 10279 ArraySize, MapTypeArg, MapName}; 10280 MapperCGF.EmitRuntimeCall( 10281 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10282 OMPRTL___tgt_push_mapper_component), 10283 OffloadingArgs); 10284 } 10285 10286 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 10287 const OMPDeclareMapperDecl *D) { 10288 auto I = UDMMap.find(D); 10289 if (I != UDMMap.end()) 10290 return I->second; 10291 emitUserDefinedMapper(D); 10292 return UDMMap.lookup(D); 10293 } 10294 10295 void CGOpenMPRuntime::emitTargetNumIterationsCall( 10296 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10297 llvm::Value *DeviceID, 10298 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10299 const OMPLoopDirective &D)> 10300 SizeEmitter) { 10301 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 10302 const OMPExecutableDirective *TD = &D; 10303 // Get nested teams distribute kind directive, if any. 10304 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 10305 TD = getNestedDistributeDirective(CGM.getContext(), D); 10306 if (!TD) 10307 return; 10308 const auto *LD = cast<OMPLoopDirective>(TD); 10309 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, 10310 PrePostActionTy &) { 10311 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 10312 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10313 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; 10314 CGF.EmitRuntimeCall( 10315 OMPBuilder.getOrCreateRuntimeFunction( 10316 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper), 10317 Args); 10318 } 10319 }; 10320 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 10321 } 10322 10323 void CGOpenMPRuntime::emitTargetCall( 10324 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10325 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 10326 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 10327 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10328 const OMPLoopDirective &D)> 10329 SizeEmitter) { 10330 if (!CGF.HaveInsertPoint()) 10331 return; 10332 10333 assert(OutlinedFn && "Invalid outlined function!"); 10334 10335 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10336 D.hasClausesOfKind<OMPNowaitClause>(); 10337 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 10338 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 10339 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 10340 PrePostActionTy &) { 10341 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10342 }; 10343 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 10344 10345 CodeGenFunction::OMPTargetDataInfo InputInfo; 10346 llvm::Value *MapTypesArray = nullptr; 10347 llvm::Value *MapNamesArray = nullptr; 10348 // Fill up the pointer arrays and transfer execution to the device. 10349 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 10350 &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask, 10351 &CapturedVars, 10352 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 10353 if (Device.getInt() == OMPC_DEVICE_ancestor) { 10354 // Reverse offloading is not supported, so just execute on the host. 10355 if (RequiresOuterTask) { 10356 CapturedVars.clear(); 10357 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10358 } 10359 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10360 return; 10361 } 10362 10363 // On top of the arrays that were filled up, the target offloading call 10364 // takes as arguments the device id as well as the host pointer. The host 10365 // pointer is used by the runtime library to identify the current target 10366 // region, so it only has to be unique and not necessarily point to 10367 // anything. It could be the pointer to the outlined function that 10368 // implements the target region, but we aren't using that so that the 10369 // compiler doesn't need to keep that, and could therefore inline the host 10370 // function if proven worthwhile during optimization. 10371 10372 // From this point on, we need to have an ID of the target region defined. 10373 assert(OutlinedFnID && "Invalid outlined function ID!"); 10374 10375 // Emit device ID if any. 10376 llvm::Value *DeviceID; 10377 if (Device.getPointer()) { 10378 assert((Device.getInt() == OMPC_DEVICE_unknown || 10379 Device.getInt() == OMPC_DEVICE_device_num) && 10380 "Expected device_num modifier."); 10381 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 10382 DeviceID = 10383 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 10384 } else { 10385 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10386 } 10387 10388 // Emit the number of elements in the offloading arrays. 10389 llvm::Value *PointerNum = 10390 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10391 10392 // Return value of the runtime offloading call. 10393 llvm::Value *Return; 10394 10395 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 10396 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 10397 10398 // Source location for the ident struct 10399 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10400 10401 // Emit tripcount for the target loop-based directive. 10402 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 10403 10404 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10405 // The target region is an outlined function launched by the runtime 10406 // via calls __tgt_target() or __tgt_target_teams(). 10407 // 10408 // __tgt_target() launches a target region with one team and one thread, 10409 // executing a serial region. This master thread may in turn launch 10410 // more threads within its team upon encountering a parallel region, 10411 // however, no additional teams can be launched on the device. 10412 // 10413 // __tgt_target_teams() launches a target region with one or more teams, 10414 // each with one or more threads. This call is required for target 10415 // constructs such as: 10416 // 'target teams' 10417 // 'target' / 'teams' 10418 // 'target teams distribute parallel for' 10419 // 'target parallel' 10420 // and so on. 10421 // 10422 // Note that on the host and CPU targets, the runtime implementation of 10423 // these calls simply call the outlined function without forking threads. 10424 // The outlined functions themselves have runtime calls to 10425 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 10426 // the compiler in emitTeamsCall() and emitParallelCall(). 10427 // 10428 // In contrast, on the NVPTX target, the implementation of 10429 // __tgt_target_teams() launches a GPU kernel with the requested number 10430 // of teams and threads so no additional calls to the runtime are required. 10431 if (NumTeams) { 10432 // If we have NumTeams defined this means that we have an enclosed teams 10433 // region. Therefore we also expect to have NumThreads defined. These two 10434 // values should be defined in the presence of a teams directive, 10435 // regardless of having any clauses associated. If the user is using teams 10436 // but no clauses, these two values will be the default that should be 10437 // passed to the runtime library - a 32-bit integer with the value zero. 10438 assert(NumThreads && "Thread limit expression should be available along " 10439 "with number of teams."); 10440 SmallVector<llvm::Value *> OffloadingArgs = { 10441 RTLoc, 10442 DeviceID, 10443 OutlinedFnID, 10444 PointerNum, 10445 InputInfo.BasePointersArray.getPointer(), 10446 InputInfo.PointersArray.getPointer(), 10447 InputInfo.SizesArray.getPointer(), 10448 MapTypesArray, 10449 MapNamesArray, 10450 InputInfo.MappersArray.getPointer(), 10451 NumTeams, 10452 NumThreads}; 10453 if (HasNowait) { 10454 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10455 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10456 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10457 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10458 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10459 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10460 } 10461 Return = CGF.EmitRuntimeCall( 10462 OMPBuilder.getOrCreateRuntimeFunction( 10463 CGM.getModule(), HasNowait 10464 ? OMPRTL___tgt_target_teams_nowait_mapper 10465 : OMPRTL___tgt_target_teams_mapper), 10466 OffloadingArgs); 10467 } else { 10468 SmallVector<llvm::Value *> OffloadingArgs = { 10469 RTLoc, 10470 DeviceID, 10471 OutlinedFnID, 10472 PointerNum, 10473 InputInfo.BasePointersArray.getPointer(), 10474 InputInfo.PointersArray.getPointer(), 10475 InputInfo.SizesArray.getPointer(), 10476 MapTypesArray, 10477 MapNamesArray, 10478 InputInfo.MappersArray.getPointer()}; 10479 if (HasNowait) { 10480 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10481 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10482 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10483 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10484 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10485 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10486 } 10487 Return = CGF.EmitRuntimeCall( 10488 OMPBuilder.getOrCreateRuntimeFunction( 10489 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 10490 : OMPRTL___tgt_target_mapper), 10491 OffloadingArgs); 10492 } 10493 10494 // Check the error code and execute the host version if required. 10495 llvm::BasicBlock *OffloadFailedBlock = 10496 CGF.createBasicBlock("omp_offload.failed"); 10497 llvm::BasicBlock *OffloadContBlock = 10498 CGF.createBasicBlock("omp_offload.cont"); 10499 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10500 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10501 10502 CGF.EmitBlock(OffloadFailedBlock); 10503 if (RequiresOuterTask) { 10504 CapturedVars.clear(); 10505 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10506 } 10507 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10508 CGF.EmitBranch(OffloadContBlock); 10509 10510 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10511 }; 10512 10513 // Notify that the host version must be executed. 10514 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 10515 RequiresOuterTask](CodeGenFunction &CGF, 10516 PrePostActionTy &) { 10517 if (RequiresOuterTask) { 10518 CapturedVars.clear(); 10519 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10520 } 10521 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10522 }; 10523 10524 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10525 &MapNamesArray, &CapturedVars, RequiresOuterTask, 10526 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10527 // Fill up the arrays with all the captured variables. 10528 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10529 10530 // Get mappable expression information. 10531 MappableExprsHandler MEHandler(D, CGF); 10532 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10533 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 10534 10535 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10536 auto *CV = CapturedVars.begin(); 10537 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10538 CE = CS.capture_end(); 10539 CI != CE; ++CI, ++RI, ++CV) { 10540 MappableExprsHandler::MapCombinedInfoTy CurInfo; 10541 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10542 10543 // VLA sizes are passed to the outlined region by copy and do not have map 10544 // information associated. 10545 if (CI->capturesVariableArrayType()) { 10546 CurInfo.Exprs.push_back(nullptr); 10547 CurInfo.BasePointers.push_back(*CV); 10548 CurInfo.Pointers.push_back(*CV); 10549 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10550 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10551 // Copy to the device as an argument. No need to retrieve it. 10552 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10553 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10554 MappableExprsHandler::OMP_MAP_IMPLICIT); 10555 CurInfo.Mappers.push_back(nullptr); 10556 } else { 10557 // If we have any information in the map clause, we use it, otherwise we 10558 // just do a default mapping. 10559 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10560 if (!CI->capturesThis()) 10561 MappedVarSet.insert(CI->getCapturedVar()); 10562 else 10563 MappedVarSet.insert(nullptr); 10564 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10565 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10566 // Generate correct mapping for variables captured by reference in 10567 // lambdas. 10568 if (CI->capturesVariable()) 10569 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10570 CurInfo, LambdaPointers); 10571 } 10572 // We expect to have at least an element of information for this capture. 10573 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10574 "Non-existing map pointer for capture!"); 10575 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10576 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10577 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10578 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10579 "Inconsistent map information sizes!"); 10580 10581 // If there is an entry in PartialStruct it means we have a struct with 10582 // individual members mapped. Emit an extra combined entry. 10583 if (PartialStruct.Base.isValid()) { 10584 CombinedInfo.append(PartialStruct.PreliminaryMapData); 10585 MEHandler.emitCombinedEntry( 10586 CombinedInfo, CurInfo.Types, PartialStruct, nullptr, 10587 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 10588 } 10589 10590 // We need to append the results of this capture to what we already have. 10591 CombinedInfo.append(CurInfo); 10592 } 10593 // Adjust MEMBER_OF flags for the lambdas captures. 10594 MEHandler.adjustMemberOfForLambdaCaptures( 10595 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10596 CombinedInfo.Types); 10597 // Map any list items in a map clause that were not captures because they 10598 // weren't referenced within the construct. 10599 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 10600 10601 TargetDataInfo Info; 10602 // Fill up the arrays and create the arguments. 10603 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10604 emitOffloadingArraysArgument( 10605 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10606 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 10607 {/*ForEndTask=*/false}); 10608 10609 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10610 InputInfo.BasePointersArray = 10611 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10612 InputInfo.PointersArray = 10613 Address(Info.PointersArray, CGM.getPointerAlign()); 10614 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 10615 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 10616 MapTypesArray = Info.MapTypesArray; 10617 MapNamesArray = Info.MapNamesArray; 10618 if (RequiresOuterTask) 10619 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10620 else 10621 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10622 }; 10623 10624 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10625 CodeGenFunction &CGF, PrePostActionTy &) { 10626 if (RequiresOuterTask) { 10627 CodeGenFunction::OMPTargetDataInfo InputInfo; 10628 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10629 } else { 10630 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10631 } 10632 }; 10633 10634 // If we have a target function ID it means that we need to support 10635 // offloading, otherwise, just execute on the host. We need to execute on host 10636 // regardless of the conditional in the if clause if, e.g., the user do not 10637 // specify target triples. 10638 if (OutlinedFnID) { 10639 if (IfCond) { 10640 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10641 } else { 10642 RegionCodeGenTy ThenRCG(TargetThenGen); 10643 ThenRCG(CGF); 10644 } 10645 } else { 10646 RegionCodeGenTy ElseRCG(TargetElseGen); 10647 ElseRCG(CGF); 10648 } 10649 } 10650 10651 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10652 StringRef ParentName) { 10653 if (!S) 10654 return; 10655 10656 // Codegen OMP target directives that offload compute to the device. 10657 bool RequiresDeviceCodegen = 10658 isa<OMPExecutableDirective>(S) && 10659 isOpenMPTargetExecutionDirective( 10660 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10661 10662 if (RequiresDeviceCodegen) { 10663 const auto &E = *cast<OMPExecutableDirective>(S); 10664 unsigned DeviceID; 10665 unsigned FileID; 10666 unsigned Line; 10667 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10668 FileID, Line); 10669 10670 // Is this a target region that should not be emitted as an entry point? If 10671 // so just signal we are done with this target region. 10672 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10673 ParentName, Line)) 10674 return; 10675 10676 switch (E.getDirectiveKind()) { 10677 case OMPD_target: 10678 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10679 cast<OMPTargetDirective>(E)); 10680 break; 10681 case OMPD_target_parallel: 10682 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10683 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10684 break; 10685 case OMPD_target_teams: 10686 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10687 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10688 break; 10689 case OMPD_target_teams_distribute: 10690 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10691 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10692 break; 10693 case OMPD_target_teams_distribute_simd: 10694 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10695 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10696 break; 10697 case OMPD_target_parallel_for: 10698 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10699 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10700 break; 10701 case OMPD_target_parallel_for_simd: 10702 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10703 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10704 break; 10705 case OMPD_target_simd: 10706 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10707 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10708 break; 10709 case OMPD_target_teams_distribute_parallel_for: 10710 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10711 CGM, ParentName, 10712 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10713 break; 10714 case OMPD_target_teams_distribute_parallel_for_simd: 10715 CodeGenFunction:: 10716 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10717 CGM, ParentName, 10718 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10719 break; 10720 case OMPD_parallel: 10721 case OMPD_for: 10722 case OMPD_parallel_for: 10723 case OMPD_parallel_master: 10724 case OMPD_parallel_sections: 10725 case OMPD_for_simd: 10726 case OMPD_parallel_for_simd: 10727 case OMPD_cancel: 10728 case OMPD_cancellation_point: 10729 case OMPD_ordered: 10730 case OMPD_threadprivate: 10731 case OMPD_allocate: 10732 case OMPD_task: 10733 case OMPD_simd: 10734 case OMPD_tile: 10735 case OMPD_unroll: 10736 case OMPD_sections: 10737 case OMPD_section: 10738 case OMPD_single: 10739 case OMPD_master: 10740 case OMPD_critical: 10741 case OMPD_taskyield: 10742 case OMPD_barrier: 10743 case OMPD_taskwait: 10744 case OMPD_taskgroup: 10745 case OMPD_atomic: 10746 case OMPD_flush: 10747 case OMPD_depobj: 10748 case OMPD_scan: 10749 case OMPD_teams: 10750 case OMPD_target_data: 10751 case OMPD_target_exit_data: 10752 case OMPD_target_enter_data: 10753 case OMPD_distribute: 10754 case OMPD_distribute_simd: 10755 case OMPD_distribute_parallel_for: 10756 case OMPD_distribute_parallel_for_simd: 10757 case OMPD_teams_distribute: 10758 case OMPD_teams_distribute_simd: 10759 case OMPD_teams_distribute_parallel_for: 10760 case OMPD_teams_distribute_parallel_for_simd: 10761 case OMPD_target_update: 10762 case OMPD_declare_simd: 10763 case OMPD_declare_variant: 10764 case OMPD_begin_declare_variant: 10765 case OMPD_end_declare_variant: 10766 case OMPD_declare_target: 10767 case OMPD_end_declare_target: 10768 case OMPD_declare_reduction: 10769 case OMPD_declare_mapper: 10770 case OMPD_taskloop: 10771 case OMPD_taskloop_simd: 10772 case OMPD_master_taskloop: 10773 case OMPD_master_taskloop_simd: 10774 case OMPD_parallel_master_taskloop: 10775 case OMPD_parallel_master_taskloop_simd: 10776 case OMPD_requires: 10777 case OMPD_metadirective: 10778 case OMPD_unknown: 10779 default: 10780 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10781 } 10782 return; 10783 } 10784 10785 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10786 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10787 return; 10788 10789 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10790 return; 10791 } 10792 10793 // If this is a lambda function, look into its body. 10794 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10795 S = L->getBody(); 10796 10797 // Keep looking for target regions recursively. 10798 for (const Stmt *II : S->children()) 10799 scanForTargetRegionsFunctions(II, ParentName); 10800 } 10801 10802 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { 10803 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10804 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10805 if (!DevTy) 10806 return false; 10807 // Do not emit device_type(nohost) functions for the host. 10808 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10809 return true; 10810 // Do not emit device_type(host) functions for the device. 10811 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10812 return true; 10813 return false; 10814 } 10815 10816 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10817 // If emitting code for the host, we do not process FD here. Instead we do 10818 // the normal code generation. 10819 if (!CGM.getLangOpts().OpenMPIsDevice) { 10820 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) 10821 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10822 CGM.getLangOpts().OpenMPIsDevice)) 10823 return true; 10824 return false; 10825 } 10826 10827 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10828 // Try to detect target regions in the function. 10829 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10830 StringRef Name = CGM.getMangledName(GD); 10831 scanForTargetRegionsFunctions(FD->getBody(), Name); 10832 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10833 CGM.getLangOpts().OpenMPIsDevice)) 10834 return true; 10835 } 10836 10837 // Do not to emit function if it is not marked as declare target. 10838 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10839 AlreadyEmittedTargetDecls.count(VD) == 0; 10840 } 10841 10842 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10843 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()), 10844 CGM.getLangOpts().OpenMPIsDevice)) 10845 return true; 10846 10847 if (!CGM.getLangOpts().OpenMPIsDevice) 10848 return false; 10849 10850 // Check if there are Ctors/Dtors in this declaration and look for target 10851 // regions in it. We use the complete variant to produce the kernel name 10852 // mangling. 10853 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10854 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10855 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10856 StringRef ParentName = 10857 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10858 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10859 } 10860 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10861 StringRef ParentName = 10862 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10863 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10864 } 10865 } 10866 10867 // Do not to emit variable if it is not marked as declare target. 10868 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10869 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10870 cast<VarDecl>(GD.getDecl())); 10871 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10872 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10873 HasRequiresUnifiedSharedMemory)) { 10874 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10875 return true; 10876 } 10877 return false; 10878 } 10879 10880 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10881 llvm::Constant *Addr) { 10882 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10883 !CGM.getLangOpts().OpenMPIsDevice) 10884 return; 10885 10886 // If we have host/nohost variables, they do not need to be registered. 10887 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10888 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10889 if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any) 10890 return; 10891 10892 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10893 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10894 if (!Res) { 10895 if (CGM.getLangOpts().OpenMPIsDevice) { 10896 // Register non-target variables being emitted in device code (debug info 10897 // may cause this). 10898 StringRef VarName = CGM.getMangledName(VD); 10899 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10900 } 10901 return; 10902 } 10903 // Register declare target variables. 10904 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10905 StringRef VarName; 10906 CharUnits VarSize; 10907 llvm::GlobalValue::LinkageTypes Linkage; 10908 10909 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10910 !HasRequiresUnifiedSharedMemory) { 10911 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10912 VarName = CGM.getMangledName(VD); 10913 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10914 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10915 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10916 } else { 10917 VarSize = CharUnits::Zero(); 10918 } 10919 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10920 // Temp solution to prevent optimizations of the internal variables. 10921 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10922 // Do not create a "ref-variable" if the original is not also available 10923 // on the host. 10924 if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName)) 10925 return; 10926 std::string RefName = getName({VarName, "ref"}); 10927 if (!CGM.GetGlobalValue(RefName)) { 10928 llvm::Constant *AddrRef = 10929 getOrCreateInternalVariable(Addr->getType(), RefName); 10930 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10931 GVAddrRef->setConstant(/*Val=*/true); 10932 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10933 GVAddrRef->setInitializer(Addr); 10934 CGM.addCompilerUsedGlobal(GVAddrRef); 10935 } 10936 } 10937 } else { 10938 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10939 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10940 HasRequiresUnifiedSharedMemory)) && 10941 "Declare target attribute must link or to with unified memory."); 10942 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10943 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10944 else 10945 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10946 10947 if (CGM.getLangOpts().OpenMPIsDevice) { 10948 VarName = Addr->getName(); 10949 Addr = nullptr; 10950 } else { 10951 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10952 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10953 } 10954 VarSize = CGM.getPointerSize(); 10955 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10956 } 10957 10958 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10959 VarName, Addr, VarSize, Flags, Linkage); 10960 } 10961 10962 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10963 if (isa<FunctionDecl>(GD.getDecl()) || 10964 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10965 return emitTargetFunctions(GD); 10966 10967 return emitTargetGlobalVariable(GD); 10968 } 10969 10970 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10971 for (const VarDecl *VD : DeferredGlobalVariables) { 10972 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10973 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10974 if (!Res) 10975 continue; 10976 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10977 !HasRequiresUnifiedSharedMemory) { 10978 CGM.EmitGlobal(VD); 10979 } else { 10980 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10981 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10982 HasRequiresUnifiedSharedMemory)) && 10983 "Expected link clause or to clause with unified memory."); 10984 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10985 } 10986 } 10987 } 10988 10989 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10990 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10991 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10992 " Expected target-based directive."); 10993 } 10994 10995 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10996 for (const OMPClause *Clause : D->clauselists()) { 10997 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10998 HasRequiresUnifiedSharedMemory = true; 10999 } else if (const auto *AC = 11000 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 11001 switch (AC->getAtomicDefaultMemOrderKind()) { 11002 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 11003 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 11004 break; 11005 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 11006 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 11007 break; 11008 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 11009 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 11010 break; 11011 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 11012 break; 11013 } 11014 } 11015 } 11016 } 11017 11018 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 11019 return RequiresAtomicOrdering; 11020 } 11021 11022 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 11023 LangAS &AS) { 11024 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 11025 return false; 11026 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 11027 switch(A->getAllocatorType()) { 11028 case OMPAllocateDeclAttr::OMPNullMemAlloc: 11029 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 11030 // Not supported, fallback to the default mem space. 11031 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 11032 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 11033 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 11034 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 11035 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 11036 case OMPAllocateDeclAttr::OMPConstMemAlloc: 11037 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 11038 AS = LangAS::Default; 11039 return true; 11040 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 11041 llvm_unreachable("Expected predefined allocator for the variables with the " 11042 "static storage."); 11043 } 11044 return false; 11045 } 11046 11047 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 11048 return HasRequiresUnifiedSharedMemory; 11049 } 11050 11051 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 11052 CodeGenModule &CGM) 11053 : CGM(CGM) { 11054 if (CGM.getLangOpts().OpenMPIsDevice) { 11055 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 11056 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 11057 } 11058 } 11059 11060 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 11061 if (CGM.getLangOpts().OpenMPIsDevice) 11062 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 11063 } 11064 11065 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 11066 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 11067 return true; 11068 11069 const auto *D = cast<FunctionDecl>(GD.getDecl()); 11070 // Do not to emit function if it is marked as declare target as it was already 11071 // emitted. 11072 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 11073 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 11074 if (auto *F = dyn_cast_or_null<llvm::Function>( 11075 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 11076 return !F->isDeclaration(); 11077 return false; 11078 } 11079 return true; 11080 } 11081 11082 return !AlreadyEmittedTargetDecls.insert(D).second; 11083 } 11084 11085 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 11086 // If we don't have entries or if we are emitting code for the device, we 11087 // don't need to do anything. 11088 if (CGM.getLangOpts().OMPTargetTriples.empty() || 11089 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 11090 (OffloadEntriesInfoManager.empty() && 11091 !HasEmittedDeclareTargetRegion && 11092 !HasEmittedTargetRegion)) 11093 return nullptr; 11094 11095 // Create and register the function that handles the requires directives. 11096 ASTContext &C = CGM.getContext(); 11097 11098 llvm::Function *RequiresRegFn; 11099 { 11100 CodeGenFunction CGF(CGM); 11101 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 11102 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 11103 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 11104 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 11105 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 11106 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 11107 // TODO: check for other requires clauses. 11108 // The requires directive takes effect only when a target region is 11109 // present in the compilation unit. Otherwise it is ignored and not 11110 // passed to the runtime. This avoids the runtime from throwing an error 11111 // for mismatching requires clauses across compilation units that don't 11112 // contain at least 1 target region. 11113 assert((HasEmittedTargetRegion || 11114 HasEmittedDeclareTargetRegion || 11115 !OffloadEntriesInfoManager.empty()) && 11116 "Target or declare target region expected."); 11117 if (HasRequiresUnifiedSharedMemory) 11118 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 11119 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11120 CGM.getModule(), OMPRTL___tgt_register_requires), 11121 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 11122 CGF.FinishFunction(); 11123 } 11124 return RequiresRegFn; 11125 } 11126 11127 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 11128 const OMPExecutableDirective &D, 11129 SourceLocation Loc, 11130 llvm::Function *OutlinedFn, 11131 ArrayRef<llvm::Value *> CapturedVars) { 11132 if (!CGF.HaveInsertPoint()) 11133 return; 11134 11135 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11136 CodeGenFunction::RunCleanupsScope Scope(CGF); 11137 11138 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 11139 llvm::Value *Args[] = { 11140 RTLoc, 11141 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 11142 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 11143 llvm::SmallVector<llvm::Value *, 16> RealArgs; 11144 RealArgs.append(std::begin(Args), std::end(Args)); 11145 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 11146 11147 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11148 CGM.getModule(), OMPRTL___kmpc_fork_teams); 11149 CGF.EmitRuntimeCall(RTLFn, RealArgs); 11150 } 11151 11152 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11153 const Expr *NumTeams, 11154 const Expr *ThreadLimit, 11155 SourceLocation Loc) { 11156 if (!CGF.HaveInsertPoint()) 11157 return; 11158 11159 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11160 11161 llvm::Value *NumTeamsVal = 11162 NumTeams 11163 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 11164 CGF.CGM.Int32Ty, /* isSigned = */ true) 11165 : CGF.Builder.getInt32(0); 11166 11167 llvm::Value *ThreadLimitVal = 11168 ThreadLimit 11169 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 11170 CGF.CGM.Int32Ty, /* isSigned = */ true) 11171 : CGF.Builder.getInt32(0); 11172 11173 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 11174 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 11175 ThreadLimitVal}; 11176 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11177 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 11178 PushNumTeamsArgs); 11179 } 11180 11181 void CGOpenMPRuntime::emitTargetDataCalls( 11182 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11183 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11184 if (!CGF.HaveInsertPoint()) 11185 return; 11186 11187 // Action used to replace the default codegen action and turn privatization 11188 // off. 11189 PrePostActionTy NoPrivAction; 11190 11191 // Generate the code for the opening of the data environment. Capture all the 11192 // arguments of the runtime call by reference because they are used in the 11193 // closing of the region. 11194 auto &&BeginThenGen = [this, &D, Device, &Info, 11195 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 11196 // Fill up the arrays with all the mapped variables. 11197 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11198 11199 // Get map clause information. 11200 MappableExprsHandler MEHandler(D, CGF); 11201 MEHandler.generateAllInfo(CombinedInfo); 11202 11203 // Fill up the arrays and create the arguments. 11204 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11205 /*IsNonContiguous=*/true); 11206 11207 llvm::Value *BasePointersArrayArg = nullptr; 11208 llvm::Value *PointersArrayArg = nullptr; 11209 llvm::Value *SizesArrayArg = nullptr; 11210 llvm::Value *MapTypesArrayArg = nullptr; 11211 llvm::Value *MapNamesArrayArg = nullptr; 11212 llvm::Value *MappersArrayArg = nullptr; 11213 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11214 SizesArrayArg, MapTypesArrayArg, 11215 MapNamesArrayArg, MappersArrayArg, Info); 11216 11217 // Emit device ID if any. 11218 llvm::Value *DeviceID = nullptr; 11219 if (Device) { 11220 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11221 CGF.Int64Ty, /*isSigned=*/true); 11222 } else { 11223 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11224 } 11225 11226 // Emit the number of elements in the offloading arrays. 11227 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11228 // 11229 // Source location for the ident struct 11230 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11231 11232 llvm::Value *OffloadingArgs[] = {RTLoc, 11233 DeviceID, 11234 PointerNum, 11235 BasePointersArrayArg, 11236 PointersArrayArg, 11237 SizesArrayArg, 11238 MapTypesArrayArg, 11239 MapNamesArrayArg, 11240 MappersArrayArg}; 11241 CGF.EmitRuntimeCall( 11242 OMPBuilder.getOrCreateRuntimeFunction( 11243 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 11244 OffloadingArgs); 11245 11246 // If device pointer privatization is required, emit the body of the region 11247 // here. It will have to be duplicated: with and without privatization. 11248 if (!Info.CaptureDeviceAddrMap.empty()) 11249 CodeGen(CGF); 11250 }; 11251 11252 // Generate code for the closing of the data region. 11253 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 11254 PrePostActionTy &) { 11255 assert(Info.isValid() && "Invalid data environment closing arguments."); 11256 11257 llvm::Value *BasePointersArrayArg = nullptr; 11258 llvm::Value *PointersArrayArg = nullptr; 11259 llvm::Value *SizesArrayArg = nullptr; 11260 llvm::Value *MapTypesArrayArg = nullptr; 11261 llvm::Value *MapNamesArrayArg = nullptr; 11262 llvm::Value *MappersArrayArg = nullptr; 11263 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11264 SizesArrayArg, MapTypesArrayArg, 11265 MapNamesArrayArg, MappersArrayArg, Info, 11266 {/*ForEndCall=*/true}); 11267 11268 // Emit device ID if any. 11269 llvm::Value *DeviceID = nullptr; 11270 if (Device) { 11271 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11272 CGF.Int64Ty, /*isSigned=*/true); 11273 } else { 11274 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11275 } 11276 11277 // Emit the number of elements in the offloading arrays. 11278 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11279 11280 // Source location for the ident struct 11281 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11282 11283 llvm::Value *OffloadingArgs[] = {RTLoc, 11284 DeviceID, 11285 PointerNum, 11286 BasePointersArrayArg, 11287 PointersArrayArg, 11288 SizesArrayArg, 11289 MapTypesArrayArg, 11290 MapNamesArrayArg, 11291 MappersArrayArg}; 11292 CGF.EmitRuntimeCall( 11293 OMPBuilder.getOrCreateRuntimeFunction( 11294 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 11295 OffloadingArgs); 11296 }; 11297 11298 // If we need device pointer privatization, we need to emit the body of the 11299 // region with no privatization in the 'else' branch of the conditional. 11300 // Otherwise, we don't have to do anything. 11301 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 11302 PrePostActionTy &) { 11303 if (!Info.CaptureDeviceAddrMap.empty()) { 11304 CodeGen.setAction(NoPrivAction); 11305 CodeGen(CGF); 11306 } 11307 }; 11308 11309 // We don't have to do anything to close the region if the if clause evaluates 11310 // to false. 11311 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 11312 11313 if (IfCond) { 11314 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 11315 } else { 11316 RegionCodeGenTy RCG(BeginThenGen); 11317 RCG(CGF); 11318 } 11319 11320 // If we don't require privatization of device pointers, we emit the body in 11321 // between the runtime calls. This avoids duplicating the body code. 11322 if (Info.CaptureDeviceAddrMap.empty()) { 11323 CodeGen.setAction(NoPrivAction); 11324 CodeGen(CGF); 11325 } 11326 11327 if (IfCond) { 11328 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 11329 } else { 11330 RegionCodeGenTy RCG(EndThenGen); 11331 RCG(CGF); 11332 } 11333 } 11334 11335 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 11336 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11337 const Expr *Device) { 11338 if (!CGF.HaveInsertPoint()) 11339 return; 11340 11341 assert((isa<OMPTargetEnterDataDirective>(D) || 11342 isa<OMPTargetExitDataDirective>(D) || 11343 isa<OMPTargetUpdateDirective>(D)) && 11344 "Expecting either target enter, exit data, or update directives."); 11345 11346 CodeGenFunction::OMPTargetDataInfo InputInfo; 11347 llvm::Value *MapTypesArray = nullptr; 11348 llvm::Value *MapNamesArray = nullptr; 11349 // Generate the code for the opening of the data environment. 11350 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 11351 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 11352 // Emit device ID if any. 11353 llvm::Value *DeviceID = nullptr; 11354 if (Device) { 11355 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11356 CGF.Int64Ty, /*isSigned=*/true); 11357 } else { 11358 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11359 } 11360 11361 // Emit the number of elements in the offloading arrays. 11362 llvm::Constant *PointerNum = 11363 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 11364 11365 // Source location for the ident struct 11366 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11367 11368 llvm::Value *OffloadingArgs[] = {RTLoc, 11369 DeviceID, 11370 PointerNum, 11371 InputInfo.BasePointersArray.getPointer(), 11372 InputInfo.PointersArray.getPointer(), 11373 InputInfo.SizesArray.getPointer(), 11374 MapTypesArray, 11375 MapNamesArray, 11376 InputInfo.MappersArray.getPointer()}; 11377 11378 // Select the right runtime function call for each standalone 11379 // directive. 11380 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 11381 RuntimeFunction RTLFn; 11382 switch (D.getDirectiveKind()) { 11383 case OMPD_target_enter_data: 11384 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 11385 : OMPRTL___tgt_target_data_begin_mapper; 11386 break; 11387 case OMPD_target_exit_data: 11388 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 11389 : OMPRTL___tgt_target_data_end_mapper; 11390 break; 11391 case OMPD_target_update: 11392 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 11393 : OMPRTL___tgt_target_data_update_mapper; 11394 break; 11395 case OMPD_parallel: 11396 case OMPD_for: 11397 case OMPD_parallel_for: 11398 case OMPD_parallel_master: 11399 case OMPD_parallel_sections: 11400 case OMPD_for_simd: 11401 case OMPD_parallel_for_simd: 11402 case OMPD_cancel: 11403 case OMPD_cancellation_point: 11404 case OMPD_ordered: 11405 case OMPD_threadprivate: 11406 case OMPD_allocate: 11407 case OMPD_task: 11408 case OMPD_simd: 11409 case OMPD_tile: 11410 case OMPD_unroll: 11411 case OMPD_sections: 11412 case OMPD_section: 11413 case OMPD_single: 11414 case OMPD_master: 11415 case OMPD_critical: 11416 case OMPD_taskyield: 11417 case OMPD_barrier: 11418 case OMPD_taskwait: 11419 case OMPD_taskgroup: 11420 case OMPD_atomic: 11421 case OMPD_flush: 11422 case OMPD_depobj: 11423 case OMPD_scan: 11424 case OMPD_teams: 11425 case OMPD_target_data: 11426 case OMPD_distribute: 11427 case OMPD_distribute_simd: 11428 case OMPD_distribute_parallel_for: 11429 case OMPD_distribute_parallel_for_simd: 11430 case OMPD_teams_distribute: 11431 case OMPD_teams_distribute_simd: 11432 case OMPD_teams_distribute_parallel_for: 11433 case OMPD_teams_distribute_parallel_for_simd: 11434 case OMPD_declare_simd: 11435 case OMPD_declare_variant: 11436 case OMPD_begin_declare_variant: 11437 case OMPD_end_declare_variant: 11438 case OMPD_declare_target: 11439 case OMPD_end_declare_target: 11440 case OMPD_declare_reduction: 11441 case OMPD_declare_mapper: 11442 case OMPD_taskloop: 11443 case OMPD_taskloop_simd: 11444 case OMPD_master_taskloop: 11445 case OMPD_master_taskloop_simd: 11446 case OMPD_parallel_master_taskloop: 11447 case OMPD_parallel_master_taskloop_simd: 11448 case OMPD_target: 11449 case OMPD_target_simd: 11450 case OMPD_target_teams_distribute: 11451 case OMPD_target_teams_distribute_simd: 11452 case OMPD_target_teams_distribute_parallel_for: 11453 case OMPD_target_teams_distribute_parallel_for_simd: 11454 case OMPD_target_teams: 11455 case OMPD_target_parallel: 11456 case OMPD_target_parallel_for: 11457 case OMPD_target_parallel_for_simd: 11458 case OMPD_requires: 11459 case OMPD_metadirective: 11460 case OMPD_unknown: 11461 default: 11462 llvm_unreachable("Unexpected standalone target data directive."); 11463 break; 11464 } 11465 CGF.EmitRuntimeCall( 11466 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 11467 OffloadingArgs); 11468 }; 11469 11470 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 11471 &MapNamesArray](CodeGenFunction &CGF, 11472 PrePostActionTy &) { 11473 // Fill up the arrays with all the mapped variables. 11474 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11475 11476 // Get map clause information. 11477 MappableExprsHandler MEHandler(D, CGF); 11478 MEHandler.generateAllInfo(CombinedInfo); 11479 11480 TargetDataInfo Info; 11481 // Fill up the arrays and create the arguments. 11482 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11483 /*IsNonContiguous=*/true); 11484 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 11485 D.hasClausesOfKind<OMPNowaitClause>(); 11486 emitOffloadingArraysArgument( 11487 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 11488 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 11489 {/*ForEndTask=*/false}); 11490 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11491 InputInfo.BasePointersArray = 11492 Address(Info.BasePointersArray, CGM.getPointerAlign()); 11493 InputInfo.PointersArray = 11494 Address(Info.PointersArray, CGM.getPointerAlign()); 11495 InputInfo.SizesArray = 11496 Address(Info.SizesArray, CGM.getPointerAlign()); 11497 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 11498 MapTypesArray = Info.MapTypesArray; 11499 MapNamesArray = Info.MapNamesArray; 11500 if (RequiresOuterTask) 11501 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11502 else 11503 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11504 }; 11505 11506 if (IfCond) { 11507 emitIfClause(CGF, IfCond, TargetThenGen, 11508 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11509 } else { 11510 RegionCodeGenTy ThenRCG(TargetThenGen); 11511 ThenRCG(CGF); 11512 } 11513 } 11514 11515 namespace { 11516 /// Kind of parameter in a function with 'declare simd' directive. 11517 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 11518 /// Attribute set of the parameter. 11519 struct ParamAttrTy { 11520 ParamKindTy Kind = Vector; 11521 llvm::APSInt StrideOrArg; 11522 llvm::APSInt Alignment; 11523 }; 11524 } // namespace 11525 11526 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11527 ArrayRef<ParamAttrTy> ParamAttrs) { 11528 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11529 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11530 // of that clause. The VLEN value must be power of 2. 11531 // In other case the notion of the function`s "characteristic data type" (CDT) 11532 // is used to compute the vector length. 11533 // CDT is defined in the following order: 11534 // a) For non-void function, the CDT is the return type. 11535 // b) If the function has any non-uniform, non-linear parameters, then the 11536 // CDT is the type of the first such parameter. 11537 // c) If the CDT determined by a) or b) above is struct, union, or class 11538 // type which is pass-by-value (except for the type that maps to the 11539 // built-in complex data type), the characteristic data type is int. 11540 // d) If none of the above three cases is applicable, the CDT is int. 11541 // The VLEN is then determined based on the CDT and the size of vector 11542 // register of that ISA for which current vector version is generated. The 11543 // VLEN is computed using the formula below: 11544 // VLEN = sizeof(vector_register) / sizeof(CDT), 11545 // where vector register size specified in section 3.2.1 Registers and the 11546 // Stack Frame of original AMD64 ABI document. 11547 QualType RetType = FD->getReturnType(); 11548 if (RetType.isNull()) 11549 return 0; 11550 ASTContext &C = FD->getASTContext(); 11551 QualType CDT; 11552 if (!RetType.isNull() && !RetType->isVoidType()) { 11553 CDT = RetType; 11554 } else { 11555 unsigned Offset = 0; 11556 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11557 if (ParamAttrs[Offset].Kind == Vector) 11558 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11559 ++Offset; 11560 } 11561 if (CDT.isNull()) { 11562 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11563 if (ParamAttrs[I + Offset].Kind == Vector) { 11564 CDT = FD->getParamDecl(I)->getType(); 11565 break; 11566 } 11567 } 11568 } 11569 } 11570 if (CDT.isNull()) 11571 CDT = C.IntTy; 11572 CDT = CDT->getCanonicalTypeUnqualified(); 11573 if (CDT->isRecordType() || CDT->isUnionType()) 11574 CDT = C.IntTy; 11575 return C.getTypeSize(CDT); 11576 } 11577 11578 static void 11579 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11580 const llvm::APSInt &VLENVal, 11581 ArrayRef<ParamAttrTy> ParamAttrs, 11582 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11583 struct ISADataTy { 11584 char ISA; 11585 unsigned VecRegSize; 11586 }; 11587 ISADataTy ISAData[] = { 11588 { 11589 'b', 128 11590 }, // SSE 11591 { 11592 'c', 256 11593 }, // AVX 11594 { 11595 'd', 256 11596 }, // AVX2 11597 { 11598 'e', 512 11599 }, // AVX512 11600 }; 11601 llvm::SmallVector<char, 2> Masked; 11602 switch (State) { 11603 case OMPDeclareSimdDeclAttr::BS_Undefined: 11604 Masked.push_back('N'); 11605 Masked.push_back('M'); 11606 break; 11607 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11608 Masked.push_back('N'); 11609 break; 11610 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11611 Masked.push_back('M'); 11612 break; 11613 } 11614 for (char Mask : Masked) { 11615 for (const ISADataTy &Data : ISAData) { 11616 SmallString<256> Buffer; 11617 llvm::raw_svector_ostream Out(Buffer); 11618 Out << "_ZGV" << Data.ISA << Mask; 11619 if (!VLENVal) { 11620 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11621 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11622 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11623 } else { 11624 Out << VLENVal; 11625 } 11626 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11627 switch (ParamAttr.Kind){ 11628 case LinearWithVarStride: 11629 Out << 's' << ParamAttr.StrideOrArg; 11630 break; 11631 case Linear: 11632 Out << 'l'; 11633 if (ParamAttr.StrideOrArg != 1) 11634 Out << ParamAttr.StrideOrArg; 11635 break; 11636 case Uniform: 11637 Out << 'u'; 11638 break; 11639 case Vector: 11640 Out << 'v'; 11641 break; 11642 } 11643 if (!!ParamAttr.Alignment) 11644 Out << 'a' << ParamAttr.Alignment; 11645 } 11646 Out << '_' << Fn->getName(); 11647 Fn->addFnAttr(Out.str()); 11648 } 11649 } 11650 } 11651 11652 // This are the Functions that are needed to mangle the name of the 11653 // vector functions generated by the compiler, according to the rules 11654 // defined in the "Vector Function ABI specifications for AArch64", 11655 // available at 11656 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11657 11658 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11659 /// 11660 /// TODO: Need to implement the behavior for reference marked with a 11661 /// var or no linear modifiers (1.b in the section). For this, we 11662 /// need to extend ParamKindTy to support the linear modifiers. 11663 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11664 QT = QT.getCanonicalType(); 11665 11666 if (QT->isVoidType()) 11667 return false; 11668 11669 if (Kind == ParamKindTy::Uniform) 11670 return false; 11671 11672 if (Kind == ParamKindTy::Linear) 11673 return false; 11674 11675 // TODO: Handle linear references with modifiers 11676 11677 if (Kind == ParamKindTy::LinearWithVarStride) 11678 return false; 11679 11680 return true; 11681 } 11682 11683 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11684 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11685 QT = QT.getCanonicalType(); 11686 unsigned Size = C.getTypeSize(QT); 11687 11688 // Only scalars and complex within 16 bytes wide set PVB to true. 11689 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11690 return false; 11691 11692 if (QT->isFloatingType()) 11693 return true; 11694 11695 if (QT->isIntegerType()) 11696 return true; 11697 11698 if (QT->isPointerType()) 11699 return true; 11700 11701 // TODO: Add support for complex types (section 3.1.2, item 2). 11702 11703 return false; 11704 } 11705 11706 /// Computes the lane size (LS) of a return type or of an input parameter, 11707 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11708 /// TODO: Add support for references, section 3.2.1, item 1. 11709 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11710 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11711 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11712 if (getAArch64PBV(PTy, C)) 11713 return C.getTypeSize(PTy); 11714 } 11715 if (getAArch64PBV(QT, C)) 11716 return C.getTypeSize(QT); 11717 11718 return C.getTypeSize(C.getUIntPtrType()); 11719 } 11720 11721 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11722 // signature of the scalar function, as defined in 3.2.2 of the 11723 // AAVFABI. 11724 static std::tuple<unsigned, unsigned, bool> 11725 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11726 QualType RetType = FD->getReturnType().getCanonicalType(); 11727 11728 ASTContext &C = FD->getASTContext(); 11729 11730 bool OutputBecomesInput = false; 11731 11732 llvm::SmallVector<unsigned, 8> Sizes; 11733 if (!RetType->isVoidType()) { 11734 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11735 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11736 OutputBecomesInput = true; 11737 } 11738 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11739 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11740 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11741 } 11742 11743 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11744 // The LS of a function parameter / return value can only be a power 11745 // of 2, starting from 8 bits, up to 128. 11746 assert(llvm::all_of(Sizes, 11747 [](unsigned Size) { 11748 return Size == 8 || Size == 16 || Size == 32 || 11749 Size == 64 || Size == 128; 11750 }) && 11751 "Invalid size"); 11752 11753 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11754 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11755 OutputBecomesInput); 11756 } 11757 11758 /// Mangle the parameter part of the vector function name according to 11759 /// their OpenMP classification. The mangling function is defined in 11760 /// section 3.5 of the AAVFABI. 11761 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11762 SmallString<256> Buffer; 11763 llvm::raw_svector_ostream Out(Buffer); 11764 for (const auto &ParamAttr : ParamAttrs) { 11765 switch (ParamAttr.Kind) { 11766 case LinearWithVarStride: 11767 Out << "ls" << ParamAttr.StrideOrArg; 11768 break; 11769 case Linear: 11770 Out << 'l'; 11771 // Don't print the step value if it is not present or if it is 11772 // equal to 1. 11773 if (ParamAttr.StrideOrArg != 1) 11774 Out << ParamAttr.StrideOrArg; 11775 break; 11776 case Uniform: 11777 Out << 'u'; 11778 break; 11779 case Vector: 11780 Out << 'v'; 11781 break; 11782 } 11783 11784 if (!!ParamAttr.Alignment) 11785 Out << 'a' << ParamAttr.Alignment; 11786 } 11787 11788 return std::string(Out.str()); 11789 } 11790 11791 // Function used to add the attribute. The parameter `VLEN` is 11792 // templated to allow the use of "x" when targeting scalable functions 11793 // for SVE. 11794 template <typename T> 11795 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11796 char ISA, StringRef ParSeq, 11797 StringRef MangledName, bool OutputBecomesInput, 11798 llvm::Function *Fn) { 11799 SmallString<256> Buffer; 11800 llvm::raw_svector_ostream Out(Buffer); 11801 Out << Prefix << ISA << LMask << VLEN; 11802 if (OutputBecomesInput) 11803 Out << "v"; 11804 Out << ParSeq << "_" << MangledName; 11805 Fn->addFnAttr(Out.str()); 11806 } 11807 11808 // Helper function to generate the Advanced SIMD names depending on 11809 // the value of the NDS when simdlen is not present. 11810 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11811 StringRef Prefix, char ISA, 11812 StringRef ParSeq, StringRef MangledName, 11813 bool OutputBecomesInput, 11814 llvm::Function *Fn) { 11815 switch (NDS) { 11816 case 8: 11817 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11818 OutputBecomesInput, Fn); 11819 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11820 OutputBecomesInput, Fn); 11821 break; 11822 case 16: 11823 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11824 OutputBecomesInput, Fn); 11825 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11826 OutputBecomesInput, Fn); 11827 break; 11828 case 32: 11829 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11830 OutputBecomesInput, Fn); 11831 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11832 OutputBecomesInput, Fn); 11833 break; 11834 case 64: 11835 case 128: 11836 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11837 OutputBecomesInput, Fn); 11838 break; 11839 default: 11840 llvm_unreachable("Scalar type is too wide."); 11841 } 11842 } 11843 11844 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11845 static void emitAArch64DeclareSimdFunction( 11846 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11847 ArrayRef<ParamAttrTy> ParamAttrs, 11848 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11849 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11850 11851 // Get basic data for building the vector signature. 11852 const auto Data = getNDSWDS(FD, ParamAttrs); 11853 const unsigned NDS = std::get<0>(Data); 11854 const unsigned WDS = std::get<1>(Data); 11855 const bool OutputBecomesInput = std::get<2>(Data); 11856 11857 // Check the values provided via `simdlen` by the user. 11858 // 1. A `simdlen(1)` doesn't produce vector signatures, 11859 if (UserVLEN == 1) { 11860 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11861 DiagnosticsEngine::Warning, 11862 "The clause simdlen(1) has no effect when targeting aarch64."); 11863 CGM.getDiags().Report(SLoc, DiagID); 11864 return; 11865 } 11866 11867 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11868 // Advanced SIMD output. 11869 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11870 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11871 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11872 "power of 2 when targeting Advanced SIMD."); 11873 CGM.getDiags().Report(SLoc, DiagID); 11874 return; 11875 } 11876 11877 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11878 // limits. 11879 if (ISA == 's' && UserVLEN != 0) { 11880 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11881 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11882 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11883 "lanes in the architectural constraints " 11884 "for SVE (min is 128-bit, max is " 11885 "2048-bit, by steps of 128-bit)"); 11886 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11887 return; 11888 } 11889 } 11890 11891 // Sort out parameter sequence. 11892 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11893 StringRef Prefix = "_ZGV"; 11894 // Generate simdlen from user input (if any). 11895 if (UserVLEN) { 11896 if (ISA == 's') { 11897 // SVE generates only a masked function. 11898 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11899 OutputBecomesInput, Fn); 11900 } else { 11901 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11902 // Advanced SIMD generates one or two functions, depending on 11903 // the `[not]inbranch` clause. 11904 switch (State) { 11905 case OMPDeclareSimdDeclAttr::BS_Undefined: 11906 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11907 OutputBecomesInput, Fn); 11908 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11909 OutputBecomesInput, Fn); 11910 break; 11911 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11912 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11913 OutputBecomesInput, Fn); 11914 break; 11915 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11916 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11917 OutputBecomesInput, Fn); 11918 break; 11919 } 11920 } 11921 } else { 11922 // If no user simdlen is provided, follow the AAVFABI rules for 11923 // generating the vector length. 11924 if (ISA == 's') { 11925 // SVE, section 3.4.1, item 1. 11926 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11927 OutputBecomesInput, Fn); 11928 } else { 11929 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11930 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11931 // two vector names depending on the use of the clause 11932 // `[not]inbranch`. 11933 switch (State) { 11934 case OMPDeclareSimdDeclAttr::BS_Undefined: 11935 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11936 OutputBecomesInput, Fn); 11937 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11938 OutputBecomesInput, Fn); 11939 break; 11940 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11941 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11942 OutputBecomesInput, Fn); 11943 break; 11944 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11945 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11946 OutputBecomesInput, Fn); 11947 break; 11948 } 11949 } 11950 } 11951 } 11952 11953 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11954 llvm::Function *Fn) { 11955 ASTContext &C = CGM.getContext(); 11956 FD = FD->getMostRecentDecl(); 11957 // Map params to their positions in function decl. 11958 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11959 if (isa<CXXMethodDecl>(FD)) 11960 ParamPositions.try_emplace(FD, 0); 11961 unsigned ParamPos = ParamPositions.size(); 11962 for (const ParmVarDecl *P : FD->parameters()) { 11963 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11964 ++ParamPos; 11965 } 11966 while (FD) { 11967 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11968 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11969 // Mark uniform parameters. 11970 for (const Expr *E : Attr->uniforms()) { 11971 E = E->IgnoreParenImpCasts(); 11972 unsigned Pos; 11973 if (isa<CXXThisExpr>(E)) { 11974 Pos = ParamPositions[FD]; 11975 } else { 11976 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11977 ->getCanonicalDecl(); 11978 Pos = ParamPositions[PVD]; 11979 } 11980 ParamAttrs[Pos].Kind = Uniform; 11981 } 11982 // Get alignment info. 11983 auto NI = Attr->alignments_begin(); 11984 for (const Expr *E : Attr->aligneds()) { 11985 E = E->IgnoreParenImpCasts(); 11986 unsigned Pos; 11987 QualType ParmTy; 11988 if (isa<CXXThisExpr>(E)) { 11989 Pos = ParamPositions[FD]; 11990 ParmTy = E->getType(); 11991 } else { 11992 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11993 ->getCanonicalDecl(); 11994 Pos = ParamPositions[PVD]; 11995 ParmTy = PVD->getType(); 11996 } 11997 ParamAttrs[Pos].Alignment = 11998 (*NI) 11999 ? (*NI)->EvaluateKnownConstInt(C) 12000 : llvm::APSInt::getUnsigned( 12001 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 12002 .getQuantity()); 12003 ++NI; 12004 } 12005 // Mark linear parameters. 12006 auto SI = Attr->steps_begin(); 12007 auto MI = Attr->modifiers_begin(); 12008 for (const Expr *E : Attr->linears()) { 12009 E = E->IgnoreParenImpCasts(); 12010 unsigned Pos; 12011 // Rescaling factor needed to compute the linear parameter 12012 // value in the mangled name. 12013 unsigned PtrRescalingFactor = 1; 12014 if (isa<CXXThisExpr>(E)) { 12015 Pos = ParamPositions[FD]; 12016 } else { 12017 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 12018 ->getCanonicalDecl(); 12019 Pos = ParamPositions[PVD]; 12020 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 12021 PtrRescalingFactor = CGM.getContext() 12022 .getTypeSizeInChars(P->getPointeeType()) 12023 .getQuantity(); 12024 } 12025 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 12026 ParamAttr.Kind = Linear; 12027 // Assuming a stride of 1, for `linear` without modifiers. 12028 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 12029 if (*SI) { 12030 Expr::EvalResult Result; 12031 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 12032 if (const auto *DRE = 12033 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 12034 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 12035 ParamAttr.Kind = LinearWithVarStride; 12036 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 12037 ParamPositions[StridePVD->getCanonicalDecl()]); 12038 } 12039 } 12040 } else { 12041 ParamAttr.StrideOrArg = Result.Val.getInt(); 12042 } 12043 } 12044 // If we are using a linear clause on a pointer, we need to 12045 // rescale the value of linear_step with the byte size of the 12046 // pointee type. 12047 if (Linear == ParamAttr.Kind) 12048 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 12049 ++SI; 12050 ++MI; 12051 } 12052 llvm::APSInt VLENVal; 12053 SourceLocation ExprLoc; 12054 const Expr *VLENExpr = Attr->getSimdlen(); 12055 if (VLENExpr) { 12056 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 12057 ExprLoc = VLENExpr->getExprLoc(); 12058 } 12059 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 12060 if (CGM.getTriple().isX86()) { 12061 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 12062 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 12063 unsigned VLEN = VLENVal.getExtValue(); 12064 StringRef MangledName = Fn->getName(); 12065 if (CGM.getTarget().hasFeature("sve")) 12066 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12067 MangledName, 's', 128, Fn, ExprLoc); 12068 if (CGM.getTarget().hasFeature("neon")) 12069 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12070 MangledName, 'n', 128, Fn, ExprLoc); 12071 } 12072 } 12073 FD = FD->getPreviousDecl(); 12074 } 12075 } 12076 12077 namespace { 12078 /// Cleanup action for doacross support. 12079 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 12080 public: 12081 static const int DoacrossFinArgs = 2; 12082 12083 private: 12084 llvm::FunctionCallee RTLFn; 12085 llvm::Value *Args[DoacrossFinArgs]; 12086 12087 public: 12088 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 12089 ArrayRef<llvm::Value *> CallArgs) 12090 : RTLFn(RTLFn) { 12091 assert(CallArgs.size() == DoacrossFinArgs); 12092 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 12093 } 12094 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12095 if (!CGF.HaveInsertPoint()) 12096 return; 12097 CGF.EmitRuntimeCall(RTLFn, Args); 12098 } 12099 }; 12100 } // namespace 12101 12102 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12103 const OMPLoopDirective &D, 12104 ArrayRef<Expr *> NumIterations) { 12105 if (!CGF.HaveInsertPoint()) 12106 return; 12107 12108 ASTContext &C = CGM.getContext(); 12109 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 12110 RecordDecl *RD; 12111 if (KmpDimTy.isNull()) { 12112 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 12113 // kmp_int64 lo; // lower 12114 // kmp_int64 up; // upper 12115 // kmp_int64 st; // stride 12116 // }; 12117 RD = C.buildImplicitRecord("kmp_dim"); 12118 RD->startDefinition(); 12119 addFieldToRecordDecl(C, RD, Int64Ty); 12120 addFieldToRecordDecl(C, RD, Int64Ty); 12121 addFieldToRecordDecl(C, RD, Int64Ty); 12122 RD->completeDefinition(); 12123 KmpDimTy = C.getRecordType(RD); 12124 } else { 12125 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 12126 } 12127 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 12128 QualType ArrayTy = 12129 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 12130 12131 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 12132 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 12133 enum { LowerFD = 0, UpperFD, StrideFD }; 12134 // Fill dims with data. 12135 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 12136 LValue DimsLVal = CGF.MakeAddrLValue( 12137 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 12138 // dims.upper = num_iterations; 12139 LValue UpperLVal = CGF.EmitLValueForField( 12140 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 12141 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 12142 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 12143 Int64Ty, NumIterations[I]->getExprLoc()); 12144 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 12145 // dims.stride = 1; 12146 LValue StrideLVal = CGF.EmitLValueForField( 12147 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 12148 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 12149 StrideLVal); 12150 } 12151 12152 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 12153 // kmp_int32 num_dims, struct kmp_dim * dims); 12154 llvm::Value *Args[] = { 12155 emitUpdateLocation(CGF, D.getBeginLoc()), 12156 getThreadID(CGF, D.getBeginLoc()), 12157 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 12158 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12159 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 12160 CGM.VoidPtrTy)}; 12161 12162 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12163 CGM.getModule(), OMPRTL___kmpc_doacross_init); 12164 CGF.EmitRuntimeCall(RTLFn, Args); 12165 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 12166 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 12167 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12168 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 12169 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 12170 llvm::makeArrayRef(FiniArgs)); 12171 } 12172 12173 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12174 const OMPDependClause *C) { 12175 QualType Int64Ty = 12176 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 12177 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 12178 QualType ArrayTy = CGM.getContext().getConstantArrayType( 12179 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 12180 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 12181 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 12182 const Expr *CounterVal = C->getLoopData(I); 12183 assert(CounterVal); 12184 llvm::Value *CntVal = CGF.EmitScalarConversion( 12185 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 12186 CounterVal->getExprLoc()); 12187 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 12188 /*Volatile=*/false, Int64Ty); 12189 } 12190 llvm::Value *Args[] = { 12191 emitUpdateLocation(CGF, C->getBeginLoc()), 12192 getThreadID(CGF, C->getBeginLoc()), 12193 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 12194 llvm::FunctionCallee RTLFn; 12195 if (C->getDependencyKind() == OMPC_DEPEND_source) { 12196 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12197 OMPRTL___kmpc_doacross_post); 12198 } else { 12199 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 12200 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12201 OMPRTL___kmpc_doacross_wait); 12202 } 12203 CGF.EmitRuntimeCall(RTLFn, Args); 12204 } 12205 12206 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 12207 llvm::FunctionCallee Callee, 12208 ArrayRef<llvm::Value *> Args) const { 12209 assert(Loc.isValid() && "Outlined function call location must be valid."); 12210 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 12211 12212 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 12213 if (Fn->doesNotThrow()) { 12214 CGF.EmitNounwindRuntimeCall(Fn, Args); 12215 return; 12216 } 12217 } 12218 CGF.EmitRuntimeCall(Callee, Args); 12219 } 12220 12221 void CGOpenMPRuntime::emitOutlinedFunctionCall( 12222 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 12223 ArrayRef<llvm::Value *> Args) const { 12224 emitCall(CGF, Loc, OutlinedFn, Args); 12225 } 12226 12227 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 12228 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 12229 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 12230 HasEmittedDeclareTargetRegion = true; 12231 } 12232 12233 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 12234 const VarDecl *NativeParam, 12235 const VarDecl *TargetParam) const { 12236 return CGF.GetAddrOfLocalVar(NativeParam); 12237 } 12238 12239 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 12240 const VarDecl *VD) { 12241 if (!VD) 12242 return Address::invalid(); 12243 Address UntiedAddr = Address::invalid(); 12244 Address UntiedRealAddr = Address::invalid(); 12245 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12246 if (It != FunctionToUntiedTaskStackMap.end()) { 12247 const UntiedLocalVarsAddressesMap &UntiedData = 12248 UntiedLocalVarsStack[It->second]; 12249 auto I = UntiedData.find(VD); 12250 if (I != UntiedData.end()) { 12251 UntiedAddr = I->second.first; 12252 UntiedRealAddr = I->second.second; 12253 } 12254 } 12255 const VarDecl *CVD = VD->getCanonicalDecl(); 12256 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 12257 // Use the default allocation. 12258 if (!isAllocatableDecl(VD)) 12259 return UntiedAddr; 12260 llvm::Value *Size; 12261 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 12262 if (CVD->getType()->isVariablyModifiedType()) { 12263 Size = CGF.getTypeSize(CVD->getType()); 12264 // Align the size: ((size + align - 1) / align) * align 12265 Size = CGF.Builder.CreateNUWAdd( 12266 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 12267 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 12268 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 12269 } else { 12270 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 12271 Size = CGM.getSize(Sz.alignTo(Align)); 12272 } 12273 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 12274 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 12275 assert(AA->getAllocator() && 12276 "Expected allocator expression for non-default allocator."); 12277 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 12278 // According to the standard, the original allocator type is a enum 12279 // (integer). Convert to pointer type, if required. 12280 Allocator = CGF.EmitScalarConversion( 12281 Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy, 12282 AA->getAllocator()->getExprLoc()); 12283 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 12284 12285 llvm::Value *Addr = 12286 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 12287 CGM.getModule(), OMPRTL___kmpc_alloc), 12288 Args, getName({CVD->getName(), ".void.addr"})); 12289 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12290 CGM.getModule(), OMPRTL___kmpc_free); 12291 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 12292 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12293 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 12294 if (UntiedAddr.isValid()) 12295 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 12296 12297 // Cleanup action for allocate support. 12298 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 12299 llvm::FunctionCallee RTLFn; 12300 SourceLocation::UIntTy LocEncoding; 12301 Address Addr; 12302 const Expr *Allocator; 12303 12304 public: 12305 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 12306 SourceLocation::UIntTy LocEncoding, Address Addr, 12307 const Expr *Allocator) 12308 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 12309 Allocator(Allocator) {} 12310 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12311 if (!CGF.HaveInsertPoint()) 12312 return; 12313 llvm::Value *Args[3]; 12314 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 12315 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 12316 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12317 Addr.getPointer(), CGF.VoidPtrTy); 12318 llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator); 12319 // According to the standard, the original allocator type is a enum 12320 // (integer). Convert to pointer type, if required. 12321 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 12322 CGF.getContext().VoidPtrTy, 12323 Allocator->getExprLoc()); 12324 Args[2] = AllocVal; 12325 12326 CGF.EmitRuntimeCall(RTLFn, Args); 12327 } 12328 }; 12329 Address VDAddr = 12330 UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align); 12331 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 12332 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 12333 VDAddr, AA->getAllocator()); 12334 if (UntiedRealAddr.isValid()) 12335 if (auto *Region = 12336 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 12337 Region->emitUntiedSwitch(CGF); 12338 return VDAddr; 12339 } 12340 return UntiedAddr; 12341 } 12342 12343 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 12344 const VarDecl *VD) const { 12345 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12346 if (It == FunctionToUntiedTaskStackMap.end()) 12347 return false; 12348 return UntiedLocalVarsStack[It->second].count(VD) > 0; 12349 } 12350 12351 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 12352 CodeGenModule &CGM, const OMPLoopDirective &S) 12353 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 12354 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12355 if (!NeedToPush) 12356 return; 12357 NontemporalDeclsSet &DS = 12358 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 12359 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 12360 for (const Stmt *Ref : C->private_refs()) { 12361 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 12362 const ValueDecl *VD; 12363 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 12364 VD = DRE->getDecl(); 12365 } else { 12366 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 12367 assert((ME->isImplicitCXXThis() || 12368 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 12369 "Expected member of current class."); 12370 VD = ME->getMemberDecl(); 12371 } 12372 DS.insert(VD); 12373 } 12374 } 12375 } 12376 12377 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 12378 if (!NeedToPush) 12379 return; 12380 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 12381 } 12382 12383 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 12384 CodeGenFunction &CGF, 12385 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 12386 std::pair<Address, Address>> &LocalVars) 12387 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 12388 if (!NeedToPush) 12389 return; 12390 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 12391 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 12392 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 12393 } 12394 12395 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 12396 if (!NeedToPush) 12397 return; 12398 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 12399 } 12400 12401 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 12402 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12403 12404 return llvm::any_of( 12405 CGM.getOpenMPRuntime().NontemporalDeclsStack, 12406 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); }); 12407 } 12408 12409 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 12410 const OMPExecutableDirective &S, 12411 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 12412 const { 12413 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 12414 // Vars in target/task regions must be excluded completely. 12415 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 12416 isOpenMPTaskingDirective(S.getDirectiveKind())) { 12417 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12418 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 12419 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 12420 for (const CapturedStmt::Capture &Cap : CS->captures()) { 12421 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 12422 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 12423 } 12424 } 12425 // Exclude vars in private clauses. 12426 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 12427 for (const Expr *Ref : C->varlists()) { 12428 if (!Ref->getType()->isScalarType()) 12429 continue; 12430 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12431 if (!DRE) 12432 continue; 12433 NeedToCheckForLPCs.insert(DRE->getDecl()); 12434 } 12435 } 12436 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 12437 for (const Expr *Ref : C->varlists()) { 12438 if (!Ref->getType()->isScalarType()) 12439 continue; 12440 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12441 if (!DRE) 12442 continue; 12443 NeedToCheckForLPCs.insert(DRE->getDecl()); 12444 } 12445 } 12446 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12447 for (const Expr *Ref : C->varlists()) { 12448 if (!Ref->getType()->isScalarType()) 12449 continue; 12450 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12451 if (!DRE) 12452 continue; 12453 NeedToCheckForLPCs.insert(DRE->getDecl()); 12454 } 12455 } 12456 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 12457 for (const Expr *Ref : C->varlists()) { 12458 if (!Ref->getType()->isScalarType()) 12459 continue; 12460 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12461 if (!DRE) 12462 continue; 12463 NeedToCheckForLPCs.insert(DRE->getDecl()); 12464 } 12465 } 12466 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 12467 for (const Expr *Ref : C->varlists()) { 12468 if (!Ref->getType()->isScalarType()) 12469 continue; 12470 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12471 if (!DRE) 12472 continue; 12473 NeedToCheckForLPCs.insert(DRE->getDecl()); 12474 } 12475 } 12476 for (const Decl *VD : NeedToCheckForLPCs) { 12477 for (const LastprivateConditionalData &Data : 12478 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12479 if (Data.DeclToUniqueName.count(VD) > 0) { 12480 if (!Data.Disabled) 12481 NeedToAddForLPCsAsDisabled.insert(VD); 12482 break; 12483 } 12484 } 12485 } 12486 } 12487 12488 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12489 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12490 : CGM(CGF.CGM), 12491 Action((CGM.getLangOpts().OpenMP >= 50 && 12492 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12493 [](const OMPLastprivateClause *C) { 12494 return C->getKind() == 12495 OMPC_LASTPRIVATE_conditional; 12496 })) 12497 ? ActionToDo::PushAsLastprivateConditional 12498 : ActionToDo::DoNotPush) { 12499 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12500 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12501 return; 12502 assert(Action == ActionToDo::PushAsLastprivateConditional && 12503 "Expected a push action."); 12504 LastprivateConditionalData &Data = 12505 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12506 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12507 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12508 continue; 12509 12510 for (const Expr *Ref : C->varlists()) { 12511 Data.DeclToUniqueName.insert(std::make_pair( 12512 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12513 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12514 } 12515 } 12516 Data.IVLVal = IVLVal; 12517 Data.Fn = CGF.CurFn; 12518 } 12519 12520 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12521 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12522 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12523 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12524 if (CGM.getLangOpts().OpenMP < 50) 12525 return; 12526 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12527 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12528 if (!NeedToAddForLPCsAsDisabled.empty()) { 12529 Action = ActionToDo::DisableLastprivateConditional; 12530 LastprivateConditionalData &Data = 12531 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12532 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12533 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12534 Data.Fn = CGF.CurFn; 12535 Data.Disabled = true; 12536 } 12537 } 12538 12539 CGOpenMPRuntime::LastprivateConditionalRAII 12540 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12541 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12542 return LastprivateConditionalRAII(CGF, S); 12543 } 12544 12545 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12546 if (CGM.getLangOpts().OpenMP < 50) 12547 return; 12548 if (Action == ActionToDo::DisableLastprivateConditional) { 12549 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12550 "Expected list of disabled private vars."); 12551 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12552 } 12553 if (Action == ActionToDo::PushAsLastprivateConditional) { 12554 assert( 12555 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12556 "Expected list of lastprivate conditional vars."); 12557 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12558 } 12559 } 12560 12561 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12562 const VarDecl *VD) { 12563 ASTContext &C = CGM.getContext(); 12564 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12565 if (I == LastprivateConditionalToTypes.end()) 12566 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12567 QualType NewType; 12568 const FieldDecl *VDField; 12569 const FieldDecl *FiredField; 12570 LValue BaseLVal; 12571 auto VI = I->getSecond().find(VD); 12572 if (VI == I->getSecond().end()) { 12573 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12574 RD->startDefinition(); 12575 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12576 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12577 RD->completeDefinition(); 12578 NewType = C.getRecordType(RD); 12579 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12580 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12581 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12582 } else { 12583 NewType = std::get<0>(VI->getSecond()); 12584 VDField = std::get<1>(VI->getSecond()); 12585 FiredField = std::get<2>(VI->getSecond()); 12586 BaseLVal = std::get<3>(VI->getSecond()); 12587 } 12588 LValue FiredLVal = 12589 CGF.EmitLValueForField(BaseLVal, FiredField); 12590 CGF.EmitStoreOfScalar( 12591 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12592 FiredLVal); 12593 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12594 } 12595 12596 namespace { 12597 /// Checks if the lastprivate conditional variable is referenced in LHS. 12598 class LastprivateConditionalRefChecker final 12599 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12600 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12601 const Expr *FoundE = nullptr; 12602 const Decl *FoundD = nullptr; 12603 StringRef UniqueDeclName; 12604 LValue IVLVal; 12605 llvm::Function *FoundFn = nullptr; 12606 SourceLocation Loc; 12607 12608 public: 12609 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12610 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12611 llvm::reverse(LPM)) { 12612 auto It = D.DeclToUniqueName.find(E->getDecl()); 12613 if (It == D.DeclToUniqueName.end()) 12614 continue; 12615 if (D.Disabled) 12616 return false; 12617 FoundE = E; 12618 FoundD = E->getDecl()->getCanonicalDecl(); 12619 UniqueDeclName = It->second; 12620 IVLVal = D.IVLVal; 12621 FoundFn = D.Fn; 12622 break; 12623 } 12624 return FoundE == E; 12625 } 12626 bool VisitMemberExpr(const MemberExpr *E) { 12627 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12628 return false; 12629 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12630 llvm::reverse(LPM)) { 12631 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12632 if (It == D.DeclToUniqueName.end()) 12633 continue; 12634 if (D.Disabled) 12635 return false; 12636 FoundE = E; 12637 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12638 UniqueDeclName = It->second; 12639 IVLVal = D.IVLVal; 12640 FoundFn = D.Fn; 12641 break; 12642 } 12643 return FoundE == E; 12644 } 12645 bool VisitStmt(const Stmt *S) { 12646 for (const Stmt *Child : S->children()) { 12647 if (!Child) 12648 continue; 12649 if (const auto *E = dyn_cast<Expr>(Child)) 12650 if (!E->isGLValue()) 12651 continue; 12652 if (Visit(Child)) 12653 return true; 12654 } 12655 return false; 12656 } 12657 explicit LastprivateConditionalRefChecker( 12658 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12659 : LPM(LPM) {} 12660 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12661 getFoundData() const { 12662 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12663 } 12664 }; 12665 } // namespace 12666 12667 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12668 LValue IVLVal, 12669 StringRef UniqueDeclName, 12670 LValue LVal, 12671 SourceLocation Loc) { 12672 // Last updated loop counter for the lastprivate conditional var. 12673 // int<xx> last_iv = 0; 12674 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12675 llvm::Constant *LastIV = 12676 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12677 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12678 IVLVal.getAlignment().getAsAlign()); 12679 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12680 12681 // Last value of the lastprivate conditional. 12682 // decltype(priv_a) last_a; 12683 llvm::Constant *Last = getOrCreateInternalVariable( 12684 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12685 cast<llvm::GlobalVariable>(Last)->setAlignment( 12686 LVal.getAlignment().getAsAlign()); 12687 LValue LastLVal = 12688 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 12689 12690 // Global loop counter. Required to handle inner parallel-for regions. 12691 // iv 12692 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12693 12694 // #pragma omp critical(a) 12695 // if (last_iv <= iv) { 12696 // last_iv = iv; 12697 // last_a = priv_a; 12698 // } 12699 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12700 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12701 Action.Enter(CGF); 12702 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12703 // (last_iv <= iv) ? Check if the variable is updated and store new 12704 // value in global var. 12705 llvm::Value *CmpRes; 12706 if (IVLVal.getType()->isSignedIntegerType()) { 12707 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12708 } else { 12709 assert(IVLVal.getType()->isUnsignedIntegerType() && 12710 "Loop iteration variable must be integer."); 12711 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12712 } 12713 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12714 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12715 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12716 // { 12717 CGF.EmitBlock(ThenBB); 12718 12719 // last_iv = iv; 12720 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12721 12722 // last_a = priv_a; 12723 switch (CGF.getEvaluationKind(LVal.getType())) { 12724 case TEK_Scalar: { 12725 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12726 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12727 break; 12728 } 12729 case TEK_Complex: { 12730 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12731 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12732 break; 12733 } 12734 case TEK_Aggregate: 12735 llvm_unreachable( 12736 "Aggregates are not supported in lastprivate conditional."); 12737 } 12738 // } 12739 CGF.EmitBranch(ExitBB); 12740 // There is no need to emit line number for unconditional branch. 12741 (void)ApplyDebugLocation::CreateEmpty(CGF); 12742 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12743 }; 12744 12745 if (CGM.getLangOpts().OpenMPSimd) { 12746 // Do not emit as a critical region as no parallel region could be emitted. 12747 RegionCodeGenTy ThenRCG(CodeGen); 12748 ThenRCG(CGF); 12749 } else { 12750 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12751 } 12752 } 12753 12754 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12755 const Expr *LHS) { 12756 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12757 return; 12758 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12759 if (!Checker.Visit(LHS)) 12760 return; 12761 const Expr *FoundE; 12762 const Decl *FoundD; 12763 StringRef UniqueDeclName; 12764 LValue IVLVal; 12765 llvm::Function *FoundFn; 12766 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12767 Checker.getFoundData(); 12768 if (FoundFn != CGF.CurFn) { 12769 // Special codegen for inner parallel regions. 12770 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12771 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12772 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12773 "Lastprivate conditional is not found in outer region."); 12774 QualType StructTy = std::get<0>(It->getSecond()); 12775 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12776 LValue PrivLVal = CGF.EmitLValue(FoundE); 12777 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12778 PrivLVal.getAddress(CGF), 12779 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 12780 LValue BaseLVal = 12781 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12782 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12783 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12784 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12785 FiredLVal, llvm::AtomicOrdering::Unordered, 12786 /*IsVolatile=*/true, /*isInit=*/false); 12787 return; 12788 } 12789 12790 // Private address of the lastprivate conditional in the current context. 12791 // priv_a 12792 LValue LVal = CGF.EmitLValue(FoundE); 12793 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12794 FoundE->getExprLoc()); 12795 } 12796 12797 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12798 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12799 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12800 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12801 return; 12802 auto Range = llvm::reverse(LastprivateConditionalStack); 12803 auto It = llvm::find_if( 12804 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12805 if (It == Range.end() || It->Fn != CGF.CurFn) 12806 return; 12807 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12808 assert(LPCI != LastprivateConditionalToTypes.end() && 12809 "Lastprivates must be registered already."); 12810 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12811 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12812 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12813 for (const auto &Pair : It->DeclToUniqueName) { 12814 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12815 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 12816 continue; 12817 auto I = LPCI->getSecond().find(Pair.first); 12818 assert(I != LPCI->getSecond().end() && 12819 "Lastprivate must be rehistered already."); 12820 // bool Cmp = priv_a.Fired != 0; 12821 LValue BaseLVal = std::get<3>(I->getSecond()); 12822 LValue FiredLVal = 12823 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12824 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12825 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12826 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12827 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12828 // if (Cmp) { 12829 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12830 CGF.EmitBlock(ThenBB); 12831 Address Addr = CGF.GetAddrOfLocalVar(VD); 12832 LValue LVal; 12833 if (VD->getType()->isReferenceType()) 12834 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12835 AlignmentSource::Decl); 12836 else 12837 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12838 AlignmentSource::Decl); 12839 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12840 D.getBeginLoc()); 12841 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12842 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12843 // } 12844 } 12845 } 12846 12847 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12848 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12849 SourceLocation Loc) { 12850 if (CGF.getLangOpts().OpenMP < 50) 12851 return; 12852 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12853 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12854 "Unknown lastprivate conditional variable."); 12855 StringRef UniqueName = It->second; 12856 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12857 // The variable was not updated in the region - exit. 12858 if (!GV) 12859 return; 12860 LValue LPLVal = CGF.MakeAddrLValue( 12861 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 12862 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12863 CGF.EmitStoreOfScalar(Res, PrivLVal); 12864 } 12865 12866 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12867 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12868 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12869 llvm_unreachable("Not supported in SIMD-only mode"); 12870 } 12871 12872 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12873 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12874 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12875 llvm_unreachable("Not supported in SIMD-only mode"); 12876 } 12877 12878 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12879 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12880 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12881 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12882 bool Tied, unsigned &NumberOfParts) { 12883 llvm_unreachable("Not supported in SIMD-only mode"); 12884 } 12885 12886 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12887 SourceLocation Loc, 12888 llvm::Function *OutlinedFn, 12889 ArrayRef<llvm::Value *> CapturedVars, 12890 const Expr *IfCond) { 12891 llvm_unreachable("Not supported in SIMD-only mode"); 12892 } 12893 12894 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12895 CodeGenFunction &CGF, StringRef CriticalName, 12896 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12897 const Expr *Hint) { 12898 llvm_unreachable("Not supported in SIMD-only mode"); 12899 } 12900 12901 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12902 const RegionCodeGenTy &MasterOpGen, 12903 SourceLocation Loc) { 12904 llvm_unreachable("Not supported in SIMD-only mode"); 12905 } 12906 12907 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, 12908 const RegionCodeGenTy &MasterOpGen, 12909 SourceLocation Loc, 12910 const Expr *Filter) { 12911 llvm_unreachable("Not supported in SIMD-only mode"); 12912 } 12913 12914 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12915 SourceLocation Loc) { 12916 llvm_unreachable("Not supported in SIMD-only mode"); 12917 } 12918 12919 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12920 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12921 SourceLocation Loc) { 12922 llvm_unreachable("Not supported in SIMD-only mode"); 12923 } 12924 12925 void CGOpenMPSIMDRuntime::emitSingleRegion( 12926 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12927 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12928 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12929 ArrayRef<const Expr *> AssignmentOps) { 12930 llvm_unreachable("Not supported in SIMD-only mode"); 12931 } 12932 12933 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12934 const RegionCodeGenTy &OrderedOpGen, 12935 SourceLocation Loc, 12936 bool IsThreads) { 12937 llvm_unreachable("Not supported in SIMD-only mode"); 12938 } 12939 12940 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12941 SourceLocation Loc, 12942 OpenMPDirectiveKind Kind, 12943 bool EmitChecks, 12944 bool ForceSimpleCall) { 12945 llvm_unreachable("Not supported in SIMD-only mode"); 12946 } 12947 12948 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12949 CodeGenFunction &CGF, SourceLocation Loc, 12950 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12951 bool Ordered, const DispatchRTInput &DispatchValues) { 12952 llvm_unreachable("Not supported in SIMD-only mode"); 12953 } 12954 12955 void CGOpenMPSIMDRuntime::emitForStaticInit( 12956 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12957 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12958 llvm_unreachable("Not supported in SIMD-only mode"); 12959 } 12960 12961 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12962 CodeGenFunction &CGF, SourceLocation Loc, 12963 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12964 llvm_unreachable("Not supported in SIMD-only mode"); 12965 } 12966 12967 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12968 SourceLocation Loc, 12969 unsigned IVSize, 12970 bool IVSigned) { 12971 llvm_unreachable("Not supported in SIMD-only mode"); 12972 } 12973 12974 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12975 SourceLocation Loc, 12976 OpenMPDirectiveKind DKind) { 12977 llvm_unreachable("Not supported in SIMD-only mode"); 12978 } 12979 12980 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12981 SourceLocation Loc, 12982 unsigned IVSize, bool IVSigned, 12983 Address IL, Address LB, 12984 Address UB, Address ST) { 12985 llvm_unreachable("Not supported in SIMD-only mode"); 12986 } 12987 12988 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12989 llvm::Value *NumThreads, 12990 SourceLocation Loc) { 12991 llvm_unreachable("Not supported in SIMD-only mode"); 12992 } 12993 12994 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12995 ProcBindKind ProcBind, 12996 SourceLocation Loc) { 12997 llvm_unreachable("Not supported in SIMD-only mode"); 12998 } 12999 13000 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 13001 const VarDecl *VD, 13002 Address VDAddr, 13003 SourceLocation Loc) { 13004 llvm_unreachable("Not supported in SIMD-only mode"); 13005 } 13006 13007 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 13008 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 13009 CodeGenFunction *CGF) { 13010 llvm_unreachable("Not supported in SIMD-only mode"); 13011 } 13012 13013 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 13014 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 13015 llvm_unreachable("Not supported in SIMD-only mode"); 13016 } 13017 13018 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 13019 ArrayRef<const Expr *> Vars, 13020 SourceLocation Loc, 13021 llvm::AtomicOrdering AO) { 13022 llvm_unreachable("Not supported in SIMD-only mode"); 13023 } 13024 13025 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 13026 const OMPExecutableDirective &D, 13027 llvm::Function *TaskFunction, 13028 QualType SharedsTy, Address Shareds, 13029 const Expr *IfCond, 13030 const OMPTaskDataTy &Data) { 13031 llvm_unreachable("Not supported in SIMD-only mode"); 13032 } 13033 13034 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 13035 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 13036 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 13037 const Expr *IfCond, const OMPTaskDataTy &Data) { 13038 llvm_unreachable("Not supported in SIMD-only mode"); 13039 } 13040 13041 void CGOpenMPSIMDRuntime::emitReduction( 13042 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 13043 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 13044 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 13045 assert(Options.SimpleReduction && "Only simple reduction is expected."); 13046 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 13047 ReductionOps, Options); 13048 } 13049 13050 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 13051 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 13052 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 13053 llvm_unreachable("Not supported in SIMD-only mode"); 13054 } 13055 13056 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 13057 SourceLocation Loc, 13058 bool IsWorksharingReduction) { 13059 llvm_unreachable("Not supported in SIMD-only mode"); 13060 } 13061 13062 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 13063 SourceLocation Loc, 13064 ReductionCodeGen &RCG, 13065 unsigned N) { 13066 llvm_unreachable("Not supported in SIMD-only mode"); 13067 } 13068 13069 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 13070 SourceLocation Loc, 13071 llvm::Value *ReductionsPtr, 13072 LValue SharedLVal) { 13073 llvm_unreachable("Not supported in SIMD-only mode"); 13074 } 13075 13076 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 13077 SourceLocation Loc, 13078 const OMPTaskDataTy &Data) { 13079 llvm_unreachable("Not supported in SIMD-only mode"); 13080 } 13081 13082 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 13083 CodeGenFunction &CGF, SourceLocation Loc, 13084 OpenMPDirectiveKind CancelRegion) { 13085 llvm_unreachable("Not supported in SIMD-only mode"); 13086 } 13087 13088 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 13089 SourceLocation Loc, const Expr *IfCond, 13090 OpenMPDirectiveKind CancelRegion) { 13091 llvm_unreachable("Not supported in SIMD-only mode"); 13092 } 13093 13094 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 13095 const OMPExecutableDirective &D, StringRef ParentName, 13096 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 13097 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 13098 llvm_unreachable("Not supported in SIMD-only mode"); 13099 } 13100 13101 void CGOpenMPSIMDRuntime::emitTargetCall( 13102 CodeGenFunction &CGF, const OMPExecutableDirective &D, 13103 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 13104 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 13105 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 13106 const OMPLoopDirective &D)> 13107 SizeEmitter) { 13108 llvm_unreachable("Not supported in SIMD-only mode"); 13109 } 13110 13111 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 13112 llvm_unreachable("Not supported in SIMD-only mode"); 13113 } 13114 13115 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 13116 llvm_unreachable("Not supported in SIMD-only mode"); 13117 } 13118 13119 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 13120 return false; 13121 } 13122 13123 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 13124 const OMPExecutableDirective &D, 13125 SourceLocation Loc, 13126 llvm::Function *OutlinedFn, 13127 ArrayRef<llvm::Value *> CapturedVars) { 13128 llvm_unreachable("Not supported in SIMD-only mode"); 13129 } 13130 13131 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 13132 const Expr *NumTeams, 13133 const Expr *ThreadLimit, 13134 SourceLocation Loc) { 13135 llvm_unreachable("Not supported in SIMD-only mode"); 13136 } 13137 13138 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 13139 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13140 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 13141 llvm_unreachable("Not supported in SIMD-only mode"); 13142 } 13143 13144 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 13145 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13146 const Expr *Device) { 13147 llvm_unreachable("Not supported in SIMD-only mode"); 13148 } 13149 13150 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 13151 const OMPLoopDirective &D, 13152 ArrayRef<Expr *> NumIterations) { 13153 llvm_unreachable("Not supported in SIMD-only mode"); 13154 } 13155 13156 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 13157 const OMPDependClause *C) { 13158 llvm_unreachable("Not supported in SIMD-only mode"); 13159 } 13160 13161 const VarDecl * 13162 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 13163 const VarDecl *NativeParam) const { 13164 llvm_unreachable("Not supported in SIMD-only mode"); 13165 } 13166 13167 Address 13168 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 13169 const VarDecl *NativeParam, 13170 const VarDecl *TargetParam) const { 13171 llvm_unreachable("Not supported in SIMD-only mode"); 13172 } 13173