1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Attr.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/OpenMPClause.h" 21 #include "clang/AST/StmtOpenMP.h" 22 #include "clang/AST/StmtVisitor.h" 23 #include "clang/Basic/BitmaskEnum.h" 24 #include "clang/Basic/FileManager.h" 25 #include "clang/Basic/OpenMPKinds.h" 26 #include "clang/Basic/SourceManager.h" 27 #include "clang/CodeGen/ConstantInitBuilder.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/SetOperations.h" 30 #include "llvm/ADT/StringExtras.h" 31 #include "llvm/Bitcode/BitcodeReader.h" 32 #include "llvm/IR/Constants.h" 33 #include "llvm/IR/DerivedTypes.h" 34 #include "llvm/IR/GlobalValue.h" 35 #include "llvm/IR/Value.h" 36 #include "llvm/Support/AtomicOrdering.h" 37 #include "llvm/Support/Format.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include <cassert> 40 #include <numeric> 41 42 using namespace clang; 43 using namespace CodeGen; 44 using namespace llvm::omp; 45 46 namespace { 47 /// Base class for handling code generation inside OpenMP regions. 48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 49 public: 50 /// Kinds of OpenMP regions used in codegen. 51 enum CGOpenMPRegionKind { 52 /// Region with outlined function for standalone 'parallel' 53 /// directive. 54 ParallelOutlinedRegion, 55 /// Region with outlined function for standalone 'task' directive. 56 TaskOutlinedRegion, 57 /// Region for constructs that do not require function outlining, 58 /// like 'for', 'sections', 'atomic' etc. directives. 59 InlinedRegion, 60 /// Region with outlined function for standalone 'target' directive. 61 TargetRegion, 62 }; 63 64 CGOpenMPRegionInfo(const CapturedStmt &CS, 65 const CGOpenMPRegionKind RegionKind, 66 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 67 bool HasCancel) 68 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 69 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 70 71 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 73 bool HasCancel) 74 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 75 Kind(Kind), HasCancel(HasCancel) {} 76 77 /// Get a variable or parameter for storing global thread id 78 /// inside OpenMP construct. 79 virtual const VarDecl *getThreadIDVariable() const = 0; 80 81 /// Emit the captured statement body. 82 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 83 84 /// Get an LValue for the current ThreadID variable. 85 /// \return LValue for thread id variable. This LValue always has type int32*. 86 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 87 88 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 89 90 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 91 92 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 93 94 bool hasCancel() const { return HasCancel; } 95 96 static bool classof(const CGCapturedStmtInfo *Info) { 97 return Info->getKind() == CR_OpenMP; 98 } 99 100 ~CGOpenMPRegionInfo() override = default; 101 102 protected: 103 CGOpenMPRegionKind RegionKind; 104 RegionCodeGenTy CodeGen; 105 OpenMPDirectiveKind Kind; 106 bool HasCancel; 107 }; 108 109 /// API for captured statement code generation in OpenMP constructs. 110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 111 public: 112 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 113 const RegionCodeGenTy &CodeGen, 114 OpenMPDirectiveKind Kind, bool HasCancel, 115 StringRef HelperName) 116 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 117 HasCancel), 118 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 119 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 120 } 121 122 /// Get a variable or parameter for storing global thread id 123 /// inside OpenMP construct. 124 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 125 126 /// Get the name of the capture helper. 127 StringRef getHelperName() const override { return HelperName; } 128 129 static bool classof(const CGCapturedStmtInfo *Info) { 130 return CGOpenMPRegionInfo::classof(Info) && 131 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 132 ParallelOutlinedRegion; 133 } 134 135 private: 136 /// A variable or parameter storing global thread id for OpenMP 137 /// constructs. 138 const VarDecl *ThreadIDVar; 139 StringRef HelperName; 140 }; 141 142 /// API for captured statement code generation in OpenMP constructs. 143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 144 public: 145 class UntiedTaskActionTy final : public PrePostActionTy { 146 bool Untied; 147 const VarDecl *PartIDVar; 148 const RegionCodeGenTy UntiedCodeGen; 149 llvm::SwitchInst *UntiedSwitch = nullptr; 150 151 public: 152 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 153 const RegionCodeGenTy &UntiedCodeGen) 154 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 155 void Enter(CodeGenFunction &CGF) override { 156 if (Untied) { 157 // Emit task switching point. 158 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 159 CGF.GetAddrOfLocalVar(PartIDVar), 160 PartIDVar->getType()->castAs<PointerType>()); 161 llvm::Value *Res = 162 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 163 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 164 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 165 CGF.EmitBlock(DoneBB); 166 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 167 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 168 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 169 CGF.Builder.GetInsertBlock()); 170 emitUntiedSwitch(CGF); 171 } 172 } 173 void emitUntiedSwitch(CodeGenFunction &CGF) const { 174 if (Untied) { 175 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 176 CGF.GetAddrOfLocalVar(PartIDVar), 177 PartIDVar->getType()->castAs<PointerType>()); 178 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 179 PartIdLVal); 180 UntiedCodeGen(CGF); 181 CodeGenFunction::JumpDest CurPoint = 182 CGF.getJumpDestInCurrentScope(".untied.next."); 183 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 184 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 185 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 186 CGF.Builder.GetInsertBlock()); 187 CGF.EmitBranchThroughCleanup(CurPoint); 188 CGF.EmitBlock(CurPoint.getBlock()); 189 } 190 } 191 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 192 }; 193 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 194 const VarDecl *ThreadIDVar, 195 const RegionCodeGenTy &CodeGen, 196 OpenMPDirectiveKind Kind, bool HasCancel, 197 const UntiedTaskActionTy &Action) 198 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 199 ThreadIDVar(ThreadIDVar), Action(Action) { 200 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 201 } 202 203 /// Get a variable or parameter for storing global thread id 204 /// inside OpenMP construct. 205 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 206 207 /// Get an LValue for the current ThreadID variable. 208 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 209 210 /// Get the name of the capture helper. 211 StringRef getHelperName() const override { return ".omp_outlined."; } 212 213 void emitUntiedSwitch(CodeGenFunction &CGF) override { 214 Action.emitUntiedSwitch(CGF); 215 } 216 217 static bool classof(const CGCapturedStmtInfo *Info) { 218 return CGOpenMPRegionInfo::classof(Info) && 219 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 220 TaskOutlinedRegion; 221 } 222 223 private: 224 /// A variable or parameter storing global thread id for OpenMP 225 /// constructs. 226 const VarDecl *ThreadIDVar; 227 /// Action for emitting code for untied tasks. 228 const UntiedTaskActionTy &Action; 229 }; 230 231 /// API for inlined captured statement code generation in OpenMP 232 /// constructs. 233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 234 public: 235 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 236 const RegionCodeGenTy &CodeGen, 237 OpenMPDirectiveKind Kind, bool HasCancel) 238 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 239 OldCSI(OldCSI), 240 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 241 242 // Retrieve the value of the context parameter. 243 llvm::Value *getContextValue() const override { 244 if (OuterRegionInfo) 245 return OuterRegionInfo->getContextValue(); 246 llvm_unreachable("No context value for inlined OpenMP region"); 247 } 248 249 void setContextValue(llvm::Value *V) override { 250 if (OuterRegionInfo) { 251 OuterRegionInfo->setContextValue(V); 252 return; 253 } 254 llvm_unreachable("No context value for inlined OpenMP region"); 255 } 256 257 /// Lookup the captured field decl for a variable. 258 const FieldDecl *lookup(const VarDecl *VD) const override { 259 if (OuterRegionInfo) 260 return OuterRegionInfo->lookup(VD); 261 // If there is no outer outlined region,no need to lookup in a list of 262 // captured variables, we can use the original one. 263 return nullptr; 264 } 265 266 FieldDecl *getThisFieldDecl() const override { 267 if (OuterRegionInfo) 268 return OuterRegionInfo->getThisFieldDecl(); 269 return nullptr; 270 } 271 272 /// Get a variable or parameter for storing global thread id 273 /// inside OpenMP construct. 274 const VarDecl *getThreadIDVariable() const override { 275 if (OuterRegionInfo) 276 return OuterRegionInfo->getThreadIDVariable(); 277 return nullptr; 278 } 279 280 /// Get an LValue for the current ThreadID variable. 281 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 282 if (OuterRegionInfo) 283 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 284 llvm_unreachable("No LValue for inlined OpenMP construct"); 285 } 286 287 /// Get the name of the capture helper. 288 StringRef getHelperName() const override { 289 if (auto *OuterRegionInfo = getOldCSI()) 290 return OuterRegionInfo->getHelperName(); 291 llvm_unreachable("No helper name for inlined OpenMP construct"); 292 } 293 294 void emitUntiedSwitch(CodeGenFunction &CGF) override { 295 if (OuterRegionInfo) 296 OuterRegionInfo->emitUntiedSwitch(CGF); 297 } 298 299 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 300 301 static bool classof(const CGCapturedStmtInfo *Info) { 302 return CGOpenMPRegionInfo::classof(Info) && 303 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 304 } 305 306 ~CGOpenMPInlinedRegionInfo() override = default; 307 308 private: 309 /// CodeGen info about outer OpenMP region. 310 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 311 CGOpenMPRegionInfo *OuterRegionInfo; 312 }; 313 314 /// API for captured statement code generation in OpenMP target 315 /// constructs. For this captures, implicit parameters are used instead of the 316 /// captured fields. The name of the target region has to be unique in a given 317 /// application so it is provided by the client, because only the client has 318 /// the information to generate that. 319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 320 public: 321 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 322 const RegionCodeGenTy &CodeGen, StringRef HelperName) 323 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 324 /*HasCancel=*/false), 325 HelperName(HelperName) {} 326 327 /// This is unused for target regions because each starts executing 328 /// with a single thread. 329 const VarDecl *getThreadIDVariable() const override { return nullptr; } 330 331 /// Get the name of the capture helper. 332 StringRef getHelperName() const override { return HelperName; } 333 334 static bool classof(const CGCapturedStmtInfo *Info) { 335 return CGOpenMPRegionInfo::classof(Info) && 336 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 337 } 338 339 private: 340 StringRef HelperName; 341 }; 342 343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 344 llvm_unreachable("No codegen for expressions"); 345 } 346 /// API for generation of expressions captured in a innermost OpenMP 347 /// region. 348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 349 public: 350 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 351 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 352 OMPD_unknown, 353 /*HasCancel=*/false), 354 PrivScope(CGF) { 355 // Make sure the globals captured in the provided statement are local by 356 // using the privatization logic. We assume the same variable is not 357 // captured more than once. 358 for (const auto &C : CS.captures()) { 359 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 360 continue; 361 362 const VarDecl *VD = C.getCapturedVar(); 363 if (VD->isLocalVarDeclOrParm()) 364 continue; 365 366 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 367 /*RefersToEnclosingVariableOrCapture=*/false, 368 VD->getType().getNonReferenceType(), VK_LValue, 369 C.getLocation()); 370 PrivScope.addPrivate( 371 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 372 } 373 (void)PrivScope.Privatize(); 374 } 375 376 /// Lookup the captured field decl for a variable. 377 const FieldDecl *lookup(const VarDecl *VD) const override { 378 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 379 return FD; 380 return nullptr; 381 } 382 383 /// Emit the captured statement body. 384 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 385 llvm_unreachable("No body for expressions"); 386 } 387 388 /// Get a variable or parameter for storing global thread id 389 /// inside OpenMP construct. 390 const VarDecl *getThreadIDVariable() const override { 391 llvm_unreachable("No thread id for expressions"); 392 } 393 394 /// Get the name of the capture helper. 395 StringRef getHelperName() const override { 396 llvm_unreachable("No helper name for expressions"); 397 } 398 399 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 400 401 private: 402 /// Private scope to capture global variables. 403 CodeGenFunction::OMPPrivateScope PrivScope; 404 }; 405 406 /// RAII for emitting code of OpenMP constructs. 407 class InlinedOpenMPRegionRAII { 408 CodeGenFunction &CGF; 409 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 410 FieldDecl *LambdaThisCaptureField = nullptr; 411 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 412 bool NoInheritance = false; 413 414 public: 415 /// Constructs region for combined constructs. 416 /// \param CodeGen Code generation sequence for combined directives. Includes 417 /// a list of functions used for code generation of implicitly inlined 418 /// regions. 419 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 420 OpenMPDirectiveKind Kind, bool HasCancel, 421 bool NoInheritance = true) 422 : CGF(CGF), NoInheritance(NoInheritance) { 423 // Start emission for the construct. 424 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 425 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 426 if (NoInheritance) { 427 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 428 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 429 CGF.LambdaThisCaptureField = nullptr; 430 BlockInfo = CGF.BlockInfo; 431 CGF.BlockInfo = nullptr; 432 } 433 } 434 435 ~InlinedOpenMPRegionRAII() { 436 // Restore original CapturedStmtInfo only if we're done with code emission. 437 auto *OldCSI = 438 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 439 delete CGF.CapturedStmtInfo; 440 CGF.CapturedStmtInfo = OldCSI; 441 if (NoInheritance) { 442 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 443 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 444 CGF.BlockInfo = BlockInfo; 445 } 446 } 447 }; 448 449 /// Values for bit flags used in the ident_t to describe the fields. 450 /// All enumeric elements are named and described in accordance with the code 451 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 452 enum OpenMPLocationFlags : unsigned { 453 /// Use trampoline for internal microtask. 454 OMP_IDENT_IMD = 0x01, 455 /// Use c-style ident structure. 456 OMP_IDENT_KMPC = 0x02, 457 /// Atomic reduction option for kmpc_reduce. 458 OMP_ATOMIC_REDUCE = 0x10, 459 /// Explicit 'barrier' directive. 460 OMP_IDENT_BARRIER_EXPL = 0x20, 461 /// Implicit barrier in code. 462 OMP_IDENT_BARRIER_IMPL = 0x40, 463 /// Implicit barrier in 'for' directive. 464 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 465 /// Implicit barrier in 'sections' directive. 466 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 467 /// Implicit barrier in 'single' directive. 468 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 469 /// Call of __kmp_for_static_init for static loop. 470 OMP_IDENT_WORK_LOOP = 0x200, 471 /// Call of __kmp_for_static_init for sections. 472 OMP_IDENT_WORK_SECTIONS = 0x400, 473 /// Call of __kmp_for_static_init for distribute. 474 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 475 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 476 }; 477 478 namespace { 479 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 480 /// Values for bit flags for marking which requires clauses have been used. 481 enum OpenMPOffloadingRequiresDirFlags : int64_t { 482 /// flag undefined. 483 OMP_REQ_UNDEFINED = 0x000, 484 /// no requires clause present. 485 OMP_REQ_NONE = 0x001, 486 /// reverse_offload clause. 487 OMP_REQ_REVERSE_OFFLOAD = 0x002, 488 /// unified_address clause. 489 OMP_REQ_UNIFIED_ADDRESS = 0x004, 490 /// unified_shared_memory clause. 491 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 492 /// dynamic_allocators clause. 493 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 494 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 495 }; 496 497 enum OpenMPOffloadingReservedDeviceIDs { 498 /// Device ID if the device was not defined, runtime should get it 499 /// from environment variables in the spec. 500 OMP_DEVICEID_UNDEF = -1, 501 }; 502 } // anonymous namespace 503 504 /// Describes ident structure that describes a source location. 505 /// All descriptions are taken from 506 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 507 /// Original structure: 508 /// typedef struct ident { 509 /// kmp_int32 reserved_1; /**< might be used in Fortran; 510 /// see above */ 511 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 512 /// KMP_IDENT_KMPC identifies this union 513 /// member */ 514 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 515 /// see above */ 516 ///#if USE_ITT_BUILD 517 /// /* but currently used for storing 518 /// region-specific ITT */ 519 /// /* contextual information. */ 520 ///#endif /* USE_ITT_BUILD */ 521 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 522 /// C++ */ 523 /// char const *psource; /**< String describing the source location. 524 /// The string is composed of semi-colon separated 525 // fields which describe the source file, 526 /// the function and a pair of line numbers that 527 /// delimit the construct. 528 /// */ 529 /// } ident_t; 530 enum IdentFieldIndex { 531 /// might be used in Fortran 532 IdentField_Reserved_1, 533 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 534 IdentField_Flags, 535 /// Not really used in Fortran any more 536 IdentField_Reserved_2, 537 /// Source[4] in Fortran, do not use for C++ 538 IdentField_Reserved_3, 539 /// String describing the source location. The string is composed of 540 /// semi-colon separated fields which describe the source file, the function 541 /// and a pair of line numbers that delimit the construct. 542 IdentField_PSource 543 }; 544 545 /// Schedule types for 'omp for' loops (these enumerators are taken from 546 /// the enum sched_type in kmp.h). 547 enum OpenMPSchedType { 548 /// Lower bound for default (unordered) versions. 549 OMP_sch_lower = 32, 550 OMP_sch_static_chunked = 33, 551 OMP_sch_static = 34, 552 OMP_sch_dynamic_chunked = 35, 553 OMP_sch_guided_chunked = 36, 554 OMP_sch_runtime = 37, 555 OMP_sch_auto = 38, 556 /// static with chunk adjustment (e.g., simd) 557 OMP_sch_static_balanced_chunked = 45, 558 /// Lower bound for 'ordered' versions. 559 OMP_ord_lower = 64, 560 OMP_ord_static_chunked = 65, 561 OMP_ord_static = 66, 562 OMP_ord_dynamic_chunked = 67, 563 OMP_ord_guided_chunked = 68, 564 OMP_ord_runtime = 69, 565 OMP_ord_auto = 70, 566 OMP_sch_default = OMP_sch_static, 567 /// dist_schedule types 568 OMP_dist_sch_static_chunked = 91, 569 OMP_dist_sch_static = 92, 570 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 571 /// Set if the monotonic schedule modifier was present. 572 OMP_sch_modifier_monotonic = (1 << 29), 573 /// Set if the nonmonotonic schedule modifier was present. 574 OMP_sch_modifier_nonmonotonic = (1 << 30), 575 }; 576 577 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 578 /// region. 579 class CleanupTy final : public EHScopeStack::Cleanup { 580 PrePostActionTy *Action; 581 582 public: 583 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 584 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 585 if (!CGF.HaveInsertPoint()) 586 return; 587 Action->Exit(CGF); 588 } 589 }; 590 591 } // anonymous namespace 592 593 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 594 CodeGenFunction::RunCleanupsScope Scope(CGF); 595 if (PrePostAction) { 596 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 597 Callback(CodeGen, CGF, *PrePostAction); 598 } else { 599 PrePostActionTy Action; 600 Callback(CodeGen, CGF, Action); 601 } 602 } 603 604 /// Check if the combiner is a call to UDR combiner and if it is so return the 605 /// UDR decl used for reduction. 606 static const OMPDeclareReductionDecl * 607 getReductionInit(const Expr *ReductionOp) { 608 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 609 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 610 if (const auto *DRE = 611 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 612 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 613 return DRD; 614 return nullptr; 615 } 616 617 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 618 const OMPDeclareReductionDecl *DRD, 619 const Expr *InitOp, 620 Address Private, Address Original, 621 QualType Ty) { 622 if (DRD->getInitializer()) { 623 std::pair<llvm::Function *, llvm::Function *> Reduction = 624 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 625 const auto *CE = cast<CallExpr>(InitOp); 626 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 627 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 628 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 629 const auto *LHSDRE = 630 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 631 const auto *RHSDRE = 632 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 633 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 634 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 635 [=]() { return Private; }); 636 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 637 [=]() { return Original; }); 638 (void)PrivateScope.Privatize(); 639 RValue Func = RValue::get(Reduction.second); 640 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 641 CGF.EmitIgnoredExpr(InitOp); 642 } else { 643 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 644 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 645 auto *GV = new llvm::GlobalVariable( 646 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 647 llvm::GlobalValue::PrivateLinkage, Init, Name); 648 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 649 RValue InitRVal; 650 switch (CGF.getEvaluationKind(Ty)) { 651 case TEK_Scalar: 652 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 653 break; 654 case TEK_Complex: 655 InitRVal = 656 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 657 break; 658 case TEK_Aggregate: 659 InitRVal = RValue::getAggregate(LV.getAddress(CGF)); 660 break; 661 } 662 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 663 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 664 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 665 /*IsInitializer=*/false); 666 } 667 } 668 669 /// Emit initialization of arrays of complex types. 670 /// \param DestAddr Address of the array. 671 /// \param Type Type of array. 672 /// \param Init Initial expression of array. 673 /// \param SrcAddr Address of the original array. 674 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 675 QualType Type, bool EmitDeclareReductionInit, 676 const Expr *Init, 677 const OMPDeclareReductionDecl *DRD, 678 Address SrcAddr = Address::invalid()) { 679 // Perform element-by-element initialization. 680 QualType ElementTy; 681 682 // Drill down to the base element type on both arrays. 683 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 684 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 685 DestAddr = 686 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 687 if (DRD) 688 SrcAddr = 689 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 690 691 llvm::Value *SrcBegin = nullptr; 692 if (DRD) 693 SrcBegin = SrcAddr.getPointer(); 694 llvm::Value *DestBegin = DestAddr.getPointer(); 695 // Cast from pointer to array type to pointer to single element. 696 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 697 // The basic structure here is a while-do loop. 698 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 699 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 700 llvm::Value *IsEmpty = 701 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 702 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 703 704 // Enter the loop body, making that address the current address. 705 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 706 CGF.EmitBlock(BodyBB); 707 708 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 709 710 llvm::PHINode *SrcElementPHI = nullptr; 711 Address SrcElementCurrent = Address::invalid(); 712 if (DRD) { 713 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 714 "omp.arraycpy.srcElementPast"); 715 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 716 SrcElementCurrent = 717 Address(SrcElementPHI, 718 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 719 } 720 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 721 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 722 DestElementPHI->addIncoming(DestBegin, EntryBB); 723 Address DestElementCurrent = 724 Address(DestElementPHI, 725 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 726 727 // Emit copy. 728 { 729 CodeGenFunction::RunCleanupsScope InitScope(CGF); 730 if (EmitDeclareReductionInit) { 731 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 732 SrcElementCurrent, ElementTy); 733 } else 734 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 735 /*IsInitializer=*/false); 736 } 737 738 if (DRD) { 739 // Shift the address forward by one element. 740 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 741 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 742 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 743 } 744 745 // Shift the address forward by one element. 746 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 747 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 748 // Check whether we've reached the end. 749 llvm::Value *Done = 750 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 751 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 752 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 753 754 // Done. 755 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 756 } 757 758 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 759 return CGF.EmitOMPSharedLValue(E); 760 } 761 762 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 763 const Expr *E) { 764 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 765 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 766 return LValue(); 767 } 768 769 void ReductionCodeGen::emitAggregateInitialization( 770 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 771 const OMPDeclareReductionDecl *DRD) { 772 // Emit VarDecl with copy init for arrays. 773 // Get the address of the original variable captured in current 774 // captured region. 775 const auto *PrivateVD = 776 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 777 bool EmitDeclareReductionInit = 778 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 779 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 780 EmitDeclareReductionInit, 781 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 782 : PrivateVD->getInit(), 783 DRD, SharedLVal.getAddress(CGF)); 784 } 785 786 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 787 ArrayRef<const Expr *> Origs, 788 ArrayRef<const Expr *> Privates, 789 ArrayRef<const Expr *> ReductionOps) { 790 ClausesData.reserve(Shareds.size()); 791 SharedAddresses.reserve(Shareds.size()); 792 Sizes.reserve(Shareds.size()); 793 BaseDecls.reserve(Shareds.size()); 794 const auto *IOrig = Origs.begin(); 795 const auto *IPriv = Privates.begin(); 796 const auto *IRed = ReductionOps.begin(); 797 for (const Expr *Ref : Shareds) { 798 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 799 std::advance(IOrig, 1); 800 std::advance(IPriv, 1); 801 std::advance(IRed, 1); 802 } 803 } 804 805 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 806 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 807 "Number of generated lvalues must be exactly N."); 808 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 809 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 810 SharedAddresses.emplace_back(First, Second); 811 if (ClausesData[N].Shared == ClausesData[N].Ref) { 812 OrigAddresses.emplace_back(First, Second); 813 } else { 814 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 815 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 816 OrigAddresses.emplace_back(First, Second); 817 } 818 } 819 820 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 821 const auto *PrivateVD = 822 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 823 QualType PrivateType = PrivateVD->getType(); 824 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 825 if (!PrivateType->isVariablyModifiedType()) { 826 Sizes.emplace_back( 827 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 828 nullptr); 829 return; 830 } 831 llvm::Value *Size; 832 llvm::Value *SizeInChars; 833 auto *ElemType = 834 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 835 ->getElementType(); 836 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 837 if (AsArraySection) { 838 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 839 OrigAddresses[N].first.getPointer(CGF)); 840 Size = CGF.Builder.CreateNUWAdd( 841 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 842 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 843 } else { 844 SizeInChars = 845 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 846 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 847 } 848 Sizes.emplace_back(SizeInChars, Size); 849 CodeGenFunction::OpaqueValueMapping OpaqueMap( 850 CGF, 851 cast<OpaqueValueExpr>( 852 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 853 RValue::get(Size)); 854 CGF.EmitVariablyModifiedType(PrivateType); 855 } 856 857 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 858 llvm::Value *Size) { 859 const auto *PrivateVD = 860 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 861 QualType PrivateType = PrivateVD->getType(); 862 if (!PrivateType->isVariablyModifiedType()) { 863 assert(!Size && !Sizes[N].second && 864 "Size should be nullptr for non-variably modified reduction " 865 "items."); 866 return; 867 } 868 CodeGenFunction::OpaqueValueMapping OpaqueMap( 869 CGF, 870 cast<OpaqueValueExpr>( 871 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 872 RValue::get(Size)); 873 CGF.EmitVariablyModifiedType(PrivateType); 874 } 875 876 void ReductionCodeGen::emitInitialization( 877 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 878 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 879 assert(SharedAddresses.size() > N && "No variable was generated"); 880 const auto *PrivateVD = 881 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 882 const OMPDeclareReductionDecl *DRD = 883 getReductionInit(ClausesData[N].ReductionOp); 884 QualType PrivateType = PrivateVD->getType(); 885 PrivateAddr = CGF.Builder.CreateElementBitCast( 886 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 887 QualType SharedType = SharedAddresses[N].first.getType(); 888 SharedLVal = CGF.MakeAddrLValue( 889 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 890 CGF.ConvertTypeForMem(SharedType)), 891 SharedType, SharedAddresses[N].first.getBaseInfo(), 892 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 893 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 894 if (DRD && DRD->getInitializer()) 895 (void)DefaultInit(CGF); 896 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 897 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 898 (void)DefaultInit(CGF); 899 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 900 PrivateAddr, SharedLVal.getAddress(CGF), 901 SharedLVal.getType()); 902 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 903 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 904 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 905 PrivateVD->getType().getQualifiers(), 906 /*IsInitializer=*/false); 907 } 908 } 909 910 bool ReductionCodeGen::needCleanups(unsigned N) { 911 const auto *PrivateVD = 912 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 913 QualType PrivateType = PrivateVD->getType(); 914 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 915 return DTorKind != QualType::DK_none; 916 } 917 918 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 919 Address PrivateAddr) { 920 const auto *PrivateVD = 921 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 922 QualType PrivateType = PrivateVD->getType(); 923 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 924 if (needCleanups(N)) { 925 PrivateAddr = CGF.Builder.CreateElementBitCast( 926 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 927 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 928 } 929 } 930 931 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 932 LValue BaseLV) { 933 BaseTy = BaseTy.getNonReferenceType(); 934 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 935 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 936 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 937 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 938 } else { 939 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 940 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 941 } 942 BaseTy = BaseTy->getPointeeType(); 943 } 944 return CGF.MakeAddrLValue( 945 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 946 CGF.ConvertTypeForMem(ElTy)), 947 BaseLV.getType(), BaseLV.getBaseInfo(), 948 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 949 } 950 951 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 952 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 953 llvm::Value *Addr) { 954 Address Tmp = Address::invalid(); 955 Address TopTmp = Address::invalid(); 956 Address MostTopTmp = Address::invalid(); 957 BaseTy = BaseTy.getNonReferenceType(); 958 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 959 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 960 Tmp = CGF.CreateMemTemp(BaseTy); 961 if (TopTmp.isValid()) 962 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 963 else 964 MostTopTmp = Tmp; 965 TopTmp = Tmp; 966 BaseTy = BaseTy->getPointeeType(); 967 } 968 llvm::Type *Ty = BaseLVType; 969 if (Tmp.isValid()) 970 Ty = Tmp.getElementType(); 971 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 972 if (Tmp.isValid()) { 973 CGF.Builder.CreateStore(Addr, Tmp); 974 return MostTopTmp; 975 } 976 return Address(Addr, BaseLVAlignment); 977 } 978 979 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 980 const VarDecl *OrigVD = nullptr; 981 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 982 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 983 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 984 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 985 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 986 Base = TempASE->getBase()->IgnoreParenImpCasts(); 987 DE = cast<DeclRefExpr>(Base); 988 OrigVD = cast<VarDecl>(DE->getDecl()); 989 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 990 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 991 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 992 Base = TempASE->getBase()->IgnoreParenImpCasts(); 993 DE = cast<DeclRefExpr>(Base); 994 OrigVD = cast<VarDecl>(DE->getDecl()); 995 } 996 return OrigVD; 997 } 998 999 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1000 Address PrivateAddr) { 1001 const DeclRefExpr *DE; 1002 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1003 BaseDecls.emplace_back(OrigVD); 1004 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1005 LValue BaseLValue = 1006 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1007 OriginalBaseLValue); 1008 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1009 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); 1010 llvm::Value *PrivatePointer = 1011 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1012 PrivateAddr.getPointer(), 1013 SharedAddresses[N].first.getAddress(CGF).getType()); 1014 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1015 return castToBase(CGF, OrigVD->getType(), 1016 SharedAddresses[N].first.getType(), 1017 OriginalBaseLValue.getAddress(CGF).getType(), 1018 OriginalBaseLValue.getAlignment(), Ptr); 1019 } 1020 BaseDecls.emplace_back( 1021 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1022 return PrivateAddr; 1023 } 1024 1025 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1026 const OMPDeclareReductionDecl *DRD = 1027 getReductionInit(ClausesData[N].ReductionOp); 1028 return DRD && DRD->getInitializer(); 1029 } 1030 1031 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1032 return CGF.EmitLoadOfPointerLValue( 1033 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1034 getThreadIDVariable()->getType()->castAs<PointerType>()); 1035 } 1036 1037 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1038 if (!CGF.HaveInsertPoint()) 1039 return; 1040 // 1.2.2 OpenMP Language Terminology 1041 // Structured block - An executable statement with a single entry at the 1042 // top and a single exit at the bottom. 1043 // The point of exit cannot be a branch out of the structured block. 1044 // longjmp() and throw() must not violate the entry/exit criteria. 1045 CGF.EHStack.pushTerminate(); 1046 CodeGen(CGF); 1047 CGF.EHStack.popTerminate(); 1048 } 1049 1050 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1051 CodeGenFunction &CGF) { 1052 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1053 getThreadIDVariable()->getType(), 1054 AlignmentSource::Decl); 1055 } 1056 1057 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1058 QualType FieldTy) { 1059 auto *Field = FieldDecl::Create( 1060 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1061 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1062 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1063 Field->setAccess(AS_public); 1064 DC->addDecl(Field); 1065 return Field; 1066 } 1067 1068 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1069 StringRef Separator) 1070 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1071 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1072 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1073 1074 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1075 OMPBuilder.initialize(); 1076 loadOffloadInfoMetadata(); 1077 } 1078 1079 void CGOpenMPRuntime::clear() { 1080 InternalVars.clear(); 1081 // Clean non-target variable declarations possibly used only in debug info. 1082 for (const auto &Data : EmittedNonTargetVariables) { 1083 if (!Data.getValue().pointsToAliveValue()) 1084 continue; 1085 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1086 if (!GV) 1087 continue; 1088 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1089 continue; 1090 GV->eraseFromParent(); 1091 } 1092 } 1093 1094 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1095 SmallString<128> Buffer; 1096 llvm::raw_svector_ostream OS(Buffer); 1097 StringRef Sep = FirstSeparator; 1098 for (StringRef Part : Parts) { 1099 OS << Sep << Part; 1100 Sep = Separator; 1101 } 1102 return std::string(OS.str()); 1103 } 1104 1105 static llvm::Function * 1106 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1107 const Expr *CombinerInitializer, const VarDecl *In, 1108 const VarDecl *Out, bool IsCombiner) { 1109 // void .omp_combiner.(Ty *in, Ty *out); 1110 ASTContext &C = CGM.getContext(); 1111 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1112 FunctionArgList Args; 1113 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1114 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1115 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1116 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1117 Args.push_back(&OmpOutParm); 1118 Args.push_back(&OmpInParm); 1119 const CGFunctionInfo &FnInfo = 1120 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1121 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1122 std::string Name = CGM.getOpenMPRuntime().getName( 1123 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1124 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1125 Name, &CGM.getModule()); 1126 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1127 if (CGM.getLangOpts().Optimize) { 1128 Fn->removeFnAttr(llvm::Attribute::NoInline); 1129 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1130 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1131 } 1132 CodeGenFunction CGF(CGM); 1133 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1134 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1135 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1136 Out->getLocation()); 1137 CodeGenFunction::OMPPrivateScope Scope(CGF); 1138 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1139 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1140 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1141 .getAddress(CGF); 1142 }); 1143 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1144 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1145 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1146 .getAddress(CGF); 1147 }); 1148 (void)Scope.Privatize(); 1149 if (!IsCombiner && Out->hasInit() && 1150 !CGF.isTrivialInitializer(Out->getInit())) { 1151 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1152 Out->getType().getQualifiers(), 1153 /*IsInitializer=*/true); 1154 } 1155 if (CombinerInitializer) 1156 CGF.EmitIgnoredExpr(CombinerInitializer); 1157 Scope.ForceCleanup(); 1158 CGF.FinishFunction(); 1159 return Fn; 1160 } 1161 1162 void CGOpenMPRuntime::emitUserDefinedReduction( 1163 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1164 if (UDRMap.count(D) > 0) 1165 return; 1166 llvm::Function *Combiner = emitCombinerOrInitializer( 1167 CGM, D->getType(), D->getCombiner(), 1168 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1169 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1170 /*IsCombiner=*/true); 1171 llvm::Function *Initializer = nullptr; 1172 if (const Expr *Init = D->getInitializer()) { 1173 Initializer = emitCombinerOrInitializer( 1174 CGM, D->getType(), 1175 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1176 : nullptr, 1177 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1178 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1179 /*IsCombiner=*/false); 1180 } 1181 UDRMap.try_emplace(D, Combiner, Initializer); 1182 if (CGF) { 1183 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1184 Decls.second.push_back(D); 1185 } 1186 } 1187 1188 std::pair<llvm::Function *, llvm::Function *> 1189 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1190 auto I = UDRMap.find(D); 1191 if (I != UDRMap.end()) 1192 return I->second; 1193 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1194 return UDRMap.lookup(D); 1195 } 1196 1197 namespace { 1198 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1199 // Builder if one is present. 1200 struct PushAndPopStackRAII { 1201 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1202 bool HasCancel) 1203 : OMPBuilder(OMPBuilder) { 1204 if (!OMPBuilder) 1205 return; 1206 1207 // The following callback is the crucial part of clangs cleanup process. 1208 // 1209 // NOTE: 1210 // Once the OpenMPIRBuilder is used to create parallel regions (and 1211 // similar), the cancellation destination (Dest below) is determined via 1212 // IP. That means if we have variables to finalize we split the block at IP, 1213 // use the new block (=BB) as destination to build a JumpDest (via 1214 // getJumpDestInCurrentScope(BB)) which then is fed to 1215 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1216 // to push & pop an FinalizationInfo object. 1217 // The FiniCB will still be needed but at the point where the 1218 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1219 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1220 assert(IP.getBlock()->end() == IP.getPoint() && 1221 "Clang CG should cause non-terminated block!"); 1222 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1223 CGF.Builder.restoreIP(IP); 1224 CodeGenFunction::JumpDest Dest = 1225 CGF.getOMPCancelDestination(OMPD_parallel); 1226 CGF.EmitBranchThroughCleanup(Dest); 1227 }; 1228 1229 // TODO: Remove this once we emit parallel regions through the 1230 // OpenMPIRBuilder as it can do this setup internally. 1231 llvm::OpenMPIRBuilder::FinalizationInfo FI( 1232 {FiniCB, OMPD_parallel, HasCancel}); 1233 OMPBuilder->pushFinalizationCB(std::move(FI)); 1234 } 1235 ~PushAndPopStackRAII() { 1236 if (OMPBuilder) 1237 OMPBuilder->popFinalizationCB(); 1238 } 1239 llvm::OpenMPIRBuilder *OMPBuilder; 1240 }; 1241 } // namespace 1242 1243 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1244 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1245 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1246 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1247 assert(ThreadIDVar->getType()->isPointerType() && 1248 "thread id variable must be of type kmp_int32 *"); 1249 CodeGenFunction CGF(CGM, true); 1250 bool HasCancel = false; 1251 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1252 HasCancel = OPD->hasCancel(); 1253 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1254 HasCancel = OPD->hasCancel(); 1255 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1256 HasCancel = OPSD->hasCancel(); 1257 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1258 HasCancel = OPFD->hasCancel(); 1259 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1260 HasCancel = OPFD->hasCancel(); 1261 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1262 HasCancel = OPFD->hasCancel(); 1263 else if (const auto *OPFD = 1264 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1265 HasCancel = OPFD->hasCancel(); 1266 else if (const auto *OPFD = 1267 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1268 HasCancel = OPFD->hasCancel(); 1269 1270 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1271 // parallel region to make cancellation barriers work properly. 1272 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1273 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel); 1274 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1275 HasCancel, OutlinedHelperName); 1276 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1277 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1278 } 1279 1280 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1281 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1282 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1283 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1284 return emitParallelOrTeamsOutlinedFunction( 1285 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1286 } 1287 1288 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1289 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1290 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1291 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1292 return emitParallelOrTeamsOutlinedFunction( 1293 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1294 } 1295 1296 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1297 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1298 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1299 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1300 bool Tied, unsigned &NumberOfParts) { 1301 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1302 PrePostActionTy &) { 1303 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1304 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1305 llvm::Value *TaskArgs[] = { 1306 UpLoc, ThreadID, 1307 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1308 TaskTVar->getType()->castAs<PointerType>()) 1309 .getPointer(CGF)}; 1310 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1311 CGM.getModule(), OMPRTL___kmpc_omp_task), 1312 TaskArgs); 1313 }; 1314 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1315 UntiedCodeGen); 1316 CodeGen.setAction(Action); 1317 assert(!ThreadIDVar->getType()->isPointerType() && 1318 "thread id variable must be of type kmp_int32 for tasks"); 1319 const OpenMPDirectiveKind Region = 1320 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1321 : OMPD_task; 1322 const CapturedStmt *CS = D.getCapturedStmt(Region); 1323 bool HasCancel = false; 1324 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1325 HasCancel = TD->hasCancel(); 1326 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1327 HasCancel = TD->hasCancel(); 1328 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1329 HasCancel = TD->hasCancel(); 1330 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1331 HasCancel = TD->hasCancel(); 1332 1333 CodeGenFunction CGF(CGM, true); 1334 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1335 InnermostKind, HasCancel, Action); 1336 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1337 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1338 if (!Tied) 1339 NumberOfParts = Action.getNumberOfParts(); 1340 return Res; 1341 } 1342 1343 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1344 const RecordDecl *RD, const CGRecordLayout &RL, 1345 ArrayRef<llvm::Constant *> Data) { 1346 llvm::StructType *StructTy = RL.getLLVMType(); 1347 unsigned PrevIdx = 0; 1348 ConstantInitBuilder CIBuilder(CGM); 1349 auto DI = Data.begin(); 1350 for (const FieldDecl *FD : RD->fields()) { 1351 unsigned Idx = RL.getLLVMFieldNo(FD); 1352 // Fill the alignment. 1353 for (unsigned I = PrevIdx; I < Idx; ++I) 1354 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1355 PrevIdx = Idx + 1; 1356 Fields.add(*DI); 1357 ++DI; 1358 } 1359 } 1360 1361 template <class... As> 1362 static llvm::GlobalVariable * 1363 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1364 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1365 As &&... Args) { 1366 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1367 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1368 ConstantInitBuilder CIBuilder(CGM); 1369 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1370 buildStructValue(Fields, CGM, RD, RL, Data); 1371 return Fields.finishAndCreateGlobal( 1372 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1373 std::forward<As>(Args)...); 1374 } 1375 1376 template <typename T> 1377 static void 1378 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1379 ArrayRef<llvm::Constant *> Data, 1380 T &Parent) { 1381 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1382 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1383 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1384 buildStructValue(Fields, CGM, RD, RL, Data); 1385 Fields.finishAndAddTo(Parent); 1386 } 1387 1388 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1389 bool AtCurrentPoint) { 1390 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1391 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1392 1393 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1394 if (AtCurrentPoint) { 1395 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1396 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1397 } else { 1398 Elem.second.ServiceInsertPt = 1399 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1400 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1401 } 1402 } 1403 1404 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1405 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1406 if (Elem.second.ServiceInsertPt) { 1407 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1408 Elem.second.ServiceInsertPt = nullptr; 1409 Ptr->eraseFromParent(); 1410 } 1411 } 1412 1413 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1414 SourceLocation Loc, 1415 SmallString<128> &Buffer) { 1416 llvm::raw_svector_ostream OS(Buffer); 1417 // Build debug location 1418 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1419 OS << ";" << PLoc.getFilename() << ";"; 1420 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1421 OS << FD->getQualifiedNameAsString(); 1422 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1423 return OS.str(); 1424 } 1425 1426 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1427 SourceLocation Loc, 1428 unsigned Flags) { 1429 llvm::Constant *SrcLocStr; 1430 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1431 Loc.isInvalid()) { 1432 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 1433 } else { 1434 std::string FunctionName = ""; 1435 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1436 FunctionName = FD->getQualifiedNameAsString(); 1437 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1438 const char *FileName = PLoc.getFilename(); 1439 unsigned Line = PLoc.getLine(); 1440 unsigned Column = PLoc.getColumn(); 1441 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName, 1442 Line, Column); 1443 } 1444 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1445 return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags), 1446 Reserved2Flags); 1447 } 1448 1449 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1450 SourceLocation Loc) { 1451 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1452 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1453 // the clang invariants used below might be broken. 1454 if (CGM.getLangOpts().OpenMPIRBuilder) { 1455 SmallString<128> Buffer; 1456 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1457 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1458 getIdentStringFromSourceLocation(CGF, Loc, Buffer)); 1459 return OMPBuilder.getOrCreateThreadID( 1460 OMPBuilder.getOrCreateIdent(SrcLocStr)); 1461 } 1462 1463 llvm::Value *ThreadID = nullptr; 1464 // Check whether we've already cached a load of the thread id in this 1465 // function. 1466 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1467 if (I != OpenMPLocThreadIDMap.end()) { 1468 ThreadID = I->second.ThreadID; 1469 if (ThreadID != nullptr) 1470 return ThreadID; 1471 } 1472 // If exceptions are enabled, do not use parameter to avoid possible crash. 1473 if (auto *OMPRegionInfo = 1474 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1475 if (OMPRegionInfo->getThreadIDVariable()) { 1476 // Check if this an outlined function with thread id passed as argument. 1477 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1478 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1479 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1480 !CGF.getLangOpts().CXXExceptions || 1481 CGF.Builder.GetInsertBlock() == TopBlock || 1482 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1483 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1484 TopBlock || 1485 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1486 CGF.Builder.GetInsertBlock()) { 1487 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1488 // If value loaded in entry block, cache it and use it everywhere in 1489 // function. 1490 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1491 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1492 Elem.second.ThreadID = ThreadID; 1493 } 1494 return ThreadID; 1495 } 1496 } 1497 } 1498 1499 // This is not an outlined function region - need to call __kmpc_int32 1500 // kmpc_global_thread_num(ident_t *loc). 1501 // Generate thread id value and cache this value for use across the 1502 // function. 1503 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1504 if (!Elem.second.ServiceInsertPt) 1505 setLocThreadIdInsertPt(CGF); 1506 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1507 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1508 llvm::CallInst *Call = CGF.Builder.CreateCall( 1509 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1510 OMPRTL___kmpc_global_thread_num), 1511 emitUpdateLocation(CGF, Loc)); 1512 Call->setCallingConv(CGF.getRuntimeCC()); 1513 Elem.second.ThreadID = Call; 1514 return Call; 1515 } 1516 1517 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1518 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1519 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1520 clearLocThreadIdInsertPt(CGF); 1521 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1522 } 1523 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1524 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1525 UDRMap.erase(D); 1526 FunctionUDRMap.erase(CGF.CurFn); 1527 } 1528 auto I = FunctionUDMMap.find(CGF.CurFn); 1529 if (I != FunctionUDMMap.end()) { 1530 for(const auto *D : I->second) 1531 UDMMap.erase(D); 1532 FunctionUDMMap.erase(I); 1533 } 1534 LastprivateConditionalToTypes.erase(CGF.CurFn); 1535 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1536 } 1537 1538 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1539 return OMPBuilder.IdentPtr; 1540 } 1541 1542 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1543 if (!Kmpc_MicroTy) { 1544 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1545 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1546 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1547 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1548 } 1549 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1550 } 1551 1552 llvm::FunctionCallee 1553 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 1554 assert((IVSize == 32 || IVSize == 64) && 1555 "IV size is not compatible with the omp runtime"); 1556 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1557 : "__kmpc_for_static_init_4u") 1558 : (IVSigned ? "__kmpc_for_static_init_8" 1559 : "__kmpc_for_static_init_8u"); 1560 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1561 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1562 llvm::Type *TypeParams[] = { 1563 getIdentTyPointerTy(), // loc 1564 CGM.Int32Ty, // tid 1565 CGM.Int32Ty, // schedtype 1566 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1567 PtrTy, // p_lower 1568 PtrTy, // p_upper 1569 PtrTy, // p_stride 1570 ITy, // incr 1571 ITy // chunk 1572 }; 1573 auto *FnTy = 1574 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1575 return CGM.CreateRuntimeFunction(FnTy, Name); 1576 } 1577 1578 llvm::FunctionCallee 1579 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1580 assert((IVSize == 32 || IVSize == 64) && 1581 "IV size is not compatible with the omp runtime"); 1582 StringRef Name = 1583 IVSize == 32 1584 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1585 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1586 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1587 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1588 CGM.Int32Ty, // tid 1589 CGM.Int32Ty, // schedtype 1590 ITy, // lower 1591 ITy, // upper 1592 ITy, // stride 1593 ITy // chunk 1594 }; 1595 auto *FnTy = 1596 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1597 return CGM.CreateRuntimeFunction(FnTy, Name); 1598 } 1599 1600 llvm::FunctionCallee 1601 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1602 assert((IVSize == 32 || IVSize == 64) && 1603 "IV size is not compatible with the omp runtime"); 1604 StringRef Name = 1605 IVSize == 32 1606 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1607 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1608 llvm::Type *TypeParams[] = { 1609 getIdentTyPointerTy(), // loc 1610 CGM.Int32Ty, // tid 1611 }; 1612 auto *FnTy = 1613 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1614 return CGM.CreateRuntimeFunction(FnTy, Name); 1615 } 1616 1617 llvm::FunctionCallee 1618 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1619 assert((IVSize == 32 || IVSize == 64) && 1620 "IV size is not compatible with the omp runtime"); 1621 StringRef Name = 1622 IVSize == 32 1623 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1624 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1625 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1626 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1627 llvm::Type *TypeParams[] = { 1628 getIdentTyPointerTy(), // loc 1629 CGM.Int32Ty, // tid 1630 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1631 PtrTy, // p_lower 1632 PtrTy, // p_upper 1633 PtrTy // p_stride 1634 }; 1635 auto *FnTy = 1636 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1637 return CGM.CreateRuntimeFunction(FnTy, Name); 1638 } 1639 1640 /// Obtain information that uniquely identifies a target entry. This 1641 /// consists of the file and device IDs as well as line number associated with 1642 /// the relevant entry source location. 1643 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1644 unsigned &DeviceID, unsigned &FileID, 1645 unsigned &LineNum) { 1646 SourceManager &SM = C.getSourceManager(); 1647 1648 // The loc should be always valid and have a file ID (the user cannot use 1649 // #pragma directives in macros) 1650 1651 assert(Loc.isValid() && "Source location is expected to be always valid."); 1652 1653 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1654 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1655 1656 llvm::sys::fs::UniqueID ID; 1657 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1658 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1659 << PLoc.getFilename() << EC.message(); 1660 1661 DeviceID = ID.getDevice(); 1662 FileID = ID.getFile(); 1663 LineNum = PLoc.getLine(); 1664 } 1665 1666 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1667 if (CGM.getLangOpts().OpenMPSimd) 1668 return Address::invalid(); 1669 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1670 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1671 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1672 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1673 HasRequiresUnifiedSharedMemory))) { 1674 SmallString<64> PtrName; 1675 { 1676 llvm::raw_svector_ostream OS(PtrName); 1677 OS << CGM.getMangledName(GlobalDecl(VD)); 1678 if (!VD->isExternallyVisible()) { 1679 unsigned DeviceID, FileID, Line; 1680 getTargetEntryUniqueInfo(CGM.getContext(), 1681 VD->getCanonicalDecl()->getBeginLoc(), 1682 DeviceID, FileID, Line); 1683 OS << llvm::format("_%x", FileID); 1684 } 1685 OS << "_decl_tgt_ref_ptr"; 1686 } 1687 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1688 if (!Ptr) { 1689 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1690 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1691 PtrName); 1692 1693 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1694 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1695 1696 if (!CGM.getLangOpts().OpenMPIsDevice) 1697 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1698 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1699 } 1700 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 1701 } 1702 return Address::invalid(); 1703 } 1704 1705 llvm::Constant * 1706 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1707 assert(!CGM.getLangOpts().OpenMPUseTLS || 1708 !CGM.getContext().getTargetInfo().isTLSSupported()); 1709 // Lookup the entry, lazily creating it if necessary. 1710 std::string Suffix = getName({"cache", ""}); 1711 return getOrCreateInternalVariable( 1712 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1713 } 1714 1715 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1716 const VarDecl *VD, 1717 Address VDAddr, 1718 SourceLocation Loc) { 1719 if (CGM.getLangOpts().OpenMPUseTLS && 1720 CGM.getContext().getTargetInfo().isTLSSupported()) 1721 return VDAddr; 1722 1723 llvm::Type *VarTy = VDAddr.getElementType(); 1724 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1725 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1726 CGM.Int8PtrTy), 1727 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1728 getOrCreateThreadPrivateCache(VD)}; 1729 return Address(CGF.EmitRuntimeCall( 1730 OMPBuilder.getOrCreateRuntimeFunction( 1731 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1732 Args), 1733 VDAddr.getAlignment()); 1734 } 1735 1736 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1737 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1738 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1739 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1740 // library. 1741 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1742 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1743 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1744 OMPLoc); 1745 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1746 // to register constructor/destructor for variable. 1747 llvm::Value *Args[] = { 1748 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1749 Ctor, CopyCtor, Dtor}; 1750 CGF.EmitRuntimeCall( 1751 OMPBuilder.getOrCreateRuntimeFunction( 1752 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1753 Args); 1754 } 1755 1756 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1757 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1758 bool PerformInit, CodeGenFunction *CGF) { 1759 if (CGM.getLangOpts().OpenMPUseTLS && 1760 CGM.getContext().getTargetInfo().isTLSSupported()) 1761 return nullptr; 1762 1763 VD = VD->getDefinition(CGM.getContext()); 1764 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1765 QualType ASTTy = VD->getType(); 1766 1767 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1768 const Expr *Init = VD->getAnyInitializer(); 1769 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1770 // Generate function that re-emits the declaration's initializer into the 1771 // threadprivate copy of the variable VD 1772 CodeGenFunction CtorCGF(CGM); 1773 FunctionArgList Args; 1774 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1775 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1776 ImplicitParamDecl::Other); 1777 Args.push_back(&Dst); 1778 1779 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1780 CGM.getContext().VoidPtrTy, Args); 1781 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1782 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1783 llvm::Function *Fn = 1784 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1785 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1786 Args, Loc, Loc); 1787 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1788 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1789 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1790 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1791 Arg = CtorCGF.Builder.CreateElementBitCast( 1792 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1793 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1794 /*IsInitializer=*/true); 1795 ArgVal = CtorCGF.EmitLoadOfScalar( 1796 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1797 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1798 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1799 CtorCGF.FinishFunction(); 1800 Ctor = Fn; 1801 } 1802 if (VD->getType().isDestructedType() != QualType::DK_none) { 1803 // Generate function that emits destructor call for the threadprivate copy 1804 // of the variable VD 1805 CodeGenFunction DtorCGF(CGM); 1806 FunctionArgList Args; 1807 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1808 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1809 ImplicitParamDecl::Other); 1810 Args.push_back(&Dst); 1811 1812 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1813 CGM.getContext().VoidTy, Args); 1814 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1815 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1816 llvm::Function *Fn = 1817 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1818 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1819 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1820 Loc, Loc); 1821 // Create a scope with an artificial location for the body of this function. 1822 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1823 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1824 DtorCGF.GetAddrOfLocalVar(&Dst), 1825 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1826 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1827 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1828 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1829 DtorCGF.FinishFunction(); 1830 Dtor = Fn; 1831 } 1832 // Do not emit init function if it is not required. 1833 if (!Ctor && !Dtor) 1834 return nullptr; 1835 1836 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1837 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1838 /*isVarArg=*/false) 1839 ->getPointerTo(); 1840 // Copying constructor for the threadprivate variable. 1841 // Must be NULL - reserved by runtime, but currently it requires that this 1842 // parameter is always NULL. Otherwise it fires assertion. 1843 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1844 if (Ctor == nullptr) { 1845 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1846 /*isVarArg=*/false) 1847 ->getPointerTo(); 1848 Ctor = llvm::Constant::getNullValue(CtorTy); 1849 } 1850 if (Dtor == nullptr) { 1851 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1852 /*isVarArg=*/false) 1853 ->getPointerTo(); 1854 Dtor = llvm::Constant::getNullValue(DtorTy); 1855 } 1856 if (!CGF) { 1857 auto *InitFunctionTy = 1858 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1859 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1860 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1861 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1862 CodeGenFunction InitCGF(CGM); 1863 FunctionArgList ArgList; 1864 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1865 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1866 Loc, Loc); 1867 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1868 InitCGF.FinishFunction(); 1869 return InitFunction; 1870 } 1871 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1872 } 1873 return nullptr; 1874 } 1875 1876 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1877 llvm::GlobalVariable *Addr, 1878 bool PerformInit) { 1879 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1880 !CGM.getLangOpts().OpenMPIsDevice) 1881 return false; 1882 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1883 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1884 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1885 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1886 HasRequiresUnifiedSharedMemory)) 1887 return CGM.getLangOpts().OpenMPIsDevice; 1888 VD = VD->getDefinition(CGM.getContext()); 1889 assert(VD && "Unknown VarDecl"); 1890 1891 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1892 return CGM.getLangOpts().OpenMPIsDevice; 1893 1894 QualType ASTTy = VD->getType(); 1895 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1896 1897 // Produce the unique prefix to identify the new target regions. We use 1898 // the source location of the variable declaration which we know to not 1899 // conflict with any target region. 1900 unsigned DeviceID; 1901 unsigned FileID; 1902 unsigned Line; 1903 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1904 SmallString<128> Buffer, Out; 1905 { 1906 llvm::raw_svector_ostream OS(Buffer); 1907 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1908 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1909 } 1910 1911 const Expr *Init = VD->getAnyInitializer(); 1912 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1913 llvm::Constant *Ctor; 1914 llvm::Constant *ID; 1915 if (CGM.getLangOpts().OpenMPIsDevice) { 1916 // Generate function that re-emits the declaration's initializer into 1917 // the threadprivate copy of the variable VD 1918 CodeGenFunction CtorCGF(CGM); 1919 1920 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1921 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1922 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1923 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1924 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1925 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1926 FunctionArgList(), Loc, Loc); 1927 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1928 CtorCGF.EmitAnyExprToMem(Init, 1929 Address(Addr, CGM.getContext().getDeclAlign(VD)), 1930 Init->getType().getQualifiers(), 1931 /*IsInitializer=*/true); 1932 CtorCGF.FinishFunction(); 1933 Ctor = Fn; 1934 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1935 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1936 } else { 1937 Ctor = new llvm::GlobalVariable( 1938 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1939 llvm::GlobalValue::PrivateLinkage, 1940 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1941 ID = Ctor; 1942 } 1943 1944 // Register the information for the entry associated with the constructor. 1945 Out.clear(); 1946 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1947 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1948 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1949 } 1950 if (VD->getType().isDestructedType() != QualType::DK_none) { 1951 llvm::Constant *Dtor; 1952 llvm::Constant *ID; 1953 if (CGM.getLangOpts().OpenMPIsDevice) { 1954 // Generate function that emits destructor call for the threadprivate 1955 // copy of the variable VD 1956 CodeGenFunction DtorCGF(CGM); 1957 1958 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1959 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1960 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1961 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1962 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1963 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1964 FunctionArgList(), Loc, Loc); 1965 // Create a scope with an artificial location for the body of this 1966 // function. 1967 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1968 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 1969 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1970 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1971 DtorCGF.FinishFunction(); 1972 Dtor = Fn; 1973 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1974 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 1975 } else { 1976 Dtor = new llvm::GlobalVariable( 1977 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1978 llvm::GlobalValue::PrivateLinkage, 1979 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 1980 ID = Dtor; 1981 } 1982 // Register the information for the entry associated with the destructor. 1983 Out.clear(); 1984 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1985 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 1986 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 1987 } 1988 return CGM.getLangOpts().OpenMPIsDevice; 1989 } 1990 1991 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 1992 QualType VarType, 1993 StringRef Name) { 1994 std::string Suffix = getName({"artificial", ""}); 1995 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 1996 llvm::Value *GAddr = 1997 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 1998 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 1999 CGM.getTarget().isTLSSupported()) { 2000 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 2001 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 2002 } 2003 std::string CacheSuffix = getName({"cache", ""}); 2004 llvm::Value *Args[] = { 2005 emitUpdateLocation(CGF, SourceLocation()), 2006 getThreadID(CGF, SourceLocation()), 2007 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2008 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2009 /*isSigned=*/false), 2010 getOrCreateInternalVariable( 2011 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2012 return Address( 2013 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2014 CGF.EmitRuntimeCall( 2015 OMPBuilder.getOrCreateRuntimeFunction( 2016 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2017 Args), 2018 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2019 CGM.getContext().getTypeAlignInChars(VarType)); 2020 } 2021 2022 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2023 const RegionCodeGenTy &ThenGen, 2024 const RegionCodeGenTy &ElseGen) { 2025 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2026 2027 // If the condition constant folds and can be elided, try to avoid emitting 2028 // the condition and the dead arm of the if/else. 2029 bool CondConstant; 2030 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2031 if (CondConstant) 2032 ThenGen(CGF); 2033 else 2034 ElseGen(CGF); 2035 return; 2036 } 2037 2038 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2039 // emit the conditional branch. 2040 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2041 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2042 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2043 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2044 2045 // Emit the 'then' code. 2046 CGF.EmitBlock(ThenBlock); 2047 ThenGen(CGF); 2048 CGF.EmitBranch(ContBlock); 2049 // Emit the 'else' code if present. 2050 // There is no need to emit line number for unconditional branch. 2051 (void)ApplyDebugLocation::CreateEmpty(CGF); 2052 CGF.EmitBlock(ElseBlock); 2053 ElseGen(CGF); 2054 // There is no need to emit line number for unconditional branch. 2055 (void)ApplyDebugLocation::CreateEmpty(CGF); 2056 CGF.EmitBranch(ContBlock); 2057 // Emit the continuation block for code after the if. 2058 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2059 } 2060 2061 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2062 llvm::Function *OutlinedFn, 2063 ArrayRef<llvm::Value *> CapturedVars, 2064 const Expr *IfCond) { 2065 if (!CGF.HaveInsertPoint()) 2066 return; 2067 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2068 auto &M = CGM.getModule(); 2069 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2070 this](CodeGenFunction &CGF, PrePostActionTy &) { 2071 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2072 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2073 llvm::Value *Args[] = { 2074 RTLoc, 2075 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2076 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2077 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2078 RealArgs.append(std::begin(Args), std::end(Args)); 2079 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2080 2081 llvm::FunctionCallee RTLFn = 2082 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2083 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2084 }; 2085 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2086 this](CodeGenFunction &CGF, PrePostActionTy &) { 2087 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2088 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2089 // Build calls: 2090 // __kmpc_serialized_parallel(&Loc, GTid); 2091 llvm::Value *Args[] = {RTLoc, ThreadID}; 2092 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2093 M, OMPRTL___kmpc_serialized_parallel), 2094 Args); 2095 2096 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2097 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2098 Address ZeroAddrBound = 2099 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2100 /*Name=*/".bound.zero.addr"); 2101 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 2102 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2103 // ThreadId for serialized parallels is 0. 2104 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2105 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2106 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2107 2108 // Ensure we do not inline the function. This is trivially true for the ones 2109 // passed to __kmpc_fork_call but the ones calles in serialized regions 2110 // could be inlined. This is not a perfect but it is closer to the invariant 2111 // we want, namely, every data environment starts with a new function. 2112 // TODO: We should pass the if condition to the runtime function and do the 2113 // handling there. Much cleaner code. 2114 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 2115 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2116 2117 // __kmpc_end_serialized_parallel(&Loc, GTid); 2118 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2119 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2120 M, OMPRTL___kmpc_end_serialized_parallel), 2121 EndArgs); 2122 }; 2123 if (IfCond) { 2124 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2125 } else { 2126 RegionCodeGenTy ThenRCG(ThenGen); 2127 ThenRCG(CGF); 2128 } 2129 } 2130 2131 // If we're inside an (outlined) parallel region, use the region info's 2132 // thread-ID variable (it is passed in a first argument of the outlined function 2133 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2134 // regular serial code region, get thread ID by calling kmp_int32 2135 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2136 // return the address of that temp. 2137 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2138 SourceLocation Loc) { 2139 if (auto *OMPRegionInfo = 2140 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2141 if (OMPRegionInfo->getThreadIDVariable()) 2142 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2143 2144 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2145 QualType Int32Ty = 2146 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2147 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2148 CGF.EmitStoreOfScalar(ThreadID, 2149 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2150 2151 return ThreadIDTemp; 2152 } 2153 2154 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 2155 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2156 SmallString<256> Buffer; 2157 llvm::raw_svector_ostream Out(Buffer); 2158 Out << Name; 2159 StringRef RuntimeName = Out.str(); 2160 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2161 if (Elem.second) { 2162 assert(Elem.second->getType()->getPointerElementType() == Ty && 2163 "OMP internal variable has different type than requested"); 2164 return &*Elem.second; 2165 } 2166 2167 return Elem.second = new llvm::GlobalVariable( 2168 CGM.getModule(), Ty, /*IsConstant*/ false, 2169 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2170 Elem.first(), /*InsertBefore=*/nullptr, 2171 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2172 } 2173 2174 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2175 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2176 std::string Name = getName({Prefix, "var"}); 2177 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2178 } 2179 2180 namespace { 2181 /// Common pre(post)-action for different OpenMP constructs. 2182 class CommonActionTy final : public PrePostActionTy { 2183 llvm::FunctionCallee EnterCallee; 2184 ArrayRef<llvm::Value *> EnterArgs; 2185 llvm::FunctionCallee ExitCallee; 2186 ArrayRef<llvm::Value *> ExitArgs; 2187 bool Conditional; 2188 llvm::BasicBlock *ContBlock = nullptr; 2189 2190 public: 2191 CommonActionTy(llvm::FunctionCallee EnterCallee, 2192 ArrayRef<llvm::Value *> EnterArgs, 2193 llvm::FunctionCallee ExitCallee, 2194 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2195 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2196 ExitArgs(ExitArgs), Conditional(Conditional) {} 2197 void Enter(CodeGenFunction &CGF) override { 2198 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2199 if (Conditional) { 2200 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2201 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2202 ContBlock = CGF.createBasicBlock("omp_if.end"); 2203 // Generate the branch (If-stmt) 2204 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2205 CGF.EmitBlock(ThenBlock); 2206 } 2207 } 2208 void Done(CodeGenFunction &CGF) { 2209 // Emit the rest of blocks/branches 2210 CGF.EmitBranch(ContBlock); 2211 CGF.EmitBlock(ContBlock, true); 2212 } 2213 void Exit(CodeGenFunction &CGF) override { 2214 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2215 } 2216 }; 2217 } // anonymous namespace 2218 2219 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2220 StringRef CriticalName, 2221 const RegionCodeGenTy &CriticalOpGen, 2222 SourceLocation Loc, const Expr *Hint) { 2223 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2224 // CriticalOpGen(); 2225 // __kmpc_end_critical(ident_t *, gtid, Lock); 2226 // Prepare arguments and build a call to __kmpc_critical 2227 if (!CGF.HaveInsertPoint()) 2228 return; 2229 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2230 getCriticalRegionLock(CriticalName)}; 2231 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2232 std::end(Args)); 2233 if (Hint) { 2234 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2235 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2236 } 2237 CommonActionTy Action( 2238 OMPBuilder.getOrCreateRuntimeFunction( 2239 CGM.getModule(), 2240 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2241 EnterArgs, 2242 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2243 OMPRTL___kmpc_end_critical), 2244 Args); 2245 CriticalOpGen.setAction(Action); 2246 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2247 } 2248 2249 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2250 const RegionCodeGenTy &MasterOpGen, 2251 SourceLocation Loc) { 2252 if (!CGF.HaveInsertPoint()) 2253 return; 2254 // if(__kmpc_master(ident_t *, gtid)) { 2255 // MasterOpGen(); 2256 // __kmpc_end_master(ident_t *, gtid); 2257 // } 2258 // Prepare arguments and build a call to __kmpc_master 2259 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2260 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2261 CGM.getModule(), OMPRTL___kmpc_master), 2262 Args, 2263 OMPBuilder.getOrCreateRuntimeFunction( 2264 CGM.getModule(), OMPRTL___kmpc_end_master), 2265 Args, 2266 /*Conditional=*/true); 2267 MasterOpGen.setAction(Action); 2268 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2269 Action.Done(CGF); 2270 } 2271 2272 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2273 SourceLocation Loc) { 2274 if (!CGF.HaveInsertPoint()) 2275 return; 2276 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2277 OMPBuilder.createTaskyield(CGF.Builder); 2278 } else { 2279 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2280 llvm::Value *Args[] = { 2281 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2282 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2283 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2284 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2285 Args); 2286 } 2287 2288 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2289 Region->emitUntiedSwitch(CGF); 2290 } 2291 2292 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2293 const RegionCodeGenTy &TaskgroupOpGen, 2294 SourceLocation Loc) { 2295 if (!CGF.HaveInsertPoint()) 2296 return; 2297 // __kmpc_taskgroup(ident_t *, gtid); 2298 // TaskgroupOpGen(); 2299 // __kmpc_end_taskgroup(ident_t *, gtid); 2300 // Prepare arguments and build a call to __kmpc_taskgroup 2301 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2302 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2303 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2304 Args, 2305 OMPBuilder.getOrCreateRuntimeFunction( 2306 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2307 Args); 2308 TaskgroupOpGen.setAction(Action); 2309 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2310 } 2311 2312 /// Given an array of pointers to variables, project the address of a 2313 /// given variable. 2314 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2315 unsigned Index, const VarDecl *Var) { 2316 // Pull out the pointer to the variable. 2317 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2318 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2319 2320 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2321 Addr = CGF.Builder.CreateElementBitCast( 2322 Addr, CGF.ConvertTypeForMem(Var->getType())); 2323 return Addr; 2324 } 2325 2326 static llvm::Value *emitCopyprivateCopyFunction( 2327 CodeGenModule &CGM, llvm::Type *ArgsType, 2328 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2329 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2330 SourceLocation Loc) { 2331 ASTContext &C = CGM.getContext(); 2332 // void copy_func(void *LHSArg, void *RHSArg); 2333 FunctionArgList Args; 2334 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2335 ImplicitParamDecl::Other); 2336 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2337 ImplicitParamDecl::Other); 2338 Args.push_back(&LHSArg); 2339 Args.push_back(&RHSArg); 2340 const auto &CGFI = 2341 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2342 std::string Name = 2343 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2344 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2345 llvm::GlobalValue::InternalLinkage, Name, 2346 &CGM.getModule()); 2347 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2348 Fn->setDoesNotRecurse(); 2349 CodeGenFunction CGF(CGM); 2350 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2351 // Dest = (void*[n])(LHSArg); 2352 // Src = (void*[n])(RHSArg); 2353 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2354 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2355 ArgsType), CGF.getPointerAlign()); 2356 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2357 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2358 ArgsType), CGF.getPointerAlign()); 2359 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2360 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2361 // ... 2362 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2363 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2364 const auto *DestVar = 2365 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2366 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2367 2368 const auto *SrcVar = 2369 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2370 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2371 2372 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2373 QualType Type = VD->getType(); 2374 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2375 } 2376 CGF.FinishFunction(); 2377 return Fn; 2378 } 2379 2380 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2381 const RegionCodeGenTy &SingleOpGen, 2382 SourceLocation Loc, 2383 ArrayRef<const Expr *> CopyprivateVars, 2384 ArrayRef<const Expr *> SrcExprs, 2385 ArrayRef<const Expr *> DstExprs, 2386 ArrayRef<const Expr *> AssignmentOps) { 2387 if (!CGF.HaveInsertPoint()) 2388 return; 2389 assert(CopyprivateVars.size() == SrcExprs.size() && 2390 CopyprivateVars.size() == DstExprs.size() && 2391 CopyprivateVars.size() == AssignmentOps.size()); 2392 ASTContext &C = CGM.getContext(); 2393 // int32 did_it = 0; 2394 // if(__kmpc_single(ident_t *, gtid)) { 2395 // SingleOpGen(); 2396 // __kmpc_end_single(ident_t *, gtid); 2397 // did_it = 1; 2398 // } 2399 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2400 // <copy_func>, did_it); 2401 2402 Address DidIt = Address::invalid(); 2403 if (!CopyprivateVars.empty()) { 2404 // int32 did_it = 0; 2405 QualType KmpInt32Ty = 2406 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2407 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2408 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2409 } 2410 // Prepare arguments and build a call to __kmpc_single 2411 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2412 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2413 CGM.getModule(), OMPRTL___kmpc_single), 2414 Args, 2415 OMPBuilder.getOrCreateRuntimeFunction( 2416 CGM.getModule(), OMPRTL___kmpc_end_single), 2417 Args, 2418 /*Conditional=*/true); 2419 SingleOpGen.setAction(Action); 2420 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2421 if (DidIt.isValid()) { 2422 // did_it = 1; 2423 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2424 } 2425 Action.Done(CGF); 2426 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2427 // <copy_func>, did_it); 2428 if (DidIt.isValid()) { 2429 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2430 QualType CopyprivateArrayTy = C.getConstantArrayType( 2431 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2432 /*IndexTypeQuals=*/0); 2433 // Create a list of all private variables for copyprivate. 2434 Address CopyprivateList = 2435 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2436 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2437 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2438 CGF.Builder.CreateStore( 2439 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2440 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2441 CGF.VoidPtrTy), 2442 Elem); 2443 } 2444 // Build function that copies private values from single region to all other 2445 // threads in the corresponding parallel region. 2446 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2447 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2448 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2449 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2450 Address CL = 2451 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2452 CGF.VoidPtrTy); 2453 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2454 llvm::Value *Args[] = { 2455 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2456 getThreadID(CGF, Loc), // i32 <gtid> 2457 BufSize, // size_t <buf_size> 2458 CL.getPointer(), // void *<copyprivate list> 2459 CpyFn, // void (*) (void *, void *) <copy_func> 2460 DidItVal // i32 did_it 2461 }; 2462 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2463 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2464 Args); 2465 } 2466 } 2467 2468 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2469 const RegionCodeGenTy &OrderedOpGen, 2470 SourceLocation Loc, bool IsThreads) { 2471 if (!CGF.HaveInsertPoint()) 2472 return; 2473 // __kmpc_ordered(ident_t *, gtid); 2474 // OrderedOpGen(); 2475 // __kmpc_end_ordered(ident_t *, gtid); 2476 // Prepare arguments and build a call to __kmpc_ordered 2477 if (IsThreads) { 2478 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2479 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2480 CGM.getModule(), OMPRTL___kmpc_ordered), 2481 Args, 2482 OMPBuilder.getOrCreateRuntimeFunction( 2483 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2484 Args); 2485 OrderedOpGen.setAction(Action); 2486 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2487 return; 2488 } 2489 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2490 } 2491 2492 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2493 unsigned Flags; 2494 if (Kind == OMPD_for) 2495 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2496 else if (Kind == OMPD_sections) 2497 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2498 else if (Kind == OMPD_single) 2499 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2500 else if (Kind == OMPD_barrier) 2501 Flags = OMP_IDENT_BARRIER_EXPL; 2502 else 2503 Flags = OMP_IDENT_BARRIER_IMPL; 2504 return Flags; 2505 } 2506 2507 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2508 CodeGenFunction &CGF, const OMPLoopDirective &S, 2509 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2510 // Check if the loop directive is actually a doacross loop directive. In this 2511 // case choose static, 1 schedule. 2512 if (llvm::any_of( 2513 S.getClausesOfKind<OMPOrderedClause>(), 2514 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2515 ScheduleKind = OMPC_SCHEDULE_static; 2516 // Chunk size is 1 in this case. 2517 llvm::APInt ChunkSize(32, 1); 2518 ChunkExpr = IntegerLiteral::Create( 2519 CGF.getContext(), ChunkSize, 2520 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2521 SourceLocation()); 2522 } 2523 } 2524 2525 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2526 OpenMPDirectiveKind Kind, bool EmitChecks, 2527 bool ForceSimpleCall) { 2528 // Check if we should use the OMPBuilder 2529 auto *OMPRegionInfo = 2530 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2531 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2532 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2533 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2534 return; 2535 } 2536 2537 if (!CGF.HaveInsertPoint()) 2538 return; 2539 // Build call __kmpc_cancel_barrier(loc, thread_id); 2540 // Build call __kmpc_barrier(loc, thread_id); 2541 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2542 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2543 // thread_id); 2544 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2545 getThreadID(CGF, Loc)}; 2546 if (OMPRegionInfo) { 2547 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2548 llvm::Value *Result = CGF.EmitRuntimeCall( 2549 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2550 OMPRTL___kmpc_cancel_barrier), 2551 Args); 2552 if (EmitChecks) { 2553 // if (__kmpc_cancel_barrier()) { 2554 // exit from construct; 2555 // } 2556 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2557 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2558 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2559 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2560 CGF.EmitBlock(ExitBB); 2561 // exit from construct; 2562 CodeGenFunction::JumpDest CancelDestination = 2563 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2564 CGF.EmitBranchThroughCleanup(CancelDestination); 2565 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2566 } 2567 return; 2568 } 2569 } 2570 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2571 CGM.getModule(), OMPRTL___kmpc_barrier), 2572 Args); 2573 } 2574 2575 /// Map the OpenMP loop schedule to the runtime enumeration. 2576 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2577 bool Chunked, bool Ordered) { 2578 switch (ScheduleKind) { 2579 case OMPC_SCHEDULE_static: 2580 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2581 : (Ordered ? OMP_ord_static : OMP_sch_static); 2582 case OMPC_SCHEDULE_dynamic: 2583 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2584 case OMPC_SCHEDULE_guided: 2585 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2586 case OMPC_SCHEDULE_runtime: 2587 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2588 case OMPC_SCHEDULE_auto: 2589 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2590 case OMPC_SCHEDULE_unknown: 2591 assert(!Chunked && "chunk was specified but schedule kind not known"); 2592 return Ordered ? OMP_ord_static : OMP_sch_static; 2593 } 2594 llvm_unreachable("Unexpected runtime schedule"); 2595 } 2596 2597 /// Map the OpenMP distribute schedule to the runtime enumeration. 2598 static OpenMPSchedType 2599 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2600 // only static is allowed for dist_schedule 2601 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2602 } 2603 2604 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2605 bool Chunked) const { 2606 OpenMPSchedType Schedule = 2607 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2608 return Schedule == OMP_sch_static; 2609 } 2610 2611 bool CGOpenMPRuntime::isStaticNonchunked( 2612 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2613 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2614 return Schedule == OMP_dist_sch_static; 2615 } 2616 2617 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2618 bool Chunked) const { 2619 OpenMPSchedType Schedule = 2620 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2621 return Schedule == OMP_sch_static_chunked; 2622 } 2623 2624 bool CGOpenMPRuntime::isStaticChunked( 2625 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2626 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2627 return Schedule == OMP_dist_sch_static_chunked; 2628 } 2629 2630 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2631 OpenMPSchedType Schedule = 2632 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2633 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2634 return Schedule != OMP_sch_static; 2635 } 2636 2637 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2638 OpenMPScheduleClauseModifier M1, 2639 OpenMPScheduleClauseModifier M2) { 2640 int Modifier = 0; 2641 switch (M1) { 2642 case OMPC_SCHEDULE_MODIFIER_monotonic: 2643 Modifier = OMP_sch_modifier_monotonic; 2644 break; 2645 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2646 Modifier = OMP_sch_modifier_nonmonotonic; 2647 break; 2648 case OMPC_SCHEDULE_MODIFIER_simd: 2649 if (Schedule == OMP_sch_static_chunked) 2650 Schedule = OMP_sch_static_balanced_chunked; 2651 break; 2652 case OMPC_SCHEDULE_MODIFIER_last: 2653 case OMPC_SCHEDULE_MODIFIER_unknown: 2654 break; 2655 } 2656 switch (M2) { 2657 case OMPC_SCHEDULE_MODIFIER_monotonic: 2658 Modifier = OMP_sch_modifier_monotonic; 2659 break; 2660 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2661 Modifier = OMP_sch_modifier_nonmonotonic; 2662 break; 2663 case OMPC_SCHEDULE_MODIFIER_simd: 2664 if (Schedule == OMP_sch_static_chunked) 2665 Schedule = OMP_sch_static_balanced_chunked; 2666 break; 2667 case OMPC_SCHEDULE_MODIFIER_last: 2668 case OMPC_SCHEDULE_MODIFIER_unknown: 2669 break; 2670 } 2671 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2672 // If the static schedule kind is specified or if the ordered clause is 2673 // specified, and if the nonmonotonic modifier is not specified, the effect is 2674 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2675 // modifier is specified, the effect is as if the nonmonotonic modifier is 2676 // specified. 2677 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2678 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2679 Schedule == OMP_sch_static_balanced_chunked || 2680 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2681 Schedule == OMP_dist_sch_static_chunked || 2682 Schedule == OMP_dist_sch_static)) 2683 Modifier = OMP_sch_modifier_nonmonotonic; 2684 } 2685 return Schedule | Modifier; 2686 } 2687 2688 void CGOpenMPRuntime::emitForDispatchInit( 2689 CodeGenFunction &CGF, SourceLocation Loc, 2690 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2691 bool Ordered, const DispatchRTInput &DispatchValues) { 2692 if (!CGF.HaveInsertPoint()) 2693 return; 2694 OpenMPSchedType Schedule = getRuntimeSchedule( 2695 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2696 assert(Ordered || 2697 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2698 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2699 Schedule != OMP_sch_static_balanced_chunked)); 2700 // Call __kmpc_dispatch_init( 2701 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2702 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2703 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2704 2705 // If the Chunk was not specified in the clause - use default value 1. 2706 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2707 : CGF.Builder.getIntN(IVSize, 1); 2708 llvm::Value *Args[] = { 2709 emitUpdateLocation(CGF, Loc), 2710 getThreadID(CGF, Loc), 2711 CGF.Builder.getInt32(addMonoNonMonoModifier( 2712 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2713 DispatchValues.LB, // Lower 2714 DispatchValues.UB, // Upper 2715 CGF.Builder.getIntN(IVSize, 1), // Stride 2716 Chunk // Chunk 2717 }; 2718 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2719 } 2720 2721 static void emitForStaticInitCall( 2722 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2723 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2724 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2725 const CGOpenMPRuntime::StaticRTInput &Values) { 2726 if (!CGF.HaveInsertPoint()) 2727 return; 2728 2729 assert(!Values.Ordered); 2730 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2731 Schedule == OMP_sch_static_balanced_chunked || 2732 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2733 Schedule == OMP_dist_sch_static || 2734 Schedule == OMP_dist_sch_static_chunked); 2735 2736 // Call __kmpc_for_static_init( 2737 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2738 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2739 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2740 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2741 llvm::Value *Chunk = Values.Chunk; 2742 if (Chunk == nullptr) { 2743 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2744 Schedule == OMP_dist_sch_static) && 2745 "expected static non-chunked schedule"); 2746 // If the Chunk was not specified in the clause - use default value 1. 2747 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2748 } else { 2749 assert((Schedule == OMP_sch_static_chunked || 2750 Schedule == OMP_sch_static_balanced_chunked || 2751 Schedule == OMP_ord_static_chunked || 2752 Schedule == OMP_dist_sch_static_chunked) && 2753 "expected static chunked schedule"); 2754 } 2755 llvm::Value *Args[] = { 2756 UpdateLocation, 2757 ThreadId, 2758 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2759 M2)), // Schedule type 2760 Values.IL.getPointer(), // &isLastIter 2761 Values.LB.getPointer(), // &LB 2762 Values.UB.getPointer(), // &UB 2763 Values.ST.getPointer(), // &Stride 2764 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2765 Chunk // Chunk 2766 }; 2767 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2768 } 2769 2770 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2771 SourceLocation Loc, 2772 OpenMPDirectiveKind DKind, 2773 const OpenMPScheduleTy &ScheduleKind, 2774 const StaticRTInput &Values) { 2775 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2776 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2777 assert(isOpenMPWorksharingDirective(DKind) && 2778 "Expected loop-based or sections-based directive."); 2779 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2780 isOpenMPLoopDirective(DKind) 2781 ? OMP_IDENT_WORK_LOOP 2782 : OMP_IDENT_WORK_SECTIONS); 2783 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2784 llvm::FunctionCallee StaticInitFunction = 2785 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2786 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2787 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2788 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2789 } 2790 2791 void CGOpenMPRuntime::emitDistributeStaticInit( 2792 CodeGenFunction &CGF, SourceLocation Loc, 2793 OpenMPDistScheduleClauseKind SchedKind, 2794 const CGOpenMPRuntime::StaticRTInput &Values) { 2795 OpenMPSchedType ScheduleNum = 2796 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2797 llvm::Value *UpdatedLocation = 2798 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2799 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2800 llvm::FunctionCallee StaticInitFunction = 2801 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2802 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2803 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2804 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2805 } 2806 2807 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2808 SourceLocation Loc, 2809 OpenMPDirectiveKind DKind) { 2810 if (!CGF.HaveInsertPoint()) 2811 return; 2812 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2813 llvm::Value *Args[] = { 2814 emitUpdateLocation(CGF, Loc, 2815 isOpenMPDistributeDirective(DKind) 2816 ? OMP_IDENT_WORK_DISTRIBUTE 2817 : isOpenMPLoopDirective(DKind) 2818 ? OMP_IDENT_WORK_LOOP 2819 : OMP_IDENT_WORK_SECTIONS), 2820 getThreadID(CGF, Loc)}; 2821 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2822 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2823 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2824 Args); 2825 } 2826 2827 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2828 SourceLocation Loc, 2829 unsigned IVSize, 2830 bool IVSigned) { 2831 if (!CGF.HaveInsertPoint()) 2832 return; 2833 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2834 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2835 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2836 } 2837 2838 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2839 SourceLocation Loc, unsigned IVSize, 2840 bool IVSigned, Address IL, 2841 Address LB, Address UB, 2842 Address ST) { 2843 // Call __kmpc_dispatch_next( 2844 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2845 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2846 // kmp_int[32|64] *p_stride); 2847 llvm::Value *Args[] = { 2848 emitUpdateLocation(CGF, Loc), 2849 getThreadID(CGF, Loc), 2850 IL.getPointer(), // &isLastIter 2851 LB.getPointer(), // &Lower 2852 UB.getPointer(), // &Upper 2853 ST.getPointer() // &Stride 2854 }; 2855 llvm::Value *Call = 2856 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2857 return CGF.EmitScalarConversion( 2858 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2859 CGF.getContext().BoolTy, Loc); 2860 } 2861 2862 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2863 llvm::Value *NumThreads, 2864 SourceLocation Loc) { 2865 if (!CGF.HaveInsertPoint()) 2866 return; 2867 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2868 llvm::Value *Args[] = { 2869 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2870 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2871 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2872 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2873 Args); 2874 } 2875 2876 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2877 ProcBindKind ProcBind, 2878 SourceLocation Loc) { 2879 if (!CGF.HaveInsertPoint()) 2880 return; 2881 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2882 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2883 llvm::Value *Args[] = { 2884 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2885 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2886 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2887 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2888 Args); 2889 } 2890 2891 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2892 SourceLocation Loc, llvm::AtomicOrdering AO) { 2893 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2894 OMPBuilder.createFlush(CGF.Builder); 2895 } else { 2896 if (!CGF.HaveInsertPoint()) 2897 return; 2898 // Build call void __kmpc_flush(ident_t *loc) 2899 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2900 CGM.getModule(), OMPRTL___kmpc_flush), 2901 emitUpdateLocation(CGF, Loc)); 2902 } 2903 } 2904 2905 namespace { 2906 /// Indexes of fields for type kmp_task_t. 2907 enum KmpTaskTFields { 2908 /// List of shared variables. 2909 KmpTaskTShareds, 2910 /// Task routine. 2911 KmpTaskTRoutine, 2912 /// Partition id for the untied tasks. 2913 KmpTaskTPartId, 2914 /// Function with call of destructors for private variables. 2915 Data1, 2916 /// Task priority. 2917 Data2, 2918 /// (Taskloops only) Lower bound. 2919 KmpTaskTLowerBound, 2920 /// (Taskloops only) Upper bound. 2921 KmpTaskTUpperBound, 2922 /// (Taskloops only) Stride. 2923 KmpTaskTStride, 2924 /// (Taskloops only) Is last iteration flag. 2925 KmpTaskTLastIter, 2926 /// (Taskloops only) Reduction data. 2927 KmpTaskTReductions, 2928 }; 2929 } // anonymous namespace 2930 2931 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2932 return OffloadEntriesTargetRegion.empty() && 2933 OffloadEntriesDeviceGlobalVar.empty(); 2934 } 2935 2936 /// Initialize target region entry. 2937 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2938 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2939 StringRef ParentName, unsigned LineNum, 2940 unsigned Order) { 2941 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2942 "only required for the device " 2943 "code generation."); 2944 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 2945 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 2946 OMPTargetRegionEntryTargetRegion); 2947 ++OffloadingEntriesNum; 2948 } 2949 2950 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2951 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2952 StringRef ParentName, unsigned LineNum, 2953 llvm::Constant *Addr, llvm::Constant *ID, 2954 OMPTargetRegionEntryKind Flags) { 2955 // If we are emitting code for a target, the entry is already initialized, 2956 // only has to be registered. 2957 if (CGM.getLangOpts().OpenMPIsDevice) { 2958 // This could happen if the device compilation is invoked standalone. 2959 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) 2960 initializeTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 2961 OffloadingEntriesNum); 2962 auto &Entry = 2963 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 2964 Entry.setAddress(Addr); 2965 Entry.setID(ID); 2966 Entry.setFlags(Flags); 2967 } else { 2968 if (Flags == 2969 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 2970 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 2971 /*IgnoreAddressId*/ true)) 2972 return; 2973 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 2974 "Target region entry already registered!"); 2975 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 2976 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 2977 ++OffloadingEntriesNum; 2978 } 2979 } 2980 2981 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 2982 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 2983 bool IgnoreAddressId) const { 2984 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 2985 if (PerDevice == OffloadEntriesTargetRegion.end()) 2986 return false; 2987 auto PerFile = PerDevice->second.find(FileID); 2988 if (PerFile == PerDevice->second.end()) 2989 return false; 2990 auto PerParentName = PerFile->second.find(ParentName); 2991 if (PerParentName == PerFile->second.end()) 2992 return false; 2993 auto PerLine = PerParentName->second.find(LineNum); 2994 if (PerLine == PerParentName->second.end()) 2995 return false; 2996 // Fail if this entry is already registered. 2997 if (!IgnoreAddressId && 2998 (PerLine->second.getAddress() || PerLine->second.getID())) 2999 return false; 3000 return true; 3001 } 3002 3003 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3004 const OffloadTargetRegionEntryInfoActTy &Action) { 3005 // Scan all target region entries and perform the provided action. 3006 for (const auto &D : OffloadEntriesTargetRegion) 3007 for (const auto &F : D.second) 3008 for (const auto &P : F.second) 3009 for (const auto &L : P.second) 3010 Action(D.first, F.first, P.first(), L.first, L.second); 3011 } 3012 3013 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3014 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3015 OMPTargetGlobalVarEntryKind Flags, 3016 unsigned Order) { 3017 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3018 "only required for the device " 3019 "code generation."); 3020 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3021 ++OffloadingEntriesNum; 3022 } 3023 3024 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3025 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3026 CharUnits VarSize, 3027 OMPTargetGlobalVarEntryKind Flags, 3028 llvm::GlobalValue::LinkageTypes Linkage) { 3029 if (CGM.getLangOpts().OpenMPIsDevice) { 3030 // This could happen if the device compilation is invoked standalone. 3031 if (!hasDeviceGlobalVarEntryInfo(VarName)) 3032 initializeDeviceGlobalVarEntryInfo(VarName, Flags, OffloadingEntriesNum); 3033 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3034 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3035 "Resetting with the new address."); 3036 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3037 if (Entry.getVarSize().isZero()) { 3038 Entry.setVarSize(VarSize); 3039 Entry.setLinkage(Linkage); 3040 } 3041 return; 3042 } 3043 Entry.setVarSize(VarSize); 3044 Entry.setLinkage(Linkage); 3045 Entry.setAddress(Addr); 3046 } else { 3047 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3048 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3049 assert(Entry.isValid() && Entry.getFlags() == Flags && 3050 "Entry not initialized!"); 3051 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3052 "Resetting with the new address."); 3053 if (Entry.getVarSize().isZero()) { 3054 Entry.setVarSize(VarSize); 3055 Entry.setLinkage(Linkage); 3056 } 3057 return; 3058 } 3059 OffloadEntriesDeviceGlobalVar.try_emplace( 3060 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3061 ++OffloadingEntriesNum; 3062 } 3063 } 3064 3065 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3066 actOnDeviceGlobalVarEntriesInfo( 3067 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3068 // Scan all target region entries and perform the provided action. 3069 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3070 Action(E.getKey(), E.getValue()); 3071 } 3072 3073 void CGOpenMPRuntime::createOffloadEntry( 3074 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3075 llvm::GlobalValue::LinkageTypes Linkage) { 3076 StringRef Name = Addr->getName(); 3077 llvm::Module &M = CGM.getModule(); 3078 llvm::LLVMContext &C = M.getContext(); 3079 3080 // Create constant string with the name. 3081 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3082 3083 std::string StringName = getName({"omp_offloading", "entry_name"}); 3084 auto *Str = new llvm::GlobalVariable( 3085 M, StrPtrInit->getType(), /*isConstant=*/true, 3086 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3087 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3088 3089 llvm::Constant *Data[] = { 3090 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), 3091 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), 3092 llvm::ConstantInt::get(CGM.SizeTy, Size), 3093 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3094 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3095 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3096 llvm::GlobalVariable *Entry = createGlobalStruct( 3097 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3098 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3099 3100 // The entry has to be created in the section the linker expects it to be. 3101 Entry->setSection("omp_offloading_entries"); 3102 } 3103 3104 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3105 // Emit the offloading entries and metadata so that the device codegen side 3106 // can easily figure out what to emit. The produced metadata looks like 3107 // this: 3108 // 3109 // !omp_offload.info = !{!1, ...} 3110 // 3111 // Right now we only generate metadata for function that contain target 3112 // regions. 3113 3114 // If we are in simd mode or there are no entries, we don't need to do 3115 // anything. 3116 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3117 return; 3118 3119 llvm::Module &M = CGM.getModule(); 3120 llvm::LLVMContext &C = M.getContext(); 3121 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3122 SourceLocation, StringRef>, 3123 16> 3124 OrderedEntries(OffloadEntriesInfoManager.size()); 3125 llvm::SmallVector<StringRef, 16> ParentFunctions( 3126 OffloadEntriesInfoManager.size()); 3127 3128 // Auxiliary methods to create metadata values and strings. 3129 auto &&GetMDInt = [this](unsigned V) { 3130 return llvm::ConstantAsMetadata::get( 3131 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3132 }; 3133 3134 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3135 3136 // Create the offloading info metadata node. 3137 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3138 3139 // Create function that emits metadata for each target region entry; 3140 auto &&TargetRegionMetadataEmitter = 3141 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3142 &GetMDString]( 3143 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3144 unsigned Line, 3145 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3146 // Generate metadata for target regions. Each entry of this metadata 3147 // contains: 3148 // - Entry 0 -> Kind of this type of metadata (0). 3149 // - Entry 1 -> Device ID of the file where the entry was identified. 3150 // - Entry 2 -> File ID of the file where the entry was identified. 3151 // - Entry 3 -> Mangled name of the function where the entry was 3152 // identified. 3153 // - Entry 4 -> Line in the file where the entry was identified. 3154 // - Entry 5 -> Order the entry was created. 3155 // The first element of the metadata node is the kind. 3156 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3157 GetMDInt(FileID), GetMDString(ParentName), 3158 GetMDInt(Line), GetMDInt(E.getOrder())}; 3159 3160 SourceLocation Loc; 3161 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3162 E = CGM.getContext().getSourceManager().fileinfo_end(); 3163 I != E; ++I) { 3164 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3165 I->getFirst()->getUniqueID().getFile() == FileID) { 3166 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3167 I->getFirst(), Line, 1); 3168 break; 3169 } 3170 } 3171 // Save this entry in the right position of the ordered entries array. 3172 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3173 ParentFunctions[E.getOrder()] = ParentName; 3174 3175 // Add metadata to the named metadata node. 3176 MD->addOperand(llvm::MDNode::get(C, Ops)); 3177 }; 3178 3179 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3180 TargetRegionMetadataEmitter); 3181 3182 // Create function that emits metadata for each device global variable entry; 3183 auto &&DeviceGlobalVarMetadataEmitter = 3184 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3185 MD](StringRef MangledName, 3186 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3187 &E) { 3188 // Generate metadata for global variables. Each entry of this metadata 3189 // contains: 3190 // - Entry 0 -> Kind of this type of metadata (1). 3191 // - Entry 1 -> Mangled name of the variable. 3192 // - Entry 2 -> Declare target kind. 3193 // - Entry 3 -> Order the entry was created. 3194 // The first element of the metadata node is the kind. 3195 llvm::Metadata *Ops[] = { 3196 GetMDInt(E.getKind()), GetMDString(MangledName), 3197 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3198 3199 // Save this entry in the right position of the ordered entries array. 3200 OrderedEntries[E.getOrder()] = 3201 std::make_tuple(&E, SourceLocation(), MangledName); 3202 3203 // Add metadata to the named metadata node. 3204 MD->addOperand(llvm::MDNode::get(C, Ops)); 3205 }; 3206 3207 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3208 DeviceGlobalVarMetadataEmitter); 3209 3210 for (const auto &E : OrderedEntries) { 3211 assert(std::get<0>(E) && "All ordered entries must exist!"); 3212 if (const auto *CE = 3213 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3214 std::get<0>(E))) { 3215 if (!CE->getID() || !CE->getAddress()) { 3216 // Do not blame the entry if the parent funtion is not emitted. 3217 StringRef FnName = ParentFunctions[CE->getOrder()]; 3218 if (!CGM.GetGlobalValue(FnName)) 3219 continue; 3220 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3221 DiagnosticsEngine::Error, 3222 "Offloading entry for target region in %0 is incorrect: either the " 3223 "address or the ID is invalid."); 3224 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3225 continue; 3226 } 3227 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3228 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3229 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3230 OffloadEntryInfoDeviceGlobalVar>( 3231 std::get<0>(E))) { 3232 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3233 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3234 CE->getFlags()); 3235 switch (Flags) { 3236 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3237 if (CGM.getLangOpts().OpenMPIsDevice && 3238 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3239 continue; 3240 if (!CE->getAddress()) { 3241 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3242 DiagnosticsEngine::Error, "Offloading entry for declare target " 3243 "variable %0 is incorrect: the " 3244 "address is invalid."); 3245 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3246 continue; 3247 } 3248 // The vaiable has no definition - no need to add the entry. 3249 if (CE->getVarSize().isZero()) 3250 continue; 3251 break; 3252 } 3253 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3254 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3255 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3256 "Declaret target link address is set."); 3257 if (CGM.getLangOpts().OpenMPIsDevice) 3258 continue; 3259 if (!CE->getAddress()) { 3260 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3261 DiagnosticsEngine::Error, 3262 "Offloading entry for declare target variable is incorrect: the " 3263 "address is invalid."); 3264 CGM.getDiags().Report(DiagID); 3265 continue; 3266 } 3267 break; 3268 } 3269 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3270 CE->getVarSize().getQuantity(), Flags, 3271 CE->getLinkage()); 3272 } else { 3273 llvm_unreachable("Unsupported entry kind."); 3274 } 3275 } 3276 } 3277 3278 /// Loads all the offload entries information from the host IR 3279 /// metadata. 3280 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3281 // If we are in target mode, load the metadata from the host IR. This code has 3282 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3283 3284 if (!CGM.getLangOpts().OpenMPIsDevice) 3285 return; 3286 3287 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3288 return; 3289 3290 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3291 if (auto EC = Buf.getError()) { 3292 CGM.getDiags().Report(diag::err_cannot_open_file) 3293 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3294 return; 3295 } 3296 3297 llvm::LLVMContext C; 3298 auto ME = expectedToErrorOrAndEmitErrors( 3299 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3300 3301 if (auto EC = ME.getError()) { 3302 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3303 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3304 CGM.getDiags().Report(DiagID) 3305 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3306 return; 3307 } 3308 3309 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3310 if (!MD) 3311 return; 3312 3313 for (llvm::MDNode *MN : MD->operands()) { 3314 auto &&GetMDInt = [MN](unsigned Idx) { 3315 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3316 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3317 }; 3318 3319 auto &&GetMDString = [MN](unsigned Idx) { 3320 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3321 return V->getString(); 3322 }; 3323 3324 switch (GetMDInt(0)) { 3325 default: 3326 llvm_unreachable("Unexpected metadata!"); 3327 break; 3328 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3329 OffloadingEntryInfoTargetRegion: 3330 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3331 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3332 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3333 /*Order=*/GetMDInt(5)); 3334 break; 3335 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3336 OffloadingEntryInfoDeviceGlobalVar: 3337 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3338 /*MangledName=*/GetMDString(1), 3339 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3340 /*Flags=*/GetMDInt(2)), 3341 /*Order=*/GetMDInt(3)); 3342 break; 3343 } 3344 } 3345 } 3346 3347 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3348 if (!KmpRoutineEntryPtrTy) { 3349 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3350 ASTContext &C = CGM.getContext(); 3351 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3352 FunctionProtoType::ExtProtoInfo EPI; 3353 KmpRoutineEntryPtrQTy = C.getPointerType( 3354 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3355 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3356 } 3357 } 3358 3359 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3360 // Make sure the type of the entry is already created. This is the type we 3361 // have to create: 3362 // struct __tgt_offload_entry{ 3363 // void *addr; // Pointer to the offload entry info. 3364 // // (function or global) 3365 // char *name; // Name of the function or global. 3366 // size_t size; // Size of the entry info (0 if it a function). 3367 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3368 // int32_t reserved; // Reserved, to use by the runtime library. 3369 // }; 3370 if (TgtOffloadEntryQTy.isNull()) { 3371 ASTContext &C = CGM.getContext(); 3372 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3373 RD->startDefinition(); 3374 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3375 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3376 addFieldToRecordDecl(C, RD, C.getSizeType()); 3377 addFieldToRecordDecl( 3378 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3379 addFieldToRecordDecl( 3380 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3381 RD->completeDefinition(); 3382 RD->addAttr(PackedAttr::CreateImplicit(C)); 3383 TgtOffloadEntryQTy = C.getRecordType(RD); 3384 } 3385 return TgtOffloadEntryQTy; 3386 } 3387 3388 namespace { 3389 struct PrivateHelpersTy { 3390 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3391 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3392 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3393 PrivateElemInit(PrivateElemInit) {} 3394 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3395 const Expr *OriginalRef = nullptr; 3396 const VarDecl *Original = nullptr; 3397 const VarDecl *PrivateCopy = nullptr; 3398 const VarDecl *PrivateElemInit = nullptr; 3399 bool isLocalPrivate() const { 3400 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3401 } 3402 }; 3403 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3404 } // anonymous namespace 3405 3406 static bool isAllocatableDecl(const VarDecl *VD) { 3407 const VarDecl *CVD = VD->getCanonicalDecl(); 3408 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3409 return false; 3410 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3411 // Use the default allocation. 3412 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 3413 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 3414 !AA->getAllocator()); 3415 } 3416 3417 static RecordDecl * 3418 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3419 if (!Privates.empty()) { 3420 ASTContext &C = CGM.getContext(); 3421 // Build struct .kmp_privates_t. { 3422 // /* private vars */ 3423 // }; 3424 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3425 RD->startDefinition(); 3426 for (const auto &Pair : Privates) { 3427 const VarDecl *VD = Pair.second.Original; 3428 QualType Type = VD->getType().getNonReferenceType(); 3429 // If the private variable is a local variable with lvalue ref type, 3430 // allocate the pointer instead of the pointee type. 3431 if (Pair.second.isLocalPrivate()) { 3432 if (VD->getType()->isLValueReferenceType()) 3433 Type = C.getPointerType(Type); 3434 if (isAllocatableDecl(VD)) 3435 Type = C.getPointerType(Type); 3436 } 3437 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3438 if (VD->hasAttrs()) { 3439 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3440 E(VD->getAttrs().end()); 3441 I != E; ++I) 3442 FD->addAttr(*I); 3443 } 3444 } 3445 RD->completeDefinition(); 3446 return RD; 3447 } 3448 return nullptr; 3449 } 3450 3451 static RecordDecl * 3452 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3453 QualType KmpInt32Ty, 3454 QualType KmpRoutineEntryPointerQTy) { 3455 ASTContext &C = CGM.getContext(); 3456 // Build struct kmp_task_t { 3457 // void * shareds; 3458 // kmp_routine_entry_t routine; 3459 // kmp_int32 part_id; 3460 // kmp_cmplrdata_t data1; 3461 // kmp_cmplrdata_t data2; 3462 // For taskloops additional fields: 3463 // kmp_uint64 lb; 3464 // kmp_uint64 ub; 3465 // kmp_int64 st; 3466 // kmp_int32 liter; 3467 // void * reductions; 3468 // }; 3469 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3470 UD->startDefinition(); 3471 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3472 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3473 UD->completeDefinition(); 3474 QualType KmpCmplrdataTy = C.getRecordType(UD); 3475 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3476 RD->startDefinition(); 3477 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3478 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3479 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3480 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3481 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3482 if (isOpenMPTaskLoopDirective(Kind)) { 3483 QualType KmpUInt64Ty = 3484 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3485 QualType KmpInt64Ty = 3486 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3487 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3488 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3489 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3490 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3491 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3492 } 3493 RD->completeDefinition(); 3494 return RD; 3495 } 3496 3497 static RecordDecl * 3498 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3499 ArrayRef<PrivateDataTy> Privates) { 3500 ASTContext &C = CGM.getContext(); 3501 // Build struct kmp_task_t_with_privates { 3502 // kmp_task_t task_data; 3503 // .kmp_privates_t. privates; 3504 // }; 3505 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3506 RD->startDefinition(); 3507 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3508 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3509 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3510 RD->completeDefinition(); 3511 return RD; 3512 } 3513 3514 /// Emit a proxy function which accepts kmp_task_t as the second 3515 /// argument. 3516 /// \code 3517 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3518 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3519 /// For taskloops: 3520 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3521 /// tt->reductions, tt->shareds); 3522 /// return 0; 3523 /// } 3524 /// \endcode 3525 static llvm::Function * 3526 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3527 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3528 QualType KmpTaskTWithPrivatesPtrQTy, 3529 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3530 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3531 llvm::Value *TaskPrivatesMap) { 3532 ASTContext &C = CGM.getContext(); 3533 FunctionArgList Args; 3534 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3535 ImplicitParamDecl::Other); 3536 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3537 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3538 ImplicitParamDecl::Other); 3539 Args.push_back(&GtidArg); 3540 Args.push_back(&TaskTypeArg); 3541 const auto &TaskEntryFnInfo = 3542 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3543 llvm::FunctionType *TaskEntryTy = 3544 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3545 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3546 auto *TaskEntry = llvm::Function::Create( 3547 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3548 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3549 TaskEntry->setDoesNotRecurse(); 3550 CodeGenFunction CGF(CGM); 3551 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3552 Loc, Loc); 3553 3554 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3555 // tt, 3556 // For taskloops: 3557 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3558 // tt->task_data.shareds); 3559 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3560 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3561 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3562 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3563 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3564 const auto *KmpTaskTWithPrivatesQTyRD = 3565 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3566 LValue Base = 3567 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3568 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3569 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3570 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3571 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3572 3573 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3574 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3575 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3576 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3577 CGF.ConvertTypeForMem(SharedsPtrTy)); 3578 3579 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3580 llvm::Value *PrivatesParam; 3581 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3582 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3583 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3584 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3585 } else { 3586 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3587 } 3588 3589 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3590 TaskPrivatesMap, 3591 CGF.Builder 3592 .CreatePointerBitCastOrAddrSpaceCast( 3593 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3594 .getPointer()}; 3595 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3596 std::end(CommonArgs)); 3597 if (isOpenMPTaskLoopDirective(Kind)) { 3598 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3599 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3600 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3601 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3602 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3603 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3604 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3605 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3606 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3607 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3608 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3609 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3610 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3611 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3612 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3613 CallArgs.push_back(LBParam); 3614 CallArgs.push_back(UBParam); 3615 CallArgs.push_back(StParam); 3616 CallArgs.push_back(LIParam); 3617 CallArgs.push_back(RParam); 3618 } 3619 CallArgs.push_back(SharedsParam); 3620 3621 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3622 CallArgs); 3623 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3624 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3625 CGF.FinishFunction(); 3626 return TaskEntry; 3627 } 3628 3629 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3630 SourceLocation Loc, 3631 QualType KmpInt32Ty, 3632 QualType KmpTaskTWithPrivatesPtrQTy, 3633 QualType KmpTaskTWithPrivatesQTy) { 3634 ASTContext &C = CGM.getContext(); 3635 FunctionArgList Args; 3636 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3637 ImplicitParamDecl::Other); 3638 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3639 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3640 ImplicitParamDecl::Other); 3641 Args.push_back(&GtidArg); 3642 Args.push_back(&TaskTypeArg); 3643 const auto &DestructorFnInfo = 3644 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3645 llvm::FunctionType *DestructorFnTy = 3646 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3647 std::string Name = 3648 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3649 auto *DestructorFn = 3650 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3651 Name, &CGM.getModule()); 3652 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3653 DestructorFnInfo); 3654 DestructorFn->setDoesNotRecurse(); 3655 CodeGenFunction CGF(CGM); 3656 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3657 Args, Loc, Loc); 3658 3659 LValue Base = CGF.EmitLoadOfPointerLValue( 3660 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3661 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3662 const auto *KmpTaskTWithPrivatesQTyRD = 3663 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3664 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3665 Base = CGF.EmitLValueForField(Base, *FI); 3666 for (const auto *Field : 3667 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3668 if (QualType::DestructionKind DtorKind = 3669 Field->getType().isDestructedType()) { 3670 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3671 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3672 } 3673 } 3674 CGF.FinishFunction(); 3675 return DestructorFn; 3676 } 3677 3678 /// Emit a privates mapping function for correct handling of private and 3679 /// firstprivate variables. 3680 /// \code 3681 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3682 /// **noalias priv1,..., <tyn> **noalias privn) { 3683 /// *priv1 = &.privates.priv1; 3684 /// ...; 3685 /// *privn = &.privates.privn; 3686 /// } 3687 /// \endcode 3688 static llvm::Value * 3689 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3690 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3691 ArrayRef<PrivateDataTy> Privates) { 3692 ASTContext &C = CGM.getContext(); 3693 FunctionArgList Args; 3694 ImplicitParamDecl TaskPrivatesArg( 3695 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3696 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3697 ImplicitParamDecl::Other); 3698 Args.push_back(&TaskPrivatesArg); 3699 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3700 unsigned Counter = 1; 3701 for (const Expr *E : Data.PrivateVars) { 3702 Args.push_back(ImplicitParamDecl::Create( 3703 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3704 C.getPointerType(C.getPointerType(E->getType())) 3705 .withConst() 3706 .withRestrict(), 3707 ImplicitParamDecl::Other)); 3708 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3709 PrivateVarsPos[VD] = Counter; 3710 ++Counter; 3711 } 3712 for (const Expr *E : Data.FirstprivateVars) { 3713 Args.push_back(ImplicitParamDecl::Create( 3714 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3715 C.getPointerType(C.getPointerType(E->getType())) 3716 .withConst() 3717 .withRestrict(), 3718 ImplicitParamDecl::Other)); 3719 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3720 PrivateVarsPos[VD] = Counter; 3721 ++Counter; 3722 } 3723 for (const Expr *E : Data.LastprivateVars) { 3724 Args.push_back(ImplicitParamDecl::Create( 3725 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3726 C.getPointerType(C.getPointerType(E->getType())) 3727 .withConst() 3728 .withRestrict(), 3729 ImplicitParamDecl::Other)); 3730 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3731 PrivateVarsPos[VD] = Counter; 3732 ++Counter; 3733 } 3734 for (const VarDecl *VD : Data.PrivateLocals) { 3735 QualType Ty = VD->getType().getNonReferenceType(); 3736 if (VD->getType()->isLValueReferenceType()) 3737 Ty = C.getPointerType(Ty); 3738 if (isAllocatableDecl(VD)) 3739 Ty = C.getPointerType(Ty); 3740 Args.push_back(ImplicitParamDecl::Create( 3741 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3742 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3743 ImplicitParamDecl::Other)); 3744 PrivateVarsPos[VD] = Counter; 3745 ++Counter; 3746 } 3747 const auto &TaskPrivatesMapFnInfo = 3748 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3749 llvm::FunctionType *TaskPrivatesMapTy = 3750 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3751 std::string Name = 3752 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3753 auto *TaskPrivatesMap = llvm::Function::Create( 3754 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3755 &CGM.getModule()); 3756 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3757 TaskPrivatesMapFnInfo); 3758 if (CGM.getLangOpts().Optimize) { 3759 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3760 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3761 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3762 } 3763 CodeGenFunction CGF(CGM); 3764 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3765 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3766 3767 // *privi = &.privates.privi; 3768 LValue Base = CGF.EmitLoadOfPointerLValue( 3769 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3770 TaskPrivatesArg.getType()->castAs<PointerType>()); 3771 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3772 Counter = 0; 3773 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3774 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3775 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3776 LValue RefLVal = 3777 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3778 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3779 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3780 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3781 ++Counter; 3782 } 3783 CGF.FinishFunction(); 3784 return TaskPrivatesMap; 3785 } 3786 3787 /// Emit initialization for private variables in task-based directives. 3788 static void emitPrivatesInit(CodeGenFunction &CGF, 3789 const OMPExecutableDirective &D, 3790 Address KmpTaskSharedsPtr, LValue TDBase, 3791 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3792 QualType SharedsTy, QualType SharedsPtrTy, 3793 const OMPTaskDataTy &Data, 3794 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3795 ASTContext &C = CGF.getContext(); 3796 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3797 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3798 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3799 ? OMPD_taskloop 3800 : OMPD_task; 3801 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3802 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3803 LValue SrcBase; 3804 bool IsTargetTask = 3805 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3806 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3807 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3808 // PointersArray, SizesArray, and MappersArray. The original variables for 3809 // these arrays are not captured and we get their addresses explicitly. 3810 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3811 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3812 SrcBase = CGF.MakeAddrLValue( 3813 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3814 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3815 SharedsTy); 3816 } 3817 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3818 for (const PrivateDataTy &Pair : Privates) { 3819 // Do not initialize private locals. 3820 if (Pair.second.isLocalPrivate()) { 3821 ++FI; 3822 continue; 3823 } 3824 const VarDecl *VD = Pair.second.PrivateCopy; 3825 const Expr *Init = VD->getAnyInitializer(); 3826 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3827 !CGF.isTrivialInitializer(Init)))) { 3828 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3829 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3830 const VarDecl *OriginalVD = Pair.second.Original; 3831 // Check if the variable is the target-based BasePointersArray, 3832 // PointersArray, SizesArray, or MappersArray. 3833 LValue SharedRefLValue; 3834 QualType Type = PrivateLValue.getType(); 3835 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3836 if (IsTargetTask && !SharedField) { 3837 assert(isa<ImplicitParamDecl>(OriginalVD) && 3838 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3839 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3840 ->getNumParams() == 0 && 3841 isa<TranslationUnitDecl>( 3842 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3843 ->getDeclContext()) && 3844 "Expected artificial target data variable."); 3845 SharedRefLValue = 3846 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3847 } else if (ForDup) { 3848 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3849 SharedRefLValue = CGF.MakeAddrLValue( 3850 Address(SharedRefLValue.getPointer(CGF), 3851 C.getDeclAlign(OriginalVD)), 3852 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3853 SharedRefLValue.getTBAAInfo()); 3854 } else if (CGF.LambdaCaptureFields.count( 3855 Pair.second.Original->getCanonicalDecl()) > 0 || 3856 dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) { 3857 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3858 } else { 3859 // Processing for implicitly captured variables. 3860 InlinedOpenMPRegionRAII Region( 3861 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3862 /*HasCancel=*/false, /*NoInheritance=*/true); 3863 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3864 } 3865 if (Type->isArrayType()) { 3866 // Initialize firstprivate array. 3867 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3868 // Perform simple memcpy. 3869 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3870 } else { 3871 // Initialize firstprivate array using element-by-element 3872 // initialization. 3873 CGF.EmitOMPAggregateAssign( 3874 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3875 Type, 3876 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3877 Address SrcElement) { 3878 // Clean up any temporaries needed by the initialization. 3879 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3880 InitScope.addPrivate( 3881 Elem, [SrcElement]() -> Address { return SrcElement; }); 3882 (void)InitScope.Privatize(); 3883 // Emit initialization for single element. 3884 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3885 CGF, &CapturesInfo); 3886 CGF.EmitAnyExprToMem(Init, DestElement, 3887 Init->getType().getQualifiers(), 3888 /*IsInitializer=*/false); 3889 }); 3890 } 3891 } else { 3892 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3893 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3894 return SharedRefLValue.getAddress(CGF); 3895 }); 3896 (void)InitScope.Privatize(); 3897 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3898 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3899 /*capturedByInit=*/false); 3900 } 3901 } else { 3902 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3903 } 3904 } 3905 ++FI; 3906 } 3907 } 3908 3909 /// Check if duplication function is required for taskloops. 3910 static bool checkInitIsRequired(CodeGenFunction &CGF, 3911 ArrayRef<PrivateDataTy> Privates) { 3912 bool InitRequired = false; 3913 for (const PrivateDataTy &Pair : Privates) { 3914 if (Pair.second.isLocalPrivate()) 3915 continue; 3916 const VarDecl *VD = Pair.second.PrivateCopy; 3917 const Expr *Init = VD->getAnyInitializer(); 3918 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3919 !CGF.isTrivialInitializer(Init)); 3920 if (InitRequired) 3921 break; 3922 } 3923 return InitRequired; 3924 } 3925 3926 3927 /// Emit task_dup function (for initialization of 3928 /// private/firstprivate/lastprivate vars and last_iter flag) 3929 /// \code 3930 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3931 /// lastpriv) { 3932 /// // setup lastprivate flag 3933 /// task_dst->last = lastpriv; 3934 /// // could be constructor calls here... 3935 /// } 3936 /// \endcode 3937 static llvm::Value * 3938 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3939 const OMPExecutableDirective &D, 3940 QualType KmpTaskTWithPrivatesPtrQTy, 3941 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3942 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3943 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3944 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3945 ASTContext &C = CGM.getContext(); 3946 FunctionArgList Args; 3947 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3948 KmpTaskTWithPrivatesPtrQTy, 3949 ImplicitParamDecl::Other); 3950 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3951 KmpTaskTWithPrivatesPtrQTy, 3952 ImplicitParamDecl::Other); 3953 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 3954 ImplicitParamDecl::Other); 3955 Args.push_back(&DstArg); 3956 Args.push_back(&SrcArg); 3957 Args.push_back(&LastprivArg); 3958 const auto &TaskDupFnInfo = 3959 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3960 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3961 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 3962 auto *TaskDup = llvm::Function::Create( 3963 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3964 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 3965 TaskDup->setDoesNotRecurse(); 3966 CodeGenFunction CGF(CGM); 3967 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 3968 Loc); 3969 3970 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3971 CGF.GetAddrOfLocalVar(&DstArg), 3972 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3973 // task_dst->liter = lastpriv; 3974 if (WithLastIter) { 3975 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3976 LValue Base = CGF.EmitLValueForField( 3977 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3978 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3979 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 3980 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 3981 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 3982 } 3983 3984 // Emit initial values for private copies (if any). 3985 assert(!Privates.empty()); 3986 Address KmpTaskSharedsPtr = Address::invalid(); 3987 if (!Data.FirstprivateVars.empty()) { 3988 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3989 CGF.GetAddrOfLocalVar(&SrcArg), 3990 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3991 LValue Base = CGF.EmitLValueForField( 3992 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3993 KmpTaskSharedsPtr = Address( 3994 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 3995 Base, *std::next(KmpTaskTQTyRD->field_begin(), 3996 KmpTaskTShareds)), 3997 Loc), 3998 CGM.getNaturalTypeAlignment(SharedsTy)); 3999 } 4000 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4001 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4002 CGF.FinishFunction(); 4003 return TaskDup; 4004 } 4005 4006 /// Checks if destructor function is required to be generated. 4007 /// \return true if cleanups are required, false otherwise. 4008 static bool 4009 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4010 ArrayRef<PrivateDataTy> Privates) { 4011 for (const PrivateDataTy &P : Privates) { 4012 if (P.second.isLocalPrivate()) 4013 continue; 4014 QualType Ty = P.second.Original->getType().getNonReferenceType(); 4015 if (Ty.isDestructedType()) 4016 return true; 4017 } 4018 return false; 4019 } 4020 4021 namespace { 4022 /// Loop generator for OpenMP iterator expression. 4023 class OMPIteratorGeneratorScope final 4024 : public CodeGenFunction::OMPPrivateScope { 4025 CodeGenFunction &CGF; 4026 const OMPIteratorExpr *E = nullptr; 4027 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4028 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4029 OMPIteratorGeneratorScope() = delete; 4030 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4031 4032 public: 4033 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4034 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4035 if (!E) 4036 return; 4037 SmallVector<llvm::Value *, 4> Uppers; 4038 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4039 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4040 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4041 addPrivate(VD, [&CGF, VD]() { 4042 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4043 }); 4044 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4045 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4046 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4047 "counter.addr"); 4048 }); 4049 } 4050 Privatize(); 4051 4052 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4053 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4054 LValue CLVal = 4055 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4056 HelperData.CounterVD->getType()); 4057 // Counter = 0; 4058 CGF.EmitStoreOfScalar( 4059 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4060 CLVal); 4061 CodeGenFunction::JumpDest &ContDest = 4062 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4063 CodeGenFunction::JumpDest &ExitDest = 4064 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4065 // N = <number-of_iterations>; 4066 llvm::Value *N = Uppers[I]; 4067 // cont: 4068 // if (Counter < N) goto body; else goto exit; 4069 CGF.EmitBlock(ContDest.getBlock()); 4070 auto *CVal = 4071 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4072 llvm::Value *Cmp = 4073 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4074 ? CGF.Builder.CreateICmpSLT(CVal, N) 4075 : CGF.Builder.CreateICmpULT(CVal, N); 4076 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4077 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4078 // body: 4079 CGF.EmitBlock(BodyBB); 4080 // Iteri = Begini + Counter * Stepi; 4081 CGF.EmitIgnoredExpr(HelperData.Update); 4082 } 4083 } 4084 ~OMPIteratorGeneratorScope() { 4085 if (!E) 4086 return; 4087 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4088 // Counter = Counter + 1; 4089 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4090 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4091 // goto cont; 4092 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4093 // exit: 4094 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4095 } 4096 } 4097 }; 4098 } // namespace 4099 4100 static std::pair<llvm::Value *, llvm::Value *> 4101 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4102 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4103 llvm::Value *Addr; 4104 if (OASE) { 4105 const Expr *Base = OASE->getBase(); 4106 Addr = CGF.EmitScalarExpr(Base); 4107 } else { 4108 Addr = CGF.EmitLValue(E).getPointer(CGF); 4109 } 4110 llvm::Value *SizeVal; 4111 QualType Ty = E->getType(); 4112 if (OASE) { 4113 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4114 for (const Expr *SE : OASE->getDimensions()) { 4115 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4116 Sz = CGF.EmitScalarConversion( 4117 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4118 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4119 } 4120 } else if (const auto *ASE = 4121 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4122 LValue UpAddrLVal = 4123 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4124 llvm::Value *UpAddr = 4125 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1); 4126 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4127 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4128 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4129 } else { 4130 SizeVal = CGF.getTypeSize(Ty); 4131 } 4132 return std::make_pair(Addr, SizeVal); 4133 } 4134 4135 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4136 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4137 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4138 if (KmpTaskAffinityInfoTy.isNull()) { 4139 RecordDecl *KmpAffinityInfoRD = 4140 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4141 KmpAffinityInfoRD->startDefinition(); 4142 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4143 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4144 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4145 KmpAffinityInfoRD->completeDefinition(); 4146 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4147 } 4148 } 4149 4150 CGOpenMPRuntime::TaskResultTy 4151 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4152 const OMPExecutableDirective &D, 4153 llvm::Function *TaskFunction, QualType SharedsTy, 4154 Address Shareds, const OMPTaskDataTy &Data) { 4155 ASTContext &C = CGM.getContext(); 4156 llvm::SmallVector<PrivateDataTy, 4> Privates; 4157 // Aggregate privates and sort them by the alignment. 4158 const auto *I = Data.PrivateCopies.begin(); 4159 for (const Expr *E : Data.PrivateVars) { 4160 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4161 Privates.emplace_back( 4162 C.getDeclAlign(VD), 4163 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4164 /*PrivateElemInit=*/nullptr)); 4165 ++I; 4166 } 4167 I = Data.FirstprivateCopies.begin(); 4168 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4169 for (const Expr *E : Data.FirstprivateVars) { 4170 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4171 Privates.emplace_back( 4172 C.getDeclAlign(VD), 4173 PrivateHelpersTy( 4174 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4175 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4176 ++I; 4177 ++IElemInitRef; 4178 } 4179 I = Data.LastprivateCopies.begin(); 4180 for (const Expr *E : Data.LastprivateVars) { 4181 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4182 Privates.emplace_back( 4183 C.getDeclAlign(VD), 4184 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4185 /*PrivateElemInit=*/nullptr)); 4186 ++I; 4187 } 4188 for (const VarDecl *VD : Data.PrivateLocals) { 4189 if (isAllocatableDecl(VD)) 4190 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4191 else 4192 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4193 } 4194 llvm::stable_sort(Privates, 4195 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4196 return L.first > R.first; 4197 }); 4198 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4199 // Build type kmp_routine_entry_t (if not built yet). 4200 emitKmpRoutineEntryT(KmpInt32Ty); 4201 // Build type kmp_task_t (if not built yet). 4202 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4203 if (SavedKmpTaskloopTQTy.isNull()) { 4204 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4205 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4206 } 4207 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4208 } else { 4209 assert((D.getDirectiveKind() == OMPD_task || 4210 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4211 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4212 "Expected taskloop, task or target directive"); 4213 if (SavedKmpTaskTQTy.isNull()) { 4214 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4215 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4216 } 4217 KmpTaskTQTy = SavedKmpTaskTQTy; 4218 } 4219 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4220 // Build particular struct kmp_task_t for the given task. 4221 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4222 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4223 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4224 QualType KmpTaskTWithPrivatesPtrQTy = 4225 C.getPointerType(KmpTaskTWithPrivatesQTy); 4226 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4227 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4228 KmpTaskTWithPrivatesTy->getPointerTo(); 4229 llvm::Value *KmpTaskTWithPrivatesTySize = 4230 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4231 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4232 4233 // Emit initial values for private copies (if any). 4234 llvm::Value *TaskPrivatesMap = nullptr; 4235 llvm::Type *TaskPrivatesMapTy = 4236 std::next(TaskFunction->arg_begin(), 3)->getType(); 4237 if (!Privates.empty()) { 4238 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4239 TaskPrivatesMap = 4240 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4241 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4242 TaskPrivatesMap, TaskPrivatesMapTy); 4243 } else { 4244 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4245 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4246 } 4247 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4248 // kmp_task_t *tt); 4249 llvm::Function *TaskEntry = emitProxyTaskFunction( 4250 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4251 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4252 TaskPrivatesMap); 4253 4254 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4255 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4256 // kmp_routine_entry_t *task_entry); 4257 // Task flags. Format is taken from 4258 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 4259 // description of kmp_tasking_flags struct. 4260 enum { 4261 TiedFlag = 0x1, 4262 FinalFlag = 0x2, 4263 DestructorsFlag = 0x8, 4264 PriorityFlag = 0x20, 4265 DetachableFlag = 0x40, 4266 }; 4267 unsigned Flags = Data.Tied ? TiedFlag : 0; 4268 bool NeedsCleanup = false; 4269 if (!Privates.empty()) { 4270 NeedsCleanup = 4271 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4272 if (NeedsCleanup) 4273 Flags = Flags | DestructorsFlag; 4274 } 4275 if (Data.Priority.getInt()) 4276 Flags = Flags | PriorityFlag; 4277 if (D.hasClausesOfKind<OMPDetachClause>()) 4278 Flags = Flags | DetachableFlag; 4279 llvm::Value *TaskFlags = 4280 Data.Final.getPointer() 4281 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4282 CGF.Builder.getInt32(FinalFlag), 4283 CGF.Builder.getInt32(/*C=*/0)) 4284 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4285 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4286 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4287 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4288 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4289 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4290 TaskEntry, KmpRoutineEntryPtrTy)}; 4291 llvm::Value *NewTask; 4292 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4293 // Check if we have any device clause associated with the directive. 4294 const Expr *Device = nullptr; 4295 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4296 Device = C->getDevice(); 4297 // Emit device ID if any otherwise use default value. 4298 llvm::Value *DeviceID; 4299 if (Device) 4300 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4301 CGF.Int64Ty, /*isSigned=*/true); 4302 else 4303 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4304 AllocArgs.push_back(DeviceID); 4305 NewTask = CGF.EmitRuntimeCall( 4306 OMPBuilder.getOrCreateRuntimeFunction( 4307 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4308 AllocArgs); 4309 } else { 4310 NewTask = 4311 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4312 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4313 AllocArgs); 4314 } 4315 // Emit detach clause initialization. 4316 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4317 // task_descriptor); 4318 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4319 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4320 LValue EvtLVal = CGF.EmitLValue(Evt); 4321 4322 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4323 // int gtid, kmp_task_t *task); 4324 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4325 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4326 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4327 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4328 OMPBuilder.getOrCreateRuntimeFunction( 4329 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4330 {Loc, Tid, NewTask}); 4331 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4332 Evt->getExprLoc()); 4333 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4334 } 4335 // Process affinity clauses. 4336 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4337 // Process list of affinity data. 4338 ASTContext &C = CGM.getContext(); 4339 Address AffinitiesArray = Address::invalid(); 4340 // Calculate number of elements to form the array of affinity data. 4341 llvm::Value *NumOfElements = nullptr; 4342 unsigned NumAffinities = 0; 4343 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4344 if (const Expr *Modifier = C->getModifier()) { 4345 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4346 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4347 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4348 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4349 NumOfElements = 4350 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4351 } 4352 } else { 4353 NumAffinities += C->varlist_size(); 4354 } 4355 } 4356 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4357 // Fields ids in kmp_task_affinity_info record. 4358 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4359 4360 QualType KmpTaskAffinityInfoArrayTy; 4361 if (NumOfElements) { 4362 NumOfElements = CGF.Builder.CreateNUWAdd( 4363 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4364 OpaqueValueExpr OVE( 4365 Loc, 4366 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4367 VK_RValue); 4368 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4369 RValue::get(NumOfElements)); 4370 KmpTaskAffinityInfoArrayTy = 4371 C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal, 4372 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4373 // Properly emit variable-sized array. 4374 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4375 ImplicitParamDecl::Other); 4376 CGF.EmitVarDecl(*PD); 4377 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4378 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4379 /*isSigned=*/false); 4380 } else { 4381 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4382 KmpTaskAffinityInfoTy, 4383 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4384 ArrayType::Normal, /*IndexTypeQuals=*/0); 4385 AffinitiesArray = 4386 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4387 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4388 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4389 /*isSigned=*/false); 4390 } 4391 4392 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4393 // Fill array by elements without iterators. 4394 unsigned Pos = 0; 4395 bool HasIterator = false; 4396 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4397 if (C->getModifier()) { 4398 HasIterator = true; 4399 continue; 4400 } 4401 for (const Expr *E : C->varlists()) { 4402 llvm::Value *Addr; 4403 llvm::Value *Size; 4404 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4405 LValue Base = 4406 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4407 KmpTaskAffinityInfoTy); 4408 // affs[i].base_addr = &<Affinities[i].second>; 4409 LValue BaseAddrLVal = CGF.EmitLValueForField( 4410 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4411 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4412 BaseAddrLVal); 4413 // affs[i].len = sizeof(<Affinities[i].second>); 4414 LValue LenLVal = CGF.EmitLValueForField( 4415 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4416 CGF.EmitStoreOfScalar(Size, LenLVal); 4417 ++Pos; 4418 } 4419 } 4420 LValue PosLVal; 4421 if (HasIterator) { 4422 PosLVal = CGF.MakeAddrLValue( 4423 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4424 C.getSizeType()); 4425 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4426 } 4427 // Process elements with iterators. 4428 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4429 const Expr *Modifier = C->getModifier(); 4430 if (!Modifier) 4431 continue; 4432 OMPIteratorGeneratorScope IteratorScope( 4433 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4434 for (const Expr *E : C->varlists()) { 4435 llvm::Value *Addr; 4436 llvm::Value *Size; 4437 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4438 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4439 LValue Base = CGF.MakeAddrLValue( 4440 Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx), 4441 AffinitiesArray.getAlignment()), 4442 KmpTaskAffinityInfoTy); 4443 // affs[i].base_addr = &<Affinities[i].second>; 4444 LValue BaseAddrLVal = CGF.EmitLValueForField( 4445 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4446 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4447 BaseAddrLVal); 4448 // affs[i].len = sizeof(<Affinities[i].second>); 4449 LValue LenLVal = CGF.EmitLValueForField( 4450 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4451 CGF.EmitStoreOfScalar(Size, LenLVal); 4452 Idx = CGF.Builder.CreateNUWAdd( 4453 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4454 CGF.EmitStoreOfScalar(Idx, PosLVal); 4455 } 4456 } 4457 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4458 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4459 // naffins, kmp_task_affinity_info_t *affin_list); 4460 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4461 llvm::Value *GTid = getThreadID(CGF, Loc); 4462 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4463 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4464 // FIXME: Emit the function and ignore its result for now unless the 4465 // runtime function is properly implemented. 4466 (void)CGF.EmitRuntimeCall( 4467 OMPBuilder.getOrCreateRuntimeFunction( 4468 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4469 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4470 } 4471 llvm::Value *NewTaskNewTaskTTy = 4472 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4473 NewTask, KmpTaskTWithPrivatesPtrTy); 4474 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4475 KmpTaskTWithPrivatesQTy); 4476 LValue TDBase = 4477 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4478 // Fill the data in the resulting kmp_task_t record. 4479 // Copy shareds if there are any. 4480 Address KmpTaskSharedsPtr = Address::invalid(); 4481 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4482 KmpTaskSharedsPtr = 4483 Address(CGF.EmitLoadOfScalar( 4484 CGF.EmitLValueForField( 4485 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4486 KmpTaskTShareds)), 4487 Loc), 4488 CGM.getNaturalTypeAlignment(SharedsTy)); 4489 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4490 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4491 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4492 } 4493 // Emit initial values for private copies (if any). 4494 TaskResultTy Result; 4495 if (!Privates.empty()) { 4496 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4497 SharedsTy, SharedsPtrTy, Data, Privates, 4498 /*ForDup=*/false); 4499 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4500 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4501 Result.TaskDupFn = emitTaskDupFunction( 4502 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4503 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4504 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4505 } 4506 } 4507 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4508 enum { Priority = 0, Destructors = 1 }; 4509 // Provide pointer to function with destructors for privates. 4510 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4511 const RecordDecl *KmpCmplrdataUD = 4512 (*FI)->getType()->getAsUnionType()->getDecl(); 4513 if (NeedsCleanup) { 4514 llvm::Value *DestructorFn = emitDestructorsFunction( 4515 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4516 KmpTaskTWithPrivatesQTy); 4517 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4518 LValue DestructorsLV = CGF.EmitLValueForField( 4519 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4520 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4521 DestructorFn, KmpRoutineEntryPtrTy), 4522 DestructorsLV); 4523 } 4524 // Set priority. 4525 if (Data.Priority.getInt()) { 4526 LValue Data2LV = CGF.EmitLValueForField( 4527 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4528 LValue PriorityLV = CGF.EmitLValueForField( 4529 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4530 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4531 } 4532 Result.NewTask = NewTask; 4533 Result.TaskEntry = TaskEntry; 4534 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4535 Result.TDBase = TDBase; 4536 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4537 return Result; 4538 } 4539 4540 namespace { 4541 /// Dependence kind for RTL. 4542 enum RTLDependenceKindTy { 4543 DepIn = 0x01, 4544 DepInOut = 0x3, 4545 DepMutexInOutSet = 0x4 4546 }; 4547 /// Fields ids in kmp_depend_info record. 4548 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4549 } // namespace 4550 4551 /// Translates internal dependency kind into the runtime kind. 4552 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4553 RTLDependenceKindTy DepKind; 4554 switch (K) { 4555 case OMPC_DEPEND_in: 4556 DepKind = DepIn; 4557 break; 4558 // Out and InOut dependencies must use the same code. 4559 case OMPC_DEPEND_out: 4560 case OMPC_DEPEND_inout: 4561 DepKind = DepInOut; 4562 break; 4563 case OMPC_DEPEND_mutexinoutset: 4564 DepKind = DepMutexInOutSet; 4565 break; 4566 case OMPC_DEPEND_source: 4567 case OMPC_DEPEND_sink: 4568 case OMPC_DEPEND_depobj: 4569 case OMPC_DEPEND_unknown: 4570 llvm_unreachable("Unknown task dependence type"); 4571 } 4572 return DepKind; 4573 } 4574 4575 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4576 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4577 QualType &FlagsTy) { 4578 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4579 if (KmpDependInfoTy.isNull()) { 4580 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4581 KmpDependInfoRD->startDefinition(); 4582 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4583 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4584 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4585 KmpDependInfoRD->completeDefinition(); 4586 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4587 } 4588 } 4589 4590 std::pair<llvm::Value *, LValue> 4591 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4592 SourceLocation Loc) { 4593 ASTContext &C = CGM.getContext(); 4594 QualType FlagsTy; 4595 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4596 RecordDecl *KmpDependInfoRD = 4597 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4598 LValue Base = CGF.EmitLoadOfPointerLValue( 4599 DepobjLVal.getAddress(CGF), 4600 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4601 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4602 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4603 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4604 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4605 Base.getTBAAInfo()); 4606 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4607 Addr.getPointer(), 4608 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4609 LValue NumDepsBase = CGF.MakeAddrLValue( 4610 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4611 Base.getBaseInfo(), Base.getTBAAInfo()); 4612 // NumDeps = deps[i].base_addr; 4613 LValue BaseAddrLVal = CGF.EmitLValueForField( 4614 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4615 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4616 return std::make_pair(NumDeps, Base); 4617 } 4618 4619 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4620 llvm::PointerUnion<unsigned *, LValue *> Pos, 4621 const OMPTaskDataTy::DependData &Data, 4622 Address DependenciesArray) { 4623 CodeGenModule &CGM = CGF.CGM; 4624 ASTContext &C = CGM.getContext(); 4625 QualType FlagsTy; 4626 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4627 RecordDecl *KmpDependInfoRD = 4628 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4629 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4630 4631 OMPIteratorGeneratorScope IteratorScope( 4632 CGF, cast_or_null<OMPIteratorExpr>( 4633 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4634 : nullptr)); 4635 for (const Expr *E : Data.DepExprs) { 4636 llvm::Value *Addr; 4637 llvm::Value *Size; 4638 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4639 LValue Base; 4640 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4641 Base = CGF.MakeAddrLValue( 4642 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4643 } else { 4644 LValue &PosLVal = *Pos.get<LValue *>(); 4645 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4646 Base = CGF.MakeAddrLValue( 4647 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx), 4648 DependenciesArray.getAlignment()), 4649 KmpDependInfoTy); 4650 } 4651 // deps[i].base_addr = &<Dependencies[i].second>; 4652 LValue BaseAddrLVal = CGF.EmitLValueForField( 4653 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4654 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4655 BaseAddrLVal); 4656 // deps[i].len = sizeof(<Dependencies[i].second>); 4657 LValue LenLVal = CGF.EmitLValueForField( 4658 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4659 CGF.EmitStoreOfScalar(Size, LenLVal); 4660 // deps[i].flags = <Dependencies[i].first>; 4661 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4662 LValue FlagsLVal = CGF.EmitLValueForField( 4663 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4664 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4665 FlagsLVal); 4666 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4667 ++(*P); 4668 } else { 4669 LValue &PosLVal = *Pos.get<LValue *>(); 4670 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4671 Idx = CGF.Builder.CreateNUWAdd(Idx, 4672 llvm::ConstantInt::get(Idx->getType(), 1)); 4673 CGF.EmitStoreOfScalar(Idx, PosLVal); 4674 } 4675 } 4676 } 4677 4678 static SmallVector<llvm::Value *, 4> 4679 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4680 const OMPTaskDataTy::DependData &Data) { 4681 assert(Data.DepKind == OMPC_DEPEND_depobj && 4682 "Expected depobj dependecy kind."); 4683 SmallVector<llvm::Value *, 4> Sizes; 4684 SmallVector<LValue, 4> SizeLVals; 4685 ASTContext &C = CGF.getContext(); 4686 QualType FlagsTy; 4687 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4688 RecordDecl *KmpDependInfoRD = 4689 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4690 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4691 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4692 { 4693 OMPIteratorGeneratorScope IteratorScope( 4694 CGF, cast_or_null<OMPIteratorExpr>( 4695 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4696 : nullptr)); 4697 for (const Expr *E : Data.DepExprs) { 4698 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4699 LValue Base = CGF.EmitLoadOfPointerLValue( 4700 DepobjLVal.getAddress(CGF), 4701 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4702 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4703 Base.getAddress(CGF), KmpDependInfoPtrT); 4704 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4705 Base.getTBAAInfo()); 4706 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4707 Addr.getPointer(), 4708 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4709 LValue NumDepsBase = CGF.MakeAddrLValue( 4710 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4711 Base.getBaseInfo(), Base.getTBAAInfo()); 4712 // NumDeps = deps[i].base_addr; 4713 LValue BaseAddrLVal = CGF.EmitLValueForField( 4714 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4715 llvm::Value *NumDeps = 4716 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4717 LValue NumLVal = CGF.MakeAddrLValue( 4718 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4719 C.getUIntPtrType()); 4720 CGF.InitTempAlloca(NumLVal.getAddress(CGF), 4721 llvm::ConstantInt::get(CGF.IntPtrTy, 0)); 4722 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4723 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4724 CGF.EmitStoreOfScalar(Add, NumLVal); 4725 SizeLVals.push_back(NumLVal); 4726 } 4727 } 4728 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4729 llvm::Value *Size = 4730 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4731 Sizes.push_back(Size); 4732 } 4733 return Sizes; 4734 } 4735 4736 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4737 LValue PosLVal, 4738 const OMPTaskDataTy::DependData &Data, 4739 Address DependenciesArray) { 4740 assert(Data.DepKind == OMPC_DEPEND_depobj && 4741 "Expected depobj dependecy kind."); 4742 ASTContext &C = CGF.getContext(); 4743 QualType FlagsTy; 4744 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4745 RecordDecl *KmpDependInfoRD = 4746 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4747 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4748 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4749 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4750 { 4751 OMPIteratorGeneratorScope IteratorScope( 4752 CGF, cast_or_null<OMPIteratorExpr>( 4753 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4754 : nullptr)); 4755 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4756 const Expr *E = Data.DepExprs[I]; 4757 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4758 LValue Base = CGF.EmitLoadOfPointerLValue( 4759 DepobjLVal.getAddress(CGF), 4760 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4761 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4762 Base.getAddress(CGF), KmpDependInfoPtrT); 4763 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4764 Base.getTBAAInfo()); 4765 4766 // Get number of elements in a single depobj. 4767 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4768 Addr.getPointer(), 4769 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4770 LValue NumDepsBase = CGF.MakeAddrLValue( 4771 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4772 Base.getBaseInfo(), Base.getTBAAInfo()); 4773 // NumDeps = deps[i].base_addr; 4774 LValue BaseAddrLVal = CGF.EmitLValueForField( 4775 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4776 llvm::Value *NumDeps = 4777 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4778 4779 // memcopy dependency data. 4780 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4781 ElSize, 4782 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4783 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4784 Address DepAddr = 4785 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos), 4786 DependenciesArray.getAlignment()); 4787 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4788 4789 // Increase pos. 4790 // pos += size; 4791 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4792 CGF.EmitStoreOfScalar(Add, PosLVal); 4793 } 4794 } 4795 } 4796 4797 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4798 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4799 SourceLocation Loc) { 4800 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4801 return D.DepExprs.empty(); 4802 })) 4803 return std::make_pair(nullptr, Address::invalid()); 4804 // Process list of dependencies. 4805 ASTContext &C = CGM.getContext(); 4806 Address DependenciesArray = Address::invalid(); 4807 llvm::Value *NumOfElements = nullptr; 4808 unsigned NumDependencies = std::accumulate( 4809 Dependencies.begin(), Dependencies.end(), 0, 4810 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4811 return D.DepKind == OMPC_DEPEND_depobj 4812 ? V 4813 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4814 }); 4815 QualType FlagsTy; 4816 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4817 bool HasDepobjDeps = false; 4818 bool HasRegularWithIterators = false; 4819 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4820 llvm::Value *NumOfRegularWithIterators = 4821 llvm::ConstantInt::get(CGF.IntPtrTy, 1); 4822 // Calculate number of depobj dependecies and regular deps with the iterators. 4823 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4824 if (D.DepKind == OMPC_DEPEND_depobj) { 4825 SmallVector<llvm::Value *, 4> Sizes = 4826 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4827 for (llvm::Value *Size : Sizes) { 4828 NumOfDepobjElements = 4829 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4830 } 4831 HasDepobjDeps = true; 4832 continue; 4833 } 4834 // Include number of iterations, if any. 4835 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4836 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4837 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4838 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4839 NumOfRegularWithIterators = 4840 CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz); 4841 } 4842 HasRegularWithIterators = true; 4843 continue; 4844 } 4845 } 4846 4847 QualType KmpDependInfoArrayTy; 4848 if (HasDepobjDeps || HasRegularWithIterators) { 4849 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4850 /*isSigned=*/false); 4851 if (HasDepobjDeps) { 4852 NumOfElements = 4853 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4854 } 4855 if (HasRegularWithIterators) { 4856 NumOfElements = 4857 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4858 } 4859 OpaqueValueExpr OVE(Loc, 4860 C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4861 VK_RValue); 4862 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4863 RValue::get(NumOfElements)); 4864 KmpDependInfoArrayTy = 4865 C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, 4866 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4867 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4868 // Properly emit variable-sized array. 4869 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4870 ImplicitParamDecl::Other); 4871 CGF.EmitVarDecl(*PD); 4872 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4873 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4874 /*isSigned=*/false); 4875 } else { 4876 KmpDependInfoArrayTy = C.getConstantArrayType( 4877 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4878 ArrayType::Normal, /*IndexTypeQuals=*/0); 4879 DependenciesArray = 4880 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4881 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4882 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4883 /*isSigned=*/false); 4884 } 4885 unsigned Pos = 0; 4886 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4887 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4888 Dependencies[I].IteratorExpr) 4889 continue; 4890 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4891 DependenciesArray); 4892 } 4893 // Copy regular dependecies with iterators. 4894 LValue PosLVal = CGF.MakeAddrLValue( 4895 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4896 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4897 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4898 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4899 !Dependencies[I].IteratorExpr) 4900 continue; 4901 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4902 DependenciesArray); 4903 } 4904 // Copy final depobj arrays without iterators. 4905 if (HasDepobjDeps) { 4906 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4907 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4908 continue; 4909 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4910 DependenciesArray); 4911 } 4912 } 4913 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4914 DependenciesArray, CGF.VoidPtrTy); 4915 return std::make_pair(NumOfElements, DependenciesArray); 4916 } 4917 4918 Address CGOpenMPRuntime::emitDepobjDependClause( 4919 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4920 SourceLocation Loc) { 4921 if (Dependencies.DepExprs.empty()) 4922 return Address::invalid(); 4923 // Process list of dependencies. 4924 ASTContext &C = CGM.getContext(); 4925 Address DependenciesArray = Address::invalid(); 4926 unsigned NumDependencies = Dependencies.DepExprs.size(); 4927 QualType FlagsTy; 4928 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4929 RecordDecl *KmpDependInfoRD = 4930 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4931 4932 llvm::Value *Size; 4933 // Define type kmp_depend_info[<Dependencies.size()>]; 4934 // For depobj reserve one extra element to store the number of elements. 4935 // It is required to handle depobj(x) update(in) construct. 4936 // kmp_depend_info[<Dependencies.size()>] deps; 4937 llvm::Value *NumDepsVal; 4938 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4939 if (const auto *IE = 4940 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4941 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4942 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4943 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4944 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4945 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4946 } 4947 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4948 NumDepsVal); 4949 CharUnits SizeInBytes = 4950 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4951 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4952 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4953 NumDepsVal = 4954 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4955 } else { 4956 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4957 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4958 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 4959 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4960 Size = CGM.getSize(Sz.alignTo(Align)); 4961 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 4962 } 4963 // Need to allocate on the dynamic memory. 4964 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4965 // Use default allocator. 4966 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4967 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 4968 4969 llvm::Value *Addr = 4970 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4971 CGM.getModule(), OMPRTL___kmpc_alloc), 4972 Args, ".dep.arr.addr"); 4973 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4974 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 4975 DependenciesArray = Address(Addr, Align); 4976 // Write number of elements in the first element of array for depobj. 4977 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 4978 // deps[i].base_addr = NumDependencies; 4979 LValue BaseAddrLVal = CGF.EmitLValueForField( 4980 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4981 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 4982 llvm::PointerUnion<unsigned *, LValue *> Pos; 4983 unsigned Idx = 1; 4984 LValue PosLVal; 4985 if (Dependencies.IteratorExpr) { 4986 PosLVal = CGF.MakeAddrLValue( 4987 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 4988 C.getSizeType()); 4989 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 4990 /*IsInit=*/true); 4991 Pos = &PosLVal; 4992 } else { 4993 Pos = &Idx; 4994 } 4995 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 4996 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4997 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 4998 return DependenciesArray; 4999 } 5000 5001 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5002 SourceLocation Loc) { 5003 ASTContext &C = CGM.getContext(); 5004 QualType FlagsTy; 5005 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5006 LValue Base = CGF.EmitLoadOfPointerLValue( 5007 DepobjLVal.getAddress(CGF), 5008 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5009 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5010 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5011 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5012 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5013 Addr.getPointer(), 5014 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5015 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5016 CGF.VoidPtrTy); 5017 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5018 // Use default allocator. 5019 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5020 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5021 5022 // _kmpc_free(gtid, addr, nullptr); 5023 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5024 CGM.getModule(), OMPRTL___kmpc_free), 5025 Args); 5026 } 5027 5028 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5029 OpenMPDependClauseKind NewDepKind, 5030 SourceLocation Loc) { 5031 ASTContext &C = CGM.getContext(); 5032 QualType FlagsTy; 5033 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5034 RecordDecl *KmpDependInfoRD = 5035 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5036 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5037 llvm::Value *NumDeps; 5038 LValue Base; 5039 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5040 5041 Address Begin = Base.getAddress(CGF); 5042 // Cast from pointer to array type to pointer to single element. 5043 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps); 5044 // The basic structure here is a while-do loop. 5045 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5046 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5047 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5048 CGF.EmitBlock(BodyBB); 5049 llvm::PHINode *ElementPHI = 5050 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5051 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5052 Begin = Address(ElementPHI, Begin.getAlignment()); 5053 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5054 Base.getTBAAInfo()); 5055 // deps[i].flags = NewDepKind; 5056 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5057 LValue FlagsLVal = CGF.EmitLValueForField( 5058 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5059 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5060 FlagsLVal); 5061 5062 // Shift the address forward by one element. 5063 Address ElementNext = 5064 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5065 ElementPHI->addIncoming(ElementNext.getPointer(), 5066 CGF.Builder.GetInsertBlock()); 5067 llvm::Value *IsEmpty = 5068 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5069 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5070 // Done. 5071 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5072 } 5073 5074 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5075 const OMPExecutableDirective &D, 5076 llvm::Function *TaskFunction, 5077 QualType SharedsTy, Address Shareds, 5078 const Expr *IfCond, 5079 const OMPTaskDataTy &Data) { 5080 if (!CGF.HaveInsertPoint()) 5081 return; 5082 5083 TaskResultTy Result = 5084 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5085 llvm::Value *NewTask = Result.NewTask; 5086 llvm::Function *TaskEntry = Result.TaskEntry; 5087 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5088 LValue TDBase = Result.TDBase; 5089 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5090 // Process list of dependences. 5091 Address DependenciesArray = Address::invalid(); 5092 llvm::Value *NumOfElements; 5093 std::tie(NumOfElements, DependenciesArray) = 5094 emitDependClause(CGF, Data.Dependences, Loc); 5095 5096 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5097 // libcall. 5098 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5099 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5100 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5101 // list is not empty 5102 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5103 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5104 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5105 llvm::Value *DepTaskArgs[7]; 5106 if (!Data.Dependences.empty()) { 5107 DepTaskArgs[0] = UpLoc; 5108 DepTaskArgs[1] = ThreadID; 5109 DepTaskArgs[2] = NewTask; 5110 DepTaskArgs[3] = NumOfElements; 5111 DepTaskArgs[4] = DependenciesArray.getPointer(); 5112 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5113 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5114 } 5115 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5116 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5117 if (!Data.Tied) { 5118 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5119 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5120 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5121 } 5122 if (!Data.Dependences.empty()) { 5123 CGF.EmitRuntimeCall( 5124 OMPBuilder.getOrCreateRuntimeFunction( 5125 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5126 DepTaskArgs); 5127 } else { 5128 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5129 CGM.getModule(), OMPRTL___kmpc_omp_task), 5130 TaskArgs); 5131 } 5132 // Check if parent region is untied and build return for untied task; 5133 if (auto *Region = 5134 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5135 Region->emitUntiedSwitch(CGF); 5136 }; 5137 5138 llvm::Value *DepWaitTaskArgs[6]; 5139 if (!Data.Dependences.empty()) { 5140 DepWaitTaskArgs[0] = UpLoc; 5141 DepWaitTaskArgs[1] = ThreadID; 5142 DepWaitTaskArgs[2] = NumOfElements; 5143 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5144 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5145 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5146 } 5147 auto &M = CGM.getModule(); 5148 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5149 TaskEntry, &Data, &DepWaitTaskArgs, 5150 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5151 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5152 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5153 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5154 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5155 // is specified. 5156 if (!Data.Dependences.empty()) 5157 CGF.EmitRuntimeCall( 5158 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5159 DepWaitTaskArgs); 5160 // Call proxy_task_entry(gtid, new_task); 5161 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5162 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5163 Action.Enter(CGF); 5164 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5165 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5166 OutlinedFnArgs); 5167 }; 5168 5169 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5170 // kmp_task_t *new_task); 5171 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5172 // kmp_task_t *new_task); 5173 RegionCodeGenTy RCG(CodeGen); 5174 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5175 M, OMPRTL___kmpc_omp_task_begin_if0), 5176 TaskArgs, 5177 OMPBuilder.getOrCreateRuntimeFunction( 5178 M, OMPRTL___kmpc_omp_task_complete_if0), 5179 TaskArgs); 5180 RCG.setAction(Action); 5181 RCG(CGF); 5182 }; 5183 5184 if (IfCond) { 5185 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5186 } else { 5187 RegionCodeGenTy ThenRCG(ThenCodeGen); 5188 ThenRCG(CGF); 5189 } 5190 } 5191 5192 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5193 const OMPLoopDirective &D, 5194 llvm::Function *TaskFunction, 5195 QualType SharedsTy, Address Shareds, 5196 const Expr *IfCond, 5197 const OMPTaskDataTy &Data) { 5198 if (!CGF.HaveInsertPoint()) 5199 return; 5200 TaskResultTy Result = 5201 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5202 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5203 // libcall. 5204 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5205 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5206 // sched, kmp_uint64 grainsize, void *task_dup); 5207 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5208 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5209 llvm::Value *IfVal; 5210 if (IfCond) { 5211 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5212 /*isSigned=*/true); 5213 } else { 5214 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5215 } 5216 5217 LValue LBLVal = CGF.EmitLValueForField( 5218 Result.TDBase, 5219 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5220 const auto *LBVar = 5221 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5222 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5223 LBLVal.getQuals(), 5224 /*IsInitializer=*/true); 5225 LValue UBLVal = CGF.EmitLValueForField( 5226 Result.TDBase, 5227 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5228 const auto *UBVar = 5229 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5230 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5231 UBLVal.getQuals(), 5232 /*IsInitializer=*/true); 5233 LValue StLVal = CGF.EmitLValueForField( 5234 Result.TDBase, 5235 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5236 const auto *StVar = 5237 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5238 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5239 StLVal.getQuals(), 5240 /*IsInitializer=*/true); 5241 // Store reductions address. 5242 LValue RedLVal = CGF.EmitLValueForField( 5243 Result.TDBase, 5244 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5245 if (Data.Reductions) { 5246 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5247 } else { 5248 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5249 CGF.getContext().VoidPtrTy); 5250 } 5251 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5252 llvm::Value *TaskArgs[] = { 5253 UpLoc, 5254 ThreadID, 5255 Result.NewTask, 5256 IfVal, 5257 LBLVal.getPointer(CGF), 5258 UBLVal.getPointer(CGF), 5259 CGF.EmitLoadOfScalar(StLVal, Loc), 5260 llvm::ConstantInt::getSigned( 5261 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5262 llvm::ConstantInt::getSigned( 5263 CGF.IntTy, Data.Schedule.getPointer() 5264 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5265 : NoSchedule), 5266 Data.Schedule.getPointer() 5267 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5268 /*isSigned=*/false) 5269 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5270 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5271 Result.TaskDupFn, CGF.VoidPtrTy) 5272 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5273 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5274 CGM.getModule(), OMPRTL___kmpc_taskloop), 5275 TaskArgs); 5276 } 5277 5278 /// Emit reduction operation for each element of array (required for 5279 /// array sections) LHS op = RHS. 5280 /// \param Type Type of array. 5281 /// \param LHSVar Variable on the left side of the reduction operation 5282 /// (references element of array in original variable). 5283 /// \param RHSVar Variable on the right side of the reduction operation 5284 /// (references element of array in original variable). 5285 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5286 /// RHSVar. 5287 static void EmitOMPAggregateReduction( 5288 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5289 const VarDecl *RHSVar, 5290 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5291 const Expr *, const Expr *)> &RedOpGen, 5292 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5293 const Expr *UpExpr = nullptr) { 5294 // Perform element-by-element initialization. 5295 QualType ElementTy; 5296 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5297 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5298 5299 // Drill down to the base element type on both arrays. 5300 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5301 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5302 5303 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5304 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5305 // Cast from pointer to array type to pointer to single element. 5306 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5307 // The basic structure here is a while-do loop. 5308 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5309 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5310 llvm::Value *IsEmpty = 5311 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5312 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5313 5314 // Enter the loop body, making that address the current address. 5315 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5316 CGF.EmitBlock(BodyBB); 5317 5318 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5319 5320 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5321 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5322 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5323 Address RHSElementCurrent = 5324 Address(RHSElementPHI, 5325 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5326 5327 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5328 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5329 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5330 Address LHSElementCurrent = 5331 Address(LHSElementPHI, 5332 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5333 5334 // Emit copy. 5335 CodeGenFunction::OMPPrivateScope Scope(CGF); 5336 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5337 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5338 Scope.Privatize(); 5339 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5340 Scope.ForceCleanup(); 5341 5342 // Shift the address forward by one element. 5343 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5344 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5345 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5346 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5347 // Check whether we've reached the end. 5348 llvm::Value *Done = 5349 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5350 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5351 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5352 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5353 5354 // Done. 5355 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5356 } 5357 5358 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5359 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5360 /// UDR combiner function. 5361 static void emitReductionCombiner(CodeGenFunction &CGF, 5362 const Expr *ReductionOp) { 5363 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5364 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5365 if (const auto *DRE = 5366 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5367 if (const auto *DRD = 5368 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5369 std::pair<llvm::Function *, llvm::Function *> Reduction = 5370 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5371 RValue Func = RValue::get(Reduction.first); 5372 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5373 CGF.EmitIgnoredExpr(ReductionOp); 5374 return; 5375 } 5376 CGF.EmitIgnoredExpr(ReductionOp); 5377 } 5378 5379 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5380 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5381 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5382 ArrayRef<const Expr *> ReductionOps) { 5383 ASTContext &C = CGM.getContext(); 5384 5385 // void reduction_func(void *LHSArg, void *RHSArg); 5386 FunctionArgList Args; 5387 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5388 ImplicitParamDecl::Other); 5389 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5390 ImplicitParamDecl::Other); 5391 Args.push_back(&LHSArg); 5392 Args.push_back(&RHSArg); 5393 const auto &CGFI = 5394 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5395 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5396 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5397 llvm::GlobalValue::InternalLinkage, Name, 5398 &CGM.getModule()); 5399 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5400 Fn->setDoesNotRecurse(); 5401 CodeGenFunction CGF(CGM); 5402 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5403 5404 // Dst = (void*[n])(LHSArg); 5405 // Src = (void*[n])(RHSArg); 5406 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5407 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5408 ArgsType), CGF.getPointerAlign()); 5409 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5410 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5411 ArgsType), CGF.getPointerAlign()); 5412 5413 // ... 5414 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5415 // ... 5416 CodeGenFunction::OMPPrivateScope Scope(CGF); 5417 auto IPriv = Privates.begin(); 5418 unsigned Idx = 0; 5419 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5420 const auto *RHSVar = 5421 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5422 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5423 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5424 }); 5425 const auto *LHSVar = 5426 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5427 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5428 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5429 }); 5430 QualType PrivTy = (*IPriv)->getType(); 5431 if (PrivTy->isVariablyModifiedType()) { 5432 // Get array size and emit VLA type. 5433 ++Idx; 5434 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5435 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5436 const VariableArrayType *VLA = 5437 CGF.getContext().getAsVariableArrayType(PrivTy); 5438 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5439 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5440 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5441 CGF.EmitVariablyModifiedType(PrivTy); 5442 } 5443 } 5444 Scope.Privatize(); 5445 IPriv = Privates.begin(); 5446 auto ILHS = LHSExprs.begin(); 5447 auto IRHS = RHSExprs.begin(); 5448 for (const Expr *E : ReductionOps) { 5449 if ((*IPriv)->getType()->isArrayType()) { 5450 // Emit reduction for array section. 5451 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5452 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5453 EmitOMPAggregateReduction( 5454 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5455 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5456 emitReductionCombiner(CGF, E); 5457 }); 5458 } else { 5459 // Emit reduction for array subscript or single variable. 5460 emitReductionCombiner(CGF, E); 5461 } 5462 ++IPriv; 5463 ++ILHS; 5464 ++IRHS; 5465 } 5466 Scope.ForceCleanup(); 5467 CGF.FinishFunction(); 5468 return Fn; 5469 } 5470 5471 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5472 const Expr *ReductionOp, 5473 const Expr *PrivateRef, 5474 const DeclRefExpr *LHS, 5475 const DeclRefExpr *RHS) { 5476 if (PrivateRef->getType()->isArrayType()) { 5477 // Emit reduction for array section. 5478 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5479 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5480 EmitOMPAggregateReduction( 5481 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5482 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5483 emitReductionCombiner(CGF, ReductionOp); 5484 }); 5485 } else { 5486 // Emit reduction for array subscript or single variable. 5487 emitReductionCombiner(CGF, ReductionOp); 5488 } 5489 } 5490 5491 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5492 ArrayRef<const Expr *> Privates, 5493 ArrayRef<const Expr *> LHSExprs, 5494 ArrayRef<const Expr *> RHSExprs, 5495 ArrayRef<const Expr *> ReductionOps, 5496 ReductionOptionsTy Options) { 5497 if (!CGF.HaveInsertPoint()) 5498 return; 5499 5500 bool WithNowait = Options.WithNowait; 5501 bool SimpleReduction = Options.SimpleReduction; 5502 5503 // Next code should be emitted for reduction: 5504 // 5505 // static kmp_critical_name lock = { 0 }; 5506 // 5507 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5508 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5509 // ... 5510 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5511 // *(Type<n>-1*)rhs[<n>-1]); 5512 // } 5513 // 5514 // ... 5515 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5516 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5517 // RedList, reduce_func, &<lock>)) { 5518 // case 1: 5519 // ... 5520 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5521 // ... 5522 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5523 // break; 5524 // case 2: 5525 // ... 5526 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5527 // ... 5528 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5529 // break; 5530 // default:; 5531 // } 5532 // 5533 // if SimpleReduction is true, only the next code is generated: 5534 // ... 5535 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5536 // ... 5537 5538 ASTContext &C = CGM.getContext(); 5539 5540 if (SimpleReduction) { 5541 CodeGenFunction::RunCleanupsScope Scope(CGF); 5542 auto IPriv = Privates.begin(); 5543 auto ILHS = LHSExprs.begin(); 5544 auto IRHS = RHSExprs.begin(); 5545 for (const Expr *E : ReductionOps) { 5546 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5547 cast<DeclRefExpr>(*IRHS)); 5548 ++IPriv; 5549 ++ILHS; 5550 ++IRHS; 5551 } 5552 return; 5553 } 5554 5555 // 1. Build a list of reduction variables. 5556 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5557 auto Size = RHSExprs.size(); 5558 for (const Expr *E : Privates) { 5559 if (E->getType()->isVariablyModifiedType()) 5560 // Reserve place for array size. 5561 ++Size; 5562 } 5563 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5564 QualType ReductionArrayTy = 5565 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5566 /*IndexTypeQuals=*/0); 5567 Address ReductionList = 5568 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5569 auto IPriv = Privates.begin(); 5570 unsigned Idx = 0; 5571 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5572 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5573 CGF.Builder.CreateStore( 5574 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5575 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5576 Elem); 5577 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5578 // Store array size. 5579 ++Idx; 5580 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5581 llvm::Value *Size = CGF.Builder.CreateIntCast( 5582 CGF.getVLASize( 5583 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5584 .NumElts, 5585 CGF.SizeTy, /*isSigned=*/false); 5586 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5587 Elem); 5588 } 5589 } 5590 5591 // 2. Emit reduce_func(). 5592 llvm::Function *ReductionFn = emitReductionFunction( 5593 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5594 LHSExprs, RHSExprs, ReductionOps); 5595 5596 // 3. Create static kmp_critical_name lock = { 0 }; 5597 std::string Name = getName({"reduction"}); 5598 llvm::Value *Lock = getCriticalRegionLock(Name); 5599 5600 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5601 // RedList, reduce_func, &<lock>); 5602 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5603 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5604 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5605 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5606 ReductionList.getPointer(), CGF.VoidPtrTy); 5607 llvm::Value *Args[] = { 5608 IdentTLoc, // ident_t *<loc> 5609 ThreadId, // i32 <gtid> 5610 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5611 ReductionArrayTySize, // size_type sizeof(RedList) 5612 RL, // void *RedList 5613 ReductionFn, // void (*) (void *, void *) <reduce_func> 5614 Lock // kmp_critical_name *&<lock> 5615 }; 5616 llvm::Value *Res = CGF.EmitRuntimeCall( 5617 OMPBuilder.getOrCreateRuntimeFunction( 5618 CGM.getModule(), 5619 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5620 Args); 5621 5622 // 5. Build switch(res) 5623 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5624 llvm::SwitchInst *SwInst = 5625 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5626 5627 // 6. Build case 1: 5628 // ... 5629 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5630 // ... 5631 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5632 // break; 5633 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5634 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5635 CGF.EmitBlock(Case1BB); 5636 5637 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5638 llvm::Value *EndArgs[] = { 5639 IdentTLoc, // ident_t *<loc> 5640 ThreadId, // i32 <gtid> 5641 Lock // kmp_critical_name *&<lock> 5642 }; 5643 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5644 CodeGenFunction &CGF, PrePostActionTy &Action) { 5645 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5646 auto IPriv = Privates.begin(); 5647 auto ILHS = LHSExprs.begin(); 5648 auto IRHS = RHSExprs.begin(); 5649 for (const Expr *E : ReductionOps) { 5650 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5651 cast<DeclRefExpr>(*IRHS)); 5652 ++IPriv; 5653 ++ILHS; 5654 ++IRHS; 5655 } 5656 }; 5657 RegionCodeGenTy RCG(CodeGen); 5658 CommonActionTy Action( 5659 nullptr, llvm::None, 5660 OMPBuilder.getOrCreateRuntimeFunction( 5661 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5662 : OMPRTL___kmpc_end_reduce), 5663 EndArgs); 5664 RCG.setAction(Action); 5665 RCG(CGF); 5666 5667 CGF.EmitBranch(DefaultBB); 5668 5669 // 7. Build case 2: 5670 // ... 5671 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5672 // ... 5673 // break; 5674 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5675 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5676 CGF.EmitBlock(Case2BB); 5677 5678 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5679 CodeGenFunction &CGF, PrePostActionTy &Action) { 5680 auto ILHS = LHSExprs.begin(); 5681 auto IRHS = RHSExprs.begin(); 5682 auto IPriv = Privates.begin(); 5683 for (const Expr *E : ReductionOps) { 5684 const Expr *XExpr = nullptr; 5685 const Expr *EExpr = nullptr; 5686 const Expr *UpExpr = nullptr; 5687 BinaryOperatorKind BO = BO_Comma; 5688 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5689 if (BO->getOpcode() == BO_Assign) { 5690 XExpr = BO->getLHS(); 5691 UpExpr = BO->getRHS(); 5692 } 5693 } 5694 // Try to emit update expression as a simple atomic. 5695 const Expr *RHSExpr = UpExpr; 5696 if (RHSExpr) { 5697 // Analyze RHS part of the whole expression. 5698 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5699 RHSExpr->IgnoreParenImpCasts())) { 5700 // If this is a conditional operator, analyze its condition for 5701 // min/max reduction operator. 5702 RHSExpr = ACO->getCond(); 5703 } 5704 if (const auto *BORHS = 5705 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5706 EExpr = BORHS->getRHS(); 5707 BO = BORHS->getOpcode(); 5708 } 5709 } 5710 if (XExpr) { 5711 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5712 auto &&AtomicRedGen = [BO, VD, 5713 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5714 const Expr *EExpr, const Expr *UpExpr) { 5715 LValue X = CGF.EmitLValue(XExpr); 5716 RValue E; 5717 if (EExpr) 5718 E = CGF.EmitAnyExpr(EExpr); 5719 CGF.EmitOMPAtomicSimpleUpdateExpr( 5720 X, E, BO, /*IsXLHSInRHSPart=*/true, 5721 llvm::AtomicOrdering::Monotonic, Loc, 5722 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5723 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5724 PrivateScope.addPrivate( 5725 VD, [&CGF, VD, XRValue, Loc]() { 5726 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5727 CGF.emitOMPSimpleStore( 5728 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5729 VD->getType().getNonReferenceType(), Loc); 5730 return LHSTemp; 5731 }); 5732 (void)PrivateScope.Privatize(); 5733 return CGF.EmitAnyExpr(UpExpr); 5734 }); 5735 }; 5736 if ((*IPriv)->getType()->isArrayType()) { 5737 // Emit atomic reduction for array section. 5738 const auto *RHSVar = 5739 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5740 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5741 AtomicRedGen, XExpr, EExpr, UpExpr); 5742 } else { 5743 // Emit atomic reduction for array subscript or single variable. 5744 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5745 } 5746 } else { 5747 // Emit as a critical region. 5748 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5749 const Expr *, const Expr *) { 5750 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5751 std::string Name = RT.getName({"atomic_reduction"}); 5752 RT.emitCriticalRegion( 5753 CGF, Name, 5754 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5755 Action.Enter(CGF); 5756 emitReductionCombiner(CGF, E); 5757 }, 5758 Loc); 5759 }; 5760 if ((*IPriv)->getType()->isArrayType()) { 5761 const auto *LHSVar = 5762 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5763 const auto *RHSVar = 5764 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5765 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5766 CritRedGen); 5767 } else { 5768 CritRedGen(CGF, nullptr, nullptr, nullptr); 5769 } 5770 } 5771 ++ILHS; 5772 ++IRHS; 5773 ++IPriv; 5774 } 5775 }; 5776 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5777 if (!WithNowait) { 5778 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5779 llvm::Value *EndArgs[] = { 5780 IdentTLoc, // ident_t *<loc> 5781 ThreadId, // i32 <gtid> 5782 Lock // kmp_critical_name *&<lock> 5783 }; 5784 CommonActionTy Action(nullptr, llvm::None, 5785 OMPBuilder.getOrCreateRuntimeFunction( 5786 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5787 EndArgs); 5788 AtomicRCG.setAction(Action); 5789 AtomicRCG(CGF); 5790 } else { 5791 AtomicRCG(CGF); 5792 } 5793 5794 CGF.EmitBranch(DefaultBB); 5795 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5796 } 5797 5798 /// Generates unique name for artificial threadprivate variables. 5799 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5800 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5801 const Expr *Ref) { 5802 SmallString<256> Buffer; 5803 llvm::raw_svector_ostream Out(Buffer); 5804 const clang::DeclRefExpr *DE; 5805 const VarDecl *D = ::getBaseDecl(Ref, DE); 5806 if (!D) 5807 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5808 D = D->getCanonicalDecl(); 5809 std::string Name = CGM.getOpenMPRuntime().getName( 5810 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5811 Out << Prefix << Name << "_" 5812 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5813 return std::string(Out.str()); 5814 } 5815 5816 /// Emits reduction initializer function: 5817 /// \code 5818 /// void @.red_init(void* %arg, void* %orig) { 5819 /// %0 = bitcast void* %arg to <type>* 5820 /// store <type> <init>, <type>* %0 5821 /// ret void 5822 /// } 5823 /// \endcode 5824 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5825 SourceLocation Loc, 5826 ReductionCodeGen &RCG, unsigned N) { 5827 ASTContext &C = CGM.getContext(); 5828 QualType VoidPtrTy = C.VoidPtrTy; 5829 VoidPtrTy.addRestrict(); 5830 FunctionArgList Args; 5831 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5832 ImplicitParamDecl::Other); 5833 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5834 ImplicitParamDecl::Other); 5835 Args.emplace_back(&Param); 5836 Args.emplace_back(&ParamOrig); 5837 const auto &FnInfo = 5838 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5839 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5840 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5841 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5842 Name, &CGM.getModule()); 5843 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5844 Fn->setDoesNotRecurse(); 5845 CodeGenFunction CGF(CGM); 5846 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5847 Address PrivateAddr = CGF.EmitLoadOfPointer( 5848 CGF.GetAddrOfLocalVar(&Param), 5849 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5850 llvm::Value *Size = nullptr; 5851 // If the size of the reduction item is non-constant, load it from global 5852 // threadprivate variable. 5853 if (RCG.getSizes(N).second) { 5854 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5855 CGF, CGM.getContext().getSizeType(), 5856 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5857 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5858 CGM.getContext().getSizeType(), Loc); 5859 } 5860 RCG.emitAggregateType(CGF, N, Size); 5861 LValue OrigLVal; 5862 // If initializer uses initializer from declare reduction construct, emit a 5863 // pointer to the address of the original reduction item (reuired by reduction 5864 // initializer) 5865 if (RCG.usesReductionInitializer(N)) { 5866 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5867 SharedAddr = CGF.EmitLoadOfPointer( 5868 SharedAddr, 5869 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5870 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5871 } else { 5872 OrigLVal = CGF.MakeNaturalAlignAddrLValue( 5873 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5874 CGM.getContext().VoidPtrTy); 5875 } 5876 // Emit the initializer: 5877 // %0 = bitcast void* %arg to <type>* 5878 // store <type> <init>, <type>* %0 5879 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, 5880 [](CodeGenFunction &) { return false; }); 5881 CGF.FinishFunction(); 5882 return Fn; 5883 } 5884 5885 /// Emits reduction combiner function: 5886 /// \code 5887 /// void @.red_comb(void* %arg0, void* %arg1) { 5888 /// %lhs = bitcast void* %arg0 to <type>* 5889 /// %rhs = bitcast void* %arg1 to <type>* 5890 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5891 /// store <type> %2, <type>* %lhs 5892 /// ret void 5893 /// } 5894 /// \endcode 5895 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5896 SourceLocation Loc, 5897 ReductionCodeGen &RCG, unsigned N, 5898 const Expr *ReductionOp, 5899 const Expr *LHS, const Expr *RHS, 5900 const Expr *PrivateRef) { 5901 ASTContext &C = CGM.getContext(); 5902 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5903 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5904 FunctionArgList Args; 5905 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5906 C.VoidPtrTy, ImplicitParamDecl::Other); 5907 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5908 ImplicitParamDecl::Other); 5909 Args.emplace_back(&ParamInOut); 5910 Args.emplace_back(&ParamIn); 5911 const auto &FnInfo = 5912 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5913 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5914 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5915 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5916 Name, &CGM.getModule()); 5917 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5918 Fn->setDoesNotRecurse(); 5919 CodeGenFunction CGF(CGM); 5920 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5921 llvm::Value *Size = nullptr; 5922 // If the size of the reduction item is non-constant, load it from global 5923 // threadprivate variable. 5924 if (RCG.getSizes(N).second) { 5925 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5926 CGF, CGM.getContext().getSizeType(), 5927 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5928 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5929 CGM.getContext().getSizeType(), Loc); 5930 } 5931 RCG.emitAggregateType(CGF, N, Size); 5932 // Remap lhs and rhs variables to the addresses of the function arguments. 5933 // %lhs = bitcast void* %arg0 to <type>* 5934 // %rhs = bitcast void* %arg1 to <type>* 5935 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5936 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 5937 // Pull out the pointer to the variable. 5938 Address PtrAddr = CGF.EmitLoadOfPointer( 5939 CGF.GetAddrOfLocalVar(&ParamInOut), 5940 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5941 return CGF.Builder.CreateElementBitCast( 5942 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5943 }); 5944 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 5945 // Pull out the pointer to the variable. 5946 Address PtrAddr = CGF.EmitLoadOfPointer( 5947 CGF.GetAddrOfLocalVar(&ParamIn), 5948 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5949 return CGF.Builder.CreateElementBitCast( 5950 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 5951 }); 5952 PrivateScope.Privatize(); 5953 // Emit the combiner body: 5954 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5955 // store <type> %2, <type>* %lhs 5956 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5957 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5958 cast<DeclRefExpr>(RHS)); 5959 CGF.FinishFunction(); 5960 return Fn; 5961 } 5962 5963 /// Emits reduction finalizer function: 5964 /// \code 5965 /// void @.red_fini(void* %arg) { 5966 /// %0 = bitcast void* %arg to <type>* 5967 /// <destroy>(<type>* %0) 5968 /// ret void 5969 /// } 5970 /// \endcode 5971 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5972 SourceLocation Loc, 5973 ReductionCodeGen &RCG, unsigned N) { 5974 if (!RCG.needCleanups(N)) 5975 return nullptr; 5976 ASTContext &C = CGM.getContext(); 5977 FunctionArgList Args; 5978 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5979 ImplicitParamDecl::Other); 5980 Args.emplace_back(&Param); 5981 const auto &FnInfo = 5982 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5983 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5984 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 5985 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5986 Name, &CGM.getModule()); 5987 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5988 Fn->setDoesNotRecurse(); 5989 CodeGenFunction CGF(CGM); 5990 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5991 Address PrivateAddr = CGF.EmitLoadOfPointer( 5992 CGF.GetAddrOfLocalVar(&Param), 5993 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5994 llvm::Value *Size = nullptr; 5995 // If the size of the reduction item is non-constant, load it from global 5996 // threadprivate variable. 5997 if (RCG.getSizes(N).second) { 5998 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5999 CGF, CGM.getContext().getSizeType(), 6000 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6001 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6002 CGM.getContext().getSizeType(), Loc); 6003 } 6004 RCG.emitAggregateType(CGF, N, Size); 6005 // Emit the finalizer body: 6006 // <destroy>(<type>* %0) 6007 RCG.emitCleanups(CGF, N, PrivateAddr); 6008 CGF.FinishFunction(Loc); 6009 return Fn; 6010 } 6011 6012 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6013 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6014 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6015 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6016 return nullptr; 6017 6018 // Build typedef struct: 6019 // kmp_taskred_input { 6020 // void *reduce_shar; // shared reduction item 6021 // void *reduce_orig; // original reduction item used for initialization 6022 // size_t reduce_size; // size of data item 6023 // void *reduce_init; // data initialization routine 6024 // void *reduce_fini; // data finalization routine 6025 // void *reduce_comb; // data combiner routine 6026 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6027 // } kmp_taskred_input_t; 6028 ASTContext &C = CGM.getContext(); 6029 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6030 RD->startDefinition(); 6031 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6032 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6033 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6034 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6035 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6036 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6037 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6038 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6039 RD->completeDefinition(); 6040 QualType RDType = C.getRecordType(RD); 6041 unsigned Size = Data.ReductionVars.size(); 6042 llvm::APInt ArraySize(/*numBits=*/64, Size); 6043 QualType ArrayRDType = C.getConstantArrayType( 6044 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6045 // kmp_task_red_input_t .rd_input.[Size]; 6046 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6047 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6048 Data.ReductionCopies, Data.ReductionOps); 6049 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6050 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6051 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6052 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6053 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6054 TaskRedInput.getPointer(), Idxs, 6055 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6056 ".rd_input.gep."); 6057 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6058 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6059 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6060 RCG.emitSharedOrigLValue(CGF, Cnt); 6061 llvm::Value *CastedShared = 6062 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6063 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6064 // ElemLVal.reduce_orig = &Origs[Cnt]; 6065 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6066 llvm::Value *CastedOrig = 6067 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6068 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6069 RCG.emitAggregateType(CGF, Cnt); 6070 llvm::Value *SizeValInChars; 6071 llvm::Value *SizeVal; 6072 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6073 // We use delayed creation/initialization for VLAs and array sections. It is 6074 // required because runtime does not provide the way to pass the sizes of 6075 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6076 // threadprivate global variables are used to store these values and use 6077 // them in the functions. 6078 bool DelayedCreation = !!SizeVal; 6079 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6080 /*isSigned=*/false); 6081 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6082 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6083 // ElemLVal.reduce_init = init; 6084 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6085 llvm::Value *InitAddr = 6086 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6087 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6088 // ElemLVal.reduce_fini = fini; 6089 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6090 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6091 llvm::Value *FiniAddr = Fini 6092 ? CGF.EmitCastToVoidPtr(Fini) 6093 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6094 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6095 // ElemLVal.reduce_comb = comb; 6096 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6097 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6098 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6099 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6100 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6101 // ElemLVal.flags = 0; 6102 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6103 if (DelayedCreation) { 6104 CGF.EmitStoreOfScalar( 6105 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6106 FlagsLVal); 6107 } else 6108 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6109 FlagsLVal.getType()); 6110 } 6111 if (Data.IsReductionWithTaskMod) { 6112 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6113 // is_ws, int num, void *data); 6114 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6115 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6116 CGM.IntTy, /*isSigned=*/true); 6117 llvm::Value *Args[] = { 6118 IdentTLoc, GTid, 6119 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6120 /*isSigned=*/true), 6121 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6122 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6123 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6124 return CGF.EmitRuntimeCall( 6125 OMPBuilder.getOrCreateRuntimeFunction( 6126 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6127 Args); 6128 } 6129 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6130 llvm::Value *Args[] = { 6131 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6132 /*isSigned=*/true), 6133 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6134 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6135 CGM.VoidPtrTy)}; 6136 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6137 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6138 Args); 6139 } 6140 6141 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6142 SourceLocation Loc, 6143 bool IsWorksharingReduction) { 6144 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6145 // is_ws, int num, void *data); 6146 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6147 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6148 CGM.IntTy, /*isSigned=*/true); 6149 llvm::Value *Args[] = {IdentTLoc, GTid, 6150 llvm::ConstantInt::get(CGM.IntTy, 6151 IsWorksharingReduction ? 1 : 0, 6152 /*isSigned=*/true)}; 6153 (void)CGF.EmitRuntimeCall( 6154 OMPBuilder.getOrCreateRuntimeFunction( 6155 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6156 Args); 6157 } 6158 6159 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6160 SourceLocation Loc, 6161 ReductionCodeGen &RCG, 6162 unsigned N) { 6163 auto Sizes = RCG.getSizes(N); 6164 // Emit threadprivate global variable if the type is non-constant 6165 // (Sizes.second = nullptr). 6166 if (Sizes.second) { 6167 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6168 /*isSigned=*/false); 6169 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6170 CGF, CGM.getContext().getSizeType(), 6171 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6172 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6173 } 6174 } 6175 6176 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6177 SourceLocation Loc, 6178 llvm::Value *ReductionsPtr, 6179 LValue SharedLVal) { 6180 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6181 // *d); 6182 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6183 CGM.IntTy, 6184 /*isSigned=*/true), 6185 ReductionsPtr, 6186 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6187 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6188 return Address( 6189 CGF.EmitRuntimeCall( 6190 OMPBuilder.getOrCreateRuntimeFunction( 6191 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6192 Args), 6193 SharedLVal.getAlignment()); 6194 } 6195 6196 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6197 SourceLocation Loc) { 6198 if (!CGF.HaveInsertPoint()) 6199 return; 6200 6201 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 6202 OMPBuilder.createTaskwait(CGF.Builder); 6203 } else { 6204 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6205 // global_tid); 6206 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6207 // Ignore return result until untied tasks are supported. 6208 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6209 CGM.getModule(), OMPRTL___kmpc_omp_taskwait), 6210 Args); 6211 } 6212 6213 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6214 Region->emitUntiedSwitch(CGF); 6215 } 6216 6217 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6218 OpenMPDirectiveKind InnerKind, 6219 const RegionCodeGenTy &CodeGen, 6220 bool HasCancel) { 6221 if (!CGF.HaveInsertPoint()) 6222 return; 6223 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 6224 InnerKind != OMPD_critical && 6225 InnerKind != OMPD_master); 6226 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6227 } 6228 6229 namespace { 6230 enum RTCancelKind { 6231 CancelNoreq = 0, 6232 CancelParallel = 1, 6233 CancelLoop = 2, 6234 CancelSections = 3, 6235 CancelTaskgroup = 4 6236 }; 6237 } // anonymous namespace 6238 6239 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6240 RTCancelKind CancelKind = CancelNoreq; 6241 if (CancelRegion == OMPD_parallel) 6242 CancelKind = CancelParallel; 6243 else if (CancelRegion == OMPD_for) 6244 CancelKind = CancelLoop; 6245 else if (CancelRegion == OMPD_sections) 6246 CancelKind = CancelSections; 6247 else { 6248 assert(CancelRegion == OMPD_taskgroup); 6249 CancelKind = CancelTaskgroup; 6250 } 6251 return CancelKind; 6252 } 6253 6254 void CGOpenMPRuntime::emitCancellationPointCall( 6255 CodeGenFunction &CGF, SourceLocation Loc, 6256 OpenMPDirectiveKind CancelRegion) { 6257 if (!CGF.HaveInsertPoint()) 6258 return; 6259 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6260 // global_tid, kmp_int32 cncl_kind); 6261 if (auto *OMPRegionInfo = 6262 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6263 // For 'cancellation point taskgroup', the task region info may not have a 6264 // cancel. This may instead happen in another adjacent task. 6265 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6266 llvm::Value *Args[] = { 6267 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6268 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6269 // Ignore return result until untied tasks are supported. 6270 llvm::Value *Result = CGF.EmitRuntimeCall( 6271 OMPBuilder.getOrCreateRuntimeFunction( 6272 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6273 Args); 6274 // if (__kmpc_cancellationpoint()) { 6275 // exit from construct; 6276 // } 6277 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6278 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6279 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6280 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6281 CGF.EmitBlock(ExitBB); 6282 // exit from construct; 6283 CodeGenFunction::JumpDest CancelDest = 6284 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6285 CGF.EmitBranchThroughCleanup(CancelDest); 6286 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6287 } 6288 } 6289 } 6290 6291 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6292 const Expr *IfCond, 6293 OpenMPDirectiveKind CancelRegion) { 6294 if (!CGF.HaveInsertPoint()) 6295 return; 6296 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6297 // kmp_int32 cncl_kind); 6298 auto &M = CGM.getModule(); 6299 if (auto *OMPRegionInfo = 6300 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6301 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6302 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6303 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6304 llvm::Value *Args[] = { 6305 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6306 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6307 // Ignore return result until untied tasks are supported. 6308 llvm::Value *Result = CGF.EmitRuntimeCall( 6309 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6310 // if (__kmpc_cancel()) { 6311 // exit from construct; 6312 // } 6313 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6314 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6315 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6316 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6317 CGF.EmitBlock(ExitBB); 6318 // exit from construct; 6319 CodeGenFunction::JumpDest CancelDest = 6320 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6321 CGF.EmitBranchThroughCleanup(CancelDest); 6322 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6323 }; 6324 if (IfCond) { 6325 emitIfClause(CGF, IfCond, ThenGen, 6326 [](CodeGenFunction &, PrePostActionTy &) {}); 6327 } else { 6328 RegionCodeGenTy ThenRCG(ThenGen); 6329 ThenRCG(CGF); 6330 } 6331 } 6332 } 6333 6334 namespace { 6335 /// Cleanup action for uses_allocators support. 6336 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6337 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6338 6339 public: 6340 OMPUsesAllocatorsActionTy( 6341 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6342 : Allocators(Allocators) {} 6343 void Enter(CodeGenFunction &CGF) override { 6344 if (!CGF.HaveInsertPoint()) 6345 return; 6346 for (const auto &AllocatorData : Allocators) { 6347 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6348 CGF, AllocatorData.first, AllocatorData.second); 6349 } 6350 } 6351 void Exit(CodeGenFunction &CGF) override { 6352 if (!CGF.HaveInsertPoint()) 6353 return; 6354 for (const auto &AllocatorData : Allocators) { 6355 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6356 AllocatorData.first); 6357 } 6358 } 6359 }; 6360 } // namespace 6361 6362 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6363 const OMPExecutableDirective &D, StringRef ParentName, 6364 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6365 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6366 assert(!ParentName.empty() && "Invalid target region parent name!"); 6367 HasEmittedTargetRegion = true; 6368 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6369 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6370 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6371 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6372 if (!D.AllocatorTraits) 6373 continue; 6374 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6375 } 6376 } 6377 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6378 CodeGen.setAction(UsesAllocatorAction); 6379 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6380 IsOffloadEntry, CodeGen); 6381 } 6382 6383 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6384 const Expr *Allocator, 6385 const Expr *AllocatorTraits) { 6386 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6387 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6388 // Use default memspace handle. 6389 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6390 llvm::Value *NumTraits = llvm::ConstantInt::get( 6391 CGF.IntTy, cast<ConstantArrayType>( 6392 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6393 ->getSize() 6394 .getLimitedValue()); 6395 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6396 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6397 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6398 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6399 AllocatorTraitsLVal.getBaseInfo(), 6400 AllocatorTraitsLVal.getTBAAInfo()); 6401 llvm::Value *Traits = 6402 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6403 6404 llvm::Value *AllocatorVal = 6405 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6406 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6407 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6408 // Store to allocator. 6409 CGF.EmitVarDecl(*cast<VarDecl>( 6410 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6411 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6412 AllocatorVal = 6413 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6414 Allocator->getType(), Allocator->getExprLoc()); 6415 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6416 } 6417 6418 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6419 const Expr *Allocator) { 6420 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6421 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6422 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6423 llvm::Value *AllocatorVal = 6424 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6425 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6426 CGF.getContext().VoidPtrTy, 6427 Allocator->getExprLoc()); 6428 (void)CGF.EmitRuntimeCall( 6429 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6430 OMPRTL___kmpc_destroy_allocator), 6431 {ThreadId, AllocatorVal}); 6432 } 6433 6434 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6435 const OMPExecutableDirective &D, StringRef ParentName, 6436 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6437 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6438 // Create a unique name for the entry function using the source location 6439 // information of the current target region. The name will be something like: 6440 // 6441 // __omp_offloading_DD_FFFF_PP_lBB 6442 // 6443 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6444 // mangled name of the function that encloses the target region and BB is the 6445 // line number of the target region. 6446 6447 unsigned DeviceID; 6448 unsigned FileID; 6449 unsigned Line; 6450 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6451 Line); 6452 SmallString<64> EntryFnName; 6453 { 6454 llvm::raw_svector_ostream OS(EntryFnName); 6455 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6456 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6457 } 6458 6459 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6460 6461 CodeGenFunction CGF(CGM, true); 6462 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6463 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6464 6465 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6466 6467 // If this target outline function is not an offload entry, we don't need to 6468 // register it. 6469 if (!IsOffloadEntry) 6470 return; 6471 6472 // The target region ID is used by the runtime library to identify the current 6473 // target region, so it only has to be unique and not necessarily point to 6474 // anything. It could be the pointer to the outlined function that implements 6475 // the target region, but we aren't using that so that the compiler doesn't 6476 // need to keep that, and could therefore inline the host function if proven 6477 // worthwhile during optimization. In the other hand, if emitting code for the 6478 // device, the ID has to be the function address so that it can retrieved from 6479 // the offloading entry and launched by the runtime library. We also mark the 6480 // outlined function to have external linkage in case we are emitting code for 6481 // the device, because these functions will be entry points to the device. 6482 6483 if (CGM.getLangOpts().OpenMPIsDevice) { 6484 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6485 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6486 OutlinedFn->setDSOLocal(false); 6487 if (CGM.getTriple().isAMDGCN()) 6488 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 6489 } else { 6490 std::string Name = getName({EntryFnName, "region_id"}); 6491 OutlinedFnID = new llvm::GlobalVariable( 6492 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6493 llvm::GlobalValue::WeakAnyLinkage, 6494 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6495 } 6496 6497 // Register the information for the entry associated with this target region. 6498 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6499 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6500 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6501 } 6502 6503 /// Checks if the expression is constant or does not have non-trivial function 6504 /// calls. 6505 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6506 // We can skip constant expressions. 6507 // We can skip expressions with trivial calls or simple expressions. 6508 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6509 !E->hasNonTrivialCall(Ctx)) && 6510 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6511 } 6512 6513 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6514 const Stmt *Body) { 6515 const Stmt *Child = Body->IgnoreContainers(); 6516 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6517 Child = nullptr; 6518 for (const Stmt *S : C->body()) { 6519 if (const auto *E = dyn_cast<Expr>(S)) { 6520 if (isTrivial(Ctx, E)) 6521 continue; 6522 } 6523 // Some of the statements can be ignored. 6524 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6525 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6526 continue; 6527 // Analyze declarations. 6528 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6529 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6530 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6531 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6532 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6533 isa<UsingDirectiveDecl>(D) || 6534 isa<OMPDeclareReductionDecl>(D) || 6535 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6536 return true; 6537 const auto *VD = dyn_cast<VarDecl>(D); 6538 if (!VD) 6539 return false; 6540 return VD->isConstexpr() || 6541 ((VD->getType().isTrivialType(Ctx) || 6542 VD->getType()->isReferenceType()) && 6543 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6544 })) 6545 continue; 6546 } 6547 // Found multiple children - cannot get the one child only. 6548 if (Child) 6549 return nullptr; 6550 Child = S; 6551 } 6552 if (Child) 6553 Child = Child->IgnoreContainers(); 6554 } 6555 return Child; 6556 } 6557 6558 /// Emit the number of teams for a target directive. Inspect the num_teams 6559 /// clause associated with a teams construct combined or closely nested 6560 /// with the target directive. 6561 /// 6562 /// Emit a team of size one for directives such as 'target parallel' that 6563 /// have no associated teams construct. 6564 /// 6565 /// Otherwise, return nullptr. 6566 static llvm::Value * 6567 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6568 const OMPExecutableDirective &D) { 6569 assert(!CGF.getLangOpts().OpenMPIsDevice && 6570 "Clauses associated with the teams directive expected to be emitted " 6571 "only for the host!"); 6572 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6573 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6574 "Expected target-based executable directive."); 6575 CGBuilderTy &Bld = CGF.Builder; 6576 switch (DirectiveKind) { 6577 case OMPD_target: { 6578 const auto *CS = D.getInnermostCapturedStmt(); 6579 const auto *Body = 6580 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6581 const Stmt *ChildStmt = 6582 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6583 if (const auto *NestedDir = 6584 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6585 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6586 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6587 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6588 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6589 const Expr *NumTeams = 6590 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6591 llvm::Value *NumTeamsVal = 6592 CGF.EmitScalarExpr(NumTeams, 6593 /*IgnoreResultAssign*/ true); 6594 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6595 /*isSigned=*/true); 6596 } 6597 return Bld.getInt32(0); 6598 } 6599 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6600 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6601 return Bld.getInt32(1); 6602 return Bld.getInt32(0); 6603 } 6604 return nullptr; 6605 } 6606 case OMPD_target_teams: 6607 case OMPD_target_teams_distribute: 6608 case OMPD_target_teams_distribute_simd: 6609 case OMPD_target_teams_distribute_parallel_for: 6610 case OMPD_target_teams_distribute_parallel_for_simd: { 6611 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6612 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6613 const Expr *NumTeams = 6614 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6615 llvm::Value *NumTeamsVal = 6616 CGF.EmitScalarExpr(NumTeams, 6617 /*IgnoreResultAssign*/ true); 6618 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6619 /*isSigned=*/true); 6620 } 6621 return Bld.getInt32(0); 6622 } 6623 case OMPD_target_parallel: 6624 case OMPD_target_parallel_for: 6625 case OMPD_target_parallel_for_simd: 6626 case OMPD_target_simd: 6627 return Bld.getInt32(1); 6628 case OMPD_parallel: 6629 case OMPD_for: 6630 case OMPD_parallel_for: 6631 case OMPD_parallel_master: 6632 case OMPD_parallel_sections: 6633 case OMPD_for_simd: 6634 case OMPD_parallel_for_simd: 6635 case OMPD_cancel: 6636 case OMPD_cancellation_point: 6637 case OMPD_ordered: 6638 case OMPD_threadprivate: 6639 case OMPD_allocate: 6640 case OMPD_task: 6641 case OMPD_simd: 6642 case OMPD_sections: 6643 case OMPD_section: 6644 case OMPD_single: 6645 case OMPD_master: 6646 case OMPD_critical: 6647 case OMPD_taskyield: 6648 case OMPD_barrier: 6649 case OMPD_taskwait: 6650 case OMPD_taskgroup: 6651 case OMPD_atomic: 6652 case OMPD_flush: 6653 case OMPD_depobj: 6654 case OMPD_scan: 6655 case OMPD_teams: 6656 case OMPD_target_data: 6657 case OMPD_target_exit_data: 6658 case OMPD_target_enter_data: 6659 case OMPD_distribute: 6660 case OMPD_distribute_simd: 6661 case OMPD_distribute_parallel_for: 6662 case OMPD_distribute_parallel_for_simd: 6663 case OMPD_teams_distribute: 6664 case OMPD_teams_distribute_simd: 6665 case OMPD_teams_distribute_parallel_for: 6666 case OMPD_teams_distribute_parallel_for_simd: 6667 case OMPD_target_update: 6668 case OMPD_declare_simd: 6669 case OMPD_declare_variant: 6670 case OMPD_begin_declare_variant: 6671 case OMPD_end_declare_variant: 6672 case OMPD_declare_target: 6673 case OMPD_end_declare_target: 6674 case OMPD_declare_reduction: 6675 case OMPD_declare_mapper: 6676 case OMPD_taskloop: 6677 case OMPD_taskloop_simd: 6678 case OMPD_master_taskloop: 6679 case OMPD_master_taskloop_simd: 6680 case OMPD_parallel_master_taskloop: 6681 case OMPD_parallel_master_taskloop_simd: 6682 case OMPD_requires: 6683 case OMPD_unknown: 6684 break; 6685 default: 6686 break; 6687 } 6688 llvm_unreachable("Unexpected directive kind."); 6689 } 6690 6691 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6692 llvm::Value *DefaultThreadLimitVal) { 6693 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6694 CGF.getContext(), CS->getCapturedStmt()); 6695 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6696 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6697 llvm::Value *NumThreads = nullptr; 6698 llvm::Value *CondVal = nullptr; 6699 // Handle if clause. If if clause present, the number of threads is 6700 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6701 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6702 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6703 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6704 const OMPIfClause *IfClause = nullptr; 6705 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6706 if (C->getNameModifier() == OMPD_unknown || 6707 C->getNameModifier() == OMPD_parallel) { 6708 IfClause = C; 6709 break; 6710 } 6711 } 6712 if (IfClause) { 6713 const Expr *Cond = IfClause->getCondition(); 6714 bool Result; 6715 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6716 if (!Result) 6717 return CGF.Builder.getInt32(1); 6718 } else { 6719 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6720 if (const auto *PreInit = 6721 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6722 for (const auto *I : PreInit->decls()) { 6723 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6724 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6725 } else { 6726 CodeGenFunction::AutoVarEmission Emission = 6727 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6728 CGF.EmitAutoVarCleanups(Emission); 6729 } 6730 } 6731 } 6732 CondVal = CGF.EvaluateExprAsBool(Cond); 6733 } 6734 } 6735 } 6736 // Check the value of num_threads clause iff if clause was not specified 6737 // or is not evaluated to false. 6738 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6739 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6740 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6741 const auto *NumThreadsClause = 6742 Dir->getSingleClause<OMPNumThreadsClause>(); 6743 CodeGenFunction::LexicalScope Scope( 6744 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6745 if (const auto *PreInit = 6746 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6747 for (const auto *I : PreInit->decls()) { 6748 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6749 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6750 } else { 6751 CodeGenFunction::AutoVarEmission Emission = 6752 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6753 CGF.EmitAutoVarCleanups(Emission); 6754 } 6755 } 6756 } 6757 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6758 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6759 /*isSigned=*/false); 6760 if (DefaultThreadLimitVal) 6761 NumThreads = CGF.Builder.CreateSelect( 6762 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6763 DefaultThreadLimitVal, NumThreads); 6764 } else { 6765 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6766 : CGF.Builder.getInt32(0); 6767 } 6768 // Process condition of the if clause. 6769 if (CondVal) { 6770 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6771 CGF.Builder.getInt32(1)); 6772 } 6773 return NumThreads; 6774 } 6775 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6776 return CGF.Builder.getInt32(1); 6777 return DefaultThreadLimitVal; 6778 } 6779 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6780 : CGF.Builder.getInt32(0); 6781 } 6782 6783 /// Emit the number of threads for a target directive. Inspect the 6784 /// thread_limit clause associated with a teams construct combined or closely 6785 /// nested with the target directive. 6786 /// 6787 /// Emit the num_threads clause for directives such as 'target parallel' that 6788 /// have no associated teams construct. 6789 /// 6790 /// Otherwise, return nullptr. 6791 static llvm::Value * 6792 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6793 const OMPExecutableDirective &D) { 6794 assert(!CGF.getLangOpts().OpenMPIsDevice && 6795 "Clauses associated with the teams directive expected to be emitted " 6796 "only for the host!"); 6797 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6798 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6799 "Expected target-based executable directive."); 6800 CGBuilderTy &Bld = CGF.Builder; 6801 llvm::Value *ThreadLimitVal = nullptr; 6802 llvm::Value *NumThreadsVal = nullptr; 6803 switch (DirectiveKind) { 6804 case OMPD_target: { 6805 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6806 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6807 return NumThreads; 6808 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6809 CGF.getContext(), CS->getCapturedStmt()); 6810 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6811 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6812 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6813 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6814 const auto *ThreadLimitClause = 6815 Dir->getSingleClause<OMPThreadLimitClause>(); 6816 CodeGenFunction::LexicalScope Scope( 6817 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6818 if (const auto *PreInit = 6819 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6820 for (const auto *I : PreInit->decls()) { 6821 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6822 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6823 } else { 6824 CodeGenFunction::AutoVarEmission Emission = 6825 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6826 CGF.EmitAutoVarCleanups(Emission); 6827 } 6828 } 6829 } 6830 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6831 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6832 ThreadLimitVal = 6833 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6834 } 6835 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6836 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6837 CS = Dir->getInnermostCapturedStmt(); 6838 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6839 CGF.getContext(), CS->getCapturedStmt()); 6840 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6841 } 6842 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6843 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6844 CS = Dir->getInnermostCapturedStmt(); 6845 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6846 return NumThreads; 6847 } 6848 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6849 return Bld.getInt32(1); 6850 } 6851 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6852 } 6853 case OMPD_target_teams: { 6854 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6855 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6856 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6857 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6858 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6859 ThreadLimitVal = 6860 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6861 } 6862 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6863 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6864 return NumThreads; 6865 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6866 CGF.getContext(), CS->getCapturedStmt()); 6867 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6868 if (Dir->getDirectiveKind() == OMPD_distribute) { 6869 CS = Dir->getInnermostCapturedStmt(); 6870 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6871 return NumThreads; 6872 } 6873 } 6874 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6875 } 6876 case OMPD_target_teams_distribute: 6877 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6878 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6879 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6880 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6881 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6882 ThreadLimitVal = 6883 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6884 } 6885 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6886 case OMPD_target_parallel: 6887 case OMPD_target_parallel_for: 6888 case OMPD_target_parallel_for_simd: 6889 case OMPD_target_teams_distribute_parallel_for: 6890 case OMPD_target_teams_distribute_parallel_for_simd: { 6891 llvm::Value *CondVal = nullptr; 6892 // Handle if clause. If if clause present, the number of threads is 6893 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6894 if (D.hasClausesOfKind<OMPIfClause>()) { 6895 const OMPIfClause *IfClause = nullptr; 6896 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6897 if (C->getNameModifier() == OMPD_unknown || 6898 C->getNameModifier() == OMPD_parallel) { 6899 IfClause = C; 6900 break; 6901 } 6902 } 6903 if (IfClause) { 6904 const Expr *Cond = IfClause->getCondition(); 6905 bool Result; 6906 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6907 if (!Result) 6908 return Bld.getInt32(1); 6909 } else { 6910 CodeGenFunction::RunCleanupsScope Scope(CGF); 6911 CondVal = CGF.EvaluateExprAsBool(Cond); 6912 } 6913 } 6914 } 6915 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6916 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6917 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6918 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6919 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6920 ThreadLimitVal = 6921 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6922 } 6923 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6924 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6925 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6926 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6927 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6928 NumThreadsVal = 6929 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 6930 ThreadLimitVal = ThreadLimitVal 6931 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6932 ThreadLimitVal), 6933 NumThreadsVal, ThreadLimitVal) 6934 : NumThreadsVal; 6935 } 6936 if (!ThreadLimitVal) 6937 ThreadLimitVal = Bld.getInt32(0); 6938 if (CondVal) 6939 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6940 return ThreadLimitVal; 6941 } 6942 case OMPD_target_teams_distribute_simd: 6943 case OMPD_target_simd: 6944 return Bld.getInt32(1); 6945 case OMPD_parallel: 6946 case OMPD_for: 6947 case OMPD_parallel_for: 6948 case OMPD_parallel_master: 6949 case OMPD_parallel_sections: 6950 case OMPD_for_simd: 6951 case OMPD_parallel_for_simd: 6952 case OMPD_cancel: 6953 case OMPD_cancellation_point: 6954 case OMPD_ordered: 6955 case OMPD_threadprivate: 6956 case OMPD_allocate: 6957 case OMPD_task: 6958 case OMPD_simd: 6959 case OMPD_sections: 6960 case OMPD_section: 6961 case OMPD_single: 6962 case OMPD_master: 6963 case OMPD_critical: 6964 case OMPD_taskyield: 6965 case OMPD_barrier: 6966 case OMPD_taskwait: 6967 case OMPD_taskgroup: 6968 case OMPD_atomic: 6969 case OMPD_flush: 6970 case OMPD_depobj: 6971 case OMPD_scan: 6972 case OMPD_teams: 6973 case OMPD_target_data: 6974 case OMPD_target_exit_data: 6975 case OMPD_target_enter_data: 6976 case OMPD_distribute: 6977 case OMPD_distribute_simd: 6978 case OMPD_distribute_parallel_for: 6979 case OMPD_distribute_parallel_for_simd: 6980 case OMPD_teams_distribute: 6981 case OMPD_teams_distribute_simd: 6982 case OMPD_teams_distribute_parallel_for: 6983 case OMPD_teams_distribute_parallel_for_simd: 6984 case OMPD_target_update: 6985 case OMPD_declare_simd: 6986 case OMPD_declare_variant: 6987 case OMPD_begin_declare_variant: 6988 case OMPD_end_declare_variant: 6989 case OMPD_declare_target: 6990 case OMPD_end_declare_target: 6991 case OMPD_declare_reduction: 6992 case OMPD_declare_mapper: 6993 case OMPD_taskloop: 6994 case OMPD_taskloop_simd: 6995 case OMPD_master_taskloop: 6996 case OMPD_master_taskloop_simd: 6997 case OMPD_parallel_master_taskloop: 6998 case OMPD_parallel_master_taskloop_simd: 6999 case OMPD_requires: 7000 case OMPD_unknown: 7001 break; 7002 default: 7003 break; 7004 } 7005 llvm_unreachable("Unsupported directive kind."); 7006 } 7007 7008 namespace { 7009 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7010 7011 // Utility to handle information from clauses associated with a given 7012 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7013 // It provides a convenient interface to obtain the information and generate 7014 // code for that information. 7015 class MappableExprsHandler { 7016 public: 7017 /// Values for bit flags used to specify the mapping type for 7018 /// offloading. 7019 enum OpenMPOffloadMappingFlags : uint64_t { 7020 /// No flags 7021 OMP_MAP_NONE = 0x0, 7022 /// Allocate memory on the device and move data from host to device. 7023 OMP_MAP_TO = 0x01, 7024 /// Allocate memory on the device and move data from device to host. 7025 OMP_MAP_FROM = 0x02, 7026 /// Always perform the requested mapping action on the element, even 7027 /// if it was already mapped before. 7028 OMP_MAP_ALWAYS = 0x04, 7029 /// Delete the element from the device environment, ignoring the 7030 /// current reference count associated with the element. 7031 OMP_MAP_DELETE = 0x08, 7032 /// The element being mapped is a pointer-pointee pair; both the 7033 /// pointer and the pointee should be mapped. 7034 OMP_MAP_PTR_AND_OBJ = 0x10, 7035 /// This flags signals that the base address of an entry should be 7036 /// passed to the target kernel as an argument. 7037 OMP_MAP_TARGET_PARAM = 0x20, 7038 /// Signal that the runtime library has to return the device pointer 7039 /// in the current position for the data being mapped. Used when we have the 7040 /// use_device_ptr or use_device_addr clause. 7041 OMP_MAP_RETURN_PARAM = 0x40, 7042 /// This flag signals that the reference being passed is a pointer to 7043 /// private data. 7044 OMP_MAP_PRIVATE = 0x80, 7045 /// Pass the element to the device by value. 7046 OMP_MAP_LITERAL = 0x100, 7047 /// Implicit map 7048 OMP_MAP_IMPLICIT = 0x200, 7049 /// Close is a hint to the runtime to allocate memory close to 7050 /// the target device. 7051 OMP_MAP_CLOSE = 0x400, 7052 /// 0x800 is reserved for compatibility with XLC. 7053 /// Produce a runtime error if the data is not already allocated. 7054 OMP_MAP_PRESENT = 0x1000, 7055 /// Signal that the runtime library should use args as an array of 7056 /// descriptor_dim pointers and use args_size as dims. Used when we have 7057 /// non-contiguous list items in target update directive 7058 OMP_MAP_NON_CONTIG = 0x100000000000, 7059 /// The 16 MSBs of the flags indicate whether the entry is member of some 7060 /// struct/class. 7061 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7062 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7063 }; 7064 7065 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7066 static unsigned getFlagMemberOffset() { 7067 unsigned Offset = 0; 7068 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7069 Remain = Remain >> 1) 7070 Offset++; 7071 return Offset; 7072 } 7073 7074 /// Class that holds debugging information for a data mapping to be passed to 7075 /// the runtime library. 7076 class MappingExprInfo { 7077 /// The variable declaration used for the data mapping. 7078 const ValueDecl *MapDecl = nullptr; 7079 /// The original expression used in the map clause, or null if there is 7080 /// none. 7081 const Expr *MapExpr = nullptr; 7082 7083 public: 7084 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 7085 : MapDecl(MapDecl), MapExpr(MapExpr) {} 7086 7087 const ValueDecl *getMapDecl() const { return MapDecl; } 7088 const Expr *getMapExpr() const { return MapExpr; } 7089 }; 7090 7091 /// Class that associates information with a base pointer to be passed to the 7092 /// runtime library. 7093 class BasePointerInfo { 7094 /// The base pointer. 7095 llvm::Value *Ptr = nullptr; 7096 /// The base declaration that refers to this device pointer, or null if 7097 /// there is none. 7098 const ValueDecl *DevPtrDecl = nullptr; 7099 7100 public: 7101 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7102 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7103 llvm::Value *operator*() const { return Ptr; } 7104 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7105 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7106 }; 7107 7108 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 7109 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7110 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7111 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7112 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7113 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7114 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7115 7116 /// This structure contains combined information generated for mappable 7117 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7118 /// mappers, and non-contiguous information. 7119 struct MapCombinedInfoTy { 7120 struct StructNonContiguousInfo { 7121 bool IsNonContiguous = false; 7122 MapDimArrayTy Dims; 7123 MapNonContiguousArrayTy Offsets; 7124 MapNonContiguousArrayTy Counts; 7125 MapNonContiguousArrayTy Strides; 7126 }; 7127 MapExprsArrayTy Exprs; 7128 MapBaseValuesArrayTy BasePointers; 7129 MapValuesArrayTy Pointers; 7130 MapValuesArrayTy Sizes; 7131 MapFlagsArrayTy Types; 7132 MapMappersArrayTy Mappers; 7133 StructNonContiguousInfo NonContigInfo; 7134 7135 /// Append arrays in \a CurInfo. 7136 void append(MapCombinedInfoTy &CurInfo) { 7137 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 7138 BasePointers.append(CurInfo.BasePointers.begin(), 7139 CurInfo.BasePointers.end()); 7140 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7141 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7142 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7143 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7144 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7145 CurInfo.NonContigInfo.Dims.end()); 7146 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7147 CurInfo.NonContigInfo.Offsets.end()); 7148 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7149 CurInfo.NonContigInfo.Counts.end()); 7150 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7151 CurInfo.NonContigInfo.Strides.end()); 7152 } 7153 }; 7154 7155 /// Map between a struct and the its lowest & highest elements which have been 7156 /// mapped. 7157 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7158 /// HE(FieldIndex, Pointer)} 7159 struct StructRangeInfoTy { 7160 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7161 0, Address::invalid()}; 7162 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7163 0, Address::invalid()}; 7164 Address Base = Address::invalid(); 7165 bool IsArraySection = false; 7166 }; 7167 7168 private: 7169 /// Kind that defines how a device pointer has to be returned. 7170 struct MapInfo { 7171 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7172 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7173 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7174 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7175 bool ReturnDevicePointer = false; 7176 bool IsImplicit = false; 7177 const ValueDecl *Mapper = nullptr; 7178 const Expr *VarRef = nullptr; 7179 bool ForDeviceAddr = false; 7180 7181 MapInfo() = default; 7182 MapInfo( 7183 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7184 OpenMPMapClauseKind MapType, 7185 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7186 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7187 bool ReturnDevicePointer, bool IsImplicit, 7188 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 7189 bool ForDeviceAddr = false) 7190 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7191 MotionModifiers(MotionModifiers), 7192 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7193 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 7194 }; 7195 7196 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7197 /// member and there is no map information about it, then emission of that 7198 /// entry is deferred until the whole struct has been processed. 7199 struct DeferredDevicePtrEntryTy { 7200 const Expr *IE = nullptr; 7201 const ValueDecl *VD = nullptr; 7202 bool ForDeviceAddr = false; 7203 7204 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7205 bool ForDeviceAddr) 7206 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7207 }; 7208 7209 /// The target directive from where the mappable clauses were extracted. It 7210 /// is either a executable directive or a user-defined mapper directive. 7211 llvm::PointerUnion<const OMPExecutableDirective *, 7212 const OMPDeclareMapperDecl *> 7213 CurDir; 7214 7215 /// Function the directive is being generated for. 7216 CodeGenFunction &CGF; 7217 7218 /// Set of all first private variables in the current directive. 7219 /// bool data is set to true if the variable is implicitly marked as 7220 /// firstprivate, false otherwise. 7221 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7222 7223 /// Map between device pointer declarations and their expression components. 7224 /// The key value for declarations in 'this' is null. 7225 llvm::DenseMap< 7226 const ValueDecl *, 7227 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7228 DevPointersMap; 7229 7230 llvm::Value *getExprTypeSize(const Expr *E) const { 7231 QualType ExprTy = E->getType().getCanonicalType(); 7232 7233 // Calculate the size for array shaping expression. 7234 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7235 llvm::Value *Size = 7236 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7237 for (const Expr *SE : OAE->getDimensions()) { 7238 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7239 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7240 CGF.getContext().getSizeType(), 7241 SE->getExprLoc()); 7242 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7243 } 7244 return Size; 7245 } 7246 7247 // Reference types are ignored for mapping purposes. 7248 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7249 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7250 7251 // Given that an array section is considered a built-in type, we need to 7252 // do the calculation based on the length of the section instead of relying 7253 // on CGF.getTypeSize(E->getType()). 7254 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7255 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7256 OAE->getBase()->IgnoreParenImpCasts()) 7257 .getCanonicalType(); 7258 7259 // If there is no length associated with the expression and lower bound is 7260 // not specified too, that means we are using the whole length of the 7261 // base. 7262 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7263 !OAE->getLowerBound()) 7264 return CGF.getTypeSize(BaseTy); 7265 7266 llvm::Value *ElemSize; 7267 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7268 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7269 } else { 7270 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7271 assert(ATy && "Expecting array type if not a pointer type."); 7272 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7273 } 7274 7275 // If we don't have a length at this point, that is because we have an 7276 // array section with a single element. 7277 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7278 return ElemSize; 7279 7280 if (const Expr *LenExpr = OAE->getLength()) { 7281 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7282 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7283 CGF.getContext().getSizeType(), 7284 LenExpr->getExprLoc()); 7285 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7286 } 7287 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7288 OAE->getLowerBound() && "expected array_section[lb:]."); 7289 // Size = sizetype - lb * elemtype; 7290 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7291 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7292 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7293 CGF.getContext().getSizeType(), 7294 OAE->getLowerBound()->getExprLoc()); 7295 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7296 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7297 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7298 LengthVal = CGF.Builder.CreateSelect( 7299 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7300 return LengthVal; 7301 } 7302 return CGF.getTypeSize(ExprTy); 7303 } 7304 7305 /// Return the corresponding bits for a given map clause modifier. Add 7306 /// a flag marking the map as a pointer if requested. Add a flag marking the 7307 /// map as the first one of a series of maps that relate to the same map 7308 /// expression. 7309 OpenMPOffloadMappingFlags getMapTypeBits( 7310 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7311 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7312 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7313 OpenMPOffloadMappingFlags Bits = 7314 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7315 switch (MapType) { 7316 case OMPC_MAP_alloc: 7317 case OMPC_MAP_release: 7318 // alloc and release is the default behavior in the runtime library, i.e. 7319 // if we don't pass any bits alloc/release that is what the runtime is 7320 // going to do. Therefore, we don't need to signal anything for these two 7321 // type modifiers. 7322 break; 7323 case OMPC_MAP_to: 7324 Bits |= OMP_MAP_TO; 7325 break; 7326 case OMPC_MAP_from: 7327 Bits |= OMP_MAP_FROM; 7328 break; 7329 case OMPC_MAP_tofrom: 7330 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7331 break; 7332 case OMPC_MAP_delete: 7333 Bits |= OMP_MAP_DELETE; 7334 break; 7335 case OMPC_MAP_unknown: 7336 llvm_unreachable("Unexpected map type!"); 7337 } 7338 if (AddPtrFlag) 7339 Bits |= OMP_MAP_PTR_AND_OBJ; 7340 if (AddIsTargetParamFlag) 7341 Bits |= OMP_MAP_TARGET_PARAM; 7342 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7343 != MapModifiers.end()) 7344 Bits |= OMP_MAP_ALWAYS; 7345 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7346 != MapModifiers.end()) 7347 Bits |= OMP_MAP_CLOSE; 7348 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) 7349 != MapModifiers.end()) 7350 Bits |= OMP_MAP_PRESENT; 7351 if (llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) 7352 != MotionModifiers.end()) 7353 Bits |= OMP_MAP_PRESENT; 7354 if (IsNonContiguous) 7355 Bits |= OMP_MAP_NON_CONTIG; 7356 return Bits; 7357 } 7358 7359 /// Return true if the provided expression is a final array section. A 7360 /// final array section, is one whose length can't be proved to be one. 7361 bool isFinalArraySectionExpression(const Expr *E) const { 7362 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7363 7364 // It is not an array section and therefore not a unity-size one. 7365 if (!OASE) 7366 return false; 7367 7368 // An array section with no colon always refer to a single element. 7369 if (OASE->getColonLocFirst().isInvalid()) 7370 return false; 7371 7372 const Expr *Length = OASE->getLength(); 7373 7374 // If we don't have a length we have to check if the array has size 1 7375 // for this dimension. Also, we should always expect a length if the 7376 // base type is pointer. 7377 if (!Length) { 7378 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7379 OASE->getBase()->IgnoreParenImpCasts()) 7380 .getCanonicalType(); 7381 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7382 return ATy->getSize().getSExtValue() != 1; 7383 // If we don't have a constant dimension length, we have to consider 7384 // the current section as having any size, so it is not necessarily 7385 // unitary. If it happen to be unity size, that's user fault. 7386 return true; 7387 } 7388 7389 // Check if the length evaluates to 1. 7390 Expr::EvalResult Result; 7391 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7392 return true; // Can have more that size 1. 7393 7394 llvm::APSInt ConstLength = Result.Val.getInt(); 7395 return ConstLength.getSExtValue() != 1; 7396 } 7397 7398 /// Generate the base pointers, section pointers, sizes, map type bits, and 7399 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7400 /// map type, map or motion modifiers, and expression components. 7401 /// \a IsFirstComponent should be set to true if the provided set of 7402 /// components is the first associated with a capture. 7403 void generateInfoForComponentList( 7404 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7405 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7406 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7407 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7408 bool IsFirstComponentList, bool IsImplicit, 7409 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7410 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7411 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7412 OverlappedElements = llvm::None) const { 7413 // The following summarizes what has to be generated for each map and the 7414 // types below. The generated information is expressed in this order: 7415 // base pointer, section pointer, size, flags 7416 // (to add to the ones that come from the map type and modifier). 7417 // 7418 // double d; 7419 // int i[100]; 7420 // float *p; 7421 // 7422 // struct S1 { 7423 // int i; 7424 // float f[50]; 7425 // } 7426 // struct S2 { 7427 // int i; 7428 // float f[50]; 7429 // S1 s; 7430 // double *p; 7431 // struct S2 *ps; 7432 // } 7433 // S2 s; 7434 // S2 *ps; 7435 // 7436 // map(d) 7437 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7438 // 7439 // map(i) 7440 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7441 // 7442 // map(i[1:23]) 7443 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7444 // 7445 // map(p) 7446 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7447 // 7448 // map(p[1:24]) 7449 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7450 // in unified shared memory mode or for local pointers 7451 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7452 // 7453 // map(s) 7454 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7455 // 7456 // map(s.i) 7457 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7458 // 7459 // map(s.s.f) 7460 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7461 // 7462 // map(s.p) 7463 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7464 // 7465 // map(to: s.p[:22]) 7466 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7467 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7468 // &(s.p), &(s.p[0]), 22*sizeof(double), 7469 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7470 // (*) alloc space for struct members, only this is a target parameter 7471 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7472 // optimizes this entry out, same in the examples below) 7473 // (***) map the pointee (map: to) 7474 // 7475 // map(s.ps) 7476 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7477 // 7478 // map(from: s.ps->s.i) 7479 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7480 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7481 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7482 // 7483 // map(to: s.ps->ps) 7484 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7485 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7486 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7487 // 7488 // map(s.ps->ps->ps) 7489 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7490 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7491 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7492 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7493 // 7494 // map(to: s.ps->ps->s.f[:22]) 7495 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7496 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7497 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7498 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7499 // 7500 // map(ps) 7501 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7502 // 7503 // map(ps->i) 7504 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7505 // 7506 // map(ps->s.f) 7507 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7508 // 7509 // map(from: ps->p) 7510 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7511 // 7512 // map(to: ps->p[:22]) 7513 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7514 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7515 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7516 // 7517 // map(ps->ps) 7518 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7519 // 7520 // map(from: ps->ps->s.i) 7521 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7522 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7523 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7524 // 7525 // map(from: ps->ps->ps) 7526 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7527 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7528 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7529 // 7530 // map(ps->ps->ps->ps) 7531 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7532 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7533 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7534 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7535 // 7536 // map(to: ps->ps->ps->s.f[:22]) 7537 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7538 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7539 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7540 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7541 // 7542 // map(to: s.f[:22]) map(from: s.p[:33]) 7543 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7544 // sizeof(double*) (**), TARGET_PARAM 7545 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7546 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7547 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7548 // (*) allocate contiguous space needed to fit all mapped members even if 7549 // we allocate space for members not mapped (in this example, 7550 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7551 // them as well because they fall between &s.f[0] and &s.p) 7552 // 7553 // map(from: s.f[:22]) map(to: ps->p[:33]) 7554 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7555 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7556 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7557 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7558 // (*) the struct this entry pertains to is the 2nd element in the list of 7559 // arguments, hence MEMBER_OF(2) 7560 // 7561 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7562 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7563 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7564 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7565 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7566 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7567 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7568 // (*) the struct this entry pertains to is the 4th element in the list 7569 // of arguments, hence MEMBER_OF(4) 7570 7571 // Track if the map information being generated is the first for a capture. 7572 bool IsCaptureFirstInfo = IsFirstComponentList; 7573 // When the variable is on a declare target link or in a to clause with 7574 // unified memory, a reference is needed to hold the host/device address 7575 // of the variable. 7576 bool RequiresReference = false; 7577 7578 // Scan the components from the base to the complete expression. 7579 auto CI = Components.rbegin(); 7580 auto CE = Components.rend(); 7581 auto I = CI; 7582 7583 // Track if the map information being generated is the first for a list of 7584 // components. 7585 bool IsExpressionFirstInfo = true; 7586 bool FirstPointerInComplexData = false; 7587 Address BP = Address::invalid(); 7588 const Expr *AssocExpr = I->getAssociatedExpression(); 7589 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7590 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7591 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7592 7593 if (isa<MemberExpr>(AssocExpr)) { 7594 // The base is the 'this' pointer. The content of the pointer is going 7595 // to be the base of the field being mapped. 7596 BP = CGF.LoadCXXThisAddress(); 7597 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7598 (OASE && 7599 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7600 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7601 } else if (OAShE && 7602 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7603 BP = Address( 7604 CGF.EmitScalarExpr(OAShE->getBase()), 7605 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7606 } else { 7607 // The base is the reference to the variable. 7608 // BP = &Var. 7609 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7610 if (const auto *VD = 7611 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7612 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7613 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7614 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7615 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7616 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7617 RequiresReference = true; 7618 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7619 } 7620 } 7621 } 7622 7623 // If the variable is a pointer and is being dereferenced (i.e. is not 7624 // the last component), the base has to be the pointer itself, not its 7625 // reference. References are ignored for mapping purposes. 7626 QualType Ty = 7627 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7628 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7629 // No need to generate individual map information for the pointer, it 7630 // can be associated with the combined storage if shared memory mode is 7631 // active or the base declaration is not global variable. 7632 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7633 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7634 !VD || VD->hasLocalStorage()) 7635 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7636 else 7637 FirstPointerInComplexData = true; 7638 ++I; 7639 } 7640 } 7641 7642 // Track whether a component of the list should be marked as MEMBER_OF some 7643 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7644 // in a component list should be marked as MEMBER_OF, all subsequent entries 7645 // do not belong to the base struct. E.g. 7646 // struct S2 s; 7647 // s.ps->ps->ps->f[:] 7648 // (1) (2) (3) (4) 7649 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7650 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7651 // is the pointee of ps(2) which is not member of struct s, so it should not 7652 // be marked as such (it is still PTR_AND_OBJ). 7653 // The variable is initialized to false so that PTR_AND_OBJ entries which 7654 // are not struct members are not considered (e.g. array of pointers to 7655 // data). 7656 bool ShouldBeMemberOf = false; 7657 7658 // Variable keeping track of whether or not we have encountered a component 7659 // in the component list which is a member expression. Useful when we have a 7660 // pointer or a final array section, in which case it is the previous 7661 // component in the list which tells us whether we have a member expression. 7662 // E.g. X.f[:] 7663 // While processing the final array section "[:]" it is "f" which tells us 7664 // whether we are dealing with a member of a declared struct. 7665 const MemberExpr *EncounteredME = nullptr; 7666 7667 // Track for the total number of dimension. Start from one for the dummy 7668 // dimension. 7669 uint64_t DimSize = 1; 7670 7671 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7672 7673 for (; I != CE; ++I) { 7674 // If the current component is member of a struct (parent struct) mark it. 7675 if (!EncounteredME) { 7676 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7677 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7678 // as MEMBER_OF the parent struct. 7679 if (EncounteredME) { 7680 ShouldBeMemberOf = true; 7681 // Do not emit as complex pointer if this is actually not array-like 7682 // expression. 7683 if (FirstPointerInComplexData) { 7684 QualType Ty = std::prev(I) 7685 ->getAssociatedDeclaration() 7686 ->getType() 7687 .getNonReferenceType(); 7688 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7689 FirstPointerInComplexData = false; 7690 } 7691 } 7692 } 7693 7694 auto Next = std::next(I); 7695 7696 // We need to generate the addresses and sizes if this is the last 7697 // component, if the component is a pointer or if it is an array section 7698 // whose length can't be proved to be one. If this is a pointer, it 7699 // becomes the base address for the following components. 7700 7701 // A final array section, is one whose length can't be proved to be one. 7702 // If the map item is non-contiguous then we don't treat any array section 7703 // as final array section. 7704 bool IsFinalArraySection = 7705 !IsNonContiguous && 7706 isFinalArraySectionExpression(I->getAssociatedExpression()); 7707 7708 // If we have a declaration for the mapping use that, otherwise use 7709 // the base declaration of the map clause. 7710 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 7711 ? I->getAssociatedDeclaration() 7712 : BaseDecl; 7713 7714 // Get information on whether the element is a pointer. Have to do a 7715 // special treatment for array sections given that they are built-in 7716 // types. 7717 const auto *OASE = 7718 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7719 const auto *OAShE = 7720 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7721 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7722 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7723 bool IsPointer = 7724 OAShE || 7725 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7726 .getCanonicalType() 7727 ->isAnyPointerType()) || 7728 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7729 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 7730 7731 if (OASE) 7732 ++DimSize; 7733 7734 if (Next == CE || IsNonDerefPointer || IsFinalArraySection) { 7735 // If this is not the last component, we expect the pointer to be 7736 // associated with an array expression or member expression. 7737 assert((Next == CE || 7738 isa<MemberExpr>(Next->getAssociatedExpression()) || 7739 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7740 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7741 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 7742 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7743 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7744 "Unexpected expression"); 7745 7746 Address LB = Address::invalid(); 7747 if (OAShE) { 7748 LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 7749 CGF.getContext().getTypeAlignInChars( 7750 OAShE->getBase()->getType())); 7751 } else { 7752 LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7753 .getAddress(CGF); 7754 } 7755 7756 // If this component is a pointer inside the base struct then we don't 7757 // need to create any entry for it - it will be combined with the object 7758 // it is pointing to into a single PTR_AND_OBJ entry. 7759 bool IsMemberPointerOrAddr = 7760 (IsPointer || ForDeviceAddr) && EncounteredME && 7761 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7762 EncounteredME); 7763 if (!OverlappedElements.empty()) { 7764 // Handle base element with the info for overlapped elements. 7765 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7766 assert(Next == CE && 7767 "Expected last element for the overlapped elements."); 7768 assert(!IsPointer && 7769 "Unexpected base element with the pointer type."); 7770 // Mark the whole struct as the struct that requires allocation on the 7771 // device. 7772 PartialStruct.LowestElem = {0, LB}; 7773 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7774 I->getAssociatedExpression()->getType()); 7775 Address HB = CGF.Builder.CreateConstGEP( 7776 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7777 CGF.VoidPtrTy), 7778 TypeSize.getQuantity() - 1); 7779 PartialStruct.HighestElem = { 7780 std::numeric_limits<decltype( 7781 PartialStruct.HighestElem.first)>::max(), 7782 HB}; 7783 PartialStruct.Base = BP; 7784 // Emit data for non-overlapped data. 7785 OpenMPOffloadMappingFlags Flags = 7786 OMP_MAP_MEMBER_OF | 7787 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 7788 /*AddPtrFlag=*/false, 7789 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 7790 LB = BP; 7791 llvm::Value *Size = nullptr; 7792 // Do bitcopy of all non-overlapped structure elements. 7793 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7794 Component : OverlappedElements) { 7795 Address ComponentLB = Address::invalid(); 7796 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7797 Component) { 7798 if (MC.getAssociatedDeclaration()) { 7799 ComponentLB = 7800 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7801 .getAddress(CGF); 7802 Size = CGF.Builder.CreatePtrDiff( 7803 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7804 CGF.EmitCastToVoidPtr(LB.getPointer())); 7805 break; 7806 } 7807 } 7808 assert(Size && "Failed to determine structure size"); 7809 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7810 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7811 CombinedInfo.Pointers.push_back(LB.getPointer()); 7812 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7813 Size, CGF.Int64Ty, /*isSigned=*/true)); 7814 CombinedInfo.Types.push_back(Flags); 7815 CombinedInfo.Mappers.push_back(nullptr); 7816 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7817 : 1); 7818 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7819 } 7820 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7821 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7822 CombinedInfo.Pointers.push_back(LB.getPointer()); 7823 Size = CGF.Builder.CreatePtrDiff( 7824 CGF.EmitCastToVoidPtr( 7825 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7826 CGF.EmitCastToVoidPtr(LB.getPointer())); 7827 CombinedInfo.Sizes.push_back( 7828 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7829 CombinedInfo.Types.push_back(Flags); 7830 CombinedInfo.Mappers.push_back(nullptr); 7831 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7832 : 1); 7833 break; 7834 } 7835 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7836 if (!IsMemberPointerOrAddr || 7837 (Next == CE && MapType != OMPC_MAP_unknown)) { 7838 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7839 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7840 CombinedInfo.Pointers.push_back(LB.getPointer()); 7841 CombinedInfo.Sizes.push_back( 7842 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7843 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7844 : 1); 7845 7846 // If Mapper is valid, the last component inherits the mapper. 7847 bool HasMapper = Mapper && Next == CE; 7848 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 7849 7850 // We need to add a pointer flag for each map that comes from the 7851 // same expression except for the first one. We also need to signal 7852 // this map is the first one that relates with the current capture 7853 // (there is a set of entries for each capture). 7854 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7855 MapType, MapModifiers, MotionModifiers, IsImplicit, 7856 !IsExpressionFirstInfo || RequiresReference || 7857 FirstPointerInComplexData, 7858 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 7859 7860 if (!IsExpressionFirstInfo) { 7861 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7862 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7863 if (IsPointer) 7864 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7865 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7866 7867 if (ShouldBeMemberOf) { 7868 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7869 // should be later updated with the correct value of MEMBER_OF. 7870 Flags |= OMP_MAP_MEMBER_OF; 7871 // From now on, all subsequent PTR_AND_OBJ entries should not be 7872 // marked as MEMBER_OF. 7873 ShouldBeMemberOf = false; 7874 } 7875 } 7876 7877 CombinedInfo.Types.push_back(Flags); 7878 } 7879 7880 // If we have encountered a member expression so far, keep track of the 7881 // mapped member. If the parent is "*this", then the value declaration 7882 // is nullptr. 7883 if (EncounteredME) { 7884 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 7885 unsigned FieldIndex = FD->getFieldIndex(); 7886 7887 // Update info about the lowest and highest elements for this struct 7888 if (!PartialStruct.Base.isValid()) { 7889 PartialStruct.LowestElem = {FieldIndex, LB}; 7890 if (IsFinalArraySection) { 7891 Address HB = 7892 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 7893 .getAddress(CGF); 7894 PartialStruct.HighestElem = {FieldIndex, HB}; 7895 } else { 7896 PartialStruct.HighestElem = {FieldIndex, LB}; 7897 } 7898 PartialStruct.Base = BP; 7899 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7900 PartialStruct.LowestElem = {FieldIndex, LB}; 7901 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7902 PartialStruct.HighestElem = {FieldIndex, LB}; 7903 } 7904 } 7905 7906 // Need to emit combined struct for array sections. 7907 if (IsFinalArraySection || IsNonContiguous) 7908 PartialStruct.IsArraySection = true; 7909 7910 // If we have a final array section, we are done with this expression. 7911 if (IsFinalArraySection) 7912 break; 7913 7914 // The pointer becomes the base for the next element. 7915 if (Next != CE) 7916 BP = LB; 7917 7918 IsExpressionFirstInfo = false; 7919 IsCaptureFirstInfo = false; 7920 FirstPointerInComplexData = false; 7921 } else if (FirstPointerInComplexData) { 7922 QualType Ty = Components.rbegin() 7923 ->getAssociatedDeclaration() 7924 ->getType() 7925 .getNonReferenceType(); 7926 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7927 FirstPointerInComplexData = false; 7928 } 7929 } 7930 7931 if (!IsNonContiguous) 7932 return; 7933 7934 const ASTContext &Context = CGF.getContext(); 7935 7936 // For supporting stride in array section, we need to initialize the first 7937 // dimension size as 1, first offset as 0, and first count as 1 7938 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 7939 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 7940 MapValuesArrayTy CurStrides; 7941 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 7942 uint64_t ElementTypeSize; 7943 7944 // Collect Size information for each dimension and get the element size as 7945 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 7946 // should be [10, 10] and the first stride is 4 btyes. 7947 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 7948 Components) { 7949 const Expr *AssocExpr = Component.getAssociatedExpression(); 7950 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7951 7952 if (!OASE) 7953 continue; 7954 7955 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 7956 auto *CAT = Context.getAsConstantArrayType(Ty); 7957 auto *VAT = Context.getAsVariableArrayType(Ty); 7958 7959 // We need all the dimension size except for the last dimension. 7960 assert((VAT || CAT || &Component == &*Components.begin()) && 7961 "Should be either ConstantArray or VariableArray if not the " 7962 "first Component"); 7963 7964 // Get element size if CurStrides is empty. 7965 if (CurStrides.empty()) { 7966 const Type *ElementType = nullptr; 7967 if (CAT) 7968 ElementType = CAT->getElementType().getTypePtr(); 7969 else if (VAT) 7970 ElementType = VAT->getElementType().getTypePtr(); 7971 else 7972 assert(&Component == &*Components.begin() && 7973 "Only expect pointer (non CAT or VAT) when this is the " 7974 "first Component"); 7975 // If ElementType is null, then it means the base is a pointer 7976 // (neither CAT nor VAT) and we'll attempt to get ElementType again 7977 // for next iteration. 7978 if (ElementType) { 7979 // For the case that having pointer as base, we need to remove one 7980 // level of indirection. 7981 if (&Component != &*Components.begin()) 7982 ElementType = ElementType->getPointeeOrArrayElementType(); 7983 ElementTypeSize = 7984 Context.getTypeSizeInChars(ElementType).getQuantity(); 7985 CurStrides.push_back( 7986 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 7987 } 7988 } 7989 // Get dimension value except for the last dimension since we don't need 7990 // it. 7991 if (DimSizes.size() < Components.size() - 1) { 7992 if (CAT) 7993 DimSizes.push_back(llvm::ConstantInt::get( 7994 CGF.Int64Ty, CAT->getSize().getZExtValue())); 7995 else if (VAT) 7996 DimSizes.push_back(CGF.Builder.CreateIntCast( 7997 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 7998 /*IsSigned=*/false)); 7999 } 8000 } 8001 8002 // Skip the dummy dimension since we have already have its information. 8003 auto DI = DimSizes.begin() + 1; 8004 // Product of dimension. 8005 llvm::Value *DimProd = 8006 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 8007 8008 // Collect info for non-contiguous. Notice that offset, count, and stride 8009 // are only meaningful for array-section, so we insert a null for anything 8010 // other than array-section. 8011 // Also, the size of offset, count, and stride are not the same as 8012 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 8013 // count, and stride are the same as the number of non-contiguous 8014 // declaration in target update to/from clause. 8015 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8016 Components) { 8017 const Expr *AssocExpr = Component.getAssociatedExpression(); 8018 8019 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 8020 llvm::Value *Offset = CGF.Builder.CreateIntCast( 8021 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 8022 /*isSigned=*/false); 8023 CurOffsets.push_back(Offset); 8024 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 8025 CurStrides.push_back(CurStrides.back()); 8026 continue; 8027 } 8028 8029 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8030 8031 if (!OASE) 8032 continue; 8033 8034 // Offset 8035 const Expr *OffsetExpr = OASE->getLowerBound(); 8036 llvm::Value *Offset = nullptr; 8037 if (!OffsetExpr) { 8038 // If offset is absent, then we just set it to zero. 8039 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 8040 } else { 8041 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 8042 CGF.Int64Ty, 8043 /*isSigned=*/false); 8044 } 8045 CurOffsets.push_back(Offset); 8046 8047 // Count 8048 const Expr *CountExpr = OASE->getLength(); 8049 llvm::Value *Count = nullptr; 8050 if (!CountExpr) { 8051 // In Clang, once a high dimension is an array section, we construct all 8052 // the lower dimension as array section, however, for case like 8053 // arr[0:2][2], Clang construct the inner dimension as an array section 8054 // but it actually is not in an array section form according to spec. 8055 if (!OASE->getColonLocFirst().isValid() && 8056 !OASE->getColonLocSecond().isValid()) { 8057 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 8058 } else { 8059 // OpenMP 5.0, 2.1.5 Array Sections, Description. 8060 // When the length is absent it defaults to ⌈(size − 8061 // lower-bound)/stride⌉, where size is the size of the array 8062 // dimension. 8063 const Expr *StrideExpr = OASE->getStride(); 8064 llvm::Value *Stride = 8065 StrideExpr 8066 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8067 CGF.Int64Ty, /*isSigned=*/false) 8068 : nullptr; 8069 if (Stride) 8070 Count = CGF.Builder.CreateUDiv( 8071 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8072 else 8073 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8074 } 8075 } else { 8076 Count = CGF.EmitScalarExpr(CountExpr); 8077 } 8078 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8079 CurCounts.push_back(Count); 8080 8081 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8082 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8083 // Offset Count Stride 8084 // D0 0 1 4 (int) <- dummy dimension 8085 // D1 0 2 8 (2 * (1) * 4) 8086 // D2 1 2 20 (1 * (1 * 5) * 4) 8087 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8088 const Expr *StrideExpr = OASE->getStride(); 8089 llvm::Value *Stride = 8090 StrideExpr 8091 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8092 CGF.Int64Ty, /*isSigned=*/false) 8093 : nullptr; 8094 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8095 if (Stride) 8096 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8097 else 8098 CurStrides.push_back(DimProd); 8099 if (DI != DimSizes.end()) 8100 ++DI; 8101 } 8102 8103 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8104 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8105 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8106 } 8107 8108 /// Return the adjusted map modifiers if the declaration a capture refers to 8109 /// appears in a first-private clause. This is expected to be used only with 8110 /// directives that start with 'target'. 8111 MappableExprsHandler::OpenMPOffloadMappingFlags 8112 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8113 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8114 8115 // A first private variable captured by reference will use only the 8116 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8117 // declaration is known as first-private in this handler. 8118 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8119 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 8120 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 8121 return MappableExprsHandler::OMP_MAP_ALWAYS | 8122 MappableExprsHandler::OMP_MAP_TO; 8123 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8124 return MappableExprsHandler::OMP_MAP_TO | 8125 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8126 return MappableExprsHandler::OMP_MAP_PRIVATE | 8127 MappableExprsHandler::OMP_MAP_TO; 8128 } 8129 return MappableExprsHandler::OMP_MAP_TO | 8130 MappableExprsHandler::OMP_MAP_FROM; 8131 } 8132 8133 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8134 // Rotate by getFlagMemberOffset() bits. 8135 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8136 << getFlagMemberOffset()); 8137 } 8138 8139 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8140 OpenMPOffloadMappingFlags MemberOfFlag) { 8141 // If the entry is PTR_AND_OBJ but has not been marked with the special 8142 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8143 // marked as MEMBER_OF. 8144 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8145 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8146 return; 8147 8148 // Reset the placeholder value to prepare the flag for the assignment of the 8149 // proper MEMBER_OF value. 8150 Flags &= ~OMP_MAP_MEMBER_OF; 8151 Flags |= MemberOfFlag; 8152 } 8153 8154 void getPlainLayout(const CXXRecordDecl *RD, 8155 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8156 bool AsBase) const { 8157 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8158 8159 llvm::StructType *St = 8160 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8161 8162 unsigned NumElements = St->getNumElements(); 8163 llvm::SmallVector< 8164 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8165 RecordLayout(NumElements); 8166 8167 // Fill bases. 8168 for (const auto &I : RD->bases()) { 8169 if (I.isVirtual()) 8170 continue; 8171 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8172 // Ignore empty bases. 8173 if (Base->isEmpty() || CGF.getContext() 8174 .getASTRecordLayout(Base) 8175 .getNonVirtualSize() 8176 .isZero()) 8177 continue; 8178 8179 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8180 RecordLayout[FieldIndex] = Base; 8181 } 8182 // Fill in virtual bases. 8183 for (const auto &I : RD->vbases()) { 8184 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8185 // Ignore empty bases. 8186 if (Base->isEmpty()) 8187 continue; 8188 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8189 if (RecordLayout[FieldIndex]) 8190 continue; 8191 RecordLayout[FieldIndex] = Base; 8192 } 8193 // Fill in all the fields. 8194 assert(!RD->isUnion() && "Unexpected union."); 8195 for (const auto *Field : RD->fields()) { 8196 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8197 // will fill in later.) 8198 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8199 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8200 RecordLayout[FieldIndex] = Field; 8201 } 8202 } 8203 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8204 &Data : RecordLayout) { 8205 if (Data.isNull()) 8206 continue; 8207 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8208 getPlainLayout(Base, Layout, /*AsBase=*/true); 8209 else 8210 Layout.push_back(Data.get<const FieldDecl *>()); 8211 } 8212 } 8213 8214 public: 8215 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8216 : CurDir(&Dir), CGF(CGF) { 8217 // Extract firstprivate clause information. 8218 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8219 for (const auto *D : C->varlists()) 8220 FirstPrivateDecls.try_emplace( 8221 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8222 // Extract implicit firstprivates from uses_allocators clauses. 8223 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8224 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8225 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8226 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8227 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8228 /*Implicit=*/true); 8229 else if (const auto *VD = dyn_cast<VarDecl>( 8230 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8231 ->getDecl())) 8232 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8233 } 8234 } 8235 // Extract device pointer clause information. 8236 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8237 for (auto L : C->component_lists()) 8238 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8239 } 8240 8241 /// Constructor for the declare mapper directive. 8242 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8243 : CurDir(&Dir), CGF(CGF) {} 8244 8245 /// Generate code for the combined entry if we have a partially mapped struct 8246 /// and take care of the mapping flags of the arguments corresponding to 8247 /// individual struct members. 8248 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8249 MapFlagsArrayTy &CurTypes, 8250 const StructRangeInfoTy &PartialStruct, 8251 const ValueDecl *VD = nullptr, 8252 bool NotTargetParams = true) const { 8253 if (CurTypes.size() == 1 && 8254 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && 8255 !PartialStruct.IsArraySection) 8256 return; 8257 CombinedInfo.Exprs.push_back(VD); 8258 // Base is the base of the struct 8259 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8260 // Pointer is the address of the lowest element 8261 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 8262 CombinedInfo.Pointers.push_back(LB); 8263 // There should not be a mapper for a combined entry. 8264 CombinedInfo.Mappers.push_back(nullptr); 8265 // Size is (addr of {highest+1} element) - (addr of lowest element) 8266 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 8267 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 8268 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8269 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8270 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 8271 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8272 /*isSigned=*/false); 8273 CombinedInfo.Sizes.push_back(Size); 8274 // Map type is always TARGET_PARAM, if generate info for captures. 8275 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 8276 : OMP_MAP_TARGET_PARAM); 8277 // If any element has the present modifier, then make sure the runtime 8278 // doesn't attempt to allocate the struct. 8279 if (CurTypes.end() != 8280 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8281 return Type & OMP_MAP_PRESENT; 8282 })) 8283 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 8284 // Remove TARGET_PARAM flag from the first element if any. 8285 if (!CurTypes.empty()) 8286 CurTypes.front() &= ~OMP_MAP_TARGET_PARAM; 8287 8288 // All other current entries will be MEMBER_OF the combined entry 8289 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8290 // 0xFFFF in the MEMBER_OF field). 8291 OpenMPOffloadMappingFlags MemberOfFlag = 8292 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 8293 for (auto &M : CurTypes) 8294 setCorrectMemberOfFlag(M, MemberOfFlag); 8295 } 8296 8297 /// Generate all the base pointers, section pointers, sizes, map types, and 8298 /// mappers for the extracted mappable expressions (all included in \a 8299 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8300 /// pair of the relevant declaration and index where it occurs is appended to 8301 /// the device pointers info array. 8302 void generateAllInfo( 8303 MapCombinedInfoTy &CombinedInfo, 8304 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8305 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8306 // We have to process the component lists that relate with the same 8307 // declaration in a single chunk so that we can generate the map flags 8308 // correctly. Therefore, we organize all lists in a map. 8309 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8310 8311 // Helper function to fill the information map for the different supported 8312 // clauses. 8313 auto &&InfoGen = 8314 [&Info, &SkipVarSet]( 8315 const ValueDecl *D, 8316 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8317 OpenMPMapClauseKind MapType, 8318 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8319 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8320 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8321 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8322 const ValueDecl *VD = 8323 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8324 if (SkipVarSet.count(VD)) 8325 return; 8326 Info[VD].emplace_back(L, MapType, MapModifiers, MotionModifiers, 8327 ReturnDevicePointer, IsImplicit, Mapper, VarRef, 8328 ForDeviceAddr); 8329 }; 8330 8331 assert(CurDir.is<const OMPExecutableDirective *>() && 8332 "Expect a executable directive"); 8333 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8334 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8335 const auto *EI = C->getVarRefs().begin(); 8336 for (const auto L : C->component_lists()) { 8337 // The Expression is not correct if the mapping is implicit 8338 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8339 InfoGen(std::get<0>(L), std::get<1>(L), C->getMapType(), 8340 C->getMapTypeModifiers(), llvm::None, 8341 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8342 E); 8343 ++EI; 8344 } 8345 } 8346 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) { 8347 const auto *EI = C->getVarRefs().begin(); 8348 for (const auto L : C->component_lists()) { 8349 InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_to, llvm::None, 8350 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8351 C->isImplicit(), std::get<2>(L), *EI); 8352 ++EI; 8353 } 8354 } 8355 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) { 8356 const auto *EI = C->getVarRefs().begin(); 8357 for (const auto L : C->component_lists()) { 8358 InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_from, llvm::None, 8359 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8360 C->isImplicit(), std::get<2>(L), *EI); 8361 ++EI; 8362 } 8363 } 8364 8365 // Look at the use_device_ptr clause information and mark the existing map 8366 // entries as such. If there is no map information for an entry in the 8367 // use_device_ptr list, we create one with map type 'alloc' and zero size 8368 // section. It is the user fault if that was not mapped before. If there is 8369 // no map information and the pointer is a struct member, then we defer the 8370 // emission of that entry until the whole struct has been processed. 8371 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 8372 DeferredInfo; 8373 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8374 8375 for (const auto *C : 8376 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { 8377 for (const auto L : C->component_lists()) { 8378 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8379 std::get<1>(L); 8380 assert(!Components.empty() && 8381 "Not expecting empty list of components!"); 8382 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8383 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8384 const Expr *IE = Components.back().getAssociatedExpression(); 8385 // If the first component is a member expression, we have to look into 8386 // 'this', which maps to null in the map of map information. Otherwise 8387 // look directly for the information. 8388 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8389 8390 // We potentially have map information for this declaration already. 8391 // Look for the first set of components that refer to it. 8392 if (It != Info.end()) { 8393 auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) { 8394 return MI.Components.back().getAssociatedDeclaration() == VD; 8395 }); 8396 // If we found a map entry, signal that the pointer has to be returned 8397 // and move on to the next declaration. 8398 // Exclude cases where the base pointer is mapped as array subscript, 8399 // array section or array shaping. The base address is passed as a 8400 // pointer to base in this case and cannot be used as a base for 8401 // use_device_ptr list item. 8402 if (CI != It->second.end()) { 8403 auto PrevCI = std::next(CI->Components.rbegin()); 8404 const auto *VarD = dyn_cast<VarDecl>(VD); 8405 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8406 isa<MemberExpr>(IE) || 8407 !VD->getType().getNonReferenceType()->isPointerType() || 8408 PrevCI == CI->Components.rend() || 8409 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8410 VarD->hasLocalStorage()) { 8411 CI->ReturnDevicePointer = true; 8412 continue; 8413 } 8414 } 8415 } 8416 8417 // We didn't find any match in our map information - generate a zero 8418 // size array section - if the pointer is a struct member we defer this 8419 // action until the whole struct has been processed. 8420 if (isa<MemberExpr>(IE)) { 8421 // Insert the pointer into Info to be processed by 8422 // generateInfoForComponentList. Because it is a member pointer 8423 // without a pointee, no entry will be generated for it, therefore 8424 // we need to generate one after the whole struct has been processed. 8425 // Nonetheless, generateInfoForComponentList must be called to take 8426 // the pointer into account for the calculation of the range of the 8427 // partial struct. 8428 InfoGen(nullptr, Components, OMPC_MAP_unknown, llvm::None, llvm::None, 8429 /*ReturnDevicePointer=*/false, C->isImplicit(), nullptr); 8430 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8431 } else { 8432 llvm::Value *Ptr = 8433 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8434 UseDevicePtrCombinedInfo.Exprs.push_back(VD); 8435 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8436 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8437 UseDevicePtrCombinedInfo.Sizes.push_back( 8438 llvm::Constant::getNullValue(CGF.Int64Ty)); 8439 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8440 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8441 } 8442 } 8443 } 8444 8445 // Look at the use_device_addr clause information and mark the existing map 8446 // entries as such. If there is no map information for an entry in the 8447 // use_device_addr list, we create one with map type 'alloc' and zero size 8448 // section. It is the user fault if that was not mapped before. If there is 8449 // no map information and the pointer is a struct member, then we defer the 8450 // emission of that entry until the whole struct has been processed. 8451 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8452 for (const auto *C : 8453 CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) { 8454 for (const auto L : C->component_lists()) { 8455 assert(!std::get<1>(L).empty() && 8456 "Not expecting empty list of components!"); 8457 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8458 if (!Processed.insert(VD).second) 8459 continue; 8460 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8461 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8462 // If the first component is a member expression, we have to look into 8463 // 'this', which maps to null in the map of map information. Otherwise 8464 // look directly for the information. 8465 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8466 8467 // We potentially have map information for this declaration already. 8468 // Look for the first set of components that refer to it. 8469 if (It != Info.end()) { 8470 auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) { 8471 return MI.Components.back().getAssociatedDeclaration() == VD; 8472 }); 8473 // If we found a map entry, signal that the pointer has to be returned 8474 // and move on to the next declaration. 8475 if (CI != It->second.end()) { 8476 CI->ReturnDevicePointer = true; 8477 continue; 8478 } 8479 } 8480 8481 // We didn't find any match in our map information - generate a zero 8482 // size array section - if the pointer is a struct member we defer this 8483 // action until the whole struct has been processed. 8484 if (isa<MemberExpr>(IE)) { 8485 // Insert the pointer into Info to be processed by 8486 // generateInfoForComponentList. Because it is a member pointer 8487 // without a pointee, no entry will be generated for it, therefore 8488 // we need to generate one after the whole struct has been processed. 8489 // Nonetheless, generateInfoForComponentList must be called to take 8490 // the pointer into account for the calculation of the range of the 8491 // partial struct. 8492 InfoGen(nullptr, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8493 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8494 nullptr, nullptr, /*ForDeviceAddr=*/true); 8495 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8496 } else { 8497 llvm::Value *Ptr; 8498 if (IE->isGLValue()) 8499 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8500 else 8501 Ptr = CGF.EmitScalarExpr(IE); 8502 CombinedInfo.Exprs.push_back(VD); 8503 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8504 CombinedInfo.Pointers.push_back(Ptr); 8505 CombinedInfo.Sizes.push_back( 8506 llvm::Constant::getNullValue(CGF.Int64Ty)); 8507 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8508 CombinedInfo.Mappers.push_back(nullptr); 8509 } 8510 } 8511 } 8512 8513 for (const auto &M : Info) { 8514 // Underlying variable declaration used in the map clause. 8515 const ValueDecl *VD = std::get<0>(M); 8516 8517 // Temporary generated information. 8518 MapCombinedInfoTy CurInfo; 8519 StructRangeInfoTy PartialStruct; 8520 8521 for (const MapInfo &L : M.second) { 8522 assert(!L.Components.empty() && 8523 "Not expecting declaration with no component lists."); 8524 8525 // Remember the current base pointer index. 8526 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8527 CurInfo.NonContigInfo.IsNonContiguous = 8528 L.Components.back().isNonContiguous(); 8529 generateInfoForComponentList( 8530 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, CurInfo, 8531 PartialStruct, /*IsFirstComponentList=*/false, L.IsImplicit, 8532 L.Mapper, L.ForDeviceAddr, VD, L.VarRef); 8533 8534 // If this entry relates with a device pointer, set the relevant 8535 // declaration and add the 'return pointer' flag. 8536 if (L.ReturnDevicePointer) { 8537 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8538 "Unexpected number of mapped base pointers."); 8539 8540 const ValueDecl *RelevantVD = 8541 L.Components.back().getAssociatedDeclaration(); 8542 assert(RelevantVD && 8543 "No relevant declaration related with device pointer??"); 8544 8545 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8546 RelevantVD); 8547 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8548 } 8549 } 8550 8551 // Append any pending zero-length pointers which are struct members and 8552 // used with use_device_ptr or use_device_addr. 8553 auto CI = DeferredInfo.find(M.first); 8554 if (CI != DeferredInfo.end()) { 8555 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8556 llvm::Value *BasePtr; 8557 llvm::Value *Ptr; 8558 if (L.ForDeviceAddr) { 8559 if (L.IE->isGLValue()) 8560 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8561 else 8562 Ptr = this->CGF.EmitScalarExpr(L.IE); 8563 BasePtr = Ptr; 8564 // Entry is RETURN_PARAM. Also, set the placeholder value 8565 // MEMBER_OF=FFFF so that the entry is later updated with the 8566 // correct value of MEMBER_OF. 8567 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8568 } else { 8569 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8570 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8571 L.IE->getExprLoc()); 8572 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 8573 // value MEMBER_OF=FFFF so that the entry is later updated with the 8574 // correct value of MEMBER_OF. 8575 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8576 OMP_MAP_MEMBER_OF); 8577 } 8578 CurInfo.Exprs.push_back(L.VD); 8579 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8580 CurInfo.Pointers.push_back(Ptr); 8581 CurInfo.Sizes.push_back( 8582 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8583 CurInfo.Mappers.push_back(nullptr); 8584 } 8585 } 8586 8587 // If there is an entry in PartialStruct it means we have a struct with 8588 // individual members mapped. Emit an extra combined entry. 8589 if (PartialStruct.Base.isValid()) 8590 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8591 8592 // We need to append the results of this capture to what we already have. 8593 CombinedInfo.append(CurInfo); 8594 } 8595 // Append data for use_device_ptr clauses. 8596 CombinedInfo.append(UseDevicePtrCombinedInfo); 8597 } 8598 8599 /// Generate all the base pointers, section pointers, sizes, map types, and 8600 /// mappers for the extracted map clauses of user-defined mapper (all included 8601 /// in \a CombinedInfo). 8602 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 8603 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8604 "Expect a declare mapper directive"); 8605 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8606 // We have to process the component lists that relate with the same 8607 // declaration in a single chunk so that we can generate the map flags 8608 // correctly. Therefore, we organize all lists in a map. 8609 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8610 8611 // Fill the information map for map clauses. 8612 for (const auto *C : CurMapperDir->clauselists()) { 8613 const auto *MC = cast<OMPMapClause>(C); 8614 const auto *EI = MC->getVarRefs().begin(); 8615 for (const auto L : MC->component_lists()) { 8616 // The Expression is not correct if the mapping is implicit 8617 const Expr *E = (MC->getMapLoc().isValid()) ? *EI : nullptr; 8618 const ValueDecl *VD = 8619 std::get<0>(L) ? cast<ValueDecl>(std::get<0>(L)->getCanonicalDecl()) 8620 : nullptr; 8621 // Get the corresponding user-defined mapper. 8622 Info[VD].emplace_back(std::get<1>(L), MC->getMapType(), 8623 MC->getMapTypeModifiers(), llvm::None, 8624 /*ReturnDevicePointer=*/false, MC->isImplicit(), 8625 std::get<2>(L), E); 8626 ++EI; 8627 } 8628 } 8629 8630 for (const auto &M : Info) { 8631 // We need to know when we generate information for the first component 8632 // associated with a capture, because the mapping flags depend on it. 8633 bool IsFirstComponentList = true; 8634 8635 // Underlying variable declaration used in the map clause. 8636 const ValueDecl *VD = std::get<0>(M); 8637 8638 // Temporary generated information. 8639 MapCombinedInfoTy CurInfo; 8640 StructRangeInfoTy PartialStruct; 8641 8642 for (const MapInfo &L : M.second) { 8643 assert(!L.Components.empty() && 8644 "Not expecting declaration with no component lists."); 8645 generateInfoForComponentList( 8646 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, CurInfo, 8647 PartialStruct, IsFirstComponentList, L.IsImplicit, L.Mapper, 8648 L.ForDeviceAddr, VD, L.VarRef); 8649 IsFirstComponentList = false; 8650 } 8651 8652 // If there is an entry in PartialStruct it means we have a struct with 8653 // individual members mapped. Emit an extra combined entry. 8654 if (PartialStruct.Base.isValid()) { 8655 CurInfo.NonContigInfo.Dims.push_back(0); 8656 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8657 } 8658 8659 // We need to append the results of this capture to what we already have. 8660 CombinedInfo.append(CurInfo); 8661 } 8662 } 8663 8664 /// Emit capture info for lambdas for variables captured by reference. 8665 void generateInfoForLambdaCaptures( 8666 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8667 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8668 const auto *RD = VD->getType() 8669 .getCanonicalType() 8670 .getNonReferenceType() 8671 ->getAsCXXRecordDecl(); 8672 if (!RD || !RD->isLambda()) 8673 return; 8674 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8675 LValue VDLVal = CGF.MakeAddrLValue( 8676 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8677 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8678 FieldDecl *ThisCapture = nullptr; 8679 RD->getCaptureFields(Captures, ThisCapture); 8680 if (ThisCapture) { 8681 LValue ThisLVal = 8682 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8683 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8684 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8685 VDLVal.getPointer(CGF)); 8686 CombinedInfo.Exprs.push_back(VD); 8687 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 8688 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 8689 CombinedInfo.Sizes.push_back( 8690 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8691 CGF.Int64Ty, /*isSigned=*/true)); 8692 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8693 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8694 CombinedInfo.Mappers.push_back(nullptr); 8695 } 8696 for (const LambdaCapture &LC : RD->captures()) { 8697 if (!LC.capturesVariable()) 8698 continue; 8699 const VarDecl *VD = LC.getCapturedVar(); 8700 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8701 continue; 8702 auto It = Captures.find(VD); 8703 assert(It != Captures.end() && "Found lambda capture without field."); 8704 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8705 if (LC.getCaptureKind() == LCK_ByRef) { 8706 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8707 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8708 VDLVal.getPointer(CGF)); 8709 CombinedInfo.Exprs.push_back(VD); 8710 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8711 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 8712 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8713 CGF.getTypeSize( 8714 VD->getType().getCanonicalType().getNonReferenceType()), 8715 CGF.Int64Ty, /*isSigned=*/true)); 8716 } else { 8717 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8718 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8719 VDLVal.getPointer(CGF)); 8720 CombinedInfo.Exprs.push_back(VD); 8721 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8722 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 8723 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8724 } 8725 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8726 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8727 CombinedInfo.Mappers.push_back(nullptr); 8728 } 8729 } 8730 8731 /// Set correct indices for lambdas captures. 8732 void adjustMemberOfForLambdaCaptures( 8733 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8734 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8735 MapFlagsArrayTy &Types) const { 8736 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8737 // Set correct member_of idx for all implicit lambda captures. 8738 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8739 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8740 continue; 8741 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8742 assert(BasePtr && "Unable to find base lambda address."); 8743 int TgtIdx = -1; 8744 for (unsigned J = I; J > 0; --J) { 8745 unsigned Idx = J - 1; 8746 if (Pointers[Idx] != BasePtr) 8747 continue; 8748 TgtIdx = Idx; 8749 break; 8750 } 8751 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8752 // All other current entries will be MEMBER_OF the combined entry 8753 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8754 // 0xFFFF in the MEMBER_OF field). 8755 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8756 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8757 } 8758 } 8759 8760 /// Generate the base pointers, section pointers, sizes, map types, and 8761 /// mappers associated to a given capture (all included in \a CombinedInfo). 8762 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8763 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8764 StructRangeInfoTy &PartialStruct) const { 8765 assert(!Cap->capturesVariableArrayType() && 8766 "Not expecting to generate map info for a variable array type!"); 8767 8768 // We need to know when we generating information for the first component 8769 const ValueDecl *VD = Cap->capturesThis() 8770 ? nullptr 8771 : Cap->getCapturedVar()->getCanonicalDecl(); 8772 8773 // If this declaration appears in a is_device_ptr clause we just have to 8774 // pass the pointer by value. If it is a reference to a declaration, we just 8775 // pass its value. 8776 if (DevPointersMap.count(VD)) { 8777 CombinedInfo.Exprs.push_back(VD); 8778 CombinedInfo.BasePointers.emplace_back(Arg, VD); 8779 CombinedInfo.Pointers.push_back(Arg); 8780 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8781 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 8782 /*isSigned=*/true)); 8783 CombinedInfo.Types.push_back( 8784 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 8785 OMP_MAP_TARGET_PARAM); 8786 CombinedInfo.Mappers.push_back(nullptr); 8787 return; 8788 } 8789 8790 using MapData = 8791 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8792 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 8793 const ValueDecl *, const Expr *>; 8794 SmallVector<MapData, 4> DeclComponentLists; 8795 assert(CurDir.is<const OMPExecutableDirective *>() && 8796 "Expect a executable directive"); 8797 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8798 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8799 const auto *EI = C->getVarRefs().begin(); 8800 for (const auto L : C->decl_component_lists(VD)) { 8801 const ValueDecl *VDecl, *Mapper; 8802 // The Expression is not correct if the mapping is implicit 8803 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8804 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8805 std::tie(VDecl, Components, Mapper) = L; 8806 assert(VDecl == VD && "We got information for the wrong declaration??"); 8807 assert(!Components.empty() && 8808 "Not expecting declaration with no component lists."); 8809 DeclComponentLists.emplace_back(Components, C->getMapType(), 8810 C->getMapTypeModifiers(), 8811 C->isImplicit(), Mapper, E); 8812 ++EI; 8813 } 8814 } 8815 8816 // Find overlapping elements (including the offset from the base element). 8817 llvm::SmallDenseMap< 8818 const MapData *, 8819 llvm::SmallVector< 8820 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8821 4> 8822 OverlappedData; 8823 size_t Count = 0; 8824 for (const MapData &L : DeclComponentLists) { 8825 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8826 OpenMPMapClauseKind MapType; 8827 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8828 bool IsImplicit; 8829 const ValueDecl *Mapper; 8830 const Expr *VarRef; 8831 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8832 L; 8833 ++Count; 8834 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8835 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8836 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 8837 VarRef) = L1; 8838 auto CI = Components.rbegin(); 8839 auto CE = Components.rend(); 8840 auto SI = Components1.rbegin(); 8841 auto SE = Components1.rend(); 8842 for (; CI != CE && SI != SE; ++CI, ++SI) { 8843 if (CI->getAssociatedExpression()->getStmtClass() != 8844 SI->getAssociatedExpression()->getStmtClass()) 8845 break; 8846 // Are we dealing with different variables/fields? 8847 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8848 break; 8849 } 8850 // Found overlapping if, at least for one component, reached the head of 8851 // the components list. 8852 if (CI == CE || SI == SE) { 8853 assert((CI != CE || SI != SE) && 8854 "Unexpected full match of the mapping components."); 8855 const MapData &BaseData = CI == CE ? L : L1; 8856 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8857 SI == SE ? Components : Components1; 8858 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8859 OverlappedElements.getSecond().push_back(SubData); 8860 } 8861 } 8862 } 8863 // Sort the overlapped elements for each item. 8864 llvm::SmallVector<const FieldDecl *, 4> Layout; 8865 if (!OverlappedData.empty()) { 8866 if (const auto *CRD = 8867 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8868 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8869 else { 8870 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8871 Layout.append(RD->field_begin(), RD->field_end()); 8872 } 8873 } 8874 for (auto &Pair : OverlappedData) { 8875 llvm::sort( 8876 Pair.getSecond(), 8877 [&Layout]( 8878 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8879 OMPClauseMappableExprCommon::MappableExprComponentListRef 8880 Second) { 8881 auto CI = First.rbegin(); 8882 auto CE = First.rend(); 8883 auto SI = Second.rbegin(); 8884 auto SE = Second.rend(); 8885 for (; CI != CE && SI != SE; ++CI, ++SI) { 8886 if (CI->getAssociatedExpression()->getStmtClass() != 8887 SI->getAssociatedExpression()->getStmtClass()) 8888 break; 8889 // Are we dealing with different variables/fields? 8890 if (CI->getAssociatedDeclaration() != 8891 SI->getAssociatedDeclaration()) 8892 break; 8893 } 8894 8895 // Lists contain the same elements. 8896 if (CI == CE && SI == SE) 8897 return false; 8898 8899 // List with less elements is less than list with more elements. 8900 if (CI == CE || SI == SE) 8901 return CI == CE; 8902 8903 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8904 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8905 if (FD1->getParent() == FD2->getParent()) 8906 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8907 const auto It = 8908 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8909 return FD == FD1 || FD == FD2; 8910 }); 8911 return *It == FD1; 8912 }); 8913 } 8914 8915 // Associated with a capture, because the mapping flags depend on it. 8916 // Go through all of the elements with the overlapped elements. 8917 for (const auto &Pair : OverlappedData) { 8918 const MapData &L = *Pair.getFirst(); 8919 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8920 OpenMPMapClauseKind MapType; 8921 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8922 bool IsImplicit; 8923 const ValueDecl *Mapper; 8924 const Expr *VarRef; 8925 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8926 L; 8927 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8928 OverlappedComponents = Pair.getSecond(); 8929 bool IsFirstComponentList = true; 8930 generateInfoForComponentList( 8931 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 8932 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 8933 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 8934 } 8935 // Go through other elements without overlapped elements. 8936 bool IsFirstComponentList = OverlappedData.empty(); 8937 for (const MapData &L : DeclComponentLists) { 8938 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8939 OpenMPMapClauseKind MapType; 8940 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8941 bool IsImplicit; 8942 const ValueDecl *Mapper; 8943 const Expr *VarRef; 8944 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8945 L; 8946 auto It = OverlappedData.find(&L); 8947 if (It == OverlappedData.end()) 8948 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 8949 Components, CombinedInfo, PartialStruct, 8950 IsFirstComponentList, IsImplicit, Mapper, 8951 /*ForDeviceAddr=*/false, VD, VarRef); 8952 IsFirstComponentList = false; 8953 } 8954 } 8955 8956 /// Generate the default map information for a given capture \a CI, 8957 /// record field declaration \a RI and captured value \a CV. 8958 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8959 const FieldDecl &RI, llvm::Value *CV, 8960 MapCombinedInfoTy &CombinedInfo) const { 8961 bool IsImplicit = true; 8962 // Do the default mapping. 8963 if (CI.capturesThis()) { 8964 CombinedInfo.Exprs.push_back(nullptr); 8965 CombinedInfo.BasePointers.push_back(CV); 8966 CombinedInfo.Pointers.push_back(CV); 8967 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8968 CombinedInfo.Sizes.push_back( 8969 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8970 CGF.Int64Ty, /*isSigned=*/true)); 8971 // Default map type. 8972 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 8973 } else if (CI.capturesVariableByCopy()) { 8974 const VarDecl *VD = CI.getCapturedVar(); 8975 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 8976 CombinedInfo.BasePointers.push_back(CV); 8977 CombinedInfo.Pointers.push_back(CV); 8978 if (!RI.getType()->isAnyPointerType()) { 8979 // We have to signal to the runtime captures passed by value that are 8980 // not pointers. 8981 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 8982 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8983 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8984 } else { 8985 // Pointers are implicitly mapped with a zero size and no flags 8986 // (other than first map that is added for all implicit maps). 8987 CombinedInfo.Types.push_back(OMP_MAP_NONE); 8988 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8989 } 8990 auto I = FirstPrivateDecls.find(VD); 8991 if (I != FirstPrivateDecls.end()) 8992 IsImplicit = I->getSecond(); 8993 } else { 8994 assert(CI.capturesVariable() && "Expected captured reference."); 8995 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8996 QualType ElementType = PtrTy->getPointeeType(); 8997 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8998 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8999 // The default map type for a scalar/complex type is 'to' because by 9000 // default the value doesn't have to be retrieved. For an aggregate 9001 // type, the default is 'tofrom'. 9002 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 9003 const VarDecl *VD = CI.getCapturedVar(); 9004 auto I = FirstPrivateDecls.find(VD); 9005 if (I != FirstPrivateDecls.end() && 9006 VD->getType().isConstant(CGF.getContext())) { 9007 llvm::Constant *Addr = 9008 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 9009 // Copy the value of the original variable to the new global copy. 9010 CGF.Builder.CreateMemCpy( 9011 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), 9012 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 9013 CombinedInfo.Sizes.back(), /*IsVolatile=*/false); 9014 // Use new global variable as the base pointers. 9015 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9016 CombinedInfo.BasePointers.push_back(Addr); 9017 CombinedInfo.Pointers.push_back(Addr); 9018 } else { 9019 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9020 CombinedInfo.BasePointers.push_back(CV); 9021 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9022 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9023 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9024 AlignmentSource::Decl)); 9025 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 9026 } else { 9027 CombinedInfo.Pointers.push_back(CV); 9028 } 9029 } 9030 if (I != FirstPrivateDecls.end()) 9031 IsImplicit = I->getSecond(); 9032 } 9033 // Every default map produces a single argument which is a target parameter. 9034 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 9035 9036 // Add flag stating this is an implicit map. 9037 if (IsImplicit) 9038 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 9039 9040 // No user-defined mapper for default mapping. 9041 CombinedInfo.Mappers.push_back(nullptr); 9042 } 9043 }; 9044 } // anonymous namespace 9045 9046 static void emitNonContiguousDescriptor( 9047 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9048 CGOpenMPRuntime::TargetDataInfo &Info) { 9049 CodeGenModule &CGM = CGF.CGM; 9050 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 9051 &NonContigInfo = CombinedInfo.NonContigInfo; 9052 9053 // Build an array of struct descriptor_dim and then assign it to 9054 // offload_args. 9055 // 9056 // struct descriptor_dim { 9057 // uint64_t offset; 9058 // uint64_t count; 9059 // uint64_t stride 9060 // }; 9061 ASTContext &C = CGF.getContext(); 9062 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9063 RecordDecl *RD; 9064 RD = C.buildImplicitRecord("descriptor_dim"); 9065 RD->startDefinition(); 9066 addFieldToRecordDecl(C, RD, Int64Ty); 9067 addFieldToRecordDecl(C, RD, Int64Ty); 9068 addFieldToRecordDecl(C, RD, Int64Ty); 9069 RD->completeDefinition(); 9070 QualType DimTy = C.getRecordType(RD); 9071 9072 enum { OffsetFD = 0, CountFD, StrideFD }; 9073 // We need two index variable here since the size of "Dims" is the same as the 9074 // size of Components, however, the size of offset, count, and stride is equal 9075 // to the size of base declaration that is non-contiguous. 9076 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9077 // Skip emitting ir if dimension size is 1 since it cannot be 9078 // non-contiguous. 9079 if (NonContigInfo.Dims[I] == 1) 9080 continue; 9081 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9082 QualType ArrayTy = 9083 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9084 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9085 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9086 unsigned RevIdx = EE - II - 1; 9087 LValue DimsLVal = CGF.MakeAddrLValue( 9088 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9089 // Offset 9090 LValue OffsetLVal = CGF.EmitLValueForField( 9091 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9092 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9093 // Count 9094 LValue CountLVal = CGF.EmitLValueForField( 9095 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9096 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9097 // Stride 9098 LValue StrideLVal = CGF.EmitLValueForField( 9099 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9100 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9101 } 9102 // args[I] = &dims 9103 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9104 DimsAddr, CGM.Int8PtrTy); 9105 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9106 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9107 Info.PointersArray, 0, I); 9108 Address PAddr(P, CGF.getPointerAlign()); 9109 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9110 ++L; 9111 } 9112 } 9113 9114 /// Emit a string constant containing the names of the values mapped to the 9115 /// offloading runtime library. 9116 llvm::Constant * 9117 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9118 MappableExprsHandler::MappingExprInfo &MapExprs) { 9119 llvm::Constant *SrcLocStr; 9120 if (!MapExprs.getMapDecl()) { 9121 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 9122 } else { 9123 std::string ExprName = ""; 9124 if (MapExprs.getMapExpr()) { 9125 PrintingPolicy P(CGF.getContext().getLangOpts()); 9126 llvm::raw_string_ostream OS(ExprName); 9127 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9128 OS.flush(); 9129 } else { 9130 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9131 } 9132 9133 SourceLocation Loc = MapExprs.getMapDecl()->getLocation(); 9134 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9135 const char *FileName = PLoc.getFilename(); 9136 unsigned Line = PLoc.getLine(); 9137 unsigned Column = PLoc.getColumn(); 9138 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(), 9139 Line, Column); 9140 } 9141 9142 return SrcLocStr; 9143 } 9144 9145 /// Emit the arrays used to pass the captures and map information to the 9146 /// offloading runtime library. If there is no map or capture information, 9147 /// return nullptr by reference. 9148 static void emitOffloadingArrays( 9149 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9150 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9151 bool IsNonContiguous = false) { 9152 CodeGenModule &CGM = CGF.CGM; 9153 ASTContext &Ctx = CGF.getContext(); 9154 9155 // Reset the array information. 9156 Info.clearArrayInfo(); 9157 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9158 9159 if (Info.NumberOfPtrs) { 9160 // Detect if we have any capture size requiring runtime evaluation of the 9161 // size so that a constant array could be eventually used. 9162 bool hasRuntimeEvaluationCaptureSize = false; 9163 for (llvm::Value *S : CombinedInfo.Sizes) 9164 if (!isa<llvm::Constant>(S)) { 9165 hasRuntimeEvaluationCaptureSize = true; 9166 break; 9167 } 9168 9169 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9170 QualType PointerArrayType = Ctx.getConstantArrayType( 9171 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9172 /*IndexTypeQuals=*/0); 9173 9174 Info.BasePointersArray = 9175 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9176 Info.PointersArray = 9177 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9178 Address MappersArray = 9179 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9180 Info.MappersArray = MappersArray.getPointer(); 9181 9182 // If we don't have any VLA types or other types that require runtime 9183 // evaluation, we can use a constant array for the map sizes, otherwise we 9184 // need to fill up the arrays as we do for the pointers. 9185 QualType Int64Ty = 9186 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9187 if (hasRuntimeEvaluationCaptureSize) { 9188 QualType SizeArrayType = Ctx.getConstantArrayType( 9189 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9190 /*IndexTypeQuals=*/0); 9191 Info.SizesArray = 9192 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9193 } else { 9194 // We expect all the sizes to be constant, so we collect them to create 9195 // a constant array. 9196 SmallVector<llvm::Constant *, 16> ConstSizes; 9197 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9198 if (IsNonContiguous && 9199 (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) { 9200 ConstSizes.push_back(llvm::ConstantInt::get( 9201 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I])); 9202 } else { 9203 ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I])); 9204 } 9205 } 9206 9207 auto *SizesArrayInit = llvm::ConstantArray::get( 9208 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9209 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9210 auto *SizesArrayGbl = new llvm::GlobalVariable( 9211 CGM.getModule(), SizesArrayInit->getType(), 9212 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9213 SizesArrayInit, Name); 9214 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9215 Info.SizesArray = SizesArrayGbl; 9216 } 9217 9218 // The map types are always constant so we don't need to generate code to 9219 // fill arrays. Instead, we create an array constant. 9220 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9221 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9222 llvm::Constant *MapTypesArrayInit = 9223 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 9224 std::string MaptypesName = 9225 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9226 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 9227 CGM.getModule(), MapTypesArrayInit->getType(), 9228 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9229 MapTypesArrayInit, MaptypesName); 9230 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9231 Info.MapTypesArray = MapTypesArrayGbl; 9232 9233 // The information types are only built if there is debug information 9234 // requested. 9235 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9236 Info.MapNamesArray = llvm::Constant::getNullValue( 9237 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9238 } else { 9239 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9240 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9241 }; 9242 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9243 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9244 9245 llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get( 9246 llvm::ArrayType::get( 9247 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo(), 9248 CombinedInfo.Exprs.size()), 9249 InfoMap); 9250 auto *MapNamesArrayGbl = new llvm::GlobalVariable( 9251 CGM.getModule(), MapNamesArrayInit->getType(), 9252 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9253 MapNamesArrayInit, 9254 CGM.getOpenMPRuntime().getName({"offload_mapnames"})); 9255 Info.MapNamesArray = MapNamesArrayGbl; 9256 } 9257 9258 // If there's a present map type modifier, it must not be applied to the end 9259 // of a region, so generate a separate map type array in that case. 9260 if (Info.separateBeginEndCalls()) { 9261 bool EndMapTypesDiffer = false; 9262 for (uint64_t &Type : Mapping) { 9263 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9264 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9265 EndMapTypesDiffer = true; 9266 } 9267 } 9268 if (EndMapTypesDiffer) { 9269 MapTypesArrayInit = 9270 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 9271 MaptypesName = CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9272 MapTypesArrayGbl = new llvm::GlobalVariable( 9273 CGM.getModule(), MapTypesArrayInit->getType(), 9274 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9275 MapTypesArrayInit, MaptypesName); 9276 MapTypesArrayGbl->setUnnamedAddr( 9277 llvm::GlobalValue::UnnamedAddr::Global); 9278 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9279 } 9280 } 9281 9282 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9283 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9284 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9285 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9286 Info.BasePointersArray, 0, I); 9287 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9288 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9289 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9290 CGF.Builder.CreateStore(BPVal, BPAddr); 9291 9292 if (Info.requiresDevicePointerInfo()) 9293 if (const ValueDecl *DevVD = 9294 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9295 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9296 9297 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9298 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9299 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9300 Info.PointersArray, 0, I); 9301 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9302 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9303 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9304 CGF.Builder.CreateStore(PVal, PAddr); 9305 9306 if (hasRuntimeEvaluationCaptureSize) { 9307 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9308 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9309 Info.SizesArray, 9310 /*Idx0=*/0, 9311 /*Idx1=*/I); 9312 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 9313 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9314 CGM.Int64Ty, 9315 /*isSigned=*/true), 9316 SAddr); 9317 } 9318 9319 // Fill up the mapper array. 9320 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9321 if (CombinedInfo.Mappers[I]) { 9322 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9323 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9324 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9325 Info.HasMapper = true; 9326 } 9327 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9328 CGF.Builder.CreateStore(MFunc, MAddr); 9329 } 9330 } 9331 9332 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9333 Info.NumberOfPtrs == 0) 9334 return; 9335 9336 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9337 } 9338 9339 namespace { 9340 /// Additional arguments for emitOffloadingArraysArgument function. 9341 struct ArgumentsOptions { 9342 bool ForEndCall = false; 9343 ArgumentsOptions() = default; 9344 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9345 }; 9346 } // namespace 9347 9348 /// Emit the arguments to be passed to the runtime library based on the 9349 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 9350 /// ForEndCall, emit map types to be passed for the end of the region instead of 9351 /// the beginning. 9352 static void emitOffloadingArraysArgument( 9353 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9354 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9355 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, 9356 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, 9357 const ArgumentsOptions &Options = ArgumentsOptions()) { 9358 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9359 "expected region end call to runtime only when end call is separate"); 9360 CodeGenModule &CGM = CGF.CGM; 9361 if (Info.NumberOfPtrs) { 9362 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9363 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9364 Info.BasePointersArray, 9365 /*Idx0=*/0, /*Idx1=*/0); 9366 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9367 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9368 Info.PointersArray, 9369 /*Idx0=*/0, 9370 /*Idx1=*/0); 9371 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9372 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9373 /*Idx0=*/0, /*Idx1=*/0); 9374 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9375 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9376 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9377 : Info.MapTypesArray, 9378 /*Idx0=*/0, 9379 /*Idx1=*/0); 9380 9381 // Only emit the mapper information arrays if debug information is 9382 // requested. 9383 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9384 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9385 else 9386 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9387 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9388 Info.MapNamesArray, 9389 /*Idx0=*/0, 9390 /*Idx1=*/0); 9391 // If there is no user-defined mapper, set the mapper array to nullptr to 9392 // avoid an unnecessary data privatization 9393 if (!Info.HasMapper) 9394 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9395 else 9396 MappersArrayArg = 9397 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9398 } else { 9399 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9400 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9401 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9402 MapTypesArrayArg = 9403 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9404 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9405 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9406 } 9407 } 9408 9409 /// Check for inner distribute directive. 9410 static const OMPExecutableDirective * 9411 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9412 const auto *CS = D.getInnermostCapturedStmt(); 9413 const auto *Body = 9414 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9415 const Stmt *ChildStmt = 9416 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9417 9418 if (const auto *NestedDir = 9419 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9420 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9421 switch (D.getDirectiveKind()) { 9422 case OMPD_target: 9423 if (isOpenMPDistributeDirective(DKind)) 9424 return NestedDir; 9425 if (DKind == OMPD_teams) { 9426 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9427 /*IgnoreCaptured=*/true); 9428 if (!Body) 9429 return nullptr; 9430 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9431 if (const auto *NND = 9432 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9433 DKind = NND->getDirectiveKind(); 9434 if (isOpenMPDistributeDirective(DKind)) 9435 return NND; 9436 } 9437 } 9438 return nullptr; 9439 case OMPD_target_teams: 9440 if (isOpenMPDistributeDirective(DKind)) 9441 return NestedDir; 9442 return nullptr; 9443 case OMPD_target_parallel: 9444 case OMPD_target_simd: 9445 case OMPD_target_parallel_for: 9446 case OMPD_target_parallel_for_simd: 9447 return nullptr; 9448 case OMPD_target_teams_distribute: 9449 case OMPD_target_teams_distribute_simd: 9450 case OMPD_target_teams_distribute_parallel_for: 9451 case OMPD_target_teams_distribute_parallel_for_simd: 9452 case OMPD_parallel: 9453 case OMPD_for: 9454 case OMPD_parallel_for: 9455 case OMPD_parallel_master: 9456 case OMPD_parallel_sections: 9457 case OMPD_for_simd: 9458 case OMPD_parallel_for_simd: 9459 case OMPD_cancel: 9460 case OMPD_cancellation_point: 9461 case OMPD_ordered: 9462 case OMPD_threadprivate: 9463 case OMPD_allocate: 9464 case OMPD_task: 9465 case OMPD_simd: 9466 case OMPD_sections: 9467 case OMPD_section: 9468 case OMPD_single: 9469 case OMPD_master: 9470 case OMPD_critical: 9471 case OMPD_taskyield: 9472 case OMPD_barrier: 9473 case OMPD_taskwait: 9474 case OMPD_taskgroup: 9475 case OMPD_atomic: 9476 case OMPD_flush: 9477 case OMPD_depobj: 9478 case OMPD_scan: 9479 case OMPD_teams: 9480 case OMPD_target_data: 9481 case OMPD_target_exit_data: 9482 case OMPD_target_enter_data: 9483 case OMPD_distribute: 9484 case OMPD_distribute_simd: 9485 case OMPD_distribute_parallel_for: 9486 case OMPD_distribute_parallel_for_simd: 9487 case OMPD_teams_distribute: 9488 case OMPD_teams_distribute_simd: 9489 case OMPD_teams_distribute_parallel_for: 9490 case OMPD_teams_distribute_parallel_for_simd: 9491 case OMPD_target_update: 9492 case OMPD_declare_simd: 9493 case OMPD_declare_variant: 9494 case OMPD_begin_declare_variant: 9495 case OMPD_end_declare_variant: 9496 case OMPD_declare_target: 9497 case OMPD_end_declare_target: 9498 case OMPD_declare_reduction: 9499 case OMPD_declare_mapper: 9500 case OMPD_taskloop: 9501 case OMPD_taskloop_simd: 9502 case OMPD_master_taskloop: 9503 case OMPD_master_taskloop_simd: 9504 case OMPD_parallel_master_taskloop: 9505 case OMPD_parallel_master_taskloop_simd: 9506 case OMPD_requires: 9507 case OMPD_unknown: 9508 default: 9509 llvm_unreachable("Unexpected directive."); 9510 } 9511 } 9512 9513 return nullptr; 9514 } 9515 9516 /// Emit the user-defined mapper function. The code generation follows the 9517 /// pattern in the example below. 9518 /// \code 9519 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9520 /// void *base, void *begin, 9521 /// int64_t size, int64_t type, 9522 /// void *name = nullptr) { 9523 /// // Allocate space for an array section first. 9524 /// if (size > 1 && !maptype.IsDelete) 9525 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9526 /// size*sizeof(Ty), clearToFrom(type)); 9527 /// // Map members. 9528 /// for (unsigned i = 0; i < size; i++) { 9529 /// // For each component specified by this mapper: 9530 /// for (auto c : all_components) { 9531 /// if (c.hasMapper()) 9532 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9533 /// c.arg_type, c.arg_name); 9534 /// else 9535 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9536 /// c.arg_begin, c.arg_size, c.arg_type, 9537 /// c.arg_name); 9538 /// } 9539 /// } 9540 /// // Delete the array section. 9541 /// if (size > 1 && maptype.IsDelete) 9542 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9543 /// size*sizeof(Ty), clearToFrom(type)); 9544 /// } 9545 /// \endcode 9546 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9547 CodeGenFunction *CGF) { 9548 if (UDMMap.count(D) > 0) 9549 return; 9550 ASTContext &C = CGM.getContext(); 9551 QualType Ty = D->getType(); 9552 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9553 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9554 auto *MapperVarDecl = 9555 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9556 SourceLocation Loc = D->getLocation(); 9557 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9558 9559 // Prepare mapper function arguments and attributes. 9560 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9561 C.VoidPtrTy, ImplicitParamDecl::Other); 9562 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9563 ImplicitParamDecl::Other); 9564 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9565 C.VoidPtrTy, ImplicitParamDecl::Other); 9566 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9567 ImplicitParamDecl::Other); 9568 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9569 ImplicitParamDecl::Other); 9570 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9571 ImplicitParamDecl::Other); 9572 FunctionArgList Args; 9573 Args.push_back(&HandleArg); 9574 Args.push_back(&BaseArg); 9575 Args.push_back(&BeginArg); 9576 Args.push_back(&SizeArg); 9577 Args.push_back(&TypeArg); 9578 Args.push_back(&NameArg); 9579 const CGFunctionInfo &FnInfo = 9580 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9581 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9582 SmallString<64> TyStr; 9583 llvm::raw_svector_ostream Out(TyStr); 9584 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9585 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9586 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9587 Name, &CGM.getModule()); 9588 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9589 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9590 // Start the mapper function code generation. 9591 CodeGenFunction MapperCGF(CGM); 9592 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9593 // Compute the starting and end addreses of array elements. 9594 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9595 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9596 C.getPointerType(Int64Ty), Loc); 9597 // Convert the size in bytes into the number of array elements. 9598 Size = MapperCGF.Builder.CreateExactUDiv( 9599 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9600 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9601 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), 9602 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); 9603 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 9604 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9605 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9606 C.getPointerType(Int64Ty), Loc); 9607 // Prepare common arguments for array initiation and deletion. 9608 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9609 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9610 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9611 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9612 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9613 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9614 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9615 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9616 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9617 9618 // Emit array initiation if this is an array section and \p MapType indicates 9619 // that memory allocation is required. 9620 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9621 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9622 ElementSize, HeadBB, /*IsInit=*/true); 9623 9624 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9625 9626 // Emit the loop header block. 9627 MapperCGF.EmitBlock(HeadBB); 9628 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9629 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9630 // Evaluate whether the initial condition is satisfied. 9631 llvm::Value *IsEmpty = 9632 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9633 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9634 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9635 9636 // Emit the loop body block. 9637 MapperCGF.EmitBlock(BodyBB); 9638 llvm::BasicBlock *LastBB = BodyBB; 9639 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9640 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9641 PtrPHI->addIncoming(PtrBegin, EntryBB); 9642 Address PtrCurrent = 9643 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 9644 .getAlignment() 9645 .alignmentOfArrayElement(ElementSize)); 9646 // Privatize the declared variable of mapper to be the current array element. 9647 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9648 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { 9649 return MapperCGF 9650 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) 9651 .getAddress(MapperCGF); 9652 }); 9653 (void)Scope.Privatize(); 9654 9655 // Get map clause information. Fill up the arrays with all mapped variables. 9656 MappableExprsHandler::MapCombinedInfoTy Info; 9657 MappableExprsHandler MEHandler(*D, MapperCGF); 9658 MEHandler.generateAllInfoForMapper(Info); 9659 9660 // Call the runtime API __tgt_mapper_num_components to get the number of 9661 // pre-existing components. 9662 llvm::Value *OffloadingArgs[] = {Handle}; 9663 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9664 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9665 OMPRTL___tgt_mapper_num_components), 9666 OffloadingArgs); 9667 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9668 PreviousSize, 9669 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9670 9671 // Fill up the runtime mapper handle for all components. 9672 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 9673 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9674 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9675 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9676 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9677 llvm::Value *CurSizeArg = Info.Sizes[I]; 9678 llvm::Value *CurNameArg = 9679 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9680 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 9681 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 9682 9683 // Extract the MEMBER_OF field from the map type. 9684 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); 9685 MapperCGF.EmitBlock(MemberBB); 9686 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 9687 llvm::Value *Member = MapperCGF.Builder.CreateAnd( 9688 OriMapType, 9689 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); 9690 llvm::BasicBlock *MemberCombineBB = 9691 MapperCGF.createBasicBlock("omp.member.combine"); 9692 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); 9693 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); 9694 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); 9695 // Add the number of pre-existing components to the MEMBER_OF field if it 9696 // is valid. 9697 MapperCGF.EmitBlock(MemberCombineBB); 9698 llvm::Value *CombinedMember = 9699 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9700 // Do nothing if it is not a member of previous components. 9701 MapperCGF.EmitBlock(TypeBB); 9702 llvm::PHINode *MemberMapType = 9703 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); 9704 MemberMapType->addIncoming(OriMapType, MemberBB); 9705 MemberMapType->addIncoming(CombinedMember, MemberCombineBB); 9706 9707 // Combine the map type inherited from user-defined mapper with that 9708 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9709 // bits of the \a MapType, which is the input argument of the mapper 9710 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9711 // bits of MemberMapType. 9712 // [OpenMP 5.0], 1.2.6. map-type decay. 9713 // | alloc | to | from | tofrom | release | delete 9714 // ---------------------------------------------------------- 9715 // alloc | alloc | alloc | alloc | alloc | release | delete 9716 // to | alloc | to | alloc | to | release | delete 9717 // from | alloc | alloc | from | from | release | delete 9718 // tofrom | alloc | to | from | tofrom | release | delete 9719 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9720 MapType, 9721 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9722 MappableExprsHandler::OMP_MAP_FROM)); 9723 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9724 llvm::BasicBlock *AllocElseBB = 9725 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9726 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9727 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9728 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9729 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9730 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9731 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9732 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9733 MapperCGF.EmitBlock(AllocBB); 9734 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9735 MemberMapType, 9736 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9737 MappableExprsHandler::OMP_MAP_FROM))); 9738 MapperCGF.Builder.CreateBr(EndBB); 9739 MapperCGF.EmitBlock(AllocElseBB); 9740 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9741 LeftToFrom, 9742 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9743 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9744 // In case of to, clear OMP_MAP_FROM. 9745 MapperCGF.EmitBlock(ToBB); 9746 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9747 MemberMapType, 9748 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9749 MapperCGF.Builder.CreateBr(EndBB); 9750 MapperCGF.EmitBlock(ToElseBB); 9751 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9752 LeftToFrom, 9753 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9754 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9755 // In case of from, clear OMP_MAP_TO. 9756 MapperCGF.EmitBlock(FromBB); 9757 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9758 MemberMapType, 9759 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9760 // In case of tofrom, do nothing. 9761 MapperCGF.EmitBlock(EndBB); 9762 LastBB = EndBB; 9763 llvm::PHINode *CurMapType = 9764 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9765 CurMapType->addIncoming(AllocMapType, AllocBB); 9766 CurMapType->addIncoming(ToMapType, ToBB); 9767 CurMapType->addIncoming(FromMapType, FromBB); 9768 CurMapType->addIncoming(MemberMapType, ToElseBB); 9769 9770 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9771 CurSizeArg, CurMapType, CurNameArg}; 9772 if (Info.Mappers[I]) { 9773 // Call the corresponding mapper function. 9774 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 9775 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 9776 assert(MapperFunc && "Expect a valid mapper function is available."); 9777 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 9778 } else { 9779 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9780 // data structure. 9781 MapperCGF.EmitRuntimeCall( 9782 OMPBuilder.getOrCreateRuntimeFunction( 9783 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 9784 OffloadingArgs); 9785 } 9786 } 9787 9788 // Update the pointer to point to the next element that needs to be mapped, 9789 // and check whether we have mapped all elements. 9790 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9791 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9792 PtrPHI->addIncoming(PtrNext, LastBB); 9793 llvm::Value *IsDone = 9794 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9795 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9796 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9797 9798 MapperCGF.EmitBlock(ExitBB); 9799 // Emit array deletion if this is an array section and \p MapType indicates 9800 // that deletion is required. 9801 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9802 ElementSize, DoneBB, /*IsInit=*/false); 9803 9804 // Emit the function exit block. 9805 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9806 MapperCGF.FinishFunction(); 9807 UDMMap.try_emplace(D, Fn); 9808 if (CGF) { 9809 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9810 Decls.second.push_back(D); 9811 } 9812 } 9813 9814 /// Emit the array initialization or deletion portion for user-defined mapper 9815 /// code generation. First, it evaluates whether an array section is mapped and 9816 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9817 /// true, and \a MapType indicates to not delete this array, array 9818 /// initialization code is generated. If \a IsInit is false, and \a MapType 9819 /// indicates to not this array, array deletion code is generated. 9820 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9821 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9822 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9823 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9824 StringRef Prefix = IsInit ? ".init" : ".del"; 9825 9826 // Evaluate if this is an array section. 9827 llvm::BasicBlock *IsDeleteBB = 9828 MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"})); 9829 llvm::BasicBlock *BodyBB = 9830 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9831 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( 9832 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9833 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); 9834 9835 // Evaluate if we are going to delete this section. 9836 MapperCGF.EmitBlock(IsDeleteBB); 9837 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9838 MapType, 9839 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9840 llvm::Value *DeleteCond; 9841 if (IsInit) { 9842 DeleteCond = MapperCGF.Builder.CreateIsNull( 9843 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9844 } else { 9845 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9846 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9847 } 9848 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); 9849 9850 MapperCGF.EmitBlock(BodyBB); 9851 // Get the array size by multiplying element size and element number (i.e., \p 9852 // Size). 9853 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9854 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9855 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9856 // memory allocation/deletion purpose only. 9857 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9858 MapType, 9859 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9860 MappableExprsHandler::OMP_MAP_FROM))); 9861 llvm::Value *MapNameArg = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9862 9863 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9864 // data structure. 9865 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 9866 ArraySize, MapTypeArg, MapNameArg}; 9867 MapperCGF.EmitRuntimeCall( 9868 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9869 OMPRTL___tgt_push_mapper_component), 9870 OffloadingArgs); 9871 } 9872 9873 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 9874 const OMPDeclareMapperDecl *D) { 9875 auto I = UDMMap.find(D); 9876 if (I != UDMMap.end()) 9877 return I->second; 9878 emitUserDefinedMapper(D); 9879 return UDMMap.lookup(D); 9880 } 9881 9882 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9883 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9884 llvm::Value *DeviceID, 9885 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9886 const OMPLoopDirective &D)> 9887 SizeEmitter) { 9888 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9889 const OMPExecutableDirective *TD = &D; 9890 // Get nested teams distribute kind directive, if any. 9891 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9892 TD = getNestedDistributeDirective(CGM.getContext(), D); 9893 if (!TD) 9894 return; 9895 const auto *LD = cast<OMPLoopDirective>(TD); 9896 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, 9897 PrePostActionTy &) { 9898 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 9899 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 9900 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; 9901 CGF.EmitRuntimeCall( 9902 OMPBuilder.getOrCreateRuntimeFunction( 9903 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper), 9904 Args); 9905 } 9906 }; 9907 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9908 } 9909 9910 void CGOpenMPRuntime::emitTargetCall( 9911 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9912 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9913 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9914 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9915 const OMPLoopDirective &D)> 9916 SizeEmitter) { 9917 if (!CGF.HaveInsertPoint()) 9918 return; 9919 9920 assert(OutlinedFn && "Invalid outlined function!"); 9921 9922 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 9923 D.hasClausesOfKind<OMPNowaitClause>(); 9924 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9925 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9926 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9927 PrePostActionTy &) { 9928 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9929 }; 9930 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9931 9932 CodeGenFunction::OMPTargetDataInfo InputInfo; 9933 llvm::Value *MapTypesArray = nullptr; 9934 llvm::Value *MapNamesArray = nullptr; 9935 // Fill up the pointer arrays and transfer execution to the device. 9936 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9937 &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask, 9938 &CapturedVars, 9939 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9940 if (Device.getInt() == OMPC_DEVICE_ancestor) { 9941 // Reverse offloading is not supported, so just execute on the host. 9942 if (RequiresOuterTask) { 9943 CapturedVars.clear(); 9944 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9945 } 9946 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9947 return; 9948 } 9949 9950 // On top of the arrays that were filled up, the target offloading call 9951 // takes as arguments the device id as well as the host pointer. The host 9952 // pointer is used by the runtime library to identify the current target 9953 // region, so it only has to be unique and not necessarily point to 9954 // anything. It could be the pointer to the outlined function that 9955 // implements the target region, but we aren't using that so that the 9956 // compiler doesn't need to keep that, and could therefore inline the host 9957 // function if proven worthwhile during optimization. 9958 9959 // From this point on, we need to have an ID of the target region defined. 9960 assert(OutlinedFnID && "Invalid outlined function ID!"); 9961 9962 // Emit device ID if any. 9963 llvm::Value *DeviceID; 9964 if (Device.getPointer()) { 9965 assert((Device.getInt() == OMPC_DEVICE_unknown || 9966 Device.getInt() == OMPC_DEVICE_device_num) && 9967 "Expected device_num modifier."); 9968 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 9969 DeviceID = 9970 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 9971 } else { 9972 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9973 } 9974 9975 // Emit the number of elements in the offloading arrays. 9976 llvm::Value *PointerNum = 9977 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9978 9979 // Return value of the runtime offloading call. 9980 llvm::Value *Return; 9981 9982 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 9983 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 9984 9985 // Source location for the ident struct 9986 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 9987 9988 // Emit tripcount for the target loop-based directive. 9989 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 9990 9991 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9992 // The target region is an outlined function launched by the runtime 9993 // via calls __tgt_target() or __tgt_target_teams(). 9994 // 9995 // __tgt_target() launches a target region with one team and one thread, 9996 // executing a serial region. This master thread may in turn launch 9997 // more threads within its team upon encountering a parallel region, 9998 // however, no additional teams can be launched on the device. 9999 // 10000 // __tgt_target_teams() launches a target region with one or more teams, 10001 // each with one or more threads. This call is required for target 10002 // constructs such as: 10003 // 'target teams' 10004 // 'target' / 'teams' 10005 // 'target teams distribute parallel for' 10006 // 'target parallel' 10007 // and so on. 10008 // 10009 // Note that on the host and CPU targets, the runtime implementation of 10010 // these calls simply call the outlined function without forking threads. 10011 // The outlined functions themselves have runtime calls to 10012 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 10013 // the compiler in emitTeamsCall() and emitParallelCall(). 10014 // 10015 // In contrast, on the NVPTX target, the implementation of 10016 // __tgt_target_teams() launches a GPU kernel with the requested number 10017 // of teams and threads so no additional calls to the runtime are required. 10018 if (NumTeams) { 10019 // If we have NumTeams defined this means that we have an enclosed teams 10020 // region. Therefore we also expect to have NumThreads defined. These two 10021 // values should be defined in the presence of a teams directive, 10022 // regardless of having any clauses associated. If the user is using teams 10023 // but no clauses, these two values will be the default that should be 10024 // passed to the runtime library - a 32-bit integer with the value zero. 10025 assert(NumThreads && "Thread limit expression should be available along " 10026 "with number of teams."); 10027 llvm::Value *OffloadingArgs[] = {RTLoc, 10028 DeviceID, 10029 OutlinedFnID, 10030 PointerNum, 10031 InputInfo.BasePointersArray.getPointer(), 10032 InputInfo.PointersArray.getPointer(), 10033 InputInfo.SizesArray.getPointer(), 10034 MapTypesArray, 10035 MapNamesArray, 10036 InputInfo.MappersArray.getPointer(), 10037 NumTeams, 10038 NumThreads}; 10039 Return = CGF.EmitRuntimeCall( 10040 OMPBuilder.getOrCreateRuntimeFunction( 10041 CGM.getModule(), HasNowait 10042 ? OMPRTL___tgt_target_teams_nowait_mapper 10043 : OMPRTL___tgt_target_teams_mapper), 10044 OffloadingArgs); 10045 } else { 10046 llvm::Value *OffloadingArgs[] = {RTLoc, 10047 DeviceID, 10048 OutlinedFnID, 10049 PointerNum, 10050 InputInfo.BasePointersArray.getPointer(), 10051 InputInfo.PointersArray.getPointer(), 10052 InputInfo.SizesArray.getPointer(), 10053 MapTypesArray, 10054 MapNamesArray, 10055 InputInfo.MappersArray.getPointer()}; 10056 Return = CGF.EmitRuntimeCall( 10057 OMPBuilder.getOrCreateRuntimeFunction( 10058 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 10059 : OMPRTL___tgt_target_mapper), 10060 OffloadingArgs); 10061 } 10062 10063 // Check the error code and execute the host version if required. 10064 llvm::BasicBlock *OffloadFailedBlock = 10065 CGF.createBasicBlock("omp_offload.failed"); 10066 llvm::BasicBlock *OffloadContBlock = 10067 CGF.createBasicBlock("omp_offload.cont"); 10068 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10069 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10070 10071 CGF.EmitBlock(OffloadFailedBlock); 10072 if (RequiresOuterTask) { 10073 CapturedVars.clear(); 10074 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10075 } 10076 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10077 CGF.EmitBranch(OffloadContBlock); 10078 10079 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10080 }; 10081 10082 // Notify that the host version must be executed. 10083 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 10084 RequiresOuterTask](CodeGenFunction &CGF, 10085 PrePostActionTy &) { 10086 if (RequiresOuterTask) { 10087 CapturedVars.clear(); 10088 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10089 } 10090 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10091 }; 10092 10093 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10094 &MapNamesArray, &CapturedVars, RequiresOuterTask, 10095 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10096 // Fill up the arrays with all the captured variables. 10097 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10098 10099 // Get mappable expression information. 10100 MappableExprsHandler MEHandler(D, CGF); 10101 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10102 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 10103 10104 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10105 auto CV = CapturedVars.begin(); 10106 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10107 CE = CS.capture_end(); 10108 CI != CE; ++CI, ++RI, ++CV) { 10109 MappableExprsHandler::MapCombinedInfoTy CurInfo; 10110 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10111 10112 // VLA sizes are passed to the outlined region by copy and do not have map 10113 // information associated. 10114 if (CI->capturesVariableArrayType()) { 10115 CurInfo.Exprs.push_back(nullptr); 10116 CurInfo.BasePointers.push_back(*CV); 10117 CurInfo.Pointers.push_back(*CV); 10118 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10119 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10120 // Copy to the device as an argument. No need to retrieve it. 10121 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10122 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10123 MappableExprsHandler::OMP_MAP_IMPLICIT); 10124 CurInfo.Mappers.push_back(nullptr); 10125 } else { 10126 // If we have any information in the map clause, we use it, otherwise we 10127 // just do a default mapping. 10128 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10129 if (!CI->capturesThis()) 10130 MappedVarSet.insert(CI->getCapturedVar()); 10131 else 10132 MappedVarSet.insert(nullptr); 10133 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10134 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10135 // Generate correct mapping for variables captured by reference in 10136 // lambdas. 10137 if (CI->capturesVariable()) 10138 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10139 CurInfo, LambdaPointers); 10140 } 10141 // We expect to have at least an element of information for this capture. 10142 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10143 "Non-existing map pointer for capture!"); 10144 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10145 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10146 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10147 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10148 "Inconsistent map information sizes!"); 10149 10150 // If there is an entry in PartialStruct it means we have a struct with 10151 // individual members mapped. Emit an extra combined entry. 10152 if (PartialStruct.Base.isValid()) 10153 MEHandler.emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, 10154 nullptr, /*NoTargetParam=*/false); 10155 10156 // We need to append the results of this capture to what we already have. 10157 CombinedInfo.append(CurInfo); 10158 } 10159 // Adjust MEMBER_OF flags for the lambdas captures. 10160 MEHandler.adjustMemberOfForLambdaCaptures( 10161 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10162 CombinedInfo.Types); 10163 // Map any list items in a map clause that were not captures because they 10164 // weren't referenced within the construct. 10165 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 10166 10167 TargetDataInfo Info; 10168 // Fill up the arrays and create the arguments. 10169 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10170 emitOffloadingArraysArgument( 10171 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10172 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 10173 {/*ForEndTask=*/false}); 10174 10175 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10176 InputInfo.BasePointersArray = 10177 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10178 InputInfo.PointersArray = 10179 Address(Info.PointersArray, CGM.getPointerAlign()); 10180 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 10181 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 10182 MapTypesArray = Info.MapTypesArray; 10183 MapNamesArray = Info.MapNamesArray; 10184 if (RequiresOuterTask) 10185 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10186 else 10187 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10188 }; 10189 10190 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10191 CodeGenFunction &CGF, PrePostActionTy &) { 10192 if (RequiresOuterTask) { 10193 CodeGenFunction::OMPTargetDataInfo InputInfo; 10194 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10195 } else { 10196 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10197 } 10198 }; 10199 10200 // If we have a target function ID it means that we need to support 10201 // offloading, otherwise, just execute on the host. We need to execute on host 10202 // regardless of the conditional in the if clause if, e.g., the user do not 10203 // specify target triples. 10204 if (OutlinedFnID) { 10205 if (IfCond) { 10206 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10207 } else { 10208 RegionCodeGenTy ThenRCG(TargetThenGen); 10209 ThenRCG(CGF); 10210 } 10211 } else { 10212 RegionCodeGenTy ElseRCG(TargetElseGen); 10213 ElseRCG(CGF); 10214 } 10215 } 10216 10217 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10218 StringRef ParentName) { 10219 if (!S) 10220 return; 10221 10222 // Codegen OMP target directives that offload compute to the device. 10223 bool RequiresDeviceCodegen = 10224 isa<OMPExecutableDirective>(S) && 10225 isOpenMPTargetExecutionDirective( 10226 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10227 10228 if (RequiresDeviceCodegen) { 10229 const auto &E = *cast<OMPExecutableDirective>(S); 10230 unsigned DeviceID; 10231 unsigned FileID; 10232 unsigned Line; 10233 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10234 FileID, Line); 10235 10236 // Is this a target region that should not be emitted as an entry point? If 10237 // so just signal we are done with this target region. 10238 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10239 ParentName, Line)) 10240 return; 10241 10242 switch (E.getDirectiveKind()) { 10243 case OMPD_target: 10244 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10245 cast<OMPTargetDirective>(E)); 10246 break; 10247 case OMPD_target_parallel: 10248 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10249 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10250 break; 10251 case OMPD_target_teams: 10252 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10253 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10254 break; 10255 case OMPD_target_teams_distribute: 10256 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10257 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10258 break; 10259 case OMPD_target_teams_distribute_simd: 10260 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10261 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10262 break; 10263 case OMPD_target_parallel_for: 10264 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10265 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10266 break; 10267 case OMPD_target_parallel_for_simd: 10268 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10269 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10270 break; 10271 case OMPD_target_simd: 10272 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10273 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10274 break; 10275 case OMPD_target_teams_distribute_parallel_for: 10276 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10277 CGM, ParentName, 10278 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10279 break; 10280 case OMPD_target_teams_distribute_parallel_for_simd: 10281 CodeGenFunction:: 10282 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10283 CGM, ParentName, 10284 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10285 break; 10286 case OMPD_parallel: 10287 case OMPD_for: 10288 case OMPD_parallel_for: 10289 case OMPD_parallel_master: 10290 case OMPD_parallel_sections: 10291 case OMPD_for_simd: 10292 case OMPD_parallel_for_simd: 10293 case OMPD_cancel: 10294 case OMPD_cancellation_point: 10295 case OMPD_ordered: 10296 case OMPD_threadprivate: 10297 case OMPD_allocate: 10298 case OMPD_task: 10299 case OMPD_simd: 10300 case OMPD_sections: 10301 case OMPD_section: 10302 case OMPD_single: 10303 case OMPD_master: 10304 case OMPD_critical: 10305 case OMPD_taskyield: 10306 case OMPD_barrier: 10307 case OMPD_taskwait: 10308 case OMPD_taskgroup: 10309 case OMPD_atomic: 10310 case OMPD_flush: 10311 case OMPD_depobj: 10312 case OMPD_scan: 10313 case OMPD_teams: 10314 case OMPD_target_data: 10315 case OMPD_target_exit_data: 10316 case OMPD_target_enter_data: 10317 case OMPD_distribute: 10318 case OMPD_distribute_simd: 10319 case OMPD_distribute_parallel_for: 10320 case OMPD_distribute_parallel_for_simd: 10321 case OMPD_teams_distribute: 10322 case OMPD_teams_distribute_simd: 10323 case OMPD_teams_distribute_parallel_for: 10324 case OMPD_teams_distribute_parallel_for_simd: 10325 case OMPD_target_update: 10326 case OMPD_declare_simd: 10327 case OMPD_declare_variant: 10328 case OMPD_begin_declare_variant: 10329 case OMPD_end_declare_variant: 10330 case OMPD_declare_target: 10331 case OMPD_end_declare_target: 10332 case OMPD_declare_reduction: 10333 case OMPD_declare_mapper: 10334 case OMPD_taskloop: 10335 case OMPD_taskloop_simd: 10336 case OMPD_master_taskloop: 10337 case OMPD_master_taskloop_simd: 10338 case OMPD_parallel_master_taskloop: 10339 case OMPD_parallel_master_taskloop_simd: 10340 case OMPD_requires: 10341 case OMPD_unknown: 10342 default: 10343 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10344 } 10345 return; 10346 } 10347 10348 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10349 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10350 return; 10351 10352 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10353 return; 10354 } 10355 10356 // If this is a lambda function, look into its body. 10357 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10358 S = L->getBody(); 10359 10360 // Keep looking for target regions recursively. 10361 for (const Stmt *II : S->children()) 10362 scanForTargetRegionsFunctions(II, ParentName); 10363 } 10364 10365 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10366 // If emitting code for the host, we do not process FD here. Instead we do 10367 // the normal code generation. 10368 if (!CGM.getLangOpts().OpenMPIsDevice) { 10369 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 10370 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10371 OMPDeclareTargetDeclAttr::getDeviceType(FD); 10372 // Do not emit device_type(nohost) functions for the host. 10373 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10374 return true; 10375 } 10376 return false; 10377 } 10378 10379 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10380 // Try to detect target regions in the function. 10381 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10382 StringRef Name = CGM.getMangledName(GD); 10383 scanForTargetRegionsFunctions(FD->getBody(), Name); 10384 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10385 OMPDeclareTargetDeclAttr::getDeviceType(FD); 10386 // Do not emit device_type(nohost) functions for the host. 10387 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10388 return true; 10389 } 10390 10391 // Do not to emit function if it is not marked as declare target. 10392 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10393 AlreadyEmittedTargetDecls.count(VD) == 0; 10394 } 10395 10396 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10397 if (!CGM.getLangOpts().OpenMPIsDevice) 10398 return false; 10399 10400 // Check if there are Ctors/Dtors in this declaration and look for target 10401 // regions in it. We use the complete variant to produce the kernel name 10402 // mangling. 10403 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10404 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10405 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10406 StringRef ParentName = 10407 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10408 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10409 } 10410 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10411 StringRef ParentName = 10412 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10413 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10414 } 10415 } 10416 10417 // Do not to emit variable if it is not marked as declare target. 10418 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10419 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10420 cast<VarDecl>(GD.getDecl())); 10421 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10422 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10423 HasRequiresUnifiedSharedMemory)) { 10424 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10425 return true; 10426 } 10427 return false; 10428 } 10429 10430 llvm::Constant * 10431 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 10432 const VarDecl *VD) { 10433 assert(VD->getType().isConstant(CGM.getContext()) && 10434 "Expected constant variable."); 10435 StringRef VarName; 10436 llvm::Constant *Addr; 10437 llvm::GlobalValue::LinkageTypes Linkage; 10438 QualType Ty = VD->getType(); 10439 SmallString<128> Buffer; 10440 { 10441 unsigned DeviceID; 10442 unsigned FileID; 10443 unsigned Line; 10444 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 10445 FileID, Line); 10446 llvm::raw_svector_ostream OS(Buffer); 10447 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 10448 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 10449 VarName = OS.str(); 10450 } 10451 Linkage = llvm::GlobalValue::InternalLinkage; 10452 Addr = 10453 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 10454 getDefaultFirstprivateAddressSpace()); 10455 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 10456 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 10457 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 10458 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10459 VarName, Addr, VarSize, 10460 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 10461 return Addr; 10462 } 10463 10464 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10465 llvm::Constant *Addr) { 10466 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10467 !CGM.getLangOpts().OpenMPIsDevice) 10468 return; 10469 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10470 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10471 if (!Res) { 10472 if (CGM.getLangOpts().OpenMPIsDevice) { 10473 // Register non-target variables being emitted in device code (debug info 10474 // may cause this). 10475 StringRef VarName = CGM.getMangledName(VD); 10476 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10477 } 10478 return; 10479 } 10480 // Register declare target variables. 10481 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10482 StringRef VarName; 10483 CharUnits VarSize; 10484 llvm::GlobalValue::LinkageTypes Linkage; 10485 10486 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10487 !HasRequiresUnifiedSharedMemory) { 10488 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10489 VarName = CGM.getMangledName(VD); 10490 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10491 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10492 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10493 } else { 10494 VarSize = CharUnits::Zero(); 10495 } 10496 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10497 // Temp solution to prevent optimizations of the internal variables. 10498 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10499 std::string RefName = getName({VarName, "ref"}); 10500 if (!CGM.GetGlobalValue(RefName)) { 10501 llvm::Constant *AddrRef = 10502 getOrCreateInternalVariable(Addr->getType(), RefName); 10503 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10504 GVAddrRef->setConstant(/*Val=*/true); 10505 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10506 GVAddrRef->setInitializer(Addr); 10507 CGM.addCompilerUsedGlobal(GVAddrRef); 10508 } 10509 } 10510 } else { 10511 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10512 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10513 HasRequiresUnifiedSharedMemory)) && 10514 "Declare target attribute must link or to with unified memory."); 10515 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10516 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10517 else 10518 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10519 10520 if (CGM.getLangOpts().OpenMPIsDevice) { 10521 VarName = Addr->getName(); 10522 Addr = nullptr; 10523 } else { 10524 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10525 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10526 } 10527 VarSize = CGM.getPointerSize(); 10528 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10529 } 10530 10531 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10532 VarName, Addr, VarSize, Flags, Linkage); 10533 } 10534 10535 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10536 if (isa<FunctionDecl>(GD.getDecl()) || 10537 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10538 return emitTargetFunctions(GD); 10539 10540 return emitTargetGlobalVariable(GD); 10541 } 10542 10543 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10544 for (const VarDecl *VD : DeferredGlobalVariables) { 10545 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10546 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10547 if (!Res) 10548 continue; 10549 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10550 !HasRequiresUnifiedSharedMemory) { 10551 CGM.EmitGlobal(VD); 10552 } else { 10553 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10554 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10555 HasRequiresUnifiedSharedMemory)) && 10556 "Expected link clause or to clause with unified memory."); 10557 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10558 } 10559 } 10560 } 10561 10562 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10563 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10564 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10565 " Expected target-based directive."); 10566 } 10567 10568 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10569 for (const OMPClause *Clause : D->clauselists()) { 10570 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10571 HasRequiresUnifiedSharedMemory = true; 10572 } else if (const auto *AC = 10573 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10574 switch (AC->getAtomicDefaultMemOrderKind()) { 10575 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10576 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10577 break; 10578 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10579 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10580 break; 10581 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10582 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10583 break; 10584 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10585 break; 10586 } 10587 } 10588 } 10589 } 10590 10591 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10592 return RequiresAtomicOrdering; 10593 } 10594 10595 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10596 LangAS &AS) { 10597 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10598 return false; 10599 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10600 switch(A->getAllocatorType()) { 10601 case OMPAllocateDeclAttr::OMPNullMemAlloc: 10602 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10603 // Not supported, fallback to the default mem space. 10604 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10605 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10606 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10607 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10608 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10609 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10610 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10611 AS = LangAS::Default; 10612 return true; 10613 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10614 llvm_unreachable("Expected predefined allocator for the variables with the " 10615 "static storage."); 10616 } 10617 return false; 10618 } 10619 10620 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10621 return HasRequiresUnifiedSharedMemory; 10622 } 10623 10624 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10625 CodeGenModule &CGM) 10626 : CGM(CGM) { 10627 if (CGM.getLangOpts().OpenMPIsDevice) { 10628 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10629 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10630 } 10631 } 10632 10633 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10634 if (CGM.getLangOpts().OpenMPIsDevice) 10635 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10636 } 10637 10638 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10639 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 10640 return true; 10641 10642 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10643 // Do not to emit function if it is marked as declare target as it was already 10644 // emitted. 10645 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10646 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10647 if (auto *F = dyn_cast_or_null<llvm::Function>( 10648 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10649 return !F->isDeclaration(); 10650 return false; 10651 } 10652 return true; 10653 } 10654 10655 return !AlreadyEmittedTargetDecls.insert(D).second; 10656 } 10657 10658 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10659 // If we don't have entries or if we are emitting code for the device, we 10660 // don't need to do anything. 10661 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10662 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 10663 (OffloadEntriesInfoManager.empty() && 10664 !HasEmittedDeclareTargetRegion && 10665 !HasEmittedTargetRegion)) 10666 return nullptr; 10667 10668 // Create and register the function that handles the requires directives. 10669 ASTContext &C = CGM.getContext(); 10670 10671 llvm::Function *RequiresRegFn; 10672 { 10673 CodeGenFunction CGF(CGM); 10674 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10675 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10676 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10677 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 10678 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10679 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 10680 // TODO: check for other requires clauses. 10681 // The requires directive takes effect only when a target region is 10682 // present in the compilation unit. Otherwise it is ignored and not 10683 // passed to the runtime. This avoids the runtime from throwing an error 10684 // for mismatching requires clauses across compilation units that don't 10685 // contain at least 1 target region. 10686 assert((HasEmittedTargetRegion || 10687 HasEmittedDeclareTargetRegion || 10688 !OffloadEntriesInfoManager.empty()) && 10689 "Target or declare target region expected."); 10690 if (HasRequiresUnifiedSharedMemory) 10691 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 10692 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10693 CGM.getModule(), OMPRTL___tgt_register_requires), 10694 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 10695 CGF.FinishFunction(); 10696 } 10697 return RequiresRegFn; 10698 } 10699 10700 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10701 const OMPExecutableDirective &D, 10702 SourceLocation Loc, 10703 llvm::Function *OutlinedFn, 10704 ArrayRef<llvm::Value *> CapturedVars) { 10705 if (!CGF.HaveInsertPoint()) 10706 return; 10707 10708 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10709 CodeGenFunction::RunCleanupsScope Scope(CGF); 10710 10711 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10712 llvm::Value *Args[] = { 10713 RTLoc, 10714 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10715 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10716 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10717 RealArgs.append(std::begin(Args), std::end(Args)); 10718 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10719 10720 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 10721 CGM.getModule(), OMPRTL___kmpc_fork_teams); 10722 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10723 } 10724 10725 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10726 const Expr *NumTeams, 10727 const Expr *ThreadLimit, 10728 SourceLocation Loc) { 10729 if (!CGF.HaveInsertPoint()) 10730 return; 10731 10732 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10733 10734 llvm::Value *NumTeamsVal = 10735 NumTeams 10736 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10737 CGF.CGM.Int32Ty, /* isSigned = */ true) 10738 : CGF.Builder.getInt32(0); 10739 10740 llvm::Value *ThreadLimitVal = 10741 ThreadLimit 10742 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10743 CGF.CGM.Int32Ty, /* isSigned = */ true) 10744 : CGF.Builder.getInt32(0); 10745 10746 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10747 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10748 ThreadLimitVal}; 10749 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10750 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 10751 PushNumTeamsArgs); 10752 } 10753 10754 void CGOpenMPRuntime::emitTargetDataCalls( 10755 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10756 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10757 if (!CGF.HaveInsertPoint()) 10758 return; 10759 10760 // Action used to replace the default codegen action and turn privatization 10761 // off. 10762 PrePostActionTy NoPrivAction; 10763 10764 // Generate the code for the opening of the data environment. Capture all the 10765 // arguments of the runtime call by reference because they are used in the 10766 // closing of the region. 10767 auto &&BeginThenGen = [this, &D, Device, &Info, 10768 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10769 // Fill up the arrays with all the mapped variables. 10770 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10771 10772 // Get map clause information. 10773 MappableExprsHandler MEHandler(D, CGF); 10774 MEHandler.generateAllInfo(CombinedInfo); 10775 10776 // Fill up the arrays and create the arguments. 10777 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 10778 /*IsNonContiguous=*/true); 10779 10780 llvm::Value *BasePointersArrayArg = nullptr; 10781 llvm::Value *PointersArrayArg = nullptr; 10782 llvm::Value *SizesArrayArg = nullptr; 10783 llvm::Value *MapTypesArrayArg = nullptr; 10784 llvm::Value *MapNamesArrayArg = nullptr; 10785 llvm::Value *MappersArrayArg = nullptr; 10786 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10787 SizesArrayArg, MapTypesArrayArg, 10788 MapNamesArrayArg, MappersArrayArg, Info); 10789 10790 // Emit device ID if any. 10791 llvm::Value *DeviceID = nullptr; 10792 if (Device) { 10793 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10794 CGF.Int64Ty, /*isSigned=*/true); 10795 } else { 10796 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10797 } 10798 10799 // Emit the number of elements in the offloading arrays. 10800 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10801 // 10802 // Source location for the ident struct 10803 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10804 10805 llvm::Value *OffloadingArgs[] = {RTLoc, 10806 DeviceID, 10807 PointerNum, 10808 BasePointersArrayArg, 10809 PointersArrayArg, 10810 SizesArrayArg, 10811 MapTypesArrayArg, 10812 MapNamesArrayArg, 10813 MappersArrayArg}; 10814 CGF.EmitRuntimeCall( 10815 OMPBuilder.getOrCreateRuntimeFunction( 10816 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 10817 OffloadingArgs); 10818 10819 // If device pointer privatization is required, emit the body of the region 10820 // here. It will have to be duplicated: with and without privatization. 10821 if (!Info.CaptureDeviceAddrMap.empty()) 10822 CodeGen(CGF); 10823 }; 10824 10825 // Generate code for the closing of the data region. 10826 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 10827 PrePostActionTy &) { 10828 assert(Info.isValid() && "Invalid data environment closing arguments."); 10829 10830 llvm::Value *BasePointersArrayArg = nullptr; 10831 llvm::Value *PointersArrayArg = nullptr; 10832 llvm::Value *SizesArrayArg = nullptr; 10833 llvm::Value *MapTypesArrayArg = nullptr; 10834 llvm::Value *MapNamesArrayArg = nullptr; 10835 llvm::Value *MappersArrayArg = nullptr; 10836 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10837 SizesArrayArg, MapTypesArrayArg, 10838 MapNamesArrayArg, MappersArrayArg, Info, 10839 {/*ForEndCall=*/true}); 10840 10841 // Emit device ID if any. 10842 llvm::Value *DeviceID = nullptr; 10843 if (Device) { 10844 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10845 CGF.Int64Ty, /*isSigned=*/true); 10846 } else { 10847 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10848 } 10849 10850 // Emit the number of elements in the offloading arrays. 10851 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10852 10853 // Source location for the ident struct 10854 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10855 10856 llvm::Value *OffloadingArgs[] = {RTLoc, 10857 DeviceID, 10858 PointerNum, 10859 BasePointersArrayArg, 10860 PointersArrayArg, 10861 SizesArrayArg, 10862 MapTypesArrayArg, 10863 MapNamesArrayArg, 10864 MappersArrayArg}; 10865 CGF.EmitRuntimeCall( 10866 OMPBuilder.getOrCreateRuntimeFunction( 10867 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 10868 OffloadingArgs); 10869 }; 10870 10871 // If we need device pointer privatization, we need to emit the body of the 10872 // region with no privatization in the 'else' branch of the conditional. 10873 // Otherwise, we don't have to do anything. 10874 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10875 PrePostActionTy &) { 10876 if (!Info.CaptureDeviceAddrMap.empty()) { 10877 CodeGen.setAction(NoPrivAction); 10878 CodeGen(CGF); 10879 } 10880 }; 10881 10882 // We don't have to do anything to close the region if the if clause evaluates 10883 // to false. 10884 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10885 10886 if (IfCond) { 10887 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10888 } else { 10889 RegionCodeGenTy RCG(BeginThenGen); 10890 RCG(CGF); 10891 } 10892 10893 // If we don't require privatization of device pointers, we emit the body in 10894 // between the runtime calls. This avoids duplicating the body code. 10895 if (Info.CaptureDeviceAddrMap.empty()) { 10896 CodeGen.setAction(NoPrivAction); 10897 CodeGen(CGF); 10898 } 10899 10900 if (IfCond) { 10901 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10902 } else { 10903 RegionCodeGenTy RCG(EndThenGen); 10904 RCG(CGF); 10905 } 10906 } 10907 10908 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10909 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10910 const Expr *Device) { 10911 if (!CGF.HaveInsertPoint()) 10912 return; 10913 10914 assert((isa<OMPTargetEnterDataDirective>(D) || 10915 isa<OMPTargetExitDataDirective>(D) || 10916 isa<OMPTargetUpdateDirective>(D)) && 10917 "Expecting either target enter, exit data, or update directives."); 10918 10919 CodeGenFunction::OMPTargetDataInfo InputInfo; 10920 llvm::Value *MapTypesArray = nullptr; 10921 llvm::Value *MapNamesArray = nullptr; 10922 // Generate the code for the opening of the data environment. 10923 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 10924 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10925 // Emit device ID if any. 10926 llvm::Value *DeviceID = nullptr; 10927 if (Device) { 10928 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10929 CGF.Int64Ty, /*isSigned=*/true); 10930 } else { 10931 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10932 } 10933 10934 // Emit the number of elements in the offloading arrays. 10935 llvm::Constant *PointerNum = 10936 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10937 10938 // Source location for the ident struct 10939 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10940 10941 llvm::Value *OffloadingArgs[] = {RTLoc, 10942 DeviceID, 10943 PointerNum, 10944 InputInfo.BasePointersArray.getPointer(), 10945 InputInfo.PointersArray.getPointer(), 10946 InputInfo.SizesArray.getPointer(), 10947 MapTypesArray, 10948 MapNamesArray, 10949 InputInfo.MappersArray.getPointer()}; 10950 10951 // Select the right runtime function call for each standalone 10952 // directive. 10953 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10954 RuntimeFunction RTLFn; 10955 switch (D.getDirectiveKind()) { 10956 case OMPD_target_enter_data: 10957 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 10958 : OMPRTL___tgt_target_data_begin_mapper; 10959 break; 10960 case OMPD_target_exit_data: 10961 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 10962 : OMPRTL___tgt_target_data_end_mapper; 10963 break; 10964 case OMPD_target_update: 10965 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 10966 : OMPRTL___tgt_target_data_update_mapper; 10967 break; 10968 case OMPD_parallel: 10969 case OMPD_for: 10970 case OMPD_parallel_for: 10971 case OMPD_parallel_master: 10972 case OMPD_parallel_sections: 10973 case OMPD_for_simd: 10974 case OMPD_parallel_for_simd: 10975 case OMPD_cancel: 10976 case OMPD_cancellation_point: 10977 case OMPD_ordered: 10978 case OMPD_threadprivate: 10979 case OMPD_allocate: 10980 case OMPD_task: 10981 case OMPD_simd: 10982 case OMPD_sections: 10983 case OMPD_section: 10984 case OMPD_single: 10985 case OMPD_master: 10986 case OMPD_critical: 10987 case OMPD_taskyield: 10988 case OMPD_barrier: 10989 case OMPD_taskwait: 10990 case OMPD_taskgroup: 10991 case OMPD_atomic: 10992 case OMPD_flush: 10993 case OMPD_depobj: 10994 case OMPD_scan: 10995 case OMPD_teams: 10996 case OMPD_target_data: 10997 case OMPD_distribute: 10998 case OMPD_distribute_simd: 10999 case OMPD_distribute_parallel_for: 11000 case OMPD_distribute_parallel_for_simd: 11001 case OMPD_teams_distribute: 11002 case OMPD_teams_distribute_simd: 11003 case OMPD_teams_distribute_parallel_for: 11004 case OMPD_teams_distribute_parallel_for_simd: 11005 case OMPD_declare_simd: 11006 case OMPD_declare_variant: 11007 case OMPD_begin_declare_variant: 11008 case OMPD_end_declare_variant: 11009 case OMPD_declare_target: 11010 case OMPD_end_declare_target: 11011 case OMPD_declare_reduction: 11012 case OMPD_declare_mapper: 11013 case OMPD_taskloop: 11014 case OMPD_taskloop_simd: 11015 case OMPD_master_taskloop: 11016 case OMPD_master_taskloop_simd: 11017 case OMPD_parallel_master_taskloop: 11018 case OMPD_parallel_master_taskloop_simd: 11019 case OMPD_target: 11020 case OMPD_target_simd: 11021 case OMPD_target_teams_distribute: 11022 case OMPD_target_teams_distribute_simd: 11023 case OMPD_target_teams_distribute_parallel_for: 11024 case OMPD_target_teams_distribute_parallel_for_simd: 11025 case OMPD_target_teams: 11026 case OMPD_target_parallel: 11027 case OMPD_target_parallel_for: 11028 case OMPD_target_parallel_for_simd: 11029 case OMPD_requires: 11030 case OMPD_unknown: 11031 default: 11032 llvm_unreachable("Unexpected standalone target data directive."); 11033 break; 11034 } 11035 CGF.EmitRuntimeCall( 11036 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 11037 OffloadingArgs); 11038 }; 11039 11040 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 11041 &MapNamesArray](CodeGenFunction &CGF, 11042 PrePostActionTy &) { 11043 // Fill up the arrays with all the mapped variables. 11044 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11045 11046 // Get map clause information. 11047 MappableExprsHandler MEHandler(D, CGF); 11048 MEHandler.generateAllInfo(CombinedInfo); 11049 11050 TargetDataInfo Info; 11051 // Fill up the arrays and create the arguments. 11052 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11053 /*IsNonContiguous=*/true); 11054 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 11055 D.hasClausesOfKind<OMPNowaitClause>(); 11056 emitOffloadingArraysArgument( 11057 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 11058 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 11059 {/*ForEndTask=*/false}); 11060 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11061 InputInfo.BasePointersArray = 11062 Address(Info.BasePointersArray, CGM.getPointerAlign()); 11063 InputInfo.PointersArray = 11064 Address(Info.PointersArray, CGM.getPointerAlign()); 11065 InputInfo.SizesArray = 11066 Address(Info.SizesArray, CGM.getPointerAlign()); 11067 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 11068 MapTypesArray = Info.MapTypesArray; 11069 MapNamesArray = Info.MapNamesArray; 11070 if (RequiresOuterTask) 11071 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11072 else 11073 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11074 }; 11075 11076 if (IfCond) { 11077 emitIfClause(CGF, IfCond, TargetThenGen, 11078 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11079 } else { 11080 RegionCodeGenTy ThenRCG(TargetThenGen); 11081 ThenRCG(CGF); 11082 } 11083 } 11084 11085 namespace { 11086 /// Kind of parameter in a function with 'declare simd' directive. 11087 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 11088 /// Attribute set of the parameter. 11089 struct ParamAttrTy { 11090 ParamKindTy Kind = Vector; 11091 llvm::APSInt StrideOrArg; 11092 llvm::APSInt Alignment; 11093 }; 11094 } // namespace 11095 11096 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11097 ArrayRef<ParamAttrTy> ParamAttrs) { 11098 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11099 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11100 // of that clause. The VLEN value must be power of 2. 11101 // In other case the notion of the function`s "characteristic data type" (CDT) 11102 // is used to compute the vector length. 11103 // CDT is defined in the following order: 11104 // a) For non-void function, the CDT is the return type. 11105 // b) If the function has any non-uniform, non-linear parameters, then the 11106 // CDT is the type of the first such parameter. 11107 // c) If the CDT determined by a) or b) above is struct, union, or class 11108 // type which is pass-by-value (except for the type that maps to the 11109 // built-in complex data type), the characteristic data type is int. 11110 // d) If none of the above three cases is applicable, the CDT is int. 11111 // The VLEN is then determined based on the CDT and the size of vector 11112 // register of that ISA for which current vector version is generated. The 11113 // VLEN is computed using the formula below: 11114 // VLEN = sizeof(vector_register) / sizeof(CDT), 11115 // where vector register size specified in section 3.2.1 Registers and the 11116 // Stack Frame of original AMD64 ABI document. 11117 QualType RetType = FD->getReturnType(); 11118 if (RetType.isNull()) 11119 return 0; 11120 ASTContext &C = FD->getASTContext(); 11121 QualType CDT; 11122 if (!RetType.isNull() && !RetType->isVoidType()) { 11123 CDT = RetType; 11124 } else { 11125 unsigned Offset = 0; 11126 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11127 if (ParamAttrs[Offset].Kind == Vector) 11128 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11129 ++Offset; 11130 } 11131 if (CDT.isNull()) { 11132 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11133 if (ParamAttrs[I + Offset].Kind == Vector) { 11134 CDT = FD->getParamDecl(I)->getType(); 11135 break; 11136 } 11137 } 11138 } 11139 } 11140 if (CDT.isNull()) 11141 CDT = C.IntTy; 11142 CDT = CDT->getCanonicalTypeUnqualified(); 11143 if (CDT->isRecordType() || CDT->isUnionType()) 11144 CDT = C.IntTy; 11145 return C.getTypeSize(CDT); 11146 } 11147 11148 static void 11149 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11150 const llvm::APSInt &VLENVal, 11151 ArrayRef<ParamAttrTy> ParamAttrs, 11152 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11153 struct ISADataTy { 11154 char ISA; 11155 unsigned VecRegSize; 11156 }; 11157 ISADataTy ISAData[] = { 11158 { 11159 'b', 128 11160 }, // SSE 11161 { 11162 'c', 256 11163 }, // AVX 11164 { 11165 'd', 256 11166 }, // AVX2 11167 { 11168 'e', 512 11169 }, // AVX512 11170 }; 11171 llvm::SmallVector<char, 2> Masked; 11172 switch (State) { 11173 case OMPDeclareSimdDeclAttr::BS_Undefined: 11174 Masked.push_back('N'); 11175 Masked.push_back('M'); 11176 break; 11177 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11178 Masked.push_back('N'); 11179 break; 11180 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11181 Masked.push_back('M'); 11182 break; 11183 } 11184 for (char Mask : Masked) { 11185 for (const ISADataTy &Data : ISAData) { 11186 SmallString<256> Buffer; 11187 llvm::raw_svector_ostream Out(Buffer); 11188 Out << "_ZGV" << Data.ISA << Mask; 11189 if (!VLENVal) { 11190 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11191 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11192 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11193 } else { 11194 Out << VLENVal; 11195 } 11196 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11197 switch (ParamAttr.Kind){ 11198 case LinearWithVarStride: 11199 Out << 's' << ParamAttr.StrideOrArg; 11200 break; 11201 case Linear: 11202 Out << 'l'; 11203 if (ParamAttr.StrideOrArg != 1) 11204 Out << ParamAttr.StrideOrArg; 11205 break; 11206 case Uniform: 11207 Out << 'u'; 11208 break; 11209 case Vector: 11210 Out << 'v'; 11211 break; 11212 } 11213 if (!!ParamAttr.Alignment) 11214 Out << 'a' << ParamAttr.Alignment; 11215 } 11216 Out << '_' << Fn->getName(); 11217 Fn->addFnAttr(Out.str()); 11218 } 11219 } 11220 } 11221 11222 // This are the Functions that are needed to mangle the name of the 11223 // vector functions generated by the compiler, according to the rules 11224 // defined in the "Vector Function ABI specifications for AArch64", 11225 // available at 11226 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11227 11228 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11229 /// 11230 /// TODO: Need to implement the behavior for reference marked with a 11231 /// var or no linear modifiers (1.b in the section). For this, we 11232 /// need to extend ParamKindTy to support the linear modifiers. 11233 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11234 QT = QT.getCanonicalType(); 11235 11236 if (QT->isVoidType()) 11237 return false; 11238 11239 if (Kind == ParamKindTy::Uniform) 11240 return false; 11241 11242 if (Kind == ParamKindTy::Linear) 11243 return false; 11244 11245 // TODO: Handle linear references with modifiers 11246 11247 if (Kind == ParamKindTy::LinearWithVarStride) 11248 return false; 11249 11250 return true; 11251 } 11252 11253 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11254 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11255 QT = QT.getCanonicalType(); 11256 unsigned Size = C.getTypeSize(QT); 11257 11258 // Only scalars and complex within 16 bytes wide set PVB to true. 11259 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11260 return false; 11261 11262 if (QT->isFloatingType()) 11263 return true; 11264 11265 if (QT->isIntegerType()) 11266 return true; 11267 11268 if (QT->isPointerType()) 11269 return true; 11270 11271 // TODO: Add support for complex types (section 3.1.2, item 2). 11272 11273 return false; 11274 } 11275 11276 /// Computes the lane size (LS) of a return type or of an input parameter, 11277 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11278 /// TODO: Add support for references, section 3.2.1, item 1. 11279 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11280 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11281 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11282 if (getAArch64PBV(PTy, C)) 11283 return C.getTypeSize(PTy); 11284 } 11285 if (getAArch64PBV(QT, C)) 11286 return C.getTypeSize(QT); 11287 11288 return C.getTypeSize(C.getUIntPtrType()); 11289 } 11290 11291 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11292 // signature of the scalar function, as defined in 3.2.2 of the 11293 // AAVFABI. 11294 static std::tuple<unsigned, unsigned, bool> 11295 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11296 QualType RetType = FD->getReturnType().getCanonicalType(); 11297 11298 ASTContext &C = FD->getASTContext(); 11299 11300 bool OutputBecomesInput = false; 11301 11302 llvm::SmallVector<unsigned, 8> Sizes; 11303 if (!RetType->isVoidType()) { 11304 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11305 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11306 OutputBecomesInput = true; 11307 } 11308 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11309 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11310 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11311 } 11312 11313 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11314 // The LS of a function parameter / return value can only be a power 11315 // of 2, starting from 8 bits, up to 128. 11316 assert(std::all_of(Sizes.begin(), Sizes.end(), 11317 [](unsigned Size) { 11318 return Size == 8 || Size == 16 || Size == 32 || 11319 Size == 64 || Size == 128; 11320 }) && 11321 "Invalid size"); 11322 11323 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11324 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11325 OutputBecomesInput); 11326 } 11327 11328 /// Mangle the parameter part of the vector function name according to 11329 /// their OpenMP classification. The mangling function is defined in 11330 /// section 3.5 of the AAVFABI. 11331 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11332 SmallString<256> Buffer; 11333 llvm::raw_svector_ostream Out(Buffer); 11334 for (const auto &ParamAttr : ParamAttrs) { 11335 switch (ParamAttr.Kind) { 11336 case LinearWithVarStride: 11337 Out << "ls" << ParamAttr.StrideOrArg; 11338 break; 11339 case Linear: 11340 Out << 'l'; 11341 // Don't print the step value if it is not present or if it is 11342 // equal to 1. 11343 if (ParamAttr.StrideOrArg != 1) 11344 Out << ParamAttr.StrideOrArg; 11345 break; 11346 case Uniform: 11347 Out << 'u'; 11348 break; 11349 case Vector: 11350 Out << 'v'; 11351 break; 11352 } 11353 11354 if (!!ParamAttr.Alignment) 11355 Out << 'a' << ParamAttr.Alignment; 11356 } 11357 11358 return std::string(Out.str()); 11359 } 11360 11361 // Function used to add the attribute. The parameter `VLEN` is 11362 // templated to allow the use of "x" when targeting scalable functions 11363 // for SVE. 11364 template <typename T> 11365 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11366 char ISA, StringRef ParSeq, 11367 StringRef MangledName, bool OutputBecomesInput, 11368 llvm::Function *Fn) { 11369 SmallString<256> Buffer; 11370 llvm::raw_svector_ostream Out(Buffer); 11371 Out << Prefix << ISA << LMask << VLEN; 11372 if (OutputBecomesInput) 11373 Out << "v"; 11374 Out << ParSeq << "_" << MangledName; 11375 Fn->addFnAttr(Out.str()); 11376 } 11377 11378 // Helper function to generate the Advanced SIMD names depending on 11379 // the value of the NDS when simdlen is not present. 11380 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11381 StringRef Prefix, char ISA, 11382 StringRef ParSeq, StringRef MangledName, 11383 bool OutputBecomesInput, 11384 llvm::Function *Fn) { 11385 switch (NDS) { 11386 case 8: 11387 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11388 OutputBecomesInput, Fn); 11389 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11390 OutputBecomesInput, Fn); 11391 break; 11392 case 16: 11393 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11394 OutputBecomesInput, Fn); 11395 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11396 OutputBecomesInput, Fn); 11397 break; 11398 case 32: 11399 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11400 OutputBecomesInput, Fn); 11401 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11402 OutputBecomesInput, Fn); 11403 break; 11404 case 64: 11405 case 128: 11406 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11407 OutputBecomesInput, Fn); 11408 break; 11409 default: 11410 llvm_unreachable("Scalar type is too wide."); 11411 } 11412 } 11413 11414 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11415 static void emitAArch64DeclareSimdFunction( 11416 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11417 ArrayRef<ParamAttrTy> ParamAttrs, 11418 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11419 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11420 11421 // Get basic data for building the vector signature. 11422 const auto Data = getNDSWDS(FD, ParamAttrs); 11423 const unsigned NDS = std::get<0>(Data); 11424 const unsigned WDS = std::get<1>(Data); 11425 const bool OutputBecomesInput = std::get<2>(Data); 11426 11427 // Check the values provided via `simdlen` by the user. 11428 // 1. A `simdlen(1)` doesn't produce vector signatures, 11429 if (UserVLEN == 1) { 11430 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11431 DiagnosticsEngine::Warning, 11432 "The clause simdlen(1) has no effect when targeting aarch64."); 11433 CGM.getDiags().Report(SLoc, DiagID); 11434 return; 11435 } 11436 11437 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11438 // Advanced SIMD output. 11439 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11440 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11441 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11442 "power of 2 when targeting Advanced SIMD."); 11443 CGM.getDiags().Report(SLoc, DiagID); 11444 return; 11445 } 11446 11447 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11448 // limits. 11449 if (ISA == 's' && UserVLEN != 0) { 11450 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11451 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11452 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11453 "lanes in the architectural constraints " 11454 "for SVE (min is 128-bit, max is " 11455 "2048-bit, by steps of 128-bit)"); 11456 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11457 return; 11458 } 11459 } 11460 11461 // Sort out parameter sequence. 11462 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11463 StringRef Prefix = "_ZGV"; 11464 // Generate simdlen from user input (if any). 11465 if (UserVLEN) { 11466 if (ISA == 's') { 11467 // SVE generates only a masked function. 11468 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11469 OutputBecomesInput, Fn); 11470 } else { 11471 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11472 // Advanced SIMD generates one or two functions, depending on 11473 // the `[not]inbranch` clause. 11474 switch (State) { 11475 case OMPDeclareSimdDeclAttr::BS_Undefined: 11476 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11477 OutputBecomesInput, Fn); 11478 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11479 OutputBecomesInput, Fn); 11480 break; 11481 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11482 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11483 OutputBecomesInput, Fn); 11484 break; 11485 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11486 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11487 OutputBecomesInput, Fn); 11488 break; 11489 } 11490 } 11491 } else { 11492 // If no user simdlen is provided, follow the AAVFABI rules for 11493 // generating the vector length. 11494 if (ISA == 's') { 11495 // SVE, section 3.4.1, item 1. 11496 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11497 OutputBecomesInput, Fn); 11498 } else { 11499 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11500 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11501 // two vector names depending on the use of the clause 11502 // `[not]inbranch`. 11503 switch (State) { 11504 case OMPDeclareSimdDeclAttr::BS_Undefined: 11505 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11506 OutputBecomesInput, Fn); 11507 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11508 OutputBecomesInput, Fn); 11509 break; 11510 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11511 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11512 OutputBecomesInput, Fn); 11513 break; 11514 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11515 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11516 OutputBecomesInput, Fn); 11517 break; 11518 } 11519 } 11520 } 11521 } 11522 11523 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11524 llvm::Function *Fn) { 11525 ASTContext &C = CGM.getContext(); 11526 FD = FD->getMostRecentDecl(); 11527 // Map params to their positions in function decl. 11528 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11529 if (isa<CXXMethodDecl>(FD)) 11530 ParamPositions.try_emplace(FD, 0); 11531 unsigned ParamPos = ParamPositions.size(); 11532 for (const ParmVarDecl *P : FD->parameters()) { 11533 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11534 ++ParamPos; 11535 } 11536 while (FD) { 11537 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11538 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11539 // Mark uniform parameters. 11540 for (const Expr *E : Attr->uniforms()) { 11541 E = E->IgnoreParenImpCasts(); 11542 unsigned Pos; 11543 if (isa<CXXThisExpr>(E)) { 11544 Pos = ParamPositions[FD]; 11545 } else { 11546 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11547 ->getCanonicalDecl(); 11548 Pos = ParamPositions[PVD]; 11549 } 11550 ParamAttrs[Pos].Kind = Uniform; 11551 } 11552 // Get alignment info. 11553 auto NI = Attr->alignments_begin(); 11554 for (const Expr *E : Attr->aligneds()) { 11555 E = E->IgnoreParenImpCasts(); 11556 unsigned Pos; 11557 QualType ParmTy; 11558 if (isa<CXXThisExpr>(E)) { 11559 Pos = ParamPositions[FD]; 11560 ParmTy = E->getType(); 11561 } else { 11562 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11563 ->getCanonicalDecl(); 11564 Pos = ParamPositions[PVD]; 11565 ParmTy = PVD->getType(); 11566 } 11567 ParamAttrs[Pos].Alignment = 11568 (*NI) 11569 ? (*NI)->EvaluateKnownConstInt(C) 11570 : llvm::APSInt::getUnsigned( 11571 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11572 .getQuantity()); 11573 ++NI; 11574 } 11575 // Mark linear parameters. 11576 auto SI = Attr->steps_begin(); 11577 auto MI = Attr->modifiers_begin(); 11578 for (const Expr *E : Attr->linears()) { 11579 E = E->IgnoreParenImpCasts(); 11580 unsigned Pos; 11581 // Rescaling factor needed to compute the linear parameter 11582 // value in the mangled name. 11583 unsigned PtrRescalingFactor = 1; 11584 if (isa<CXXThisExpr>(E)) { 11585 Pos = ParamPositions[FD]; 11586 } else { 11587 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11588 ->getCanonicalDecl(); 11589 Pos = ParamPositions[PVD]; 11590 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 11591 PtrRescalingFactor = CGM.getContext() 11592 .getTypeSizeInChars(P->getPointeeType()) 11593 .getQuantity(); 11594 } 11595 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 11596 ParamAttr.Kind = Linear; 11597 // Assuming a stride of 1, for `linear` without modifiers. 11598 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 11599 if (*SI) { 11600 Expr::EvalResult Result; 11601 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 11602 if (const auto *DRE = 11603 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 11604 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 11605 ParamAttr.Kind = LinearWithVarStride; 11606 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 11607 ParamPositions[StridePVD->getCanonicalDecl()]); 11608 } 11609 } 11610 } else { 11611 ParamAttr.StrideOrArg = Result.Val.getInt(); 11612 } 11613 } 11614 // If we are using a linear clause on a pointer, we need to 11615 // rescale the value of linear_step with the byte size of the 11616 // pointee type. 11617 if (Linear == ParamAttr.Kind) 11618 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 11619 ++SI; 11620 ++MI; 11621 } 11622 llvm::APSInt VLENVal; 11623 SourceLocation ExprLoc; 11624 const Expr *VLENExpr = Attr->getSimdlen(); 11625 if (VLENExpr) { 11626 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11627 ExprLoc = VLENExpr->getExprLoc(); 11628 } 11629 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11630 if (CGM.getTriple().isX86()) { 11631 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11632 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11633 unsigned VLEN = VLENVal.getExtValue(); 11634 StringRef MangledName = Fn->getName(); 11635 if (CGM.getTarget().hasFeature("sve")) 11636 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11637 MangledName, 's', 128, Fn, ExprLoc); 11638 if (CGM.getTarget().hasFeature("neon")) 11639 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11640 MangledName, 'n', 128, Fn, ExprLoc); 11641 } 11642 } 11643 FD = FD->getPreviousDecl(); 11644 } 11645 } 11646 11647 namespace { 11648 /// Cleanup action for doacross support. 11649 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 11650 public: 11651 static const int DoacrossFinArgs = 2; 11652 11653 private: 11654 llvm::FunctionCallee RTLFn; 11655 llvm::Value *Args[DoacrossFinArgs]; 11656 11657 public: 11658 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 11659 ArrayRef<llvm::Value *> CallArgs) 11660 : RTLFn(RTLFn) { 11661 assert(CallArgs.size() == DoacrossFinArgs); 11662 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11663 } 11664 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11665 if (!CGF.HaveInsertPoint()) 11666 return; 11667 CGF.EmitRuntimeCall(RTLFn, Args); 11668 } 11669 }; 11670 } // namespace 11671 11672 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11673 const OMPLoopDirective &D, 11674 ArrayRef<Expr *> NumIterations) { 11675 if (!CGF.HaveInsertPoint()) 11676 return; 11677 11678 ASTContext &C = CGM.getContext(); 11679 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11680 RecordDecl *RD; 11681 if (KmpDimTy.isNull()) { 11682 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11683 // kmp_int64 lo; // lower 11684 // kmp_int64 up; // upper 11685 // kmp_int64 st; // stride 11686 // }; 11687 RD = C.buildImplicitRecord("kmp_dim"); 11688 RD->startDefinition(); 11689 addFieldToRecordDecl(C, RD, Int64Ty); 11690 addFieldToRecordDecl(C, RD, Int64Ty); 11691 addFieldToRecordDecl(C, RD, Int64Ty); 11692 RD->completeDefinition(); 11693 KmpDimTy = C.getRecordType(RD); 11694 } else { 11695 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11696 } 11697 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11698 QualType ArrayTy = 11699 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 11700 11701 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11702 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11703 enum { LowerFD = 0, UpperFD, StrideFD }; 11704 // Fill dims with data. 11705 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11706 LValue DimsLVal = CGF.MakeAddrLValue( 11707 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11708 // dims.upper = num_iterations; 11709 LValue UpperLVal = CGF.EmitLValueForField( 11710 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11711 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 11712 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 11713 Int64Ty, NumIterations[I]->getExprLoc()); 11714 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11715 // dims.stride = 1; 11716 LValue StrideLVal = CGF.EmitLValueForField( 11717 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11718 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11719 StrideLVal); 11720 } 11721 11722 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11723 // kmp_int32 num_dims, struct kmp_dim * dims); 11724 llvm::Value *Args[] = { 11725 emitUpdateLocation(CGF, D.getBeginLoc()), 11726 getThreadID(CGF, D.getBeginLoc()), 11727 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11728 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11729 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 11730 CGM.VoidPtrTy)}; 11731 11732 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11733 CGM.getModule(), OMPRTL___kmpc_doacross_init); 11734 CGF.EmitRuntimeCall(RTLFn, Args); 11735 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11736 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11737 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11738 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 11739 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11740 llvm::makeArrayRef(FiniArgs)); 11741 } 11742 11743 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11744 const OMPDependClause *C) { 11745 QualType Int64Ty = 11746 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11747 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11748 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11749 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 11750 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11751 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11752 const Expr *CounterVal = C->getLoopData(I); 11753 assert(CounterVal); 11754 llvm::Value *CntVal = CGF.EmitScalarConversion( 11755 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11756 CounterVal->getExprLoc()); 11757 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11758 /*Volatile=*/false, Int64Ty); 11759 } 11760 llvm::Value *Args[] = { 11761 emitUpdateLocation(CGF, C->getBeginLoc()), 11762 getThreadID(CGF, C->getBeginLoc()), 11763 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11764 llvm::FunctionCallee RTLFn; 11765 if (C->getDependencyKind() == OMPC_DEPEND_source) { 11766 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11767 OMPRTL___kmpc_doacross_post); 11768 } else { 11769 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 11770 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11771 OMPRTL___kmpc_doacross_wait); 11772 } 11773 CGF.EmitRuntimeCall(RTLFn, Args); 11774 } 11775 11776 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11777 llvm::FunctionCallee Callee, 11778 ArrayRef<llvm::Value *> Args) const { 11779 assert(Loc.isValid() && "Outlined function call location must be valid."); 11780 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11781 11782 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11783 if (Fn->doesNotThrow()) { 11784 CGF.EmitNounwindRuntimeCall(Fn, Args); 11785 return; 11786 } 11787 } 11788 CGF.EmitRuntimeCall(Callee, Args); 11789 } 11790 11791 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11792 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11793 ArrayRef<llvm::Value *> Args) const { 11794 emitCall(CGF, Loc, OutlinedFn, Args); 11795 } 11796 11797 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11798 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11799 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11800 HasEmittedDeclareTargetRegion = true; 11801 } 11802 11803 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11804 const VarDecl *NativeParam, 11805 const VarDecl *TargetParam) const { 11806 return CGF.GetAddrOfLocalVar(NativeParam); 11807 } 11808 11809 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11810 const VarDecl *VD) { 11811 if (!VD) 11812 return Address::invalid(); 11813 Address UntiedAddr = Address::invalid(); 11814 Address UntiedRealAddr = Address::invalid(); 11815 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11816 if (It != FunctionToUntiedTaskStackMap.end()) { 11817 const UntiedLocalVarsAddressesMap &UntiedData = 11818 UntiedLocalVarsStack[It->second]; 11819 auto I = UntiedData.find(VD); 11820 if (I != UntiedData.end()) { 11821 UntiedAddr = I->second.first; 11822 UntiedRealAddr = I->second.second; 11823 } 11824 } 11825 const VarDecl *CVD = VD->getCanonicalDecl(); 11826 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 11827 // Use the default allocation. 11828 if (!isAllocatableDecl(VD)) 11829 return UntiedAddr; 11830 llvm::Value *Size; 11831 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11832 if (CVD->getType()->isVariablyModifiedType()) { 11833 Size = CGF.getTypeSize(CVD->getType()); 11834 // Align the size: ((size + align - 1) / align) * align 11835 Size = CGF.Builder.CreateNUWAdd( 11836 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11837 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11838 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11839 } else { 11840 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11841 Size = CGM.getSize(Sz.alignTo(Align)); 11842 } 11843 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11844 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11845 assert(AA->getAllocator() && 11846 "Expected allocator expression for non-default allocator."); 11847 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11848 // According to the standard, the original allocator type is a enum 11849 // (integer). Convert to pointer type, if required. 11850 Allocator = CGF.EmitScalarConversion( 11851 Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy, 11852 AA->getAllocator()->getExprLoc()); 11853 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11854 11855 llvm::Value *Addr = 11856 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11857 CGM.getModule(), OMPRTL___kmpc_alloc), 11858 Args, getName({CVD->getName(), ".void.addr"})); 11859 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11860 CGM.getModule(), OMPRTL___kmpc_free); 11861 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 11862 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11863 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 11864 if (UntiedAddr.isValid()) 11865 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 11866 11867 // Cleanup action for allocate support. 11868 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 11869 llvm::FunctionCallee RTLFn; 11870 unsigned LocEncoding; 11871 Address Addr; 11872 const Expr *Allocator; 11873 11874 public: 11875 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding, 11876 Address Addr, const Expr *Allocator) 11877 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 11878 Allocator(Allocator) {} 11879 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11880 if (!CGF.HaveInsertPoint()) 11881 return; 11882 llvm::Value *Args[3]; 11883 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 11884 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 11885 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11886 Addr.getPointer(), CGF.VoidPtrTy); 11887 llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator); 11888 // According to the standard, the original allocator type is a enum 11889 // (integer). Convert to pointer type, if required. 11890 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 11891 CGF.getContext().VoidPtrTy, 11892 Allocator->getExprLoc()); 11893 Args[2] = AllocVal; 11894 11895 CGF.EmitRuntimeCall(RTLFn, Args); 11896 } 11897 }; 11898 Address VDAddr = 11899 UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align); 11900 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 11901 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 11902 VDAddr, AA->getAllocator()); 11903 if (UntiedRealAddr.isValid()) 11904 if (auto *Region = 11905 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 11906 Region->emitUntiedSwitch(CGF); 11907 return VDAddr; 11908 } 11909 return UntiedAddr; 11910 } 11911 11912 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 11913 const VarDecl *VD) const { 11914 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11915 if (It == FunctionToUntiedTaskStackMap.end()) 11916 return false; 11917 return UntiedLocalVarsStack[It->second].count(VD) > 0; 11918 } 11919 11920 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11921 CodeGenModule &CGM, const OMPLoopDirective &S) 11922 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11923 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11924 if (!NeedToPush) 11925 return; 11926 NontemporalDeclsSet &DS = 11927 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11928 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11929 for (const Stmt *Ref : C->private_refs()) { 11930 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11931 const ValueDecl *VD; 11932 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11933 VD = DRE->getDecl(); 11934 } else { 11935 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11936 assert((ME->isImplicitCXXThis() || 11937 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11938 "Expected member of current class."); 11939 VD = ME->getMemberDecl(); 11940 } 11941 DS.insert(VD); 11942 } 11943 } 11944 } 11945 11946 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 11947 if (!NeedToPush) 11948 return; 11949 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 11950 } 11951 11952 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 11953 CodeGenFunction &CGF, 11954 const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, 11955 std::pair<Address, Address>> &LocalVars) 11956 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 11957 if (!NeedToPush) 11958 return; 11959 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 11960 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 11961 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 11962 } 11963 11964 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 11965 if (!NeedToPush) 11966 return; 11967 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 11968 } 11969 11970 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 11971 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11972 11973 return llvm::any_of( 11974 CGM.getOpenMPRuntime().NontemporalDeclsStack, 11975 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 11976 } 11977 11978 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 11979 const OMPExecutableDirective &S, 11980 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 11981 const { 11982 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 11983 // Vars in target/task regions must be excluded completely. 11984 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 11985 isOpenMPTaskingDirective(S.getDirectiveKind())) { 11986 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11987 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 11988 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 11989 for (const CapturedStmt::Capture &Cap : CS->captures()) { 11990 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 11991 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 11992 } 11993 } 11994 // Exclude vars in private clauses. 11995 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 11996 for (const Expr *Ref : C->varlists()) { 11997 if (!Ref->getType()->isScalarType()) 11998 continue; 11999 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12000 if (!DRE) 12001 continue; 12002 NeedToCheckForLPCs.insert(DRE->getDecl()); 12003 } 12004 } 12005 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 12006 for (const Expr *Ref : C->varlists()) { 12007 if (!Ref->getType()->isScalarType()) 12008 continue; 12009 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12010 if (!DRE) 12011 continue; 12012 NeedToCheckForLPCs.insert(DRE->getDecl()); 12013 } 12014 } 12015 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12016 for (const Expr *Ref : C->varlists()) { 12017 if (!Ref->getType()->isScalarType()) 12018 continue; 12019 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12020 if (!DRE) 12021 continue; 12022 NeedToCheckForLPCs.insert(DRE->getDecl()); 12023 } 12024 } 12025 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 12026 for (const Expr *Ref : C->varlists()) { 12027 if (!Ref->getType()->isScalarType()) 12028 continue; 12029 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12030 if (!DRE) 12031 continue; 12032 NeedToCheckForLPCs.insert(DRE->getDecl()); 12033 } 12034 } 12035 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 12036 for (const Expr *Ref : C->varlists()) { 12037 if (!Ref->getType()->isScalarType()) 12038 continue; 12039 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12040 if (!DRE) 12041 continue; 12042 NeedToCheckForLPCs.insert(DRE->getDecl()); 12043 } 12044 } 12045 for (const Decl *VD : NeedToCheckForLPCs) { 12046 for (const LastprivateConditionalData &Data : 12047 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12048 if (Data.DeclToUniqueName.count(VD) > 0) { 12049 if (!Data.Disabled) 12050 NeedToAddForLPCsAsDisabled.insert(VD); 12051 break; 12052 } 12053 } 12054 } 12055 } 12056 12057 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12058 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12059 : CGM(CGF.CGM), 12060 Action((CGM.getLangOpts().OpenMP >= 50 && 12061 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12062 [](const OMPLastprivateClause *C) { 12063 return C->getKind() == 12064 OMPC_LASTPRIVATE_conditional; 12065 })) 12066 ? ActionToDo::PushAsLastprivateConditional 12067 : ActionToDo::DoNotPush) { 12068 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12069 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12070 return; 12071 assert(Action == ActionToDo::PushAsLastprivateConditional && 12072 "Expected a push action."); 12073 LastprivateConditionalData &Data = 12074 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12075 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12076 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12077 continue; 12078 12079 for (const Expr *Ref : C->varlists()) { 12080 Data.DeclToUniqueName.insert(std::make_pair( 12081 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12082 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12083 } 12084 } 12085 Data.IVLVal = IVLVal; 12086 Data.Fn = CGF.CurFn; 12087 } 12088 12089 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12090 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12091 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12092 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12093 if (CGM.getLangOpts().OpenMP < 50) 12094 return; 12095 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12096 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12097 if (!NeedToAddForLPCsAsDisabled.empty()) { 12098 Action = ActionToDo::DisableLastprivateConditional; 12099 LastprivateConditionalData &Data = 12100 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12101 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12102 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12103 Data.Fn = CGF.CurFn; 12104 Data.Disabled = true; 12105 } 12106 } 12107 12108 CGOpenMPRuntime::LastprivateConditionalRAII 12109 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12110 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12111 return LastprivateConditionalRAII(CGF, S); 12112 } 12113 12114 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12115 if (CGM.getLangOpts().OpenMP < 50) 12116 return; 12117 if (Action == ActionToDo::DisableLastprivateConditional) { 12118 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12119 "Expected list of disabled private vars."); 12120 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12121 } 12122 if (Action == ActionToDo::PushAsLastprivateConditional) { 12123 assert( 12124 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12125 "Expected list of lastprivate conditional vars."); 12126 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12127 } 12128 } 12129 12130 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12131 const VarDecl *VD) { 12132 ASTContext &C = CGM.getContext(); 12133 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12134 if (I == LastprivateConditionalToTypes.end()) 12135 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12136 QualType NewType; 12137 const FieldDecl *VDField; 12138 const FieldDecl *FiredField; 12139 LValue BaseLVal; 12140 auto VI = I->getSecond().find(VD); 12141 if (VI == I->getSecond().end()) { 12142 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12143 RD->startDefinition(); 12144 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12145 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12146 RD->completeDefinition(); 12147 NewType = C.getRecordType(RD); 12148 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12149 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12150 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12151 } else { 12152 NewType = std::get<0>(VI->getSecond()); 12153 VDField = std::get<1>(VI->getSecond()); 12154 FiredField = std::get<2>(VI->getSecond()); 12155 BaseLVal = std::get<3>(VI->getSecond()); 12156 } 12157 LValue FiredLVal = 12158 CGF.EmitLValueForField(BaseLVal, FiredField); 12159 CGF.EmitStoreOfScalar( 12160 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12161 FiredLVal); 12162 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12163 } 12164 12165 namespace { 12166 /// Checks if the lastprivate conditional variable is referenced in LHS. 12167 class LastprivateConditionalRefChecker final 12168 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12169 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12170 const Expr *FoundE = nullptr; 12171 const Decl *FoundD = nullptr; 12172 StringRef UniqueDeclName; 12173 LValue IVLVal; 12174 llvm::Function *FoundFn = nullptr; 12175 SourceLocation Loc; 12176 12177 public: 12178 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12179 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12180 llvm::reverse(LPM)) { 12181 auto It = D.DeclToUniqueName.find(E->getDecl()); 12182 if (It == D.DeclToUniqueName.end()) 12183 continue; 12184 if (D.Disabled) 12185 return false; 12186 FoundE = E; 12187 FoundD = E->getDecl()->getCanonicalDecl(); 12188 UniqueDeclName = It->second; 12189 IVLVal = D.IVLVal; 12190 FoundFn = D.Fn; 12191 break; 12192 } 12193 return FoundE == E; 12194 } 12195 bool VisitMemberExpr(const MemberExpr *E) { 12196 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12197 return false; 12198 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12199 llvm::reverse(LPM)) { 12200 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12201 if (It == D.DeclToUniqueName.end()) 12202 continue; 12203 if (D.Disabled) 12204 return false; 12205 FoundE = E; 12206 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12207 UniqueDeclName = It->second; 12208 IVLVal = D.IVLVal; 12209 FoundFn = D.Fn; 12210 break; 12211 } 12212 return FoundE == E; 12213 } 12214 bool VisitStmt(const Stmt *S) { 12215 for (const Stmt *Child : S->children()) { 12216 if (!Child) 12217 continue; 12218 if (const auto *E = dyn_cast<Expr>(Child)) 12219 if (!E->isGLValue()) 12220 continue; 12221 if (Visit(Child)) 12222 return true; 12223 } 12224 return false; 12225 } 12226 explicit LastprivateConditionalRefChecker( 12227 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12228 : LPM(LPM) {} 12229 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12230 getFoundData() const { 12231 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12232 } 12233 }; 12234 } // namespace 12235 12236 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12237 LValue IVLVal, 12238 StringRef UniqueDeclName, 12239 LValue LVal, 12240 SourceLocation Loc) { 12241 // Last updated loop counter for the lastprivate conditional var. 12242 // int<xx> last_iv = 0; 12243 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12244 llvm::Constant *LastIV = 12245 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12246 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12247 IVLVal.getAlignment().getAsAlign()); 12248 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12249 12250 // Last value of the lastprivate conditional. 12251 // decltype(priv_a) last_a; 12252 llvm::Constant *Last = getOrCreateInternalVariable( 12253 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12254 cast<llvm::GlobalVariable>(Last)->setAlignment( 12255 LVal.getAlignment().getAsAlign()); 12256 LValue LastLVal = 12257 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 12258 12259 // Global loop counter. Required to handle inner parallel-for regions. 12260 // iv 12261 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12262 12263 // #pragma omp critical(a) 12264 // if (last_iv <= iv) { 12265 // last_iv = iv; 12266 // last_a = priv_a; 12267 // } 12268 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12269 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12270 Action.Enter(CGF); 12271 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12272 // (last_iv <= iv) ? Check if the variable is updated and store new 12273 // value in global var. 12274 llvm::Value *CmpRes; 12275 if (IVLVal.getType()->isSignedIntegerType()) { 12276 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12277 } else { 12278 assert(IVLVal.getType()->isUnsignedIntegerType() && 12279 "Loop iteration variable must be integer."); 12280 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12281 } 12282 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12283 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12284 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12285 // { 12286 CGF.EmitBlock(ThenBB); 12287 12288 // last_iv = iv; 12289 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12290 12291 // last_a = priv_a; 12292 switch (CGF.getEvaluationKind(LVal.getType())) { 12293 case TEK_Scalar: { 12294 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12295 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12296 break; 12297 } 12298 case TEK_Complex: { 12299 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12300 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12301 break; 12302 } 12303 case TEK_Aggregate: 12304 llvm_unreachable( 12305 "Aggregates are not supported in lastprivate conditional."); 12306 } 12307 // } 12308 CGF.EmitBranch(ExitBB); 12309 // There is no need to emit line number for unconditional branch. 12310 (void)ApplyDebugLocation::CreateEmpty(CGF); 12311 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12312 }; 12313 12314 if (CGM.getLangOpts().OpenMPSimd) { 12315 // Do not emit as a critical region as no parallel region could be emitted. 12316 RegionCodeGenTy ThenRCG(CodeGen); 12317 ThenRCG(CGF); 12318 } else { 12319 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12320 } 12321 } 12322 12323 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12324 const Expr *LHS) { 12325 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12326 return; 12327 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12328 if (!Checker.Visit(LHS)) 12329 return; 12330 const Expr *FoundE; 12331 const Decl *FoundD; 12332 StringRef UniqueDeclName; 12333 LValue IVLVal; 12334 llvm::Function *FoundFn; 12335 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12336 Checker.getFoundData(); 12337 if (FoundFn != CGF.CurFn) { 12338 // Special codegen for inner parallel regions. 12339 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12340 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12341 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12342 "Lastprivate conditional is not found in outer region."); 12343 QualType StructTy = std::get<0>(It->getSecond()); 12344 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12345 LValue PrivLVal = CGF.EmitLValue(FoundE); 12346 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12347 PrivLVal.getAddress(CGF), 12348 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 12349 LValue BaseLVal = 12350 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12351 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12352 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12353 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12354 FiredLVal, llvm::AtomicOrdering::Unordered, 12355 /*IsVolatile=*/true, /*isInit=*/false); 12356 return; 12357 } 12358 12359 // Private address of the lastprivate conditional in the current context. 12360 // priv_a 12361 LValue LVal = CGF.EmitLValue(FoundE); 12362 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12363 FoundE->getExprLoc()); 12364 } 12365 12366 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12367 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12368 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12369 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12370 return; 12371 auto Range = llvm::reverse(LastprivateConditionalStack); 12372 auto It = llvm::find_if( 12373 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12374 if (It == Range.end() || It->Fn != CGF.CurFn) 12375 return; 12376 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12377 assert(LPCI != LastprivateConditionalToTypes.end() && 12378 "Lastprivates must be registered already."); 12379 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12380 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12381 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12382 for (const auto &Pair : It->DeclToUniqueName) { 12383 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12384 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 12385 continue; 12386 auto I = LPCI->getSecond().find(Pair.first); 12387 assert(I != LPCI->getSecond().end() && 12388 "Lastprivate must be rehistered already."); 12389 // bool Cmp = priv_a.Fired != 0; 12390 LValue BaseLVal = std::get<3>(I->getSecond()); 12391 LValue FiredLVal = 12392 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12393 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12394 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12395 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12396 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12397 // if (Cmp) { 12398 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12399 CGF.EmitBlock(ThenBB); 12400 Address Addr = CGF.GetAddrOfLocalVar(VD); 12401 LValue LVal; 12402 if (VD->getType()->isReferenceType()) 12403 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12404 AlignmentSource::Decl); 12405 else 12406 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12407 AlignmentSource::Decl); 12408 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12409 D.getBeginLoc()); 12410 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12411 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12412 // } 12413 } 12414 } 12415 12416 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12417 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12418 SourceLocation Loc) { 12419 if (CGF.getLangOpts().OpenMP < 50) 12420 return; 12421 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12422 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12423 "Unknown lastprivate conditional variable."); 12424 StringRef UniqueName = It->second; 12425 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12426 // The variable was not updated in the region - exit. 12427 if (!GV) 12428 return; 12429 LValue LPLVal = CGF.MakeAddrLValue( 12430 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 12431 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12432 CGF.EmitStoreOfScalar(Res, PrivLVal); 12433 } 12434 12435 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12436 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12437 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12438 llvm_unreachable("Not supported in SIMD-only mode"); 12439 } 12440 12441 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12442 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12443 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12444 llvm_unreachable("Not supported in SIMD-only mode"); 12445 } 12446 12447 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12448 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12449 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12450 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12451 bool Tied, unsigned &NumberOfParts) { 12452 llvm_unreachable("Not supported in SIMD-only mode"); 12453 } 12454 12455 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12456 SourceLocation Loc, 12457 llvm::Function *OutlinedFn, 12458 ArrayRef<llvm::Value *> CapturedVars, 12459 const Expr *IfCond) { 12460 llvm_unreachable("Not supported in SIMD-only mode"); 12461 } 12462 12463 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12464 CodeGenFunction &CGF, StringRef CriticalName, 12465 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12466 const Expr *Hint) { 12467 llvm_unreachable("Not supported in SIMD-only mode"); 12468 } 12469 12470 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12471 const RegionCodeGenTy &MasterOpGen, 12472 SourceLocation Loc) { 12473 llvm_unreachable("Not supported in SIMD-only mode"); 12474 } 12475 12476 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12477 SourceLocation Loc) { 12478 llvm_unreachable("Not supported in SIMD-only mode"); 12479 } 12480 12481 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12482 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12483 SourceLocation Loc) { 12484 llvm_unreachable("Not supported in SIMD-only mode"); 12485 } 12486 12487 void CGOpenMPSIMDRuntime::emitSingleRegion( 12488 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12489 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12490 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12491 ArrayRef<const Expr *> AssignmentOps) { 12492 llvm_unreachable("Not supported in SIMD-only mode"); 12493 } 12494 12495 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12496 const RegionCodeGenTy &OrderedOpGen, 12497 SourceLocation Loc, 12498 bool IsThreads) { 12499 llvm_unreachable("Not supported in SIMD-only mode"); 12500 } 12501 12502 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12503 SourceLocation Loc, 12504 OpenMPDirectiveKind Kind, 12505 bool EmitChecks, 12506 bool ForceSimpleCall) { 12507 llvm_unreachable("Not supported in SIMD-only mode"); 12508 } 12509 12510 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12511 CodeGenFunction &CGF, SourceLocation Loc, 12512 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12513 bool Ordered, const DispatchRTInput &DispatchValues) { 12514 llvm_unreachable("Not supported in SIMD-only mode"); 12515 } 12516 12517 void CGOpenMPSIMDRuntime::emitForStaticInit( 12518 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12519 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12520 llvm_unreachable("Not supported in SIMD-only mode"); 12521 } 12522 12523 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12524 CodeGenFunction &CGF, SourceLocation Loc, 12525 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12526 llvm_unreachable("Not supported in SIMD-only mode"); 12527 } 12528 12529 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12530 SourceLocation Loc, 12531 unsigned IVSize, 12532 bool IVSigned) { 12533 llvm_unreachable("Not supported in SIMD-only mode"); 12534 } 12535 12536 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12537 SourceLocation Loc, 12538 OpenMPDirectiveKind DKind) { 12539 llvm_unreachable("Not supported in SIMD-only mode"); 12540 } 12541 12542 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12543 SourceLocation Loc, 12544 unsigned IVSize, bool IVSigned, 12545 Address IL, Address LB, 12546 Address UB, Address ST) { 12547 llvm_unreachable("Not supported in SIMD-only mode"); 12548 } 12549 12550 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12551 llvm::Value *NumThreads, 12552 SourceLocation Loc) { 12553 llvm_unreachable("Not supported in SIMD-only mode"); 12554 } 12555 12556 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12557 ProcBindKind ProcBind, 12558 SourceLocation Loc) { 12559 llvm_unreachable("Not supported in SIMD-only mode"); 12560 } 12561 12562 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12563 const VarDecl *VD, 12564 Address VDAddr, 12565 SourceLocation Loc) { 12566 llvm_unreachable("Not supported in SIMD-only mode"); 12567 } 12568 12569 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12570 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12571 CodeGenFunction *CGF) { 12572 llvm_unreachable("Not supported in SIMD-only mode"); 12573 } 12574 12575 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12576 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12577 llvm_unreachable("Not supported in SIMD-only mode"); 12578 } 12579 12580 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 12581 ArrayRef<const Expr *> Vars, 12582 SourceLocation Loc, 12583 llvm::AtomicOrdering AO) { 12584 llvm_unreachable("Not supported in SIMD-only mode"); 12585 } 12586 12587 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 12588 const OMPExecutableDirective &D, 12589 llvm::Function *TaskFunction, 12590 QualType SharedsTy, Address Shareds, 12591 const Expr *IfCond, 12592 const OMPTaskDataTy &Data) { 12593 llvm_unreachable("Not supported in SIMD-only mode"); 12594 } 12595 12596 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 12597 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 12598 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 12599 const Expr *IfCond, const OMPTaskDataTy &Data) { 12600 llvm_unreachable("Not supported in SIMD-only mode"); 12601 } 12602 12603 void CGOpenMPSIMDRuntime::emitReduction( 12604 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 12605 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 12606 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 12607 assert(Options.SimpleReduction && "Only simple reduction is expected."); 12608 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 12609 ReductionOps, Options); 12610 } 12611 12612 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 12613 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 12614 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 12615 llvm_unreachable("Not supported in SIMD-only mode"); 12616 } 12617 12618 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 12619 SourceLocation Loc, 12620 bool IsWorksharingReduction) { 12621 llvm_unreachable("Not supported in SIMD-only mode"); 12622 } 12623 12624 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 12625 SourceLocation Loc, 12626 ReductionCodeGen &RCG, 12627 unsigned N) { 12628 llvm_unreachable("Not supported in SIMD-only mode"); 12629 } 12630 12631 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 12632 SourceLocation Loc, 12633 llvm::Value *ReductionsPtr, 12634 LValue SharedLVal) { 12635 llvm_unreachable("Not supported in SIMD-only mode"); 12636 } 12637 12638 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 12639 SourceLocation Loc) { 12640 llvm_unreachable("Not supported in SIMD-only mode"); 12641 } 12642 12643 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 12644 CodeGenFunction &CGF, SourceLocation Loc, 12645 OpenMPDirectiveKind CancelRegion) { 12646 llvm_unreachable("Not supported in SIMD-only mode"); 12647 } 12648 12649 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 12650 SourceLocation Loc, const Expr *IfCond, 12651 OpenMPDirectiveKind CancelRegion) { 12652 llvm_unreachable("Not supported in SIMD-only mode"); 12653 } 12654 12655 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 12656 const OMPExecutableDirective &D, StringRef ParentName, 12657 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 12658 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 12659 llvm_unreachable("Not supported in SIMD-only mode"); 12660 } 12661 12662 void CGOpenMPSIMDRuntime::emitTargetCall( 12663 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12664 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 12665 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 12666 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 12667 const OMPLoopDirective &D)> 12668 SizeEmitter) { 12669 llvm_unreachable("Not supported in SIMD-only mode"); 12670 } 12671 12672 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 12673 llvm_unreachable("Not supported in SIMD-only mode"); 12674 } 12675 12676 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 12677 llvm_unreachable("Not supported in SIMD-only mode"); 12678 } 12679 12680 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 12681 return false; 12682 } 12683 12684 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 12685 const OMPExecutableDirective &D, 12686 SourceLocation Loc, 12687 llvm::Function *OutlinedFn, 12688 ArrayRef<llvm::Value *> CapturedVars) { 12689 llvm_unreachable("Not supported in SIMD-only mode"); 12690 } 12691 12692 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 12693 const Expr *NumTeams, 12694 const Expr *ThreadLimit, 12695 SourceLocation Loc) { 12696 llvm_unreachable("Not supported in SIMD-only mode"); 12697 } 12698 12699 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 12700 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12701 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 12702 llvm_unreachable("Not supported in SIMD-only mode"); 12703 } 12704 12705 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 12706 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12707 const Expr *Device) { 12708 llvm_unreachable("Not supported in SIMD-only mode"); 12709 } 12710 12711 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12712 const OMPLoopDirective &D, 12713 ArrayRef<Expr *> NumIterations) { 12714 llvm_unreachable("Not supported in SIMD-only mode"); 12715 } 12716 12717 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12718 const OMPDependClause *C) { 12719 llvm_unreachable("Not supported in SIMD-only mode"); 12720 } 12721 12722 const VarDecl * 12723 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12724 const VarDecl *NativeParam) const { 12725 llvm_unreachable("Not supported in SIMD-only mode"); 12726 } 12727 12728 Address 12729 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12730 const VarDecl *NativeParam, 12731 const VarDecl *TargetParam) const { 12732 llvm_unreachable("Not supported in SIMD-only mode"); 12733 } 12734