1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/APValue.h" 20 #include "clang/AST/Attr.h" 21 #include "clang/AST/Decl.h" 22 #include "clang/AST/OpenMPClause.h" 23 #include "clang/AST/StmtOpenMP.h" 24 #include "clang/AST/StmtVisitor.h" 25 #include "clang/Basic/BitmaskEnum.h" 26 #include "clang/Basic/FileManager.h" 27 #include "clang/Basic/OpenMPKinds.h" 28 #include "clang/Basic/SourceManager.h" 29 #include "clang/CodeGen/ConstantInitBuilder.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/SetOperations.h" 32 #include "llvm/ADT/StringExtras.h" 33 #include "llvm/Bitcode/BitcodeReader.h" 34 #include "llvm/IR/Constants.h" 35 #include "llvm/IR/DerivedTypes.h" 36 #include "llvm/IR/GlobalValue.h" 37 #include "llvm/IR/Value.h" 38 #include "llvm/Support/AtomicOrdering.h" 39 #include "llvm/Support/Format.h" 40 #include "llvm/Support/raw_ostream.h" 41 #include <cassert> 42 #include <numeric> 43 44 using namespace clang; 45 using namespace CodeGen; 46 using namespace llvm::omp; 47 48 namespace { 49 /// Base class for handling code generation inside OpenMP regions. 50 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 51 public: 52 /// Kinds of OpenMP regions used in codegen. 53 enum CGOpenMPRegionKind { 54 /// Region with outlined function for standalone 'parallel' 55 /// directive. 56 ParallelOutlinedRegion, 57 /// Region with outlined function for standalone 'task' directive. 58 TaskOutlinedRegion, 59 /// Region for constructs that do not require function outlining, 60 /// like 'for', 'sections', 'atomic' etc. directives. 61 InlinedRegion, 62 /// Region with outlined function for standalone 'target' directive. 63 TargetRegion, 64 }; 65 66 CGOpenMPRegionInfo(const CapturedStmt &CS, 67 const CGOpenMPRegionKind RegionKind, 68 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 69 bool HasCancel) 70 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 71 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 72 73 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 74 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 75 bool HasCancel) 76 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 77 Kind(Kind), HasCancel(HasCancel) {} 78 79 /// Get a variable or parameter for storing global thread id 80 /// inside OpenMP construct. 81 virtual const VarDecl *getThreadIDVariable() const = 0; 82 83 /// Emit the captured statement body. 84 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 85 86 /// Get an LValue for the current ThreadID variable. 87 /// \return LValue for thread id variable. This LValue always has type int32*. 88 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 89 90 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 91 92 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 93 94 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 95 96 bool hasCancel() const { return HasCancel; } 97 98 static bool classof(const CGCapturedStmtInfo *Info) { 99 return Info->getKind() == CR_OpenMP; 100 } 101 102 ~CGOpenMPRegionInfo() override = default; 103 104 protected: 105 CGOpenMPRegionKind RegionKind; 106 RegionCodeGenTy CodeGen; 107 OpenMPDirectiveKind Kind; 108 bool HasCancel; 109 }; 110 111 /// API for captured statement code generation in OpenMP constructs. 112 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 113 public: 114 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 115 const RegionCodeGenTy &CodeGen, 116 OpenMPDirectiveKind Kind, bool HasCancel, 117 StringRef HelperName) 118 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 119 HasCancel), 120 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 121 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 122 } 123 124 /// Get a variable or parameter for storing global thread id 125 /// inside OpenMP construct. 126 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 127 128 /// Get the name of the capture helper. 129 StringRef getHelperName() const override { return HelperName; } 130 131 static bool classof(const CGCapturedStmtInfo *Info) { 132 return CGOpenMPRegionInfo::classof(Info) && 133 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 134 ParallelOutlinedRegion; 135 } 136 137 private: 138 /// A variable or parameter storing global thread id for OpenMP 139 /// constructs. 140 const VarDecl *ThreadIDVar; 141 StringRef HelperName; 142 }; 143 144 /// API for captured statement code generation in OpenMP constructs. 145 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 146 public: 147 class UntiedTaskActionTy final : public PrePostActionTy { 148 bool Untied; 149 const VarDecl *PartIDVar; 150 const RegionCodeGenTy UntiedCodeGen; 151 llvm::SwitchInst *UntiedSwitch = nullptr; 152 153 public: 154 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 155 const RegionCodeGenTy &UntiedCodeGen) 156 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 157 void Enter(CodeGenFunction &CGF) override { 158 if (Untied) { 159 // Emit task switching point. 160 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 161 CGF.GetAddrOfLocalVar(PartIDVar), 162 PartIDVar->getType()->castAs<PointerType>()); 163 llvm::Value *Res = 164 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 165 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 166 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 167 CGF.EmitBlock(DoneBB); 168 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 169 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 170 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 171 CGF.Builder.GetInsertBlock()); 172 emitUntiedSwitch(CGF); 173 } 174 } 175 void emitUntiedSwitch(CodeGenFunction &CGF) const { 176 if (Untied) { 177 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 178 CGF.GetAddrOfLocalVar(PartIDVar), 179 PartIDVar->getType()->castAs<PointerType>()); 180 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 181 PartIdLVal); 182 UntiedCodeGen(CGF); 183 CodeGenFunction::JumpDest CurPoint = 184 CGF.getJumpDestInCurrentScope(".untied.next."); 185 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 186 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 187 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 188 CGF.Builder.GetInsertBlock()); 189 CGF.EmitBranchThroughCleanup(CurPoint); 190 CGF.EmitBlock(CurPoint.getBlock()); 191 } 192 } 193 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 194 }; 195 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 196 const VarDecl *ThreadIDVar, 197 const RegionCodeGenTy &CodeGen, 198 OpenMPDirectiveKind Kind, bool HasCancel, 199 const UntiedTaskActionTy &Action) 200 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 201 ThreadIDVar(ThreadIDVar), Action(Action) { 202 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 203 } 204 205 /// Get a variable or parameter for storing global thread id 206 /// inside OpenMP construct. 207 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 208 209 /// Get an LValue for the current ThreadID variable. 210 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 211 212 /// Get the name of the capture helper. 213 StringRef getHelperName() const override { return ".omp_outlined."; } 214 215 void emitUntiedSwitch(CodeGenFunction &CGF) override { 216 Action.emitUntiedSwitch(CGF); 217 } 218 219 static bool classof(const CGCapturedStmtInfo *Info) { 220 return CGOpenMPRegionInfo::classof(Info) && 221 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 222 TaskOutlinedRegion; 223 } 224 225 private: 226 /// A variable or parameter storing global thread id for OpenMP 227 /// constructs. 228 const VarDecl *ThreadIDVar; 229 /// Action for emitting code for untied tasks. 230 const UntiedTaskActionTy &Action; 231 }; 232 233 /// API for inlined captured statement code generation in OpenMP 234 /// constructs. 235 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 236 public: 237 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 238 const RegionCodeGenTy &CodeGen, 239 OpenMPDirectiveKind Kind, bool HasCancel) 240 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 241 OldCSI(OldCSI), 242 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 243 244 // Retrieve the value of the context parameter. 245 llvm::Value *getContextValue() const override { 246 if (OuterRegionInfo) 247 return OuterRegionInfo->getContextValue(); 248 llvm_unreachable("No context value for inlined OpenMP region"); 249 } 250 251 void setContextValue(llvm::Value *V) override { 252 if (OuterRegionInfo) { 253 OuterRegionInfo->setContextValue(V); 254 return; 255 } 256 llvm_unreachable("No context value for inlined OpenMP region"); 257 } 258 259 /// Lookup the captured field decl for a variable. 260 const FieldDecl *lookup(const VarDecl *VD) const override { 261 if (OuterRegionInfo) 262 return OuterRegionInfo->lookup(VD); 263 // If there is no outer outlined region,no need to lookup in a list of 264 // captured variables, we can use the original one. 265 return nullptr; 266 } 267 268 FieldDecl *getThisFieldDecl() const override { 269 if (OuterRegionInfo) 270 return OuterRegionInfo->getThisFieldDecl(); 271 return nullptr; 272 } 273 274 /// Get a variable or parameter for storing global thread id 275 /// inside OpenMP construct. 276 const VarDecl *getThreadIDVariable() const override { 277 if (OuterRegionInfo) 278 return OuterRegionInfo->getThreadIDVariable(); 279 return nullptr; 280 } 281 282 /// Get an LValue for the current ThreadID variable. 283 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 284 if (OuterRegionInfo) 285 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 286 llvm_unreachable("No LValue for inlined OpenMP construct"); 287 } 288 289 /// Get the name of the capture helper. 290 StringRef getHelperName() const override { 291 if (auto *OuterRegionInfo = getOldCSI()) 292 return OuterRegionInfo->getHelperName(); 293 llvm_unreachable("No helper name for inlined OpenMP construct"); 294 } 295 296 void emitUntiedSwitch(CodeGenFunction &CGF) override { 297 if (OuterRegionInfo) 298 OuterRegionInfo->emitUntiedSwitch(CGF); 299 } 300 301 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 302 303 static bool classof(const CGCapturedStmtInfo *Info) { 304 return CGOpenMPRegionInfo::classof(Info) && 305 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 306 } 307 308 ~CGOpenMPInlinedRegionInfo() override = default; 309 310 private: 311 /// CodeGen info about outer OpenMP region. 312 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 313 CGOpenMPRegionInfo *OuterRegionInfo; 314 }; 315 316 /// API for captured statement code generation in OpenMP target 317 /// constructs. For this captures, implicit parameters are used instead of the 318 /// captured fields. The name of the target region has to be unique in a given 319 /// application so it is provided by the client, because only the client has 320 /// the information to generate that. 321 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 322 public: 323 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 324 const RegionCodeGenTy &CodeGen, StringRef HelperName) 325 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 326 /*HasCancel=*/false), 327 HelperName(HelperName) {} 328 329 /// This is unused for target regions because each starts executing 330 /// with a single thread. 331 const VarDecl *getThreadIDVariable() const override { return nullptr; } 332 333 /// Get the name of the capture helper. 334 StringRef getHelperName() const override { return HelperName; } 335 336 static bool classof(const CGCapturedStmtInfo *Info) { 337 return CGOpenMPRegionInfo::classof(Info) && 338 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 339 } 340 341 private: 342 StringRef HelperName; 343 }; 344 345 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 346 llvm_unreachable("No codegen for expressions"); 347 } 348 /// API for generation of expressions captured in a innermost OpenMP 349 /// region. 350 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 351 public: 352 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 353 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 354 OMPD_unknown, 355 /*HasCancel=*/false), 356 PrivScope(CGF) { 357 // Make sure the globals captured in the provided statement are local by 358 // using the privatization logic. We assume the same variable is not 359 // captured more than once. 360 for (const auto &C : CS.captures()) { 361 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 362 continue; 363 364 const VarDecl *VD = C.getCapturedVar(); 365 if (VD->isLocalVarDeclOrParm()) 366 continue; 367 368 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 369 /*RefersToEnclosingVariableOrCapture=*/false, 370 VD->getType().getNonReferenceType(), VK_LValue, 371 C.getLocation()); 372 PrivScope.addPrivate( 373 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 374 } 375 (void)PrivScope.Privatize(); 376 } 377 378 /// Lookup the captured field decl for a variable. 379 const FieldDecl *lookup(const VarDecl *VD) const override { 380 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 381 return FD; 382 return nullptr; 383 } 384 385 /// Emit the captured statement body. 386 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 387 llvm_unreachable("No body for expressions"); 388 } 389 390 /// Get a variable or parameter for storing global thread id 391 /// inside OpenMP construct. 392 const VarDecl *getThreadIDVariable() const override { 393 llvm_unreachable("No thread id for expressions"); 394 } 395 396 /// Get the name of the capture helper. 397 StringRef getHelperName() const override { 398 llvm_unreachable("No helper name for expressions"); 399 } 400 401 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 402 403 private: 404 /// Private scope to capture global variables. 405 CodeGenFunction::OMPPrivateScope PrivScope; 406 }; 407 408 /// RAII for emitting code of OpenMP constructs. 409 class InlinedOpenMPRegionRAII { 410 CodeGenFunction &CGF; 411 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 412 FieldDecl *LambdaThisCaptureField = nullptr; 413 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 414 bool NoInheritance = false; 415 416 public: 417 /// Constructs region for combined constructs. 418 /// \param CodeGen Code generation sequence for combined directives. Includes 419 /// a list of functions used for code generation of implicitly inlined 420 /// regions. 421 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 422 OpenMPDirectiveKind Kind, bool HasCancel, 423 bool NoInheritance = true) 424 : CGF(CGF), NoInheritance(NoInheritance) { 425 // Start emission for the construct. 426 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 427 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 428 if (NoInheritance) { 429 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 430 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 431 CGF.LambdaThisCaptureField = nullptr; 432 BlockInfo = CGF.BlockInfo; 433 CGF.BlockInfo = nullptr; 434 } 435 } 436 437 ~InlinedOpenMPRegionRAII() { 438 // Restore original CapturedStmtInfo only if we're done with code emission. 439 auto *OldCSI = 440 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 441 delete CGF.CapturedStmtInfo; 442 CGF.CapturedStmtInfo = OldCSI; 443 if (NoInheritance) { 444 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 445 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 446 CGF.BlockInfo = BlockInfo; 447 } 448 } 449 }; 450 451 /// Values for bit flags used in the ident_t to describe the fields. 452 /// All enumeric elements are named and described in accordance with the code 453 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 454 enum OpenMPLocationFlags : unsigned { 455 /// Use trampoline for internal microtask. 456 OMP_IDENT_IMD = 0x01, 457 /// Use c-style ident structure. 458 OMP_IDENT_KMPC = 0x02, 459 /// Atomic reduction option for kmpc_reduce. 460 OMP_ATOMIC_REDUCE = 0x10, 461 /// Explicit 'barrier' directive. 462 OMP_IDENT_BARRIER_EXPL = 0x20, 463 /// Implicit barrier in code. 464 OMP_IDENT_BARRIER_IMPL = 0x40, 465 /// Implicit barrier in 'for' directive. 466 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 467 /// Implicit barrier in 'sections' directive. 468 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 469 /// Implicit barrier in 'single' directive. 470 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 471 /// Call of __kmp_for_static_init for static loop. 472 OMP_IDENT_WORK_LOOP = 0x200, 473 /// Call of __kmp_for_static_init for sections. 474 OMP_IDENT_WORK_SECTIONS = 0x400, 475 /// Call of __kmp_for_static_init for distribute. 476 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 477 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 478 }; 479 480 namespace { 481 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 482 /// Values for bit flags for marking which requires clauses have been used. 483 enum OpenMPOffloadingRequiresDirFlags : int64_t { 484 /// flag undefined. 485 OMP_REQ_UNDEFINED = 0x000, 486 /// no requires clause present. 487 OMP_REQ_NONE = 0x001, 488 /// reverse_offload clause. 489 OMP_REQ_REVERSE_OFFLOAD = 0x002, 490 /// unified_address clause. 491 OMP_REQ_UNIFIED_ADDRESS = 0x004, 492 /// unified_shared_memory clause. 493 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 494 /// dynamic_allocators clause. 495 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 496 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 497 }; 498 499 enum OpenMPOffloadingReservedDeviceIDs { 500 /// Device ID if the device was not defined, runtime should get it 501 /// from environment variables in the spec. 502 OMP_DEVICEID_UNDEF = -1, 503 }; 504 } // anonymous namespace 505 506 /// Describes ident structure that describes a source location. 507 /// All descriptions are taken from 508 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 509 /// Original structure: 510 /// typedef struct ident { 511 /// kmp_int32 reserved_1; /**< might be used in Fortran; 512 /// see above */ 513 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 514 /// KMP_IDENT_KMPC identifies this union 515 /// member */ 516 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 517 /// see above */ 518 ///#if USE_ITT_BUILD 519 /// /* but currently used for storing 520 /// region-specific ITT */ 521 /// /* contextual information. */ 522 ///#endif /* USE_ITT_BUILD */ 523 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 524 /// C++ */ 525 /// char const *psource; /**< String describing the source location. 526 /// The string is composed of semi-colon separated 527 // fields which describe the source file, 528 /// the function and a pair of line numbers that 529 /// delimit the construct. 530 /// */ 531 /// } ident_t; 532 enum IdentFieldIndex { 533 /// might be used in Fortran 534 IdentField_Reserved_1, 535 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 536 IdentField_Flags, 537 /// Not really used in Fortran any more 538 IdentField_Reserved_2, 539 /// Source[4] in Fortran, do not use for C++ 540 IdentField_Reserved_3, 541 /// String describing the source location. The string is composed of 542 /// semi-colon separated fields which describe the source file, the function 543 /// and a pair of line numbers that delimit the construct. 544 IdentField_PSource 545 }; 546 547 /// Schedule types for 'omp for' loops (these enumerators are taken from 548 /// the enum sched_type in kmp.h). 549 enum OpenMPSchedType { 550 /// Lower bound for default (unordered) versions. 551 OMP_sch_lower = 32, 552 OMP_sch_static_chunked = 33, 553 OMP_sch_static = 34, 554 OMP_sch_dynamic_chunked = 35, 555 OMP_sch_guided_chunked = 36, 556 OMP_sch_runtime = 37, 557 OMP_sch_auto = 38, 558 /// static with chunk adjustment (e.g., simd) 559 OMP_sch_static_balanced_chunked = 45, 560 /// Lower bound for 'ordered' versions. 561 OMP_ord_lower = 64, 562 OMP_ord_static_chunked = 65, 563 OMP_ord_static = 66, 564 OMP_ord_dynamic_chunked = 67, 565 OMP_ord_guided_chunked = 68, 566 OMP_ord_runtime = 69, 567 OMP_ord_auto = 70, 568 OMP_sch_default = OMP_sch_static, 569 /// dist_schedule types 570 OMP_dist_sch_static_chunked = 91, 571 OMP_dist_sch_static = 92, 572 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 573 /// Set if the monotonic schedule modifier was present. 574 OMP_sch_modifier_monotonic = (1 << 29), 575 /// Set if the nonmonotonic schedule modifier was present. 576 OMP_sch_modifier_nonmonotonic = (1 << 30), 577 }; 578 579 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 580 /// region. 581 class CleanupTy final : public EHScopeStack::Cleanup { 582 PrePostActionTy *Action; 583 584 public: 585 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 586 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 587 if (!CGF.HaveInsertPoint()) 588 return; 589 Action->Exit(CGF); 590 } 591 }; 592 593 } // anonymous namespace 594 595 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 596 CodeGenFunction::RunCleanupsScope Scope(CGF); 597 if (PrePostAction) { 598 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 599 Callback(CodeGen, CGF, *PrePostAction); 600 } else { 601 PrePostActionTy Action; 602 Callback(CodeGen, CGF, Action); 603 } 604 } 605 606 /// Check if the combiner is a call to UDR combiner and if it is so return the 607 /// UDR decl used for reduction. 608 static const OMPDeclareReductionDecl * 609 getReductionInit(const Expr *ReductionOp) { 610 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 611 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 612 if (const auto *DRE = 613 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 614 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 615 return DRD; 616 return nullptr; 617 } 618 619 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 620 const OMPDeclareReductionDecl *DRD, 621 const Expr *InitOp, 622 Address Private, Address Original, 623 QualType Ty) { 624 if (DRD->getInitializer()) { 625 std::pair<llvm::Function *, llvm::Function *> Reduction = 626 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 627 const auto *CE = cast<CallExpr>(InitOp); 628 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 629 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 630 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 631 const auto *LHSDRE = 632 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 633 const auto *RHSDRE = 634 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 635 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 636 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 637 [=]() { return Private; }); 638 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 639 [=]() { return Original; }); 640 (void)PrivateScope.Privatize(); 641 RValue Func = RValue::get(Reduction.second); 642 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 643 CGF.EmitIgnoredExpr(InitOp); 644 } else { 645 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 646 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 647 auto *GV = new llvm::GlobalVariable( 648 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 649 llvm::GlobalValue::PrivateLinkage, Init, Name); 650 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 651 RValue InitRVal; 652 switch (CGF.getEvaluationKind(Ty)) { 653 case TEK_Scalar: 654 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 655 break; 656 case TEK_Complex: 657 InitRVal = 658 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 659 break; 660 case TEK_Aggregate: { 661 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); 662 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); 663 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 664 /*IsInitializer=*/false); 665 return; 666 } 667 } 668 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue); 669 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 670 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 671 /*IsInitializer=*/false); 672 } 673 } 674 675 /// Emit initialization of arrays of complex types. 676 /// \param DestAddr Address of the array. 677 /// \param Type Type of array. 678 /// \param Init Initial expression of array. 679 /// \param SrcAddr Address of the original array. 680 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 681 QualType Type, bool EmitDeclareReductionInit, 682 const Expr *Init, 683 const OMPDeclareReductionDecl *DRD, 684 Address SrcAddr = Address::invalid()) { 685 // Perform element-by-element initialization. 686 QualType ElementTy; 687 688 // Drill down to the base element type on both arrays. 689 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 690 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 691 if (DRD) 692 SrcAddr = 693 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 694 695 llvm::Value *SrcBegin = nullptr; 696 if (DRD) 697 SrcBegin = SrcAddr.getPointer(); 698 llvm::Value *DestBegin = DestAddr.getPointer(); 699 // Cast from pointer to array type to pointer to single element. 700 llvm::Value *DestEnd = 701 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); 702 // The basic structure here is a while-do loop. 703 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 704 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 705 llvm::Value *IsEmpty = 706 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 707 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 708 709 // Enter the loop body, making that address the current address. 710 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 711 CGF.EmitBlock(BodyBB); 712 713 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 714 715 llvm::PHINode *SrcElementPHI = nullptr; 716 Address SrcElementCurrent = Address::invalid(); 717 if (DRD) { 718 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 719 "omp.arraycpy.srcElementPast"); 720 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 721 SrcElementCurrent = 722 Address(SrcElementPHI, 723 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 724 } 725 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 726 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 727 DestElementPHI->addIncoming(DestBegin, EntryBB); 728 Address DestElementCurrent = 729 Address(DestElementPHI, 730 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 731 732 // Emit copy. 733 { 734 CodeGenFunction::RunCleanupsScope InitScope(CGF); 735 if (EmitDeclareReductionInit) { 736 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 737 SrcElementCurrent, ElementTy); 738 } else 739 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 740 /*IsInitializer=*/false); 741 } 742 743 if (DRD) { 744 // Shift the address forward by one element. 745 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 746 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, 747 "omp.arraycpy.dest.element"); 748 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 749 } 750 751 // Shift the address forward by one element. 752 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 753 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, 754 "omp.arraycpy.dest.element"); 755 // Check whether we've reached the end. 756 llvm::Value *Done = 757 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 758 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 759 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 760 761 // Done. 762 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 763 } 764 765 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 766 return CGF.EmitOMPSharedLValue(E); 767 } 768 769 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 770 const Expr *E) { 771 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 772 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 773 return LValue(); 774 } 775 776 void ReductionCodeGen::emitAggregateInitialization( 777 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 778 const OMPDeclareReductionDecl *DRD) { 779 // Emit VarDecl with copy init for arrays. 780 // Get the address of the original variable captured in current 781 // captured region. 782 const auto *PrivateVD = 783 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 784 bool EmitDeclareReductionInit = 785 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 786 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 787 EmitDeclareReductionInit, 788 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 789 : PrivateVD->getInit(), 790 DRD, SharedAddr); 791 } 792 793 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 794 ArrayRef<const Expr *> Origs, 795 ArrayRef<const Expr *> Privates, 796 ArrayRef<const Expr *> ReductionOps) { 797 ClausesData.reserve(Shareds.size()); 798 SharedAddresses.reserve(Shareds.size()); 799 Sizes.reserve(Shareds.size()); 800 BaseDecls.reserve(Shareds.size()); 801 const auto *IOrig = Origs.begin(); 802 const auto *IPriv = Privates.begin(); 803 const auto *IRed = ReductionOps.begin(); 804 for (const Expr *Ref : Shareds) { 805 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 806 std::advance(IOrig, 1); 807 std::advance(IPriv, 1); 808 std::advance(IRed, 1); 809 } 810 } 811 812 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 813 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 814 "Number of generated lvalues must be exactly N."); 815 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 816 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 817 SharedAddresses.emplace_back(First, Second); 818 if (ClausesData[N].Shared == ClausesData[N].Ref) { 819 OrigAddresses.emplace_back(First, Second); 820 } else { 821 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 822 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 823 OrigAddresses.emplace_back(First, Second); 824 } 825 } 826 827 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 828 const auto *PrivateVD = 829 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 830 QualType PrivateType = PrivateVD->getType(); 831 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 832 if (!PrivateType->isVariablyModifiedType()) { 833 Sizes.emplace_back( 834 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 835 nullptr); 836 return; 837 } 838 llvm::Value *Size; 839 llvm::Value *SizeInChars; 840 auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType(); 841 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 842 if (AsArraySection) { 843 Size = CGF.Builder.CreatePtrDiff(ElemType, 844 OrigAddresses[N].second.getPointer(CGF), 845 OrigAddresses[N].first.getPointer(CGF)); 846 Size = CGF.Builder.CreateNUWAdd( 847 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 848 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 849 } else { 850 SizeInChars = 851 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 852 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 853 } 854 Sizes.emplace_back(SizeInChars, Size); 855 CodeGenFunction::OpaqueValueMapping OpaqueMap( 856 CGF, 857 cast<OpaqueValueExpr>( 858 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 859 RValue::get(Size)); 860 CGF.EmitVariablyModifiedType(PrivateType); 861 } 862 863 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 864 llvm::Value *Size) { 865 const auto *PrivateVD = 866 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 867 QualType PrivateType = PrivateVD->getType(); 868 if (!PrivateType->isVariablyModifiedType()) { 869 assert(!Size && !Sizes[N].second && 870 "Size should be nullptr for non-variably modified reduction " 871 "items."); 872 return; 873 } 874 CodeGenFunction::OpaqueValueMapping OpaqueMap( 875 CGF, 876 cast<OpaqueValueExpr>( 877 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 878 RValue::get(Size)); 879 CGF.EmitVariablyModifiedType(PrivateType); 880 } 881 882 void ReductionCodeGen::emitInitialization( 883 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 884 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 885 assert(SharedAddresses.size() > N && "No variable was generated"); 886 const auto *PrivateVD = 887 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 888 const OMPDeclareReductionDecl *DRD = 889 getReductionInit(ClausesData[N].ReductionOp); 890 QualType PrivateType = PrivateVD->getType(); 891 PrivateAddr = CGF.Builder.CreateElementBitCast( 892 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 893 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 894 if (DRD && DRD->getInitializer()) 895 (void)DefaultInit(CGF); 896 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD); 897 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 898 (void)DefaultInit(CGF); 899 QualType SharedType = SharedAddresses[N].first.getType(); 900 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 901 PrivateAddr, SharedAddr, SharedType); 902 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 903 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 904 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 905 PrivateVD->getType().getQualifiers(), 906 /*IsInitializer=*/false); 907 } 908 } 909 910 bool ReductionCodeGen::needCleanups(unsigned N) { 911 const auto *PrivateVD = 912 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 913 QualType PrivateType = PrivateVD->getType(); 914 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 915 return DTorKind != QualType::DK_none; 916 } 917 918 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 919 Address PrivateAddr) { 920 const auto *PrivateVD = 921 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 922 QualType PrivateType = PrivateVD->getType(); 923 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 924 if (needCleanups(N)) { 925 PrivateAddr = CGF.Builder.CreateElementBitCast( 926 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 927 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 928 } 929 } 930 931 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 932 LValue BaseLV) { 933 BaseTy = BaseTy.getNonReferenceType(); 934 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 935 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 936 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 937 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 938 } else { 939 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 940 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 941 } 942 BaseTy = BaseTy->getPointeeType(); 943 } 944 return CGF.MakeAddrLValue( 945 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 946 CGF.ConvertTypeForMem(ElTy)), 947 BaseLV.getType(), BaseLV.getBaseInfo(), 948 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 949 } 950 951 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 952 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 953 llvm::Value *Addr) { 954 Address Tmp = Address::invalid(); 955 Address TopTmp = Address::invalid(); 956 Address MostTopTmp = Address::invalid(); 957 BaseTy = BaseTy.getNonReferenceType(); 958 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 959 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 960 Tmp = CGF.CreateMemTemp(BaseTy); 961 if (TopTmp.isValid()) 962 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 963 else 964 MostTopTmp = Tmp; 965 TopTmp = Tmp; 966 BaseTy = BaseTy->getPointeeType(); 967 } 968 llvm::Type *Ty = BaseLVType; 969 if (Tmp.isValid()) 970 Ty = Tmp.getElementType(); 971 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 972 if (Tmp.isValid()) { 973 CGF.Builder.CreateStore(Addr, Tmp); 974 return MostTopTmp; 975 } 976 return Address(Addr, BaseLVAlignment); 977 } 978 979 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 980 const VarDecl *OrigVD = nullptr; 981 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 982 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 983 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 984 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 985 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 986 Base = TempASE->getBase()->IgnoreParenImpCasts(); 987 DE = cast<DeclRefExpr>(Base); 988 OrigVD = cast<VarDecl>(DE->getDecl()); 989 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 990 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 991 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 992 Base = TempASE->getBase()->IgnoreParenImpCasts(); 993 DE = cast<DeclRefExpr>(Base); 994 OrigVD = cast<VarDecl>(DE->getDecl()); 995 } 996 return OrigVD; 997 } 998 999 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1000 Address PrivateAddr) { 1001 const DeclRefExpr *DE; 1002 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1003 BaseDecls.emplace_back(OrigVD); 1004 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1005 LValue BaseLValue = 1006 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1007 OriginalBaseLValue); 1008 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF); 1009 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1010 SharedAddr.getElementType(), BaseLValue.getPointer(CGF), 1011 SharedAddr.getPointer()); 1012 llvm::Value *PrivatePointer = 1013 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1014 PrivateAddr.getPointer(), SharedAddr.getType()); 1015 llvm::Value *Ptr = CGF.Builder.CreateGEP( 1016 SharedAddr.getElementType(), PrivatePointer, Adjustment); 1017 return castToBase(CGF, OrigVD->getType(), 1018 SharedAddresses[N].first.getType(), 1019 OriginalBaseLValue.getAddress(CGF).getType(), 1020 OriginalBaseLValue.getAlignment(), Ptr); 1021 } 1022 BaseDecls.emplace_back( 1023 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1024 return PrivateAddr; 1025 } 1026 1027 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1028 const OMPDeclareReductionDecl *DRD = 1029 getReductionInit(ClausesData[N].ReductionOp); 1030 return DRD && DRD->getInitializer(); 1031 } 1032 1033 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1034 return CGF.EmitLoadOfPointerLValue( 1035 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1036 getThreadIDVariable()->getType()->castAs<PointerType>()); 1037 } 1038 1039 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 1040 if (!CGF.HaveInsertPoint()) 1041 return; 1042 // 1.2.2 OpenMP Language Terminology 1043 // Structured block - An executable statement with a single entry at the 1044 // top and a single exit at the bottom. 1045 // The point of exit cannot be a branch out of the structured block. 1046 // longjmp() and throw() must not violate the entry/exit criteria. 1047 CGF.EHStack.pushTerminate(); 1048 if (S) 1049 CGF.incrementProfileCounter(S); 1050 CodeGen(CGF); 1051 CGF.EHStack.popTerminate(); 1052 } 1053 1054 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1055 CodeGenFunction &CGF) { 1056 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1057 getThreadIDVariable()->getType(), 1058 AlignmentSource::Decl); 1059 } 1060 1061 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1062 QualType FieldTy) { 1063 auto *Field = FieldDecl::Create( 1064 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1065 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1066 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1067 Field->setAccess(AS_public); 1068 DC->addDecl(Field); 1069 return Field; 1070 } 1071 1072 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1073 StringRef Separator) 1074 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1075 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1076 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1077 1078 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1079 OMPBuilder.initialize(); 1080 loadOffloadInfoMetadata(); 1081 } 1082 1083 void CGOpenMPRuntime::clear() { 1084 InternalVars.clear(); 1085 // Clean non-target variable declarations possibly used only in debug info. 1086 for (const auto &Data : EmittedNonTargetVariables) { 1087 if (!Data.getValue().pointsToAliveValue()) 1088 continue; 1089 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1090 if (!GV) 1091 continue; 1092 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1093 continue; 1094 GV->eraseFromParent(); 1095 } 1096 } 1097 1098 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1099 SmallString<128> Buffer; 1100 llvm::raw_svector_ostream OS(Buffer); 1101 StringRef Sep = FirstSeparator; 1102 for (StringRef Part : Parts) { 1103 OS << Sep << Part; 1104 Sep = Separator; 1105 } 1106 return std::string(OS.str()); 1107 } 1108 1109 static llvm::Function * 1110 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1111 const Expr *CombinerInitializer, const VarDecl *In, 1112 const VarDecl *Out, bool IsCombiner) { 1113 // void .omp_combiner.(Ty *in, Ty *out); 1114 ASTContext &C = CGM.getContext(); 1115 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1116 FunctionArgList Args; 1117 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1118 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1119 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1120 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1121 Args.push_back(&OmpOutParm); 1122 Args.push_back(&OmpInParm); 1123 const CGFunctionInfo &FnInfo = 1124 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1125 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1126 std::string Name = CGM.getOpenMPRuntime().getName( 1127 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1128 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1129 Name, &CGM.getModule()); 1130 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1131 if (CGM.getLangOpts().Optimize) { 1132 Fn->removeFnAttr(llvm::Attribute::NoInline); 1133 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1134 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1135 } 1136 CodeGenFunction CGF(CGM); 1137 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1138 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1139 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1140 Out->getLocation()); 1141 CodeGenFunction::OMPPrivateScope Scope(CGF); 1142 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1143 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1144 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1145 .getAddress(CGF); 1146 }); 1147 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1148 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1149 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1150 .getAddress(CGF); 1151 }); 1152 (void)Scope.Privatize(); 1153 if (!IsCombiner && Out->hasInit() && 1154 !CGF.isTrivialInitializer(Out->getInit())) { 1155 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1156 Out->getType().getQualifiers(), 1157 /*IsInitializer=*/true); 1158 } 1159 if (CombinerInitializer) 1160 CGF.EmitIgnoredExpr(CombinerInitializer); 1161 Scope.ForceCleanup(); 1162 CGF.FinishFunction(); 1163 return Fn; 1164 } 1165 1166 void CGOpenMPRuntime::emitUserDefinedReduction( 1167 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1168 if (UDRMap.count(D) > 0) 1169 return; 1170 llvm::Function *Combiner = emitCombinerOrInitializer( 1171 CGM, D->getType(), D->getCombiner(), 1172 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1173 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1174 /*IsCombiner=*/true); 1175 llvm::Function *Initializer = nullptr; 1176 if (const Expr *Init = D->getInitializer()) { 1177 Initializer = emitCombinerOrInitializer( 1178 CGM, D->getType(), 1179 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1180 : nullptr, 1181 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1182 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1183 /*IsCombiner=*/false); 1184 } 1185 UDRMap.try_emplace(D, Combiner, Initializer); 1186 if (CGF) { 1187 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1188 Decls.second.push_back(D); 1189 } 1190 } 1191 1192 std::pair<llvm::Function *, llvm::Function *> 1193 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1194 auto I = UDRMap.find(D); 1195 if (I != UDRMap.end()) 1196 return I->second; 1197 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1198 return UDRMap.lookup(D); 1199 } 1200 1201 namespace { 1202 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1203 // Builder if one is present. 1204 struct PushAndPopStackRAII { 1205 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1206 bool HasCancel, llvm::omp::Directive Kind) 1207 : OMPBuilder(OMPBuilder) { 1208 if (!OMPBuilder) 1209 return; 1210 1211 // The following callback is the crucial part of clangs cleanup process. 1212 // 1213 // NOTE: 1214 // Once the OpenMPIRBuilder is used to create parallel regions (and 1215 // similar), the cancellation destination (Dest below) is determined via 1216 // IP. That means if we have variables to finalize we split the block at IP, 1217 // use the new block (=BB) as destination to build a JumpDest (via 1218 // getJumpDestInCurrentScope(BB)) which then is fed to 1219 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1220 // to push & pop an FinalizationInfo object. 1221 // The FiniCB will still be needed but at the point where the 1222 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1223 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1224 assert(IP.getBlock()->end() == IP.getPoint() && 1225 "Clang CG should cause non-terminated block!"); 1226 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1227 CGF.Builder.restoreIP(IP); 1228 CodeGenFunction::JumpDest Dest = 1229 CGF.getOMPCancelDestination(OMPD_parallel); 1230 CGF.EmitBranchThroughCleanup(Dest); 1231 }; 1232 1233 // TODO: Remove this once we emit parallel regions through the 1234 // OpenMPIRBuilder as it can do this setup internally. 1235 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); 1236 OMPBuilder->pushFinalizationCB(std::move(FI)); 1237 } 1238 ~PushAndPopStackRAII() { 1239 if (OMPBuilder) 1240 OMPBuilder->popFinalizationCB(); 1241 } 1242 llvm::OpenMPIRBuilder *OMPBuilder; 1243 }; 1244 } // namespace 1245 1246 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1247 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1248 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1249 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1250 assert(ThreadIDVar->getType()->isPointerType() && 1251 "thread id variable must be of type kmp_int32 *"); 1252 CodeGenFunction CGF(CGM, true); 1253 bool HasCancel = false; 1254 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1255 HasCancel = OPD->hasCancel(); 1256 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1257 HasCancel = OPD->hasCancel(); 1258 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1259 HasCancel = OPSD->hasCancel(); 1260 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1261 HasCancel = OPFD->hasCancel(); 1262 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1263 HasCancel = OPFD->hasCancel(); 1264 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1265 HasCancel = OPFD->hasCancel(); 1266 else if (const auto *OPFD = 1267 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1268 HasCancel = OPFD->hasCancel(); 1269 else if (const auto *OPFD = 1270 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1271 HasCancel = OPFD->hasCancel(); 1272 1273 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1274 // parallel region to make cancellation barriers work properly. 1275 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1276 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); 1277 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1278 HasCancel, OutlinedHelperName); 1279 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1280 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1281 } 1282 1283 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1284 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1285 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1286 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1287 return emitParallelOrTeamsOutlinedFunction( 1288 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1289 } 1290 1291 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1292 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1293 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1294 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1295 return emitParallelOrTeamsOutlinedFunction( 1296 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1297 } 1298 1299 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1300 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1301 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1302 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1303 bool Tied, unsigned &NumberOfParts) { 1304 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1305 PrePostActionTy &) { 1306 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1307 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1308 llvm::Value *TaskArgs[] = { 1309 UpLoc, ThreadID, 1310 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1311 TaskTVar->getType()->castAs<PointerType>()) 1312 .getPointer(CGF)}; 1313 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1314 CGM.getModule(), OMPRTL___kmpc_omp_task), 1315 TaskArgs); 1316 }; 1317 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1318 UntiedCodeGen); 1319 CodeGen.setAction(Action); 1320 assert(!ThreadIDVar->getType()->isPointerType() && 1321 "thread id variable must be of type kmp_int32 for tasks"); 1322 const OpenMPDirectiveKind Region = 1323 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1324 : OMPD_task; 1325 const CapturedStmt *CS = D.getCapturedStmt(Region); 1326 bool HasCancel = false; 1327 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1328 HasCancel = TD->hasCancel(); 1329 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1330 HasCancel = TD->hasCancel(); 1331 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1332 HasCancel = TD->hasCancel(); 1333 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1334 HasCancel = TD->hasCancel(); 1335 1336 CodeGenFunction CGF(CGM, true); 1337 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1338 InnermostKind, HasCancel, Action); 1339 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1340 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1341 if (!Tied) 1342 NumberOfParts = Action.getNumberOfParts(); 1343 return Res; 1344 } 1345 1346 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1347 const RecordDecl *RD, const CGRecordLayout &RL, 1348 ArrayRef<llvm::Constant *> Data) { 1349 llvm::StructType *StructTy = RL.getLLVMType(); 1350 unsigned PrevIdx = 0; 1351 ConstantInitBuilder CIBuilder(CGM); 1352 auto DI = Data.begin(); 1353 for (const FieldDecl *FD : RD->fields()) { 1354 unsigned Idx = RL.getLLVMFieldNo(FD); 1355 // Fill the alignment. 1356 for (unsigned I = PrevIdx; I < Idx; ++I) 1357 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1358 PrevIdx = Idx + 1; 1359 Fields.add(*DI); 1360 ++DI; 1361 } 1362 } 1363 1364 template <class... As> 1365 static llvm::GlobalVariable * 1366 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1367 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1368 As &&... Args) { 1369 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1370 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1371 ConstantInitBuilder CIBuilder(CGM); 1372 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1373 buildStructValue(Fields, CGM, RD, RL, Data); 1374 return Fields.finishAndCreateGlobal( 1375 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1376 std::forward<As>(Args)...); 1377 } 1378 1379 template <typename T> 1380 static void 1381 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1382 ArrayRef<llvm::Constant *> Data, 1383 T &Parent) { 1384 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1385 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1386 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1387 buildStructValue(Fields, CGM, RD, RL, Data); 1388 Fields.finishAndAddTo(Parent); 1389 } 1390 1391 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1392 bool AtCurrentPoint) { 1393 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1394 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1395 1396 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1397 if (AtCurrentPoint) { 1398 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1399 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1400 } else { 1401 Elem.second.ServiceInsertPt = 1402 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1403 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1404 } 1405 } 1406 1407 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1408 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1409 if (Elem.second.ServiceInsertPt) { 1410 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1411 Elem.second.ServiceInsertPt = nullptr; 1412 Ptr->eraseFromParent(); 1413 } 1414 } 1415 1416 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1417 SourceLocation Loc, 1418 SmallString<128> &Buffer) { 1419 llvm::raw_svector_ostream OS(Buffer); 1420 // Build debug location 1421 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1422 OS << ";" << PLoc.getFilename() << ";"; 1423 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1424 OS << FD->getQualifiedNameAsString(); 1425 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1426 return OS.str(); 1427 } 1428 1429 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1430 SourceLocation Loc, 1431 unsigned Flags) { 1432 uint32_t SrcLocStrSize; 1433 llvm::Constant *SrcLocStr; 1434 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1435 Loc.isInvalid()) { 1436 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 1437 } else { 1438 std::string FunctionName; 1439 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1440 FunctionName = FD->getQualifiedNameAsString(); 1441 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1442 const char *FileName = PLoc.getFilename(); 1443 unsigned Line = PLoc.getLine(); 1444 unsigned Column = PLoc.getColumn(); 1445 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, 1446 Column, SrcLocStrSize); 1447 } 1448 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1449 return OMPBuilder.getOrCreateIdent( 1450 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags); 1451 } 1452 1453 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1454 SourceLocation Loc) { 1455 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1456 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1457 // the clang invariants used below might be broken. 1458 if (CGM.getLangOpts().OpenMPIRBuilder) { 1459 SmallString<128> Buffer; 1460 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1461 uint32_t SrcLocStrSize; 1462 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1463 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize); 1464 return OMPBuilder.getOrCreateThreadID( 1465 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize)); 1466 } 1467 1468 llvm::Value *ThreadID = nullptr; 1469 // Check whether we've already cached a load of the thread id in this 1470 // function. 1471 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1472 if (I != OpenMPLocThreadIDMap.end()) { 1473 ThreadID = I->second.ThreadID; 1474 if (ThreadID != nullptr) 1475 return ThreadID; 1476 } 1477 // If exceptions are enabled, do not use parameter to avoid possible crash. 1478 if (auto *OMPRegionInfo = 1479 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1480 if (OMPRegionInfo->getThreadIDVariable()) { 1481 // Check if this an outlined function with thread id passed as argument. 1482 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1483 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1484 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1485 !CGF.getLangOpts().CXXExceptions || 1486 CGF.Builder.GetInsertBlock() == TopBlock || 1487 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1488 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1489 TopBlock || 1490 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1491 CGF.Builder.GetInsertBlock()) { 1492 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1493 // If value loaded in entry block, cache it and use it everywhere in 1494 // function. 1495 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1496 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1497 Elem.second.ThreadID = ThreadID; 1498 } 1499 return ThreadID; 1500 } 1501 } 1502 } 1503 1504 // This is not an outlined function region - need to call __kmpc_int32 1505 // kmpc_global_thread_num(ident_t *loc). 1506 // Generate thread id value and cache this value for use across the 1507 // function. 1508 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1509 if (!Elem.second.ServiceInsertPt) 1510 setLocThreadIdInsertPt(CGF); 1511 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1512 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1513 llvm::CallInst *Call = CGF.Builder.CreateCall( 1514 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1515 OMPRTL___kmpc_global_thread_num), 1516 emitUpdateLocation(CGF, Loc)); 1517 Call->setCallingConv(CGF.getRuntimeCC()); 1518 Elem.second.ThreadID = Call; 1519 return Call; 1520 } 1521 1522 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1523 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1524 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1525 clearLocThreadIdInsertPt(CGF); 1526 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1527 } 1528 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1529 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1530 UDRMap.erase(D); 1531 FunctionUDRMap.erase(CGF.CurFn); 1532 } 1533 auto I = FunctionUDMMap.find(CGF.CurFn); 1534 if (I != FunctionUDMMap.end()) { 1535 for(const auto *D : I->second) 1536 UDMMap.erase(D); 1537 FunctionUDMMap.erase(I); 1538 } 1539 LastprivateConditionalToTypes.erase(CGF.CurFn); 1540 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1541 } 1542 1543 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1544 return OMPBuilder.IdentPtr; 1545 } 1546 1547 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1548 if (!Kmpc_MicroTy) { 1549 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1550 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1551 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1552 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1553 } 1554 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1555 } 1556 1557 llvm::FunctionCallee 1558 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned, 1559 bool IsGPUDistribute) { 1560 assert((IVSize == 32 || IVSize == 64) && 1561 "IV size is not compatible with the omp runtime"); 1562 StringRef Name; 1563 if (IsGPUDistribute) 1564 Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4" 1565 : "__kmpc_distribute_static_init_4u") 1566 : (IVSigned ? "__kmpc_distribute_static_init_8" 1567 : "__kmpc_distribute_static_init_8u"); 1568 else 1569 Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1570 : "__kmpc_for_static_init_4u") 1571 : (IVSigned ? "__kmpc_for_static_init_8" 1572 : "__kmpc_for_static_init_8u"); 1573 1574 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1575 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1576 llvm::Type *TypeParams[] = { 1577 getIdentTyPointerTy(), // loc 1578 CGM.Int32Ty, // tid 1579 CGM.Int32Ty, // schedtype 1580 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1581 PtrTy, // p_lower 1582 PtrTy, // p_upper 1583 PtrTy, // p_stride 1584 ITy, // incr 1585 ITy // chunk 1586 }; 1587 auto *FnTy = 1588 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1589 return CGM.CreateRuntimeFunction(FnTy, Name); 1590 } 1591 1592 llvm::FunctionCallee 1593 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1594 assert((IVSize == 32 || IVSize == 64) && 1595 "IV size is not compatible with the omp runtime"); 1596 StringRef Name = 1597 IVSize == 32 1598 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1599 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1600 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1601 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1602 CGM.Int32Ty, // tid 1603 CGM.Int32Ty, // schedtype 1604 ITy, // lower 1605 ITy, // upper 1606 ITy, // stride 1607 ITy // chunk 1608 }; 1609 auto *FnTy = 1610 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1611 return CGM.CreateRuntimeFunction(FnTy, Name); 1612 } 1613 1614 llvm::FunctionCallee 1615 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1616 assert((IVSize == 32 || IVSize == 64) && 1617 "IV size is not compatible with the omp runtime"); 1618 StringRef Name = 1619 IVSize == 32 1620 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1621 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1622 llvm::Type *TypeParams[] = { 1623 getIdentTyPointerTy(), // loc 1624 CGM.Int32Ty, // tid 1625 }; 1626 auto *FnTy = 1627 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1628 return CGM.CreateRuntimeFunction(FnTy, Name); 1629 } 1630 1631 llvm::FunctionCallee 1632 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1633 assert((IVSize == 32 || IVSize == 64) && 1634 "IV size is not compatible with the omp runtime"); 1635 StringRef Name = 1636 IVSize == 32 1637 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1638 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1639 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1640 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1641 llvm::Type *TypeParams[] = { 1642 getIdentTyPointerTy(), // loc 1643 CGM.Int32Ty, // tid 1644 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1645 PtrTy, // p_lower 1646 PtrTy, // p_upper 1647 PtrTy // p_stride 1648 }; 1649 auto *FnTy = 1650 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1651 return CGM.CreateRuntimeFunction(FnTy, Name); 1652 } 1653 1654 /// Obtain information that uniquely identifies a target entry. This 1655 /// consists of the file and device IDs as well as line number associated with 1656 /// the relevant entry source location. 1657 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1658 unsigned &DeviceID, unsigned &FileID, 1659 unsigned &LineNum) { 1660 SourceManager &SM = C.getSourceManager(); 1661 1662 // The loc should be always valid and have a file ID (the user cannot use 1663 // #pragma directives in macros) 1664 1665 assert(Loc.isValid() && "Source location is expected to be always valid."); 1666 1667 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1668 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1669 1670 llvm::sys::fs::UniqueID ID; 1671 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1672 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false); 1673 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1674 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1675 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1676 << PLoc.getFilename() << EC.message(); 1677 } 1678 1679 DeviceID = ID.getDevice(); 1680 FileID = ID.getFile(); 1681 LineNum = PLoc.getLine(); 1682 } 1683 1684 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1685 if (CGM.getLangOpts().OpenMPSimd) 1686 return Address::invalid(); 1687 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1688 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1689 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1690 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1691 HasRequiresUnifiedSharedMemory))) { 1692 SmallString<64> PtrName; 1693 { 1694 llvm::raw_svector_ostream OS(PtrName); 1695 OS << CGM.getMangledName(GlobalDecl(VD)); 1696 if (!VD->isExternallyVisible()) { 1697 unsigned DeviceID, FileID, Line; 1698 getTargetEntryUniqueInfo(CGM.getContext(), 1699 VD->getCanonicalDecl()->getBeginLoc(), 1700 DeviceID, FileID, Line); 1701 OS << llvm::format("_%x", FileID); 1702 } 1703 OS << "_decl_tgt_ref_ptr"; 1704 } 1705 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1706 if (!Ptr) { 1707 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1708 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1709 PtrName); 1710 1711 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1712 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1713 1714 if (!CGM.getLangOpts().OpenMPIsDevice) 1715 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1716 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1717 } 1718 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 1719 } 1720 return Address::invalid(); 1721 } 1722 1723 llvm::Constant * 1724 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1725 assert(!CGM.getLangOpts().OpenMPUseTLS || 1726 !CGM.getContext().getTargetInfo().isTLSSupported()); 1727 // Lookup the entry, lazily creating it if necessary. 1728 std::string Suffix = getName({"cache", ""}); 1729 return getOrCreateInternalVariable( 1730 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1731 } 1732 1733 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1734 const VarDecl *VD, 1735 Address VDAddr, 1736 SourceLocation Loc) { 1737 if (CGM.getLangOpts().OpenMPUseTLS && 1738 CGM.getContext().getTargetInfo().isTLSSupported()) 1739 return VDAddr; 1740 1741 llvm::Type *VarTy = VDAddr.getElementType(); 1742 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1743 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1744 CGM.Int8PtrTy), 1745 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1746 getOrCreateThreadPrivateCache(VD)}; 1747 return Address(CGF.EmitRuntimeCall( 1748 OMPBuilder.getOrCreateRuntimeFunction( 1749 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1750 Args), 1751 VDAddr.getAlignment()); 1752 } 1753 1754 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1755 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1756 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1757 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1758 // library. 1759 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1760 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1761 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1762 OMPLoc); 1763 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1764 // to register constructor/destructor for variable. 1765 llvm::Value *Args[] = { 1766 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1767 Ctor, CopyCtor, Dtor}; 1768 CGF.EmitRuntimeCall( 1769 OMPBuilder.getOrCreateRuntimeFunction( 1770 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1771 Args); 1772 } 1773 1774 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1775 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1776 bool PerformInit, CodeGenFunction *CGF) { 1777 if (CGM.getLangOpts().OpenMPUseTLS && 1778 CGM.getContext().getTargetInfo().isTLSSupported()) 1779 return nullptr; 1780 1781 VD = VD->getDefinition(CGM.getContext()); 1782 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1783 QualType ASTTy = VD->getType(); 1784 1785 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1786 const Expr *Init = VD->getAnyInitializer(); 1787 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1788 // Generate function that re-emits the declaration's initializer into the 1789 // threadprivate copy of the variable VD 1790 CodeGenFunction CtorCGF(CGM); 1791 FunctionArgList Args; 1792 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1793 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1794 ImplicitParamDecl::Other); 1795 Args.push_back(&Dst); 1796 1797 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1798 CGM.getContext().VoidPtrTy, Args); 1799 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1800 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1801 llvm::Function *Fn = 1802 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1803 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1804 Args, Loc, Loc); 1805 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1806 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1807 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1808 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1809 Arg = CtorCGF.Builder.CreateElementBitCast( 1810 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1811 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1812 /*IsInitializer=*/true); 1813 ArgVal = CtorCGF.EmitLoadOfScalar( 1814 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1815 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1816 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1817 CtorCGF.FinishFunction(); 1818 Ctor = Fn; 1819 } 1820 if (VD->getType().isDestructedType() != QualType::DK_none) { 1821 // Generate function that emits destructor call for the threadprivate copy 1822 // of the variable VD 1823 CodeGenFunction DtorCGF(CGM); 1824 FunctionArgList Args; 1825 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1826 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1827 ImplicitParamDecl::Other); 1828 Args.push_back(&Dst); 1829 1830 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1831 CGM.getContext().VoidTy, Args); 1832 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1833 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1834 llvm::Function *Fn = 1835 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1836 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1837 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1838 Loc, Loc); 1839 // Create a scope with an artificial location for the body of this function. 1840 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1841 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1842 DtorCGF.GetAddrOfLocalVar(&Dst), 1843 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1844 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1845 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1846 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1847 DtorCGF.FinishFunction(); 1848 Dtor = Fn; 1849 } 1850 // Do not emit init function if it is not required. 1851 if (!Ctor && !Dtor) 1852 return nullptr; 1853 1854 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1855 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1856 /*isVarArg=*/false) 1857 ->getPointerTo(); 1858 // Copying constructor for the threadprivate variable. 1859 // Must be NULL - reserved by runtime, but currently it requires that this 1860 // parameter is always NULL. Otherwise it fires assertion. 1861 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1862 if (Ctor == nullptr) { 1863 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1864 /*isVarArg=*/false) 1865 ->getPointerTo(); 1866 Ctor = llvm::Constant::getNullValue(CtorTy); 1867 } 1868 if (Dtor == nullptr) { 1869 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1870 /*isVarArg=*/false) 1871 ->getPointerTo(); 1872 Dtor = llvm::Constant::getNullValue(DtorTy); 1873 } 1874 if (!CGF) { 1875 auto *InitFunctionTy = 1876 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1877 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1878 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1879 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1880 CodeGenFunction InitCGF(CGM); 1881 FunctionArgList ArgList; 1882 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1883 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1884 Loc, Loc); 1885 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1886 InitCGF.FinishFunction(); 1887 return InitFunction; 1888 } 1889 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1890 } 1891 return nullptr; 1892 } 1893 1894 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1895 llvm::GlobalVariable *Addr, 1896 bool PerformInit) { 1897 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1898 !CGM.getLangOpts().OpenMPIsDevice) 1899 return false; 1900 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1901 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1902 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1903 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1904 HasRequiresUnifiedSharedMemory)) 1905 return CGM.getLangOpts().OpenMPIsDevice; 1906 VD = VD->getDefinition(CGM.getContext()); 1907 assert(VD && "Unknown VarDecl"); 1908 1909 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1910 return CGM.getLangOpts().OpenMPIsDevice; 1911 1912 QualType ASTTy = VD->getType(); 1913 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1914 1915 // Produce the unique prefix to identify the new target regions. We use 1916 // the source location of the variable declaration which we know to not 1917 // conflict with any target region. 1918 unsigned DeviceID; 1919 unsigned FileID; 1920 unsigned Line; 1921 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1922 SmallString<128> Buffer, Out; 1923 { 1924 llvm::raw_svector_ostream OS(Buffer); 1925 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1926 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1927 } 1928 1929 const Expr *Init = VD->getAnyInitializer(); 1930 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1931 llvm::Constant *Ctor; 1932 llvm::Constant *ID; 1933 if (CGM.getLangOpts().OpenMPIsDevice) { 1934 // Generate function that re-emits the declaration's initializer into 1935 // the threadprivate copy of the variable VD 1936 CodeGenFunction CtorCGF(CGM); 1937 1938 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1939 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1940 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1941 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1942 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1943 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1944 FunctionArgList(), Loc, Loc); 1945 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1946 CtorCGF.EmitAnyExprToMem(Init, 1947 Address(Addr, CGM.getContext().getDeclAlign(VD)), 1948 Init->getType().getQualifiers(), 1949 /*IsInitializer=*/true); 1950 CtorCGF.FinishFunction(); 1951 Ctor = Fn; 1952 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1953 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1954 } else { 1955 Ctor = new llvm::GlobalVariable( 1956 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1957 llvm::GlobalValue::PrivateLinkage, 1958 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1959 ID = Ctor; 1960 } 1961 1962 // Register the information for the entry associated with the constructor. 1963 Out.clear(); 1964 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1965 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1966 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1967 } 1968 if (VD->getType().isDestructedType() != QualType::DK_none) { 1969 llvm::Constant *Dtor; 1970 llvm::Constant *ID; 1971 if (CGM.getLangOpts().OpenMPIsDevice) { 1972 // Generate function that emits destructor call for the threadprivate 1973 // copy of the variable VD 1974 CodeGenFunction DtorCGF(CGM); 1975 1976 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1977 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1978 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1979 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1980 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1981 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1982 FunctionArgList(), Loc, Loc); 1983 // Create a scope with an artificial location for the body of this 1984 // function. 1985 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1986 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 1987 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1988 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1989 DtorCGF.FinishFunction(); 1990 Dtor = Fn; 1991 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1992 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 1993 } else { 1994 Dtor = new llvm::GlobalVariable( 1995 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1996 llvm::GlobalValue::PrivateLinkage, 1997 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 1998 ID = Dtor; 1999 } 2000 // Register the information for the entry associated with the destructor. 2001 Out.clear(); 2002 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2003 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2004 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2005 } 2006 return CGM.getLangOpts().OpenMPIsDevice; 2007 } 2008 2009 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2010 QualType VarType, 2011 StringRef Name) { 2012 std::string Suffix = getName({"artificial", ""}); 2013 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2014 llvm::GlobalVariable *GAddr = 2015 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2016 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 2017 CGM.getTarget().isTLSSupported()) { 2018 GAddr->setThreadLocal(/*Val=*/true); 2019 return Address(GAddr, GAddr->getValueType(), 2020 CGM.getContext().getTypeAlignInChars(VarType)); 2021 } 2022 std::string CacheSuffix = getName({"cache", ""}); 2023 llvm::Value *Args[] = { 2024 emitUpdateLocation(CGF, SourceLocation()), 2025 getThreadID(CGF, SourceLocation()), 2026 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2027 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2028 /*isSigned=*/false), 2029 getOrCreateInternalVariable( 2030 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2031 return Address( 2032 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2033 CGF.EmitRuntimeCall( 2034 OMPBuilder.getOrCreateRuntimeFunction( 2035 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2036 Args), 2037 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2038 CGM.getContext().getTypeAlignInChars(VarType)); 2039 } 2040 2041 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2042 const RegionCodeGenTy &ThenGen, 2043 const RegionCodeGenTy &ElseGen) { 2044 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2045 2046 // If the condition constant folds and can be elided, try to avoid emitting 2047 // the condition and the dead arm of the if/else. 2048 bool CondConstant; 2049 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2050 if (CondConstant) 2051 ThenGen(CGF); 2052 else 2053 ElseGen(CGF); 2054 return; 2055 } 2056 2057 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2058 // emit the conditional branch. 2059 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2060 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2061 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2062 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2063 2064 // Emit the 'then' code. 2065 CGF.EmitBlock(ThenBlock); 2066 ThenGen(CGF); 2067 CGF.EmitBranch(ContBlock); 2068 // Emit the 'else' code if present. 2069 // There is no need to emit line number for unconditional branch. 2070 (void)ApplyDebugLocation::CreateEmpty(CGF); 2071 CGF.EmitBlock(ElseBlock); 2072 ElseGen(CGF); 2073 // There is no need to emit line number for unconditional branch. 2074 (void)ApplyDebugLocation::CreateEmpty(CGF); 2075 CGF.EmitBranch(ContBlock); 2076 // Emit the continuation block for code after the if. 2077 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2078 } 2079 2080 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2081 llvm::Function *OutlinedFn, 2082 ArrayRef<llvm::Value *> CapturedVars, 2083 const Expr *IfCond, 2084 llvm::Value *NumThreads) { 2085 if (!CGF.HaveInsertPoint()) 2086 return; 2087 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2088 auto &M = CGM.getModule(); 2089 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2090 this](CodeGenFunction &CGF, PrePostActionTy &) { 2091 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2092 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2093 llvm::Value *Args[] = { 2094 RTLoc, 2095 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2096 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2097 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2098 RealArgs.append(std::begin(Args), std::end(Args)); 2099 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2100 2101 llvm::FunctionCallee RTLFn = 2102 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2103 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2104 }; 2105 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2106 this](CodeGenFunction &CGF, PrePostActionTy &) { 2107 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2108 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2109 // Build calls: 2110 // __kmpc_serialized_parallel(&Loc, GTid); 2111 llvm::Value *Args[] = {RTLoc, ThreadID}; 2112 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2113 M, OMPRTL___kmpc_serialized_parallel), 2114 Args); 2115 2116 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2117 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2118 Address ZeroAddrBound = 2119 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2120 /*Name=*/".bound.zero.addr"); 2121 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound); 2122 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2123 // ThreadId for serialized parallels is 0. 2124 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2125 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2126 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2127 2128 // Ensure we do not inline the function. This is trivially true for the ones 2129 // passed to __kmpc_fork_call but the ones called in serialized regions 2130 // could be inlined. This is not a perfect but it is closer to the invariant 2131 // we want, namely, every data environment starts with a new function. 2132 // TODO: We should pass the if condition to the runtime function and do the 2133 // handling there. Much cleaner code. 2134 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline); 2135 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 2136 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2137 2138 // __kmpc_end_serialized_parallel(&Loc, GTid); 2139 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2140 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2141 M, OMPRTL___kmpc_end_serialized_parallel), 2142 EndArgs); 2143 }; 2144 if (IfCond) { 2145 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2146 } else { 2147 RegionCodeGenTy ThenRCG(ThenGen); 2148 ThenRCG(CGF); 2149 } 2150 } 2151 2152 // If we're inside an (outlined) parallel region, use the region info's 2153 // thread-ID variable (it is passed in a first argument of the outlined function 2154 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2155 // regular serial code region, get thread ID by calling kmp_int32 2156 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2157 // return the address of that temp. 2158 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2159 SourceLocation Loc) { 2160 if (auto *OMPRegionInfo = 2161 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2162 if (OMPRegionInfo->getThreadIDVariable()) 2163 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2164 2165 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2166 QualType Int32Ty = 2167 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2168 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2169 CGF.EmitStoreOfScalar(ThreadID, 2170 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2171 2172 return ThreadIDTemp; 2173 } 2174 2175 llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable( 2176 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2177 SmallString<256> Buffer; 2178 llvm::raw_svector_ostream Out(Buffer); 2179 Out << Name; 2180 StringRef RuntimeName = Out.str(); 2181 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2182 if (Elem.second) { 2183 assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) && 2184 "OMP internal variable has different type than requested"); 2185 return &*Elem.second; 2186 } 2187 2188 return Elem.second = new llvm::GlobalVariable( 2189 CGM.getModule(), Ty, /*IsConstant*/ false, 2190 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2191 Elem.first(), /*InsertBefore=*/nullptr, 2192 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2193 } 2194 2195 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2196 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2197 std::string Name = getName({Prefix, "var"}); 2198 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2199 } 2200 2201 namespace { 2202 /// Common pre(post)-action for different OpenMP constructs. 2203 class CommonActionTy final : public PrePostActionTy { 2204 llvm::FunctionCallee EnterCallee; 2205 ArrayRef<llvm::Value *> EnterArgs; 2206 llvm::FunctionCallee ExitCallee; 2207 ArrayRef<llvm::Value *> ExitArgs; 2208 bool Conditional; 2209 llvm::BasicBlock *ContBlock = nullptr; 2210 2211 public: 2212 CommonActionTy(llvm::FunctionCallee EnterCallee, 2213 ArrayRef<llvm::Value *> EnterArgs, 2214 llvm::FunctionCallee ExitCallee, 2215 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2216 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2217 ExitArgs(ExitArgs), Conditional(Conditional) {} 2218 void Enter(CodeGenFunction &CGF) override { 2219 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2220 if (Conditional) { 2221 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2222 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2223 ContBlock = CGF.createBasicBlock("omp_if.end"); 2224 // Generate the branch (If-stmt) 2225 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2226 CGF.EmitBlock(ThenBlock); 2227 } 2228 } 2229 void Done(CodeGenFunction &CGF) { 2230 // Emit the rest of blocks/branches 2231 CGF.EmitBranch(ContBlock); 2232 CGF.EmitBlock(ContBlock, true); 2233 } 2234 void Exit(CodeGenFunction &CGF) override { 2235 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2236 } 2237 }; 2238 } // anonymous namespace 2239 2240 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2241 StringRef CriticalName, 2242 const RegionCodeGenTy &CriticalOpGen, 2243 SourceLocation Loc, const Expr *Hint) { 2244 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2245 // CriticalOpGen(); 2246 // __kmpc_end_critical(ident_t *, gtid, Lock); 2247 // Prepare arguments and build a call to __kmpc_critical 2248 if (!CGF.HaveInsertPoint()) 2249 return; 2250 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2251 getCriticalRegionLock(CriticalName)}; 2252 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2253 std::end(Args)); 2254 if (Hint) { 2255 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2256 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2257 } 2258 CommonActionTy Action( 2259 OMPBuilder.getOrCreateRuntimeFunction( 2260 CGM.getModule(), 2261 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2262 EnterArgs, 2263 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2264 OMPRTL___kmpc_end_critical), 2265 Args); 2266 CriticalOpGen.setAction(Action); 2267 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2268 } 2269 2270 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2271 const RegionCodeGenTy &MasterOpGen, 2272 SourceLocation Loc) { 2273 if (!CGF.HaveInsertPoint()) 2274 return; 2275 // if(__kmpc_master(ident_t *, gtid)) { 2276 // MasterOpGen(); 2277 // __kmpc_end_master(ident_t *, gtid); 2278 // } 2279 // Prepare arguments and build a call to __kmpc_master 2280 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2281 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2282 CGM.getModule(), OMPRTL___kmpc_master), 2283 Args, 2284 OMPBuilder.getOrCreateRuntimeFunction( 2285 CGM.getModule(), OMPRTL___kmpc_end_master), 2286 Args, 2287 /*Conditional=*/true); 2288 MasterOpGen.setAction(Action); 2289 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2290 Action.Done(CGF); 2291 } 2292 2293 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, 2294 const RegionCodeGenTy &MaskedOpGen, 2295 SourceLocation Loc, const Expr *Filter) { 2296 if (!CGF.HaveInsertPoint()) 2297 return; 2298 // if(__kmpc_masked(ident_t *, gtid, filter)) { 2299 // MaskedOpGen(); 2300 // __kmpc_end_masked(iden_t *, gtid); 2301 // } 2302 // Prepare arguments and build a call to __kmpc_masked 2303 llvm::Value *FilterVal = Filter 2304 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) 2305 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 2306 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2307 FilterVal}; 2308 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), 2309 getThreadID(CGF, Loc)}; 2310 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2311 CGM.getModule(), OMPRTL___kmpc_masked), 2312 Args, 2313 OMPBuilder.getOrCreateRuntimeFunction( 2314 CGM.getModule(), OMPRTL___kmpc_end_masked), 2315 ArgsEnd, 2316 /*Conditional=*/true); 2317 MaskedOpGen.setAction(Action); 2318 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); 2319 Action.Done(CGF); 2320 } 2321 2322 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2323 SourceLocation Loc) { 2324 if (!CGF.HaveInsertPoint()) 2325 return; 2326 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2327 OMPBuilder.createTaskyield(CGF.Builder); 2328 } else { 2329 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2330 llvm::Value *Args[] = { 2331 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2332 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2333 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2334 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2335 Args); 2336 } 2337 2338 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2339 Region->emitUntiedSwitch(CGF); 2340 } 2341 2342 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2343 const RegionCodeGenTy &TaskgroupOpGen, 2344 SourceLocation Loc) { 2345 if (!CGF.HaveInsertPoint()) 2346 return; 2347 // __kmpc_taskgroup(ident_t *, gtid); 2348 // TaskgroupOpGen(); 2349 // __kmpc_end_taskgroup(ident_t *, gtid); 2350 // Prepare arguments and build a call to __kmpc_taskgroup 2351 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2352 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2353 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2354 Args, 2355 OMPBuilder.getOrCreateRuntimeFunction( 2356 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2357 Args); 2358 TaskgroupOpGen.setAction(Action); 2359 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2360 } 2361 2362 /// Given an array of pointers to variables, project the address of a 2363 /// given variable. 2364 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2365 unsigned Index, const VarDecl *Var) { 2366 // Pull out the pointer to the variable. 2367 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2368 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2369 2370 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2371 Addr = CGF.Builder.CreateElementBitCast( 2372 Addr, CGF.ConvertTypeForMem(Var->getType())); 2373 return Addr; 2374 } 2375 2376 static llvm::Value *emitCopyprivateCopyFunction( 2377 CodeGenModule &CGM, llvm::Type *ArgsType, 2378 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2379 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2380 SourceLocation Loc) { 2381 ASTContext &C = CGM.getContext(); 2382 // void copy_func(void *LHSArg, void *RHSArg); 2383 FunctionArgList Args; 2384 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2385 ImplicitParamDecl::Other); 2386 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2387 ImplicitParamDecl::Other); 2388 Args.push_back(&LHSArg); 2389 Args.push_back(&RHSArg); 2390 const auto &CGFI = 2391 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2392 std::string Name = 2393 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2394 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2395 llvm::GlobalValue::InternalLinkage, Name, 2396 &CGM.getModule()); 2397 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2398 Fn->setDoesNotRecurse(); 2399 CodeGenFunction CGF(CGM); 2400 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2401 // Dest = (void*[n])(LHSArg); 2402 // Src = (void*[n])(RHSArg); 2403 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2404 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2405 ArgsType), CGF.getPointerAlign()); 2406 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2407 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2408 ArgsType), CGF.getPointerAlign()); 2409 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2410 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2411 // ... 2412 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2413 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2414 const auto *DestVar = 2415 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2416 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2417 2418 const auto *SrcVar = 2419 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2420 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2421 2422 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2423 QualType Type = VD->getType(); 2424 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2425 } 2426 CGF.FinishFunction(); 2427 return Fn; 2428 } 2429 2430 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2431 const RegionCodeGenTy &SingleOpGen, 2432 SourceLocation Loc, 2433 ArrayRef<const Expr *> CopyprivateVars, 2434 ArrayRef<const Expr *> SrcExprs, 2435 ArrayRef<const Expr *> DstExprs, 2436 ArrayRef<const Expr *> AssignmentOps) { 2437 if (!CGF.HaveInsertPoint()) 2438 return; 2439 assert(CopyprivateVars.size() == SrcExprs.size() && 2440 CopyprivateVars.size() == DstExprs.size() && 2441 CopyprivateVars.size() == AssignmentOps.size()); 2442 ASTContext &C = CGM.getContext(); 2443 // int32 did_it = 0; 2444 // if(__kmpc_single(ident_t *, gtid)) { 2445 // SingleOpGen(); 2446 // __kmpc_end_single(ident_t *, gtid); 2447 // did_it = 1; 2448 // } 2449 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2450 // <copy_func>, did_it); 2451 2452 Address DidIt = Address::invalid(); 2453 if (!CopyprivateVars.empty()) { 2454 // int32 did_it = 0; 2455 QualType KmpInt32Ty = 2456 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2457 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2458 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2459 } 2460 // Prepare arguments and build a call to __kmpc_single 2461 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2462 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2463 CGM.getModule(), OMPRTL___kmpc_single), 2464 Args, 2465 OMPBuilder.getOrCreateRuntimeFunction( 2466 CGM.getModule(), OMPRTL___kmpc_end_single), 2467 Args, 2468 /*Conditional=*/true); 2469 SingleOpGen.setAction(Action); 2470 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2471 if (DidIt.isValid()) { 2472 // did_it = 1; 2473 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2474 } 2475 Action.Done(CGF); 2476 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2477 // <copy_func>, did_it); 2478 if (DidIt.isValid()) { 2479 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2480 QualType CopyprivateArrayTy = C.getConstantArrayType( 2481 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2482 /*IndexTypeQuals=*/0); 2483 // Create a list of all private variables for copyprivate. 2484 Address CopyprivateList = 2485 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2486 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2487 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2488 CGF.Builder.CreateStore( 2489 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2490 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2491 CGF.VoidPtrTy), 2492 Elem); 2493 } 2494 // Build function that copies private values from single region to all other 2495 // threads in the corresponding parallel region. 2496 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2497 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2498 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2499 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2500 Address CL = 2501 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2502 CGF.VoidPtrTy); 2503 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2504 llvm::Value *Args[] = { 2505 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2506 getThreadID(CGF, Loc), // i32 <gtid> 2507 BufSize, // size_t <buf_size> 2508 CL.getPointer(), // void *<copyprivate list> 2509 CpyFn, // void (*) (void *, void *) <copy_func> 2510 DidItVal // i32 did_it 2511 }; 2512 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2513 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2514 Args); 2515 } 2516 } 2517 2518 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2519 const RegionCodeGenTy &OrderedOpGen, 2520 SourceLocation Loc, bool IsThreads) { 2521 if (!CGF.HaveInsertPoint()) 2522 return; 2523 // __kmpc_ordered(ident_t *, gtid); 2524 // OrderedOpGen(); 2525 // __kmpc_end_ordered(ident_t *, gtid); 2526 // Prepare arguments and build a call to __kmpc_ordered 2527 if (IsThreads) { 2528 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2529 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2530 CGM.getModule(), OMPRTL___kmpc_ordered), 2531 Args, 2532 OMPBuilder.getOrCreateRuntimeFunction( 2533 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2534 Args); 2535 OrderedOpGen.setAction(Action); 2536 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2537 return; 2538 } 2539 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2540 } 2541 2542 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2543 unsigned Flags; 2544 if (Kind == OMPD_for) 2545 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2546 else if (Kind == OMPD_sections) 2547 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2548 else if (Kind == OMPD_single) 2549 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2550 else if (Kind == OMPD_barrier) 2551 Flags = OMP_IDENT_BARRIER_EXPL; 2552 else 2553 Flags = OMP_IDENT_BARRIER_IMPL; 2554 return Flags; 2555 } 2556 2557 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2558 CodeGenFunction &CGF, const OMPLoopDirective &S, 2559 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2560 // Check if the loop directive is actually a doacross loop directive. In this 2561 // case choose static, 1 schedule. 2562 if (llvm::any_of( 2563 S.getClausesOfKind<OMPOrderedClause>(), 2564 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2565 ScheduleKind = OMPC_SCHEDULE_static; 2566 // Chunk size is 1 in this case. 2567 llvm::APInt ChunkSize(32, 1); 2568 ChunkExpr = IntegerLiteral::Create( 2569 CGF.getContext(), ChunkSize, 2570 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2571 SourceLocation()); 2572 } 2573 } 2574 2575 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2576 OpenMPDirectiveKind Kind, bool EmitChecks, 2577 bool ForceSimpleCall) { 2578 // Check if we should use the OMPBuilder 2579 auto *OMPRegionInfo = 2580 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2581 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2582 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2583 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2584 return; 2585 } 2586 2587 if (!CGF.HaveInsertPoint()) 2588 return; 2589 // Build call __kmpc_cancel_barrier(loc, thread_id); 2590 // Build call __kmpc_barrier(loc, thread_id); 2591 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2592 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2593 // thread_id); 2594 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2595 getThreadID(CGF, Loc)}; 2596 if (OMPRegionInfo) { 2597 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2598 llvm::Value *Result = CGF.EmitRuntimeCall( 2599 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2600 OMPRTL___kmpc_cancel_barrier), 2601 Args); 2602 if (EmitChecks) { 2603 // if (__kmpc_cancel_barrier()) { 2604 // exit from construct; 2605 // } 2606 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2607 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2608 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2609 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2610 CGF.EmitBlock(ExitBB); 2611 // exit from construct; 2612 CodeGenFunction::JumpDest CancelDestination = 2613 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2614 CGF.EmitBranchThroughCleanup(CancelDestination); 2615 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2616 } 2617 return; 2618 } 2619 } 2620 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2621 CGM.getModule(), OMPRTL___kmpc_barrier), 2622 Args); 2623 } 2624 2625 /// Map the OpenMP loop schedule to the runtime enumeration. 2626 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2627 bool Chunked, bool Ordered) { 2628 switch (ScheduleKind) { 2629 case OMPC_SCHEDULE_static: 2630 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2631 : (Ordered ? OMP_ord_static : OMP_sch_static); 2632 case OMPC_SCHEDULE_dynamic: 2633 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2634 case OMPC_SCHEDULE_guided: 2635 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2636 case OMPC_SCHEDULE_runtime: 2637 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2638 case OMPC_SCHEDULE_auto: 2639 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2640 case OMPC_SCHEDULE_unknown: 2641 assert(!Chunked && "chunk was specified but schedule kind not known"); 2642 return Ordered ? OMP_ord_static : OMP_sch_static; 2643 } 2644 llvm_unreachable("Unexpected runtime schedule"); 2645 } 2646 2647 /// Map the OpenMP distribute schedule to the runtime enumeration. 2648 static OpenMPSchedType 2649 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2650 // only static is allowed for dist_schedule 2651 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2652 } 2653 2654 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2655 bool Chunked) const { 2656 OpenMPSchedType Schedule = 2657 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2658 return Schedule == OMP_sch_static; 2659 } 2660 2661 bool CGOpenMPRuntime::isStaticNonchunked( 2662 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2663 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2664 return Schedule == OMP_dist_sch_static; 2665 } 2666 2667 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2668 bool Chunked) const { 2669 OpenMPSchedType Schedule = 2670 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2671 return Schedule == OMP_sch_static_chunked; 2672 } 2673 2674 bool CGOpenMPRuntime::isStaticChunked( 2675 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2676 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2677 return Schedule == OMP_dist_sch_static_chunked; 2678 } 2679 2680 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2681 OpenMPSchedType Schedule = 2682 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2683 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2684 return Schedule != OMP_sch_static; 2685 } 2686 2687 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2688 OpenMPScheduleClauseModifier M1, 2689 OpenMPScheduleClauseModifier M2) { 2690 int Modifier = 0; 2691 switch (M1) { 2692 case OMPC_SCHEDULE_MODIFIER_monotonic: 2693 Modifier = OMP_sch_modifier_monotonic; 2694 break; 2695 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2696 Modifier = OMP_sch_modifier_nonmonotonic; 2697 break; 2698 case OMPC_SCHEDULE_MODIFIER_simd: 2699 if (Schedule == OMP_sch_static_chunked) 2700 Schedule = OMP_sch_static_balanced_chunked; 2701 break; 2702 case OMPC_SCHEDULE_MODIFIER_last: 2703 case OMPC_SCHEDULE_MODIFIER_unknown: 2704 break; 2705 } 2706 switch (M2) { 2707 case OMPC_SCHEDULE_MODIFIER_monotonic: 2708 Modifier = OMP_sch_modifier_monotonic; 2709 break; 2710 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2711 Modifier = OMP_sch_modifier_nonmonotonic; 2712 break; 2713 case OMPC_SCHEDULE_MODIFIER_simd: 2714 if (Schedule == OMP_sch_static_chunked) 2715 Schedule = OMP_sch_static_balanced_chunked; 2716 break; 2717 case OMPC_SCHEDULE_MODIFIER_last: 2718 case OMPC_SCHEDULE_MODIFIER_unknown: 2719 break; 2720 } 2721 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2722 // If the static schedule kind is specified or if the ordered clause is 2723 // specified, and if the nonmonotonic modifier is not specified, the effect is 2724 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2725 // modifier is specified, the effect is as if the nonmonotonic modifier is 2726 // specified. 2727 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2728 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2729 Schedule == OMP_sch_static_balanced_chunked || 2730 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2731 Schedule == OMP_dist_sch_static_chunked || 2732 Schedule == OMP_dist_sch_static)) 2733 Modifier = OMP_sch_modifier_nonmonotonic; 2734 } 2735 return Schedule | Modifier; 2736 } 2737 2738 void CGOpenMPRuntime::emitForDispatchInit( 2739 CodeGenFunction &CGF, SourceLocation Loc, 2740 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2741 bool Ordered, const DispatchRTInput &DispatchValues) { 2742 if (!CGF.HaveInsertPoint()) 2743 return; 2744 OpenMPSchedType Schedule = getRuntimeSchedule( 2745 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2746 assert(Ordered || 2747 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2748 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2749 Schedule != OMP_sch_static_balanced_chunked)); 2750 // Call __kmpc_dispatch_init( 2751 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2752 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2753 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2754 2755 // If the Chunk was not specified in the clause - use default value 1. 2756 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2757 : CGF.Builder.getIntN(IVSize, 1); 2758 llvm::Value *Args[] = { 2759 emitUpdateLocation(CGF, Loc), 2760 getThreadID(CGF, Loc), 2761 CGF.Builder.getInt32(addMonoNonMonoModifier( 2762 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2763 DispatchValues.LB, // Lower 2764 DispatchValues.UB, // Upper 2765 CGF.Builder.getIntN(IVSize, 1), // Stride 2766 Chunk // Chunk 2767 }; 2768 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2769 } 2770 2771 static void emitForStaticInitCall( 2772 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2773 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2774 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2775 const CGOpenMPRuntime::StaticRTInput &Values) { 2776 if (!CGF.HaveInsertPoint()) 2777 return; 2778 2779 assert(!Values.Ordered); 2780 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2781 Schedule == OMP_sch_static_balanced_chunked || 2782 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2783 Schedule == OMP_dist_sch_static || 2784 Schedule == OMP_dist_sch_static_chunked); 2785 2786 // Call __kmpc_for_static_init( 2787 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2788 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2789 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2790 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2791 llvm::Value *Chunk = Values.Chunk; 2792 if (Chunk == nullptr) { 2793 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2794 Schedule == OMP_dist_sch_static) && 2795 "expected static non-chunked schedule"); 2796 // If the Chunk was not specified in the clause - use default value 1. 2797 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2798 } else { 2799 assert((Schedule == OMP_sch_static_chunked || 2800 Schedule == OMP_sch_static_balanced_chunked || 2801 Schedule == OMP_ord_static_chunked || 2802 Schedule == OMP_dist_sch_static_chunked) && 2803 "expected static chunked schedule"); 2804 } 2805 llvm::Value *Args[] = { 2806 UpdateLocation, 2807 ThreadId, 2808 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2809 M2)), // Schedule type 2810 Values.IL.getPointer(), // &isLastIter 2811 Values.LB.getPointer(), // &LB 2812 Values.UB.getPointer(), // &UB 2813 Values.ST.getPointer(), // &Stride 2814 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2815 Chunk // Chunk 2816 }; 2817 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2818 } 2819 2820 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2821 SourceLocation Loc, 2822 OpenMPDirectiveKind DKind, 2823 const OpenMPScheduleTy &ScheduleKind, 2824 const StaticRTInput &Values) { 2825 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2826 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2827 assert(isOpenMPWorksharingDirective(DKind) && 2828 "Expected loop-based or sections-based directive."); 2829 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2830 isOpenMPLoopDirective(DKind) 2831 ? OMP_IDENT_WORK_LOOP 2832 : OMP_IDENT_WORK_SECTIONS); 2833 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2834 llvm::FunctionCallee StaticInitFunction = 2835 createForStaticInitFunction(Values.IVSize, Values.IVSigned, false); 2836 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2837 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2838 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2839 } 2840 2841 void CGOpenMPRuntime::emitDistributeStaticInit( 2842 CodeGenFunction &CGF, SourceLocation Loc, 2843 OpenMPDistScheduleClauseKind SchedKind, 2844 const CGOpenMPRuntime::StaticRTInput &Values) { 2845 OpenMPSchedType ScheduleNum = 2846 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2847 llvm::Value *UpdatedLocation = 2848 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2849 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2850 llvm::FunctionCallee StaticInitFunction; 2851 bool isGPUDistribute = 2852 CGM.getLangOpts().OpenMPIsDevice && 2853 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()); 2854 StaticInitFunction = createForStaticInitFunction( 2855 Values.IVSize, Values.IVSigned, isGPUDistribute); 2856 2857 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2858 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2859 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2860 } 2861 2862 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2863 SourceLocation Loc, 2864 OpenMPDirectiveKind DKind) { 2865 if (!CGF.HaveInsertPoint()) 2866 return; 2867 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2868 llvm::Value *Args[] = { 2869 emitUpdateLocation(CGF, Loc, 2870 isOpenMPDistributeDirective(DKind) 2871 ? OMP_IDENT_WORK_DISTRIBUTE 2872 : isOpenMPLoopDirective(DKind) 2873 ? OMP_IDENT_WORK_LOOP 2874 : OMP_IDENT_WORK_SECTIONS), 2875 getThreadID(CGF, Loc)}; 2876 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2877 if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice && 2878 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX())) 2879 CGF.EmitRuntimeCall( 2880 OMPBuilder.getOrCreateRuntimeFunction( 2881 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini), 2882 Args); 2883 else 2884 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2885 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2886 Args); 2887 } 2888 2889 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2890 SourceLocation Loc, 2891 unsigned IVSize, 2892 bool IVSigned) { 2893 if (!CGF.HaveInsertPoint()) 2894 return; 2895 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2896 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2897 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2898 } 2899 2900 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2901 SourceLocation Loc, unsigned IVSize, 2902 bool IVSigned, Address IL, 2903 Address LB, Address UB, 2904 Address ST) { 2905 // Call __kmpc_dispatch_next( 2906 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2907 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2908 // kmp_int[32|64] *p_stride); 2909 llvm::Value *Args[] = { 2910 emitUpdateLocation(CGF, Loc), 2911 getThreadID(CGF, Loc), 2912 IL.getPointer(), // &isLastIter 2913 LB.getPointer(), // &Lower 2914 UB.getPointer(), // &Upper 2915 ST.getPointer() // &Stride 2916 }; 2917 llvm::Value *Call = 2918 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2919 return CGF.EmitScalarConversion( 2920 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2921 CGF.getContext().BoolTy, Loc); 2922 } 2923 2924 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2925 llvm::Value *NumThreads, 2926 SourceLocation Loc) { 2927 if (!CGF.HaveInsertPoint()) 2928 return; 2929 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2930 llvm::Value *Args[] = { 2931 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2932 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2933 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2934 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2935 Args); 2936 } 2937 2938 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2939 ProcBindKind ProcBind, 2940 SourceLocation Loc) { 2941 if (!CGF.HaveInsertPoint()) 2942 return; 2943 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2944 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2945 llvm::Value *Args[] = { 2946 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2947 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2948 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2949 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2950 Args); 2951 } 2952 2953 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2954 SourceLocation Loc, llvm::AtomicOrdering AO) { 2955 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2956 OMPBuilder.createFlush(CGF.Builder); 2957 } else { 2958 if (!CGF.HaveInsertPoint()) 2959 return; 2960 // Build call void __kmpc_flush(ident_t *loc) 2961 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2962 CGM.getModule(), OMPRTL___kmpc_flush), 2963 emitUpdateLocation(CGF, Loc)); 2964 } 2965 } 2966 2967 namespace { 2968 /// Indexes of fields for type kmp_task_t. 2969 enum KmpTaskTFields { 2970 /// List of shared variables. 2971 KmpTaskTShareds, 2972 /// Task routine. 2973 KmpTaskTRoutine, 2974 /// Partition id for the untied tasks. 2975 KmpTaskTPartId, 2976 /// Function with call of destructors for private variables. 2977 Data1, 2978 /// Task priority. 2979 Data2, 2980 /// (Taskloops only) Lower bound. 2981 KmpTaskTLowerBound, 2982 /// (Taskloops only) Upper bound. 2983 KmpTaskTUpperBound, 2984 /// (Taskloops only) Stride. 2985 KmpTaskTStride, 2986 /// (Taskloops only) Is last iteration flag. 2987 KmpTaskTLastIter, 2988 /// (Taskloops only) Reduction data. 2989 KmpTaskTReductions, 2990 }; 2991 } // anonymous namespace 2992 2993 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2994 return OffloadEntriesTargetRegion.empty() && 2995 OffloadEntriesDeviceGlobalVar.empty(); 2996 } 2997 2998 /// Initialize target region entry. 2999 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3000 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3001 StringRef ParentName, unsigned LineNum, 3002 unsigned Order) { 3003 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3004 "only required for the device " 3005 "code generation."); 3006 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3007 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3008 OMPTargetRegionEntryTargetRegion); 3009 ++OffloadingEntriesNum; 3010 } 3011 3012 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3013 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3014 StringRef ParentName, unsigned LineNum, 3015 llvm::Constant *Addr, llvm::Constant *ID, 3016 OMPTargetRegionEntryKind Flags) { 3017 // If we are emitting code for a target, the entry is already initialized, 3018 // only has to be registered. 3019 if (CGM.getLangOpts().OpenMPIsDevice) { 3020 // This could happen if the device compilation is invoked standalone. 3021 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) 3022 return; 3023 auto &Entry = 3024 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3025 Entry.setAddress(Addr); 3026 Entry.setID(ID); 3027 Entry.setFlags(Flags); 3028 } else { 3029 if (Flags == 3030 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 3031 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 3032 /*IgnoreAddressId*/ true)) 3033 return; 3034 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 3035 "Target region entry already registered!"); 3036 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3037 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3038 ++OffloadingEntriesNum; 3039 } 3040 } 3041 3042 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3043 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 3044 bool IgnoreAddressId) const { 3045 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3046 if (PerDevice == OffloadEntriesTargetRegion.end()) 3047 return false; 3048 auto PerFile = PerDevice->second.find(FileID); 3049 if (PerFile == PerDevice->second.end()) 3050 return false; 3051 auto PerParentName = PerFile->second.find(ParentName); 3052 if (PerParentName == PerFile->second.end()) 3053 return false; 3054 auto PerLine = PerParentName->second.find(LineNum); 3055 if (PerLine == PerParentName->second.end()) 3056 return false; 3057 // Fail if this entry is already registered. 3058 if (!IgnoreAddressId && 3059 (PerLine->second.getAddress() || PerLine->second.getID())) 3060 return false; 3061 return true; 3062 } 3063 3064 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3065 const OffloadTargetRegionEntryInfoActTy &Action) { 3066 // Scan all target region entries and perform the provided action. 3067 for (const auto &D : OffloadEntriesTargetRegion) 3068 for (const auto &F : D.second) 3069 for (const auto &P : F.second) 3070 for (const auto &L : P.second) 3071 Action(D.first, F.first, P.first(), L.first, L.second); 3072 } 3073 3074 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3075 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3076 OMPTargetGlobalVarEntryKind Flags, 3077 unsigned Order) { 3078 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3079 "only required for the device " 3080 "code generation."); 3081 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3082 ++OffloadingEntriesNum; 3083 } 3084 3085 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3086 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3087 CharUnits VarSize, 3088 OMPTargetGlobalVarEntryKind Flags, 3089 llvm::GlobalValue::LinkageTypes Linkage) { 3090 if (CGM.getLangOpts().OpenMPIsDevice) { 3091 // This could happen if the device compilation is invoked standalone. 3092 if (!hasDeviceGlobalVarEntryInfo(VarName)) 3093 return; 3094 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3095 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3096 if (Entry.getVarSize().isZero()) { 3097 Entry.setVarSize(VarSize); 3098 Entry.setLinkage(Linkage); 3099 } 3100 return; 3101 } 3102 Entry.setVarSize(VarSize); 3103 Entry.setLinkage(Linkage); 3104 Entry.setAddress(Addr); 3105 } else { 3106 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3107 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3108 assert(Entry.isValid() && Entry.getFlags() == Flags && 3109 "Entry not initialized!"); 3110 if (Entry.getVarSize().isZero()) { 3111 Entry.setVarSize(VarSize); 3112 Entry.setLinkage(Linkage); 3113 } 3114 return; 3115 } 3116 OffloadEntriesDeviceGlobalVar.try_emplace( 3117 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3118 ++OffloadingEntriesNum; 3119 } 3120 } 3121 3122 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3123 actOnDeviceGlobalVarEntriesInfo( 3124 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3125 // Scan all target region entries and perform the provided action. 3126 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3127 Action(E.getKey(), E.getValue()); 3128 } 3129 3130 void CGOpenMPRuntime::createOffloadEntry( 3131 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3132 llvm::GlobalValue::LinkageTypes Linkage) { 3133 StringRef Name = Addr->getName(); 3134 llvm::Module &M = CGM.getModule(); 3135 llvm::LLVMContext &C = M.getContext(); 3136 3137 // Create constant string with the name. 3138 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3139 3140 std::string StringName = getName({"omp_offloading", "entry_name"}); 3141 auto *Str = new llvm::GlobalVariable( 3142 M, StrPtrInit->getType(), /*isConstant=*/true, 3143 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3144 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3145 3146 llvm::Constant *Data[] = { 3147 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), 3148 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), 3149 llvm::ConstantInt::get(CGM.SizeTy, Size), 3150 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3151 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3152 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3153 llvm::GlobalVariable *Entry = createGlobalStruct( 3154 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3155 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3156 3157 // The entry has to be created in the section the linker expects it to be. 3158 Entry->setSection("omp_offloading_entries"); 3159 } 3160 3161 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3162 // Emit the offloading entries and metadata so that the device codegen side 3163 // can easily figure out what to emit. The produced metadata looks like 3164 // this: 3165 // 3166 // !omp_offload.info = !{!1, ...} 3167 // 3168 // Right now we only generate metadata for function that contain target 3169 // regions. 3170 3171 // If we are in simd mode or there are no entries, we don't need to do 3172 // anything. 3173 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3174 return; 3175 3176 llvm::Module &M = CGM.getModule(); 3177 llvm::LLVMContext &C = M.getContext(); 3178 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3179 SourceLocation, StringRef>, 3180 16> 3181 OrderedEntries(OffloadEntriesInfoManager.size()); 3182 llvm::SmallVector<StringRef, 16> ParentFunctions( 3183 OffloadEntriesInfoManager.size()); 3184 3185 // Auxiliary methods to create metadata values and strings. 3186 auto &&GetMDInt = [this](unsigned V) { 3187 return llvm::ConstantAsMetadata::get( 3188 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3189 }; 3190 3191 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3192 3193 // Create the offloading info metadata node. 3194 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3195 3196 // Create function that emits metadata for each target region entry; 3197 auto &&TargetRegionMetadataEmitter = 3198 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3199 &GetMDString]( 3200 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3201 unsigned Line, 3202 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3203 // Generate metadata for target regions. Each entry of this metadata 3204 // contains: 3205 // - Entry 0 -> Kind of this type of metadata (0). 3206 // - Entry 1 -> Device ID of the file where the entry was identified. 3207 // - Entry 2 -> File ID of the file where the entry was identified. 3208 // - Entry 3 -> Mangled name of the function where the entry was 3209 // identified. 3210 // - Entry 4 -> Line in the file where the entry was identified. 3211 // - Entry 5 -> Order the entry was created. 3212 // The first element of the metadata node is the kind. 3213 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3214 GetMDInt(FileID), GetMDString(ParentName), 3215 GetMDInt(Line), GetMDInt(E.getOrder())}; 3216 3217 SourceLocation Loc; 3218 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3219 E = CGM.getContext().getSourceManager().fileinfo_end(); 3220 I != E; ++I) { 3221 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3222 I->getFirst()->getUniqueID().getFile() == FileID) { 3223 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3224 I->getFirst(), Line, 1); 3225 break; 3226 } 3227 } 3228 // Save this entry in the right position of the ordered entries array. 3229 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3230 ParentFunctions[E.getOrder()] = ParentName; 3231 3232 // Add metadata to the named metadata node. 3233 MD->addOperand(llvm::MDNode::get(C, Ops)); 3234 }; 3235 3236 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3237 TargetRegionMetadataEmitter); 3238 3239 // Create function that emits metadata for each device global variable entry; 3240 auto &&DeviceGlobalVarMetadataEmitter = 3241 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3242 MD](StringRef MangledName, 3243 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3244 &E) { 3245 // Generate metadata for global variables. Each entry of this metadata 3246 // contains: 3247 // - Entry 0 -> Kind of this type of metadata (1). 3248 // - Entry 1 -> Mangled name of the variable. 3249 // - Entry 2 -> Declare target kind. 3250 // - Entry 3 -> Order the entry was created. 3251 // The first element of the metadata node is the kind. 3252 llvm::Metadata *Ops[] = { 3253 GetMDInt(E.getKind()), GetMDString(MangledName), 3254 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3255 3256 // Save this entry in the right position of the ordered entries array. 3257 OrderedEntries[E.getOrder()] = 3258 std::make_tuple(&E, SourceLocation(), MangledName); 3259 3260 // Add metadata to the named metadata node. 3261 MD->addOperand(llvm::MDNode::get(C, Ops)); 3262 }; 3263 3264 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3265 DeviceGlobalVarMetadataEmitter); 3266 3267 for (const auto &E : OrderedEntries) { 3268 assert(std::get<0>(E) && "All ordered entries must exist!"); 3269 if (const auto *CE = 3270 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3271 std::get<0>(E))) { 3272 if (!CE->getID() || !CE->getAddress()) { 3273 // Do not blame the entry if the parent funtion is not emitted. 3274 StringRef FnName = ParentFunctions[CE->getOrder()]; 3275 if (!CGM.GetGlobalValue(FnName)) 3276 continue; 3277 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3278 DiagnosticsEngine::Error, 3279 "Offloading entry for target region in %0 is incorrect: either the " 3280 "address or the ID is invalid."); 3281 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3282 continue; 3283 } 3284 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3285 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3286 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3287 OffloadEntryInfoDeviceGlobalVar>( 3288 std::get<0>(E))) { 3289 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3290 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3291 CE->getFlags()); 3292 switch (Flags) { 3293 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3294 if (CGM.getLangOpts().OpenMPIsDevice && 3295 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3296 continue; 3297 if (!CE->getAddress()) { 3298 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3299 DiagnosticsEngine::Error, "Offloading entry for declare target " 3300 "variable %0 is incorrect: the " 3301 "address is invalid."); 3302 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3303 continue; 3304 } 3305 // The vaiable has no definition - no need to add the entry. 3306 if (CE->getVarSize().isZero()) 3307 continue; 3308 break; 3309 } 3310 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3311 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3312 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3313 "Declaret target link address is set."); 3314 if (CGM.getLangOpts().OpenMPIsDevice) 3315 continue; 3316 if (!CE->getAddress()) { 3317 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3318 DiagnosticsEngine::Error, 3319 "Offloading entry for declare target variable is incorrect: the " 3320 "address is invalid."); 3321 CGM.getDiags().Report(DiagID); 3322 continue; 3323 } 3324 break; 3325 } 3326 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3327 CE->getVarSize().getQuantity(), Flags, 3328 CE->getLinkage()); 3329 } else { 3330 llvm_unreachable("Unsupported entry kind."); 3331 } 3332 } 3333 } 3334 3335 /// Loads all the offload entries information from the host IR 3336 /// metadata. 3337 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3338 // If we are in target mode, load the metadata from the host IR. This code has 3339 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3340 3341 if (!CGM.getLangOpts().OpenMPIsDevice) 3342 return; 3343 3344 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3345 return; 3346 3347 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3348 if (auto EC = Buf.getError()) { 3349 CGM.getDiags().Report(diag::err_cannot_open_file) 3350 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3351 return; 3352 } 3353 3354 llvm::LLVMContext C; 3355 auto ME = expectedToErrorOrAndEmitErrors( 3356 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3357 3358 if (auto EC = ME.getError()) { 3359 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3360 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3361 CGM.getDiags().Report(DiagID) 3362 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3363 return; 3364 } 3365 3366 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3367 if (!MD) 3368 return; 3369 3370 for (llvm::MDNode *MN : MD->operands()) { 3371 auto &&GetMDInt = [MN](unsigned Idx) { 3372 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3373 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3374 }; 3375 3376 auto &&GetMDString = [MN](unsigned Idx) { 3377 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3378 return V->getString(); 3379 }; 3380 3381 switch (GetMDInt(0)) { 3382 default: 3383 llvm_unreachable("Unexpected metadata!"); 3384 break; 3385 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3386 OffloadingEntryInfoTargetRegion: 3387 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3388 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3389 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3390 /*Order=*/GetMDInt(5)); 3391 break; 3392 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3393 OffloadingEntryInfoDeviceGlobalVar: 3394 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3395 /*MangledName=*/GetMDString(1), 3396 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3397 /*Flags=*/GetMDInt(2)), 3398 /*Order=*/GetMDInt(3)); 3399 break; 3400 } 3401 } 3402 } 3403 3404 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3405 if (!KmpRoutineEntryPtrTy) { 3406 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3407 ASTContext &C = CGM.getContext(); 3408 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3409 FunctionProtoType::ExtProtoInfo EPI; 3410 KmpRoutineEntryPtrQTy = C.getPointerType( 3411 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3412 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3413 } 3414 } 3415 3416 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3417 // Make sure the type of the entry is already created. This is the type we 3418 // have to create: 3419 // struct __tgt_offload_entry{ 3420 // void *addr; // Pointer to the offload entry info. 3421 // // (function or global) 3422 // char *name; // Name of the function or global. 3423 // size_t size; // Size of the entry info (0 if it a function). 3424 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3425 // int32_t reserved; // Reserved, to use by the runtime library. 3426 // }; 3427 if (TgtOffloadEntryQTy.isNull()) { 3428 ASTContext &C = CGM.getContext(); 3429 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3430 RD->startDefinition(); 3431 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3432 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3433 addFieldToRecordDecl(C, RD, C.getSizeType()); 3434 addFieldToRecordDecl( 3435 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3436 addFieldToRecordDecl( 3437 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3438 RD->completeDefinition(); 3439 RD->addAttr(PackedAttr::CreateImplicit(C)); 3440 TgtOffloadEntryQTy = C.getRecordType(RD); 3441 } 3442 return TgtOffloadEntryQTy; 3443 } 3444 3445 namespace { 3446 struct PrivateHelpersTy { 3447 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3448 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3449 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3450 PrivateElemInit(PrivateElemInit) {} 3451 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3452 const Expr *OriginalRef = nullptr; 3453 const VarDecl *Original = nullptr; 3454 const VarDecl *PrivateCopy = nullptr; 3455 const VarDecl *PrivateElemInit = nullptr; 3456 bool isLocalPrivate() const { 3457 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3458 } 3459 }; 3460 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3461 } // anonymous namespace 3462 3463 static bool isAllocatableDecl(const VarDecl *VD) { 3464 const VarDecl *CVD = VD->getCanonicalDecl(); 3465 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3466 return false; 3467 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3468 // Use the default allocation. 3469 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 3470 !AA->getAllocator()); 3471 } 3472 3473 static RecordDecl * 3474 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3475 if (!Privates.empty()) { 3476 ASTContext &C = CGM.getContext(); 3477 // Build struct .kmp_privates_t. { 3478 // /* private vars */ 3479 // }; 3480 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3481 RD->startDefinition(); 3482 for (const auto &Pair : Privates) { 3483 const VarDecl *VD = Pair.second.Original; 3484 QualType Type = VD->getType().getNonReferenceType(); 3485 // If the private variable is a local variable with lvalue ref type, 3486 // allocate the pointer instead of the pointee type. 3487 if (Pair.second.isLocalPrivate()) { 3488 if (VD->getType()->isLValueReferenceType()) 3489 Type = C.getPointerType(Type); 3490 if (isAllocatableDecl(VD)) 3491 Type = C.getPointerType(Type); 3492 } 3493 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3494 if (VD->hasAttrs()) { 3495 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3496 E(VD->getAttrs().end()); 3497 I != E; ++I) 3498 FD->addAttr(*I); 3499 } 3500 } 3501 RD->completeDefinition(); 3502 return RD; 3503 } 3504 return nullptr; 3505 } 3506 3507 static RecordDecl * 3508 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3509 QualType KmpInt32Ty, 3510 QualType KmpRoutineEntryPointerQTy) { 3511 ASTContext &C = CGM.getContext(); 3512 // Build struct kmp_task_t { 3513 // void * shareds; 3514 // kmp_routine_entry_t routine; 3515 // kmp_int32 part_id; 3516 // kmp_cmplrdata_t data1; 3517 // kmp_cmplrdata_t data2; 3518 // For taskloops additional fields: 3519 // kmp_uint64 lb; 3520 // kmp_uint64 ub; 3521 // kmp_int64 st; 3522 // kmp_int32 liter; 3523 // void * reductions; 3524 // }; 3525 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3526 UD->startDefinition(); 3527 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3528 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3529 UD->completeDefinition(); 3530 QualType KmpCmplrdataTy = C.getRecordType(UD); 3531 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3532 RD->startDefinition(); 3533 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3534 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3535 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3536 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3537 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3538 if (isOpenMPTaskLoopDirective(Kind)) { 3539 QualType KmpUInt64Ty = 3540 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3541 QualType KmpInt64Ty = 3542 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3543 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3544 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3545 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3546 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3547 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3548 } 3549 RD->completeDefinition(); 3550 return RD; 3551 } 3552 3553 static RecordDecl * 3554 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3555 ArrayRef<PrivateDataTy> Privates) { 3556 ASTContext &C = CGM.getContext(); 3557 // Build struct kmp_task_t_with_privates { 3558 // kmp_task_t task_data; 3559 // .kmp_privates_t. privates; 3560 // }; 3561 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3562 RD->startDefinition(); 3563 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3564 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3565 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3566 RD->completeDefinition(); 3567 return RD; 3568 } 3569 3570 /// Emit a proxy function which accepts kmp_task_t as the second 3571 /// argument. 3572 /// \code 3573 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3574 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3575 /// For taskloops: 3576 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3577 /// tt->reductions, tt->shareds); 3578 /// return 0; 3579 /// } 3580 /// \endcode 3581 static llvm::Function * 3582 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3583 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3584 QualType KmpTaskTWithPrivatesPtrQTy, 3585 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3586 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3587 llvm::Value *TaskPrivatesMap) { 3588 ASTContext &C = CGM.getContext(); 3589 FunctionArgList Args; 3590 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3591 ImplicitParamDecl::Other); 3592 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3593 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3594 ImplicitParamDecl::Other); 3595 Args.push_back(&GtidArg); 3596 Args.push_back(&TaskTypeArg); 3597 const auto &TaskEntryFnInfo = 3598 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3599 llvm::FunctionType *TaskEntryTy = 3600 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3601 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3602 auto *TaskEntry = llvm::Function::Create( 3603 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3604 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3605 TaskEntry->setDoesNotRecurse(); 3606 CodeGenFunction CGF(CGM); 3607 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3608 Loc, Loc); 3609 3610 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3611 // tt, 3612 // For taskloops: 3613 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3614 // tt->task_data.shareds); 3615 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3616 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3617 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3618 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3619 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3620 const auto *KmpTaskTWithPrivatesQTyRD = 3621 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3622 LValue Base = 3623 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3624 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3625 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3626 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3627 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3628 3629 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3630 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3631 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3632 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3633 CGF.ConvertTypeForMem(SharedsPtrTy)); 3634 3635 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3636 llvm::Value *PrivatesParam; 3637 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3638 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3639 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3640 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3641 } else { 3642 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3643 } 3644 3645 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3646 TaskPrivatesMap, 3647 CGF.Builder 3648 .CreatePointerBitCastOrAddrSpaceCast( 3649 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3650 .getPointer()}; 3651 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3652 std::end(CommonArgs)); 3653 if (isOpenMPTaskLoopDirective(Kind)) { 3654 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3655 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3656 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3657 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3658 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3659 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3660 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3661 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3662 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3663 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3664 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3665 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3666 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3667 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3668 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3669 CallArgs.push_back(LBParam); 3670 CallArgs.push_back(UBParam); 3671 CallArgs.push_back(StParam); 3672 CallArgs.push_back(LIParam); 3673 CallArgs.push_back(RParam); 3674 } 3675 CallArgs.push_back(SharedsParam); 3676 3677 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3678 CallArgs); 3679 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3680 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3681 CGF.FinishFunction(); 3682 return TaskEntry; 3683 } 3684 3685 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3686 SourceLocation Loc, 3687 QualType KmpInt32Ty, 3688 QualType KmpTaskTWithPrivatesPtrQTy, 3689 QualType KmpTaskTWithPrivatesQTy) { 3690 ASTContext &C = CGM.getContext(); 3691 FunctionArgList Args; 3692 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3693 ImplicitParamDecl::Other); 3694 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3695 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3696 ImplicitParamDecl::Other); 3697 Args.push_back(&GtidArg); 3698 Args.push_back(&TaskTypeArg); 3699 const auto &DestructorFnInfo = 3700 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3701 llvm::FunctionType *DestructorFnTy = 3702 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3703 std::string Name = 3704 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3705 auto *DestructorFn = 3706 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3707 Name, &CGM.getModule()); 3708 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3709 DestructorFnInfo); 3710 DestructorFn->setDoesNotRecurse(); 3711 CodeGenFunction CGF(CGM); 3712 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3713 Args, Loc, Loc); 3714 3715 LValue Base = CGF.EmitLoadOfPointerLValue( 3716 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3717 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3718 const auto *KmpTaskTWithPrivatesQTyRD = 3719 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3720 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3721 Base = CGF.EmitLValueForField(Base, *FI); 3722 for (const auto *Field : 3723 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3724 if (QualType::DestructionKind DtorKind = 3725 Field->getType().isDestructedType()) { 3726 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3727 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3728 } 3729 } 3730 CGF.FinishFunction(); 3731 return DestructorFn; 3732 } 3733 3734 /// Emit a privates mapping function for correct handling of private and 3735 /// firstprivate variables. 3736 /// \code 3737 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3738 /// **noalias priv1,..., <tyn> **noalias privn) { 3739 /// *priv1 = &.privates.priv1; 3740 /// ...; 3741 /// *privn = &.privates.privn; 3742 /// } 3743 /// \endcode 3744 static llvm::Value * 3745 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3746 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3747 ArrayRef<PrivateDataTy> Privates) { 3748 ASTContext &C = CGM.getContext(); 3749 FunctionArgList Args; 3750 ImplicitParamDecl TaskPrivatesArg( 3751 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3752 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3753 ImplicitParamDecl::Other); 3754 Args.push_back(&TaskPrivatesArg); 3755 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3756 unsigned Counter = 1; 3757 for (const Expr *E : Data.PrivateVars) { 3758 Args.push_back(ImplicitParamDecl::Create( 3759 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3760 C.getPointerType(C.getPointerType(E->getType())) 3761 .withConst() 3762 .withRestrict(), 3763 ImplicitParamDecl::Other)); 3764 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3765 PrivateVarsPos[VD] = Counter; 3766 ++Counter; 3767 } 3768 for (const Expr *E : Data.FirstprivateVars) { 3769 Args.push_back(ImplicitParamDecl::Create( 3770 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3771 C.getPointerType(C.getPointerType(E->getType())) 3772 .withConst() 3773 .withRestrict(), 3774 ImplicitParamDecl::Other)); 3775 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3776 PrivateVarsPos[VD] = Counter; 3777 ++Counter; 3778 } 3779 for (const Expr *E : Data.LastprivateVars) { 3780 Args.push_back(ImplicitParamDecl::Create( 3781 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3782 C.getPointerType(C.getPointerType(E->getType())) 3783 .withConst() 3784 .withRestrict(), 3785 ImplicitParamDecl::Other)); 3786 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3787 PrivateVarsPos[VD] = Counter; 3788 ++Counter; 3789 } 3790 for (const VarDecl *VD : Data.PrivateLocals) { 3791 QualType Ty = VD->getType().getNonReferenceType(); 3792 if (VD->getType()->isLValueReferenceType()) 3793 Ty = C.getPointerType(Ty); 3794 if (isAllocatableDecl(VD)) 3795 Ty = C.getPointerType(Ty); 3796 Args.push_back(ImplicitParamDecl::Create( 3797 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3798 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3799 ImplicitParamDecl::Other)); 3800 PrivateVarsPos[VD] = Counter; 3801 ++Counter; 3802 } 3803 const auto &TaskPrivatesMapFnInfo = 3804 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3805 llvm::FunctionType *TaskPrivatesMapTy = 3806 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3807 std::string Name = 3808 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3809 auto *TaskPrivatesMap = llvm::Function::Create( 3810 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3811 &CGM.getModule()); 3812 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3813 TaskPrivatesMapFnInfo); 3814 if (CGM.getLangOpts().Optimize) { 3815 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3816 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3817 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3818 } 3819 CodeGenFunction CGF(CGM); 3820 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3821 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3822 3823 // *privi = &.privates.privi; 3824 LValue Base = CGF.EmitLoadOfPointerLValue( 3825 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3826 TaskPrivatesArg.getType()->castAs<PointerType>()); 3827 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3828 Counter = 0; 3829 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3830 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3831 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3832 LValue RefLVal = 3833 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3834 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3835 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3836 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3837 ++Counter; 3838 } 3839 CGF.FinishFunction(); 3840 return TaskPrivatesMap; 3841 } 3842 3843 /// Emit initialization for private variables in task-based directives. 3844 static void emitPrivatesInit(CodeGenFunction &CGF, 3845 const OMPExecutableDirective &D, 3846 Address KmpTaskSharedsPtr, LValue TDBase, 3847 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3848 QualType SharedsTy, QualType SharedsPtrTy, 3849 const OMPTaskDataTy &Data, 3850 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3851 ASTContext &C = CGF.getContext(); 3852 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3853 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3854 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3855 ? OMPD_taskloop 3856 : OMPD_task; 3857 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3858 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3859 LValue SrcBase; 3860 bool IsTargetTask = 3861 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3862 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3863 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3864 // PointersArray, SizesArray, and MappersArray. The original variables for 3865 // these arrays are not captured and we get their addresses explicitly. 3866 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3867 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3868 SrcBase = CGF.MakeAddrLValue( 3869 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3870 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3871 SharedsTy); 3872 } 3873 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3874 for (const PrivateDataTy &Pair : Privates) { 3875 // Do not initialize private locals. 3876 if (Pair.second.isLocalPrivate()) { 3877 ++FI; 3878 continue; 3879 } 3880 const VarDecl *VD = Pair.second.PrivateCopy; 3881 const Expr *Init = VD->getAnyInitializer(); 3882 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3883 !CGF.isTrivialInitializer(Init)))) { 3884 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3885 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3886 const VarDecl *OriginalVD = Pair.second.Original; 3887 // Check if the variable is the target-based BasePointersArray, 3888 // PointersArray, SizesArray, or MappersArray. 3889 LValue SharedRefLValue; 3890 QualType Type = PrivateLValue.getType(); 3891 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3892 if (IsTargetTask && !SharedField) { 3893 assert(isa<ImplicitParamDecl>(OriginalVD) && 3894 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3895 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3896 ->getNumParams() == 0 && 3897 isa<TranslationUnitDecl>( 3898 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3899 ->getDeclContext()) && 3900 "Expected artificial target data variable."); 3901 SharedRefLValue = 3902 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3903 } else if (ForDup) { 3904 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3905 SharedRefLValue = CGF.MakeAddrLValue( 3906 Address(SharedRefLValue.getPointer(CGF), 3907 C.getDeclAlign(OriginalVD)), 3908 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3909 SharedRefLValue.getTBAAInfo()); 3910 } else if (CGF.LambdaCaptureFields.count( 3911 Pair.second.Original->getCanonicalDecl()) > 0 || 3912 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) { 3913 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3914 } else { 3915 // Processing for implicitly captured variables. 3916 InlinedOpenMPRegionRAII Region( 3917 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3918 /*HasCancel=*/false, /*NoInheritance=*/true); 3919 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3920 } 3921 if (Type->isArrayType()) { 3922 // Initialize firstprivate array. 3923 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3924 // Perform simple memcpy. 3925 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3926 } else { 3927 // Initialize firstprivate array using element-by-element 3928 // initialization. 3929 CGF.EmitOMPAggregateAssign( 3930 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3931 Type, 3932 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3933 Address SrcElement) { 3934 // Clean up any temporaries needed by the initialization. 3935 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3936 InitScope.addPrivate( 3937 Elem, [SrcElement]() -> Address { return SrcElement; }); 3938 (void)InitScope.Privatize(); 3939 // Emit initialization for single element. 3940 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3941 CGF, &CapturesInfo); 3942 CGF.EmitAnyExprToMem(Init, DestElement, 3943 Init->getType().getQualifiers(), 3944 /*IsInitializer=*/false); 3945 }); 3946 } 3947 } else { 3948 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3949 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3950 return SharedRefLValue.getAddress(CGF); 3951 }); 3952 (void)InitScope.Privatize(); 3953 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3954 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3955 /*capturedByInit=*/false); 3956 } 3957 } else { 3958 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3959 } 3960 } 3961 ++FI; 3962 } 3963 } 3964 3965 /// Check if duplication function is required for taskloops. 3966 static bool checkInitIsRequired(CodeGenFunction &CGF, 3967 ArrayRef<PrivateDataTy> Privates) { 3968 bool InitRequired = false; 3969 for (const PrivateDataTy &Pair : Privates) { 3970 if (Pair.second.isLocalPrivate()) 3971 continue; 3972 const VarDecl *VD = Pair.second.PrivateCopy; 3973 const Expr *Init = VD->getAnyInitializer(); 3974 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3975 !CGF.isTrivialInitializer(Init)); 3976 if (InitRequired) 3977 break; 3978 } 3979 return InitRequired; 3980 } 3981 3982 3983 /// Emit task_dup function (for initialization of 3984 /// private/firstprivate/lastprivate vars and last_iter flag) 3985 /// \code 3986 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3987 /// lastpriv) { 3988 /// // setup lastprivate flag 3989 /// task_dst->last = lastpriv; 3990 /// // could be constructor calls here... 3991 /// } 3992 /// \endcode 3993 static llvm::Value * 3994 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3995 const OMPExecutableDirective &D, 3996 QualType KmpTaskTWithPrivatesPtrQTy, 3997 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3998 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3999 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4000 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4001 ASTContext &C = CGM.getContext(); 4002 FunctionArgList Args; 4003 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4004 KmpTaskTWithPrivatesPtrQTy, 4005 ImplicitParamDecl::Other); 4006 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4007 KmpTaskTWithPrivatesPtrQTy, 4008 ImplicitParamDecl::Other); 4009 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4010 ImplicitParamDecl::Other); 4011 Args.push_back(&DstArg); 4012 Args.push_back(&SrcArg); 4013 Args.push_back(&LastprivArg); 4014 const auto &TaskDupFnInfo = 4015 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4016 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4017 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4018 auto *TaskDup = llvm::Function::Create( 4019 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4020 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4021 TaskDup->setDoesNotRecurse(); 4022 CodeGenFunction CGF(CGM); 4023 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4024 Loc); 4025 4026 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4027 CGF.GetAddrOfLocalVar(&DstArg), 4028 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4029 // task_dst->liter = lastpriv; 4030 if (WithLastIter) { 4031 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4032 LValue Base = CGF.EmitLValueForField( 4033 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4034 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4035 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4036 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4037 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4038 } 4039 4040 // Emit initial values for private copies (if any). 4041 assert(!Privates.empty()); 4042 Address KmpTaskSharedsPtr = Address::invalid(); 4043 if (!Data.FirstprivateVars.empty()) { 4044 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4045 CGF.GetAddrOfLocalVar(&SrcArg), 4046 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4047 LValue Base = CGF.EmitLValueForField( 4048 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4049 KmpTaskSharedsPtr = Address( 4050 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4051 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4052 KmpTaskTShareds)), 4053 Loc), 4054 CGM.getNaturalTypeAlignment(SharedsTy)); 4055 } 4056 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4057 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4058 CGF.FinishFunction(); 4059 return TaskDup; 4060 } 4061 4062 /// Checks if destructor function is required to be generated. 4063 /// \return true if cleanups are required, false otherwise. 4064 static bool 4065 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4066 ArrayRef<PrivateDataTy> Privates) { 4067 for (const PrivateDataTy &P : Privates) { 4068 if (P.second.isLocalPrivate()) 4069 continue; 4070 QualType Ty = P.second.Original->getType().getNonReferenceType(); 4071 if (Ty.isDestructedType()) 4072 return true; 4073 } 4074 return false; 4075 } 4076 4077 namespace { 4078 /// Loop generator for OpenMP iterator expression. 4079 class OMPIteratorGeneratorScope final 4080 : public CodeGenFunction::OMPPrivateScope { 4081 CodeGenFunction &CGF; 4082 const OMPIteratorExpr *E = nullptr; 4083 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4084 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4085 OMPIteratorGeneratorScope() = delete; 4086 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4087 4088 public: 4089 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4090 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4091 if (!E) 4092 return; 4093 SmallVector<llvm::Value *, 4> Uppers; 4094 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4095 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4096 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4097 addPrivate(VD, [&CGF, VD]() { 4098 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4099 }); 4100 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4101 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4102 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4103 "counter.addr"); 4104 }); 4105 } 4106 Privatize(); 4107 4108 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4109 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4110 LValue CLVal = 4111 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4112 HelperData.CounterVD->getType()); 4113 // Counter = 0; 4114 CGF.EmitStoreOfScalar( 4115 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4116 CLVal); 4117 CodeGenFunction::JumpDest &ContDest = 4118 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4119 CodeGenFunction::JumpDest &ExitDest = 4120 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4121 // N = <number-of_iterations>; 4122 llvm::Value *N = Uppers[I]; 4123 // cont: 4124 // if (Counter < N) goto body; else goto exit; 4125 CGF.EmitBlock(ContDest.getBlock()); 4126 auto *CVal = 4127 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4128 llvm::Value *Cmp = 4129 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4130 ? CGF.Builder.CreateICmpSLT(CVal, N) 4131 : CGF.Builder.CreateICmpULT(CVal, N); 4132 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4133 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4134 // body: 4135 CGF.EmitBlock(BodyBB); 4136 // Iteri = Begini + Counter * Stepi; 4137 CGF.EmitIgnoredExpr(HelperData.Update); 4138 } 4139 } 4140 ~OMPIteratorGeneratorScope() { 4141 if (!E) 4142 return; 4143 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4144 // Counter = Counter + 1; 4145 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4146 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4147 // goto cont; 4148 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4149 // exit: 4150 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4151 } 4152 } 4153 }; 4154 } // namespace 4155 4156 static std::pair<llvm::Value *, llvm::Value *> 4157 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4158 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4159 llvm::Value *Addr; 4160 if (OASE) { 4161 const Expr *Base = OASE->getBase(); 4162 Addr = CGF.EmitScalarExpr(Base); 4163 } else { 4164 Addr = CGF.EmitLValue(E).getPointer(CGF); 4165 } 4166 llvm::Value *SizeVal; 4167 QualType Ty = E->getType(); 4168 if (OASE) { 4169 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4170 for (const Expr *SE : OASE->getDimensions()) { 4171 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4172 Sz = CGF.EmitScalarConversion( 4173 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4174 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4175 } 4176 } else if (const auto *ASE = 4177 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4178 LValue UpAddrLVal = 4179 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4180 Address UpAddrAddress = UpAddrLVal.getAddress(CGF); 4181 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 4182 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1); 4183 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4184 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4185 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4186 } else { 4187 SizeVal = CGF.getTypeSize(Ty); 4188 } 4189 return std::make_pair(Addr, SizeVal); 4190 } 4191 4192 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4193 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4194 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4195 if (KmpTaskAffinityInfoTy.isNull()) { 4196 RecordDecl *KmpAffinityInfoRD = 4197 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4198 KmpAffinityInfoRD->startDefinition(); 4199 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4200 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4201 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4202 KmpAffinityInfoRD->completeDefinition(); 4203 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4204 } 4205 } 4206 4207 CGOpenMPRuntime::TaskResultTy 4208 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4209 const OMPExecutableDirective &D, 4210 llvm::Function *TaskFunction, QualType SharedsTy, 4211 Address Shareds, const OMPTaskDataTy &Data) { 4212 ASTContext &C = CGM.getContext(); 4213 llvm::SmallVector<PrivateDataTy, 4> Privates; 4214 // Aggregate privates and sort them by the alignment. 4215 const auto *I = Data.PrivateCopies.begin(); 4216 for (const Expr *E : Data.PrivateVars) { 4217 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4218 Privates.emplace_back( 4219 C.getDeclAlign(VD), 4220 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4221 /*PrivateElemInit=*/nullptr)); 4222 ++I; 4223 } 4224 I = Data.FirstprivateCopies.begin(); 4225 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4226 for (const Expr *E : Data.FirstprivateVars) { 4227 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4228 Privates.emplace_back( 4229 C.getDeclAlign(VD), 4230 PrivateHelpersTy( 4231 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4232 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4233 ++I; 4234 ++IElemInitRef; 4235 } 4236 I = Data.LastprivateCopies.begin(); 4237 for (const Expr *E : Data.LastprivateVars) { 4238 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4239 Privates.emplace_back( 4240 C.getDeclAlign(VD), 4241 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4242 /*PrivateElemInit=*/nullptr)); 4243 ++I; 4244 } 4245 for (const VarDecl *VD : Data.PrivateLocals) { 4246 if (isAllocatableDecl(VD)) 4247 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4248 else 4249 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4250 } 4251 llvm::stable_sort(Privates, 4252 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4253 return L.first > R.first; 4254 }); 4255 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4256 // Build type kmp_routine_entry_t (if not built yet). 4257 emitKmpRoutineEntryT(KmpInt32Ty); 4258 // Build type kmp_task_t (if not built yet). 4259 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4260 if (SavedKmpTaskloopTQTy.isNull()) { 4261 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4262 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4263 } 4264 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4265 } else { 4266 assert((D.getDirectiveKind() == OMPD_task || 4267 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4268 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4269 "Expected taskloop, task or target directive"); 4270 if (SavedKmpTaskTQTy.isNull()) { 4271 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4272 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4273 } 4274 KmpTaskTQTy = SavedKmpTaskTQTy; 4275 } 4276 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4277 // Build particular struct kmp_task_t for the given task. 4278 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4279 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4280 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4281 QualType KmpTaskTWithPrivatesPtrQTy = 4282 C.getPointerType(KmpTaskTWithPrivatesQTy); 4283 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4284 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4285 KmpTaskTWithPrivatesTy->getPointerTo(); 4286 llvm::Value *KmpTaskTWithPrivatesTySize = 4287 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4288 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4289 4290 // Emit initial values for private copies (if any). 4291 llvm::Value *TaskPrivatesMap = nullptr; 4292 llvm::Type *TaskPrivatesMapTy = 4293 std::next(TaskFunction->arg_begin(), 3)->getType(); 4294 if (!Privates.empty()) { 4295 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4296 TaskPrivatesMap = 4297 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4298 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4299 TaskPrivatesMap, TaskPrivatesMapTy); 4300 } else { 4301 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4302 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4303 } 4304 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4305 // kmp_task_t *tt); 4306 llvm::Function *TaskEntry = emitProxyTaskFunction( 4307 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4308 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4309 TaskPrivatesMap); 4310 4311 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4312 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4313 // kmp_routine_entry_t *task_entry); 4314 // Task flags. Format is taken from 4315 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 4316 // description of kmp_tasking_flags struct. 4317 enum { 4318 TiedFlag = 0x1, 4319 FinalFlag = 0x2, 4320 DestructorsFlag = 0x8, 4321 PriorityFlag = 0x20, 4322 DetachableFlag = 0x40, 4323 }; 4324 unsigned Flags = Data.Tied ? TiedFlag : 0; 4325 bool NeedsCleanup = false; 4326 if (!Privates.empty()) { 4327 NeedsCleanup = 4328 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4329 if (NeedsCleanup) 4330 Flags = Flags | DestructorsFlag; 4331 } 4332 if (Data.Priority.getInt()) 4333 Flags = Flags | PriorityFlag; 4334 if (D.hasClausesOfKind<OMPDetachClause>()) 4335 Flags = Flags | DetachableFlag; 4336 llvm::Value *TaskFlags = 4337 Data.Final.getPointer() 4338 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4339 CGF.Builder.getInt32(FinalFlag), 4340 CGF.Builder.getInt32(/*C=*/0)) 4341 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4342 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4343 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4344 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4345 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4346 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4347 TaskEntry, KmpRoutineEntryPtrTy)}; 4348 llvm::Value *NewTask; 4349 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4350 // Check if we have any device clause associated with the directive. 4351 const Expr *Device = nullptr; 4352 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4353 Device = C->getDevice(); 4354 // Emit device ID if any otherwise use default value. 4355 llvm::Value *DeviceID; 4356 if (Device) 4357 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4358 CGF.Int64Ty, /*isSigned=*/true); 4359 else 4360 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4361 AllocArgs.push_back(DeviceID); 4362 NewTask = CGF.EmitRuntimeCall( 4363 OMPBuilder.getOrCreateRuntimeFunction( 4364 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4365 AllocArgs); 4366 } else { 4367 NewTask = 4368 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4369 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4370 AllocArgs); 4371 } 4372 // Emit detach clause initialization. 4373 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4374 // task_descriptor); 4375 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4376 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4377 LValue EvtLVal = CGF.EmitLValue(Evt); 4378 4379 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4380 // int gtid, kmp_task_t *task); 4381 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4382 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4383 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4384 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4385 OMPBuilder.getOrCreateRuntimeFunction( 4386 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4387 {Loc, Tid, NewTask}); 4388 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4389 Evt->getExprLoc()); 4390 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4391 } 4392 // Process affinity clauses. 4393 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4394 // Process list of affinity data. 4395 ASTContext &C = CGM.getContext(); 4396 Address AffinitiesArray = Address::invalid(); 4397 // Calculate number of elements to form the array of affinity data. 4398 llvm::Value *NumOfElements = nullptr; 4399 unsigned NumAffinities = 0; 4400 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4401 if (const Expr *Modifier = C->getModifier()) { 4402 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4403 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4404 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4405 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4406 NumOfElements = 4407 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4408 } 4409 } else { 4410 NumAffinities += C->varlist_size(); 4411 } 4412 } 4413 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4414 // Fields ids in kmp_task_affinity_info record. 4415 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4416 4417 QualType KmpTaskAffinityInfoArrayTy; 4418 if (NumOfElements) { 4419 NumOfElements = CGF.Builder.CreateNUWAdd( 4420 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4421 auto *OVE = new (C) OpaqueValueExpr( 4422 Loc, 4423 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4424 VK_PRValue); 4425 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4426 RValue::get(NumOfElements)); 4427 KmpTaskAffinityInfoArrayTy = 4428 C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal, 4429 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4430 // Properly emit variable-sized array. 4431 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4432 ImplicitParamDecl::Other); 4433 CGF.EmitVarDecl(*PD); 4434 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4435 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4436 /*isSigned=*/false); 4437 } else { 4438 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4439 KmpTaskAffinityInfoTy, 4440 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4441 ArrayType::Normal, /*IndexTypeQuals=*/0); 4442 AffinitiesArray = 4443 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4444 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4445 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4446 /*isSigned=*/false); 4447 } 4448 4449 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4450 // Fill array by elements without iterators. 4451 unsigned Pos = 0; 4452 bool HasIterator = false; 4453 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4454 if (C->getModifier()) { 4455 HasIterator = true; 4456 continue; 4457 } 4458 for (const Expr *E : C->varlists()) { 4459 llvm::Value *Addr; 4460 llvm::Value *Size; 4461 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4462 LValue Base = 4463 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4464 KmpTaskAffinityInfoTy); 4465 // affs[i].base_addr = &<Affinities[i].second>; 4466 LValue BaseAddrLVal = CGF.EmitLValueForField( 4467 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4468 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4469 BaseAddrLVal); 4470 // affs[i].len = sizeof(<Affinities[i].second>); 4471 LValue LenLVal = CGF.EmitLValueForField( 4472 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4473 CGF.EmitStoreOfScalar(Size, LenLVal); 4474 ++Pos; 4475 } 4476 } 4477 LValue PosLVal; 4478 if (HasIterator) { 4479 PosLVal = CGF.MakeAddrLValue( 4480 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4481 C.getSizeType()); 4482 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4483 } 4484 // Process elements with iterators. 4485 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4486 const Expr *Modifier = C->getModifier(); 4487 if (!Modifier) 4488 continue; 4489 OMPIteratorGeneratorScope IteratorScope( 4490 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4491 for (const Expr *E : C->varlists()) { 4492 llvm::Value *Addr; 4493 llvm::Value *Size; 4494 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4495 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4496 LValue Base = CGF.MakeAddrLValue( 4497 CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy); 4498 // affs[i].base_addr = &<Affinities[i].second>; 4499 LValue BaseAddrLVal = CGF.EmitLValueForField( 4500 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4501 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4502 BaseAddrLVal); 4503 // affs[i].len = sizeof(<Affinities[i].second>); 4504 LValue LenLVal = CGF.EmitLValueForField( 4505 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4506 CGF.EmitStoreOfScalar(Size, LenLVal); 4507 Idx = CGF.Builder.CreateNUWAdd( 4508 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4509 CGF.EmitStoreOfScalar(Idx, PosLVal); 4510 } 4511 } 4512 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4513 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4514 // naffins, kmp_task_affinity_info_t *affin_list); 4515 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4516 llvm::Value *GTid = getThreadID(CGF, Loc); 4517 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4518 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4519 // FIXME: Emit the function and ignore its result for now unless the 4520 // runtime function is properly implemented. 4521 (void)CGF.EmitRuntimeCall( 4522 OMPBuilder.getOrCreateRuntimeFunction( 4523 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4524 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4525 } 4526 llvm::Value *NewTaskNewTaskTTy = 4527 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4528 NewTask, KmpTaskTWithPrivatesPtrTy); 4529 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4530 KmpTaskTWithPrivatesQTy); 4531 LValue TDBase = 4532 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4533 // Fill the data in the resulting kmp_task_t record. 4534 // Copy shareds if there are any. 4535 Address KmpTaskSharedsPtr = Address::invalid(); 4536 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4537 KmpTaskSharedsPtr = 4538 Address(CGF.EmitLoadOfScalar( 4539 CGF.EmitLValueForField( 4540 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4541 KmpTaskTShareds)), 4542 Loc), 4543 CGM.getNaturalTypeAlignment(SharedsTy)); 4544 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4545 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4546 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4547 } 4548 // Emit initial values for private copies (if any). 4549 TaskResultTy Result; 4550 if (!Privates.empty()) { 4551 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4552 SharedsTy, SharedsPtrTy, Data, Privates, 4553 /*ForDup=*/false); 4554 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4555 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4556 Result.TaskDupFn = emitTaskDupFunction( 4557 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4558 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4559 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4560 } 4561 } 4562 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4563 enum { Priority = 0, Destructors = 1 }; 4564 // Provide pointer to function with destructors for privates. 4565 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4566 const RecordDecl *KmpCmplrdataUD = 4567 (*FI)->getType()->getAsUnionType()->getDecl(); 4568 if (NeedsCleanup) { 4569 llvm::Value *DestructorFn = emitDestructorsFunction( 4570 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4571 KmpTaskTWithPrivatesQTy); 4572 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4573 LValue DestructorsLV = CGF.EmitLValueForField( 4574 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4575 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4576 DestructorFn, KmpRoutineEntryPtrTy), 4577 DestructorsLV); 4578 } 4579 // Set priority. 4580 if (Data.Priority.getInt()) { 4581 LValue Data2LV = CGF.EmitLValueForField( 4582 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4583 LValue PriorityLV = CGF.EmitLValueForField( 4584 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4585 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4586 } 4587 Result.NewTask = NewTask; 4588 Result.TaskEntry = TaskEntry; 4589 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4590 Result.TDBase = TDBase; 4591 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4592 return Result; 4593 } 4594 4595 namespace { 4596 /// Dependence kind for RTL. 4597 enum RTLDependenceKindTy { 4598 DepIn = 0x01, 4599 DepInOut = 0x3, 4600 DepMutexInOutSet = 0x4 4601 }; 4602 /// Fields ids in kmp_depend_info record. 4603 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4604 } // namespace 4605 4606 /// Translates internal dependency kind into the runtime kind. 4607 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4608 RTLDependenceKindTy DepKind; 4609 switch (K) { 4610 case OMPC_DEPEND_in: 4611 DepKind = DepIn; 4612 break; 4613 // Out and InOut dependencies must use the same code. 4614 case OMPC_DEPEND_out: 4615 case OMPC_DEPEND_inout: 4616 DepKind = DepInOut; 4617 break; 4618 case OMPC_DEPEND_mutexinoutset: 4619 DepKind = DepMutexInOutSet; 4620 break; 4621 case OMPC_DEPEND_source: 4622 case OMPC_DEPEND_sink: 4623 case OMPC_DEPEND_depobj: 4624 case OMPC_DEPEND_unknown: 4625 llvm_unreachable("Unknown task dependence type"); 4626 } 4627 return DepKind; 4628 } 4629 4630 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4631 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4632 QualType &FlagsTy) { 4633 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4634 if (KmpDependInfoTy.isNull()) { 4635 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4636 KmpDependInfoRD->startDefinition(); 4637 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4638 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4639 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4640 KmpDependInfoRD->completeDefinition(); 4641 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4642 } 4643 } 4644 4645 std::pair<llvm::Value *, LValue> 4646 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4647 SourceLocation Loc) { 4648 ASTContext &C = CGM.getContext(); 4649 QualType FlagsTy; 4650 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4651 RecordDecl *KmpDependInfoRD = 4652 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4653 LValue Base = CGF.EmitLoadOfPointerLValue( 4654 DepobjLVal.getAddress(CGF), 4655 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4656 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4657 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4658 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4659 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4660 Base.getTBAAInfo()); 4661 Address DepObjAddr = CGF.Builder.CreateGEP( 4662 Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4663 LValue NumDepsBase = CGF.MakeAddrLValue( 4664 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4665 // NumDeps = deps[i].base_addr; 4666 LValue BaseAddrLVal = CGF.EmitLValueForField( 4667 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4668 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4669 return std::make_pair(NumDeps, Base); 4670 } 4671 4672 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4673 llvm::PointerUnion<unsigned *, LValue *> Pos, 4674 const OMPTaskDataTy::DependData &Data, 4675 Address DependenciesArray) { 4676 CodeGenModule &CGM = CGF.CGM; 4677 ASTContext &C = CGM.getContext(); 4678 QualType FlagsTy; 4679 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4680 RecordDecl *KmpDependInfoRD = 4681 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4682 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4683 4684 OMPIteratorGeneratorScope IteratorScope( 4685 CGF, cast_or_null<OMPIteratorExpr>( 4686 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4687 : nullptr)); 4688 for (const Expr *E : Data.DepExprs) { 4689 llvm::Value *Addr; 4690 llvm::Value *Size; 4691 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4692 LValue Base; 4693 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4694 Base = CGF.MakeAddrLValue( 4695 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4696 } else { 4697 LValue &PosLVal = *Pos.get<LValue *>(); 4698 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4699 Base = CGF.MakeAddrLValue( 4700 CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy); 4701 } 4702 // deps[i].base_addr = &<Dependencies[i].second>; 4703 LValue BaseAddrLVal = CGF.EmitLValueForField( 4704 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4705 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4706 BaseAddrLVal); 4707 // deps[i].len = sizeof(<Dependencies[i].second>); 4708 LValue LenLVal = CGF.EmitLValueForField( 4709 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4710 CGF.EmitStoreOfScalar(Size, LenLVal); 4711 // deps[i].flags = <Dependencies[i].first>; 4712 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4713 LValue FlagsLVal = CGF.EmitLValueForField( 4714 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4715 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4716 FlagsLVal); 4717 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4718 ++(*P); 4719 } else { 4720 LValue &PosLVal = *Pos.get<LValue *>(); 4721 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4722 Idx = CGF.Builder.CreateNUWAdd(Idx, 4723 llvm::ConstantInt::get(Idx->getType(), 1)); 4724 CGF.EmitStoreOfScalar(Idx, PosLVal); 4725 } 4726 } 4727 } 4728 4729 static SmallVector<llvm::Value *, 4> 4730 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4731 const OMPTaskDataTy::DependData &Data) { 4732 assert(Data.DepKind == OMPC_DEPEND_depobj && 4733 "Expected depobj dependecy kind."); 4734 SmallVector<llvm::Value *, 4> Sizes; 4735 SmallVector<LValue, 4> SizeLVals; 4736 ASTContext &C = CGF.getContext(); 4737 QualType FlagsTy; 4738 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4739 RecordDecl *KmpDependInfoRD = 4740 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4741 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4742 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4743 { 4744 OMPIteratorGeneratorScope IteratorScope( 4745 CGF, cast_or_null<OMPIteratorExpr>( 4746 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4747 : nullptr)); 4748 for (const Expr *E : Data.DepExprs) { 4749 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4750 LValue Base = CGF.EmitLoadOfPointerLValue( 4751 DepobjLVal.getAddress(CGF), 4752 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4753 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4754 Base.getAddress(CGF), KmpDependInfoPtrT); 4755 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4756 Base.getTBAAInfo()); 4757 Address DepObjAddr = CGF.Builder.CreateGEP( 4758 Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4759 LValue NumDepsBase = CGF.MakeAddrLValue( 4760 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4761 // NumDeps = deps[i].base_addr; 4762 LValue BaseAddrLVal = CGF.EmitLValueForField( 4763 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4764 llvm::Value *NumDeps = 4765 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4766 LValue NumLVal = CGF.MakeAddrLValue( 4767 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4768 C.getUIntPtrType()); 4769 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0), 4770 NumLVal.getAddress(CGF)); 4771 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4772 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4773 CGF.EmitStoreOfScalar(Add, NumLVal); 4774 SizeLVals.push_back(NumLVal); 4775 } 4776 } 4777 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4778 llvm::Value *Size = 4779 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4780 Sizes.push_back(Size); 4781 } 4782 return Sizes; 4783 } 4784 4785 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4786 LValue PosLVal, 4787 const OMPTaskDataTy::DependData &Data, 4788 Address DependenciesArray) { 4789 assert(Data.DepKind == OMPC_DEPEND_depobj && 4790 "Expected depobj dependecy kind."); 4791 ASTContext &C = CGF.getContext(); 4792 QualType FlagsTy; 4793 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4794 RecordDecl *KmpDependInfoRD = 4795 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4796 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4797 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4798 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4799 { 4800 OMPIteratorGeneratorScope IteratorScope( 4801 CGF, cast_or_null<OMPIteratorExpr>( 4802 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4803 : nullptr)); 4804 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4805 const Expr *E = Data.DepExprs[I]; 4806 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4807 LValue Base = CGF.EmitLoadOfPointerLValue( 4808 DepobjLVal.getAddress(CGF), 4809 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4810 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4811 Base.getAddress(CGF), KmpDependInfoPtrT); 4812 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4813 Base.getTBAAInfo()); 4814 4815 // Get number of elements in a single depobj. 4816 Address DepObjAddr = CGF.Builder.CreateGEP( 4817 Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4818 LValue NumDepsBase = CGF.MakeAddrLValue( 4819 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4820 // NumDeps = deps[i].base_addr; 4821 LValue BaseAddrLVal = CGF.EmitLValueForField( 4822 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4823 llvm::Value *NumDeps = 4824 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4825 4826 // memcopy dependency data. 4827 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4828 ElSize, 4829 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4830 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4831 Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos); 4832 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4833 4834 // Increase pos. 4835 // pos += size; 4836 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4837 CGF.EmitStoreOfScalar(Add, PosLVal); 4838 } 4839 } 4840 } 4841 4842 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4843 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4844 SourceLocation Loc) { 4845 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4846 return D.DepExprs.empty(); 4847 })) 4848 return std::make_pair(nullptr, Address::invalid()); 4849 // Process list of dependencies. 4850 ASTContext &C = CGM.getContext(); 4851 Address DependenciesArray = Address::invalid(); 4852 llvm::Value *NumOfElements = nullptr; 4853 unsigned NumDependencies = std::accumulate( 4854 Dependencies.begin(), Dependencies.end(), 0, 4855 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4856 return D.DepKind == OMPC_DEPEND_depobj 4857 ? V 4858 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4859 }); 4860 QualType FlagsTy; 4861 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4862 bool HasDepobjDeps = false; 4863 bool HasRegularWithIterators = false; 4864 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4865 llvm::Value *NumOfRegularWithIterators = 4866 llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4867 // Calculate number of depobj dependecies and regular deps with the iterators. 4868 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4869 if (D.DepKind == OMPC_DEPEND_depobj) { 4870 SmallVector<llvm::Value *, 4> Sizes = 4871 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4872 for (llvm::Value *Size : Sizes) { 4873 NumOfDepobjElements = 4874 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4875 } 4876 HasDepobjDeps = true; 4877 continue; 4878 } 4879 // Include number of iterations, if any. 4880 4881 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4882 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4883 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4884 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4885 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul( 4886 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size())); 4887 NumOfRegularWithIterators = 4888 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps); 4889 } 4890 HasRegularWithIterators = true; 4891 continue; 4892 } 4893 } 4894 4895 QualType KmpDependInfoArrayTy; 4896 if (HasDepobjDeps || HasRegularWithIterators) { 4897 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4898 /*isSigned=*/false); 4899 if (HasDepobjDeps) { 4900 NumOfElements = 4901 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4902 } 4903 if (HasRegularWithIterators) { 4904 NumOfElements = 4905 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4906 } 4907 auto *OVE = new (C) OpaqueValueExpr( 4908 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4909 VK_PRValue); 4910 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4911 RValue::get(NumOfElements)); 4912 KmpDependInfoArrayTy = 4913 C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal, 4914 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4915 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4916 // Properly emit variable-sized array. 4917 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4918 ImplicitParamDecl::Other); 4919 CGF.EmitVarDecl(*PD); 4920 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4921 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4922 /*isSigned=*/false); 4923 } else { 4924 KmpDependInfoArrayTy = C.getConstantArrayType( 4925 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4926 ArrayType::Normal, /*IndexTypeQuals=*/0); 4927 DependenciesArray = 4928 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4929 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4930 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4931 /*isSigned=*/false); 4932 } 4933 unsigned Pos = 0; 4934 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4935 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4936 Dependencies[I].IteratorExpr) 4937 continue; 4938 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4939 DependenciesArray); 4940 } 4941 // Copy regular dependecies with iterators. 4942 LValue PosLVal = CGF.MakeAddrLValue( 4943 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4944 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4945 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4946 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4947 !Dependencies[I].IteratorExpr) 4948 continue; 4949 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4950 DependenciesArray); 4951 } 4952 // Copy final depobj arrays without iterators. 4953 if (HasDepobjDeps) { 4954 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4955 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4956 continue; 4957 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4958 DependenciesArray); 4959 } 4960 } 4961 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4962 DependenciesArray, CGF.VoidPtrTy); 4963 return std::make_pair(NumOfElements, DependenciesArray); 4964 } 4965 4966 Address CGOpenMPRuntime::emitDepobjDependClause( 4967 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4968 SourceLocation Loc) { 4969 if (Dependencies.DepExprs.empty()) 4970 return Address::invalid(); 4971 // Process list of dependencies. 4972 ASTContext &C = CGM.getContext(); 4973 Address DependenciesArray = Address::invalid(); 4974 unsigned NumDependencies = Dependencies.DepExprs.size(); 4975 QualType FlagsTy; 4976 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4977 RecordDecl *KmpDependInfoRD = 4978 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4979 4980 llvm::Value *Size; 4981 // Define type kmp_depend_info[<Dependencies.size()>]; 4982 // For depobj reserve one extra element to store the number of elements. 4983 // It is required to handle depobj(x) update(in) construct. 4984 // kmp_depend_info[<Dependencies.size()>] deps; 4985 llvm::Value *NumDepsVal; 4986 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4987 if (const auto *IE = 4988 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4989 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4990 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4991 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4992 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4993 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4994 } 4995 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4996 NumDepsVal); 4997 CharUnits SizeInBytes = 4998 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4999 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 5000 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 5001 NumDepsVal = 5002 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 5003 } else { 5004 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5005 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 5006 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5007 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 5008 Size = CGM.getSize(Sz.alignTo(Align)); 5009 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 5010 } 5011 // Need to allocate on the dynamic memory. 5012 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5013 // Use default allocator. 5014 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5015 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 5016 5017 llvm::Value *Addr = 5018 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5019 CGM.getModule(), OMPRTL___kmpc_alloc), 5020 Args, ".dep.arr.addr"); 5021 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5022 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 5023 DependenciesArray = Address(Addr, Align); 5024 // Write number of elements in the first element of array for depobj. 5025 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 5026 // deps[i].base_addr = NumDependencies; 5027 LValue BaseAddrLVal = CGF.EmitLValueForField( 5028 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5029 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 5030 llvm::PointerUnion<unsigned *, LValue *> Pos; 5031 unsigned Idx = 1; 5032 LValue PosLVal; 5033 if (Dependencies.IteratorExpr) { 5034 PosLVal = CGF.MakeAddrLValue( 5035 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 5036 C.getSizeType()); 5037 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 5038 /*IsInit=*/true); 5039 Pos = &PosLVal; 5040 } else { 5041 Pos = &Idx; 5042 } 5043 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 5044 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5045 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 5046 return DependenciesArray; 5047 } 5048 5049 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5050 SourceLocation Loc) { 5051 ASTContext &C = CGM.getContext(); 5052 QualType FlagsTy; 5053 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5054 LValue Base = CGF.EmitLoadOfPointerLValue( 5055 DepobjLVal.getAddress(CGF), 5056 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5057 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5058 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5059 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5060 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5061 Addr.getElementType(), Addr.getPointer(), 5062 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5063 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5064 CGF.VoidPtrTy); 5065 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5066 // Use default allocator. 5067 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5068 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5069 5070 // _kmpc_free(gtid, addr, nullptr); 5071 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5072 CGM.getModule(), OMPRTL___kmpc_free), 5073 Args); 5074 } 5075 5076 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5077 OpenMPDependClauseKind NewDepKind, 5078 SourceLocation Loc) { 5079 ASTContext &C = CGM.getContext(); 5080 QualType FlagsTy; 5081 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5082 RecordDecl *KmpDependInfoRD = 5083 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5084 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5085 llvm::Value *NumDeps; 5086 LValue Base; 5087 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5088 5089 Address Begin = Base.getAddress(CGF); 5090 // Cast from pointer to array type to pointer to single element. 5091 llvm::Value *End = CGF.Builder.CreateGEP( 5092 Begin.getElementType(), Begin.getPointer(), NumDeps); 5093 // The basic structure here is a while-do loop. 5094 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5095 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5096 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5097 CGF.EmitBlock(BodyBB); 5098 llvm::PHINode *ElementPHI = 5099 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5100 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5101 Begin = Address(ElementPHI, Begin.getAlignment()); 5102 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5103 Base.getTBAAInfo()); 5104 // deps[i].flags = NewDepKind; 5105 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5106 LValue FlagsLVal = CGF.EmitLValueForField( 5107 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5108 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5109 FlagsLVal); 5110 5111 // Shift the address forward by one element. 5112 Address ElementNext = 5113 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5114 ElementPHI->addIncoming(ElementNext.getPointer(), 5115 CGF.Builder.GetInsertBlock()); 5116 llvm::Value *IsEmpty = 5117 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5118 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5119 // Done. 5120 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5121 } 5122 5123 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5124 const OMPExecutableDirective &D, 5125 llvm::Function *TaskFunction, 5126 QualType SharedsTy, Address Shareds, 5127 const Expr *IfCond, 5128 const OMPTaskDataTy &Data) { 5129 if (!CGF.HaveInsertPoint()) 5130 return; 5131 5132 TaskResultTy Result = 5133 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5134 llvm::Value *NewTask = Result.NewTask; 5135 llvm::Function *TaskEntry = Result.TaskEntry; 5136 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5137 LValue TDBase = Result.TDBase; 5138 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5139 // Process list of dependences. 5140 Address DependenciesArray = Address::invalid(); 5141 llvm::Value *NumOfElements; 5142 std::tie(NumOfElements, DependenciesArray) = 5143 emitDependClause(CGF, Data.Dependences, Loc); 5144 5145 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5146 // libcall. 5147 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5148 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5149 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5150 // list is not empty 5151 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5152 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5153 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5154 llvm::Value *DepTaskArgs[7]; 5155 if (!Data.Dependences.empty()) { 5156 DepTaskArgs[0] = UpLoc; 5157 DepTaskArgs[1] = ThreadID; 5158 DepTaskArgs[2] = NewTask; 5159 DepTaskArgs[3] = NumOfElements; 5160 DepTaskArgs[4] = DependenciesArray.getPointer(); 5161 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5162 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5163 } 5164 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5165 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5166 if (!Data.Tied) { 5167 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5168 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5169 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5170 } 5171 if (!Data.Dependences.empty()) { 5172 CGF.EmitRuntimeCall( 5173 OMPBuilder.getOrCreateRuntimeFunction( 5174 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5175 DepTaskArgs); 5176 } else { 5177 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5178 CGM.getModule(), OMPRTL___kmpc_omp_task), 5179 TaskArgs); 5180 } 5181 // Check if parent region is untied and build return for untied task; 5182 if (auto *Region = 5183 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5184 Region->emitUntiedSwitch(CGF); 5185 }; 5186 5187 llvm::Value *DepWaitTaskArgs[6]; 5188 if (!Data.Dependences.empty()) { 5189 DepWaitTaskArgs[0] = UpLoc; 5190 DepWaitTaskArgs[1] = ThreadID; 5191 DepWaitTaskArgs[2] = NumOfElements; 5192 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5193 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5194 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5195 } 5196 auto &M = CGM.getModule(); 5197 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5198 TaskEntry, &Data, &DepWaitTaskArgs, 5199 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5200 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5201 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5202 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5203 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5204 // is specified. 5205 if (!Data.Dependences.empty()) 5206 CGF.EmitRuntimeCall( 5207 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5208 DepWaitTaskArgs); 5209 // Call proxy_task_entry(gtid, new_task); 5210 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5211 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5212 Action.Enter(CGF); 5213 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5214 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5215 OutlinedFnArgs); 5216 }; 5217 5218 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5219 // kmp_task_t *new_task); 5220 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5221 // kmp_task_t *new_task); 5222 RegionCodeGenTy RCG(CodeGen); 5223 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5224 M, OMPRTL___kmpc_omp_task_begin_if0), 5225 TaskArgs, 5226 OMPBuilder.getOrCreateRuntimeFunction( 5227 M, OMPRTL___kmpc_omp_task_complete_if0), 5228 TaskArgs); 5229 RCG.setAction(Action); 5230 RCG(CGF); 5231 }; 5232 5233 if (IfCond) { 5234 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5235 } else { 5236 RegionCodeGenTy ThenRCG(ThenCodeGen); 5237 ThenRCG(CGF); 5238 } 5239 } 5240 5241 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5242 const OMPLoopDirective &D, 5243 llvm::Function *TaskFunction, 5244 QualType SharedsTy, Address Shareds, 5245 const Expr *IfCond, 5246 const OMPTaskDataTy &Data) { 5247 if (!CGF.HaveInsertPoint()) 5248 return; 5249 TaskResultTy Result = 5250 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5251 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5252 // libcall. 5253 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5254 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5255 // sched, kmp_uint64 grainsize, void *task_dup); 5256 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5257 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5258 llvm::Value *IfVal; 5259 if (IfCond) { 5260 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5261 /*isSigned=*/true); 5262 } else { 5263 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5264 } 5265 5266 LValue LBLVal = CGF.EmitLValueForField( 5267 Result.TDBase, 5268 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5269 const auto *LBVar = 5270 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5271 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5272 LBLVal.getQuals(), 5273 /*IsInitializer=*/true); 5274 LValue UBLVal = CGF.EmitLValueForField( 5275 Result.TDBase, 5276 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5277 const auto *UBVar = 5278 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5279 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5280 UBLVal.getQuals(), 5281 /*IsInitializer=*/true); 5282 LValue StLVal = CGF.EmitLValueForField( 5283 Result.TDBase, 5284 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5285 const auto *StVar = 5286 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5287 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5288 StLVal.getQuals(), 5289 /*IsInitializer=*/true); 5290 // Store reductions address. 5291 LValue RedLVal = CGF.EmitLValueForField( 5292 Result.TDBase, 5293 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5294 if (Data.Reductions) { 5295 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5296 } else { 5297 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5298 CGF.getContext().VoidPtrTy); 5299 } 5300 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5301 llvm::Value *TaskArgs[] = { 5302 UpLoc, 5303 ThreadID, 5304 Result.NewTask, 5305 IfVal, 5306 LBLVal.getPointer(CGF), 5307 UBLVal.getPointer(CGF), 5308 CGF.EmitLoadOfScalar(StLVal, Loc), 5309 llvm::ConstantInt::getSigned( 5310 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5311 llvm::ConstantInt::getSigned( 5312 CGF.IntTy, Data.Schedule.getPointer() 5313 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5314 : NoSchedule), 5315 Data.Schedule.getPointer() 5316 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5317 /*isSigned=*/false) 5318 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5319 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5320 Result.TaskDupFn, CGF.VoidPtrTy) 5321 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5322 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5323 CGM.getModule(), OMPRTL___kmpc_taskloop), 5324 TaskArgs); 5325 } 5326 5327 /// Emit reduction operation for each element of array (required for 5328 /// array sections) LHS op = RHS. 5329 /// \param Type Type of array. 5330 /// \param LHSVar Variable on the left side of the reduction operation 5331 /// (references element of array in original variable). 5332 /// \param RHSVar Variable on the right side of the reduction operation 5333 /// (references element of array in original variable). 5334 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5335 /// RHSVar. 5336 static void EmitOMPAggregateReduction( 5337 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5338 const VarDecl *RHSVar, 5339 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5340 const Expr *, const Expr *)> &RedOpGen, 5341 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5342 const Expr *UpExpr = nullptr) { 5343 // Perform element-by-element initialization. 5344 QualType ElementTy; 5345 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5346 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5347 5348 // Drill down to the base element type on both arrays. 5349 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5350 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5351 5352 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5353 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5354 // Cast from pointer to array type to pointer to single element. 5355 llvm::Value *LHSEnd = 5356 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements); 5357 // The basic structure here is a while-do loop. 5358 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5359 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5360 llvm::Value *IsEmpty = 5361 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5362 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5363 5364 // Enter the loop body, making that address the current address. 5365 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5366 CGF.EmitBlock(BodyBB); 5367 5368 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5369 5370 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5371 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5372 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5373 Address RHSElementCurrent = 5374 Address(RHSElementPHI, 5375 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5376 5377 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5378 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5379 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5380 Address LHSElementCurrent = 5381 Address(LHSElementPHI, 5382 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5383 5384 // Emit copy. 5385 CodeGenFunction::OMPPrivateScope Scope(CGF); 5386 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5387 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5388 Scope.Privatize(); 5389 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5390 Scope.ForceCleanup(); 5391 5392 // Shift the address forward by one element. 5393 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5394 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1, 5395 "omp.arraycpy.dest.element"); 5396 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5397 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1, 5398 "omp.arraycpy.src.element"); 5399 // Check whether we've reached the end. 5400 llvm::Value *Done = 5401 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5402 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5403 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5404 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5405 5406 // Done. 5407 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5408 } 5409 5410 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5411 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5412 /// UDR combiner function. 5413 static void emitReductionCombiner(CodeGenFunction &CGF, 5414 const Expr *ReductionOp) { 5415 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5416 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5417 if (const auto *DRE = 5418 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5419 if (const auto *DRD = 5420 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5421 std::pair<llvm::Function *, llvm::Function *> Reduction = 5422 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5423 RValue Func = RValue::get(Reduction.first); 5424 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5425 CGF.EmitIgnoredExpr(ReductionOp); 5426 return; 5427 } 5428 CGF.EmitIgnoredExpr(ReductionOp); 5429 } 5430 5431 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5432 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5433 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5434 ArrayRef<const Expr *> ReductionOps) { 5435 ASTContext &C = CGM.getContext(); 5436 5437 // void reduction_func(void *LHSArg, void *RHSArg); 5438 FunctionArgList Args; 5439 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5440 ImplicitParamDecl::Other); 5441 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5442 ImplicitParamDecl::Other); 5443 Args.push_back(&LHSArg); 5444 Args.push_back(&RHSArg); 5445 const auto &CGFI = 5446 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5447 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5448 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5449 llvm::GlobalValue::InternalLinkage, Name, 5450 &CGM.getModule()); 5451 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5452 Fn->setDoesNotRecurse(); 5453 CodeGenFunction CGF(CGM); 5454 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5455 5456 // Dst = (void*[n])(LHSArg); 5457 // Src = (void*[n])(RHSArg); 5458 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5459 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5460 ArgsType), CGF.getPointerAlign()); 5461 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5462 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5463 ArgsType), CGF.getPointerAlign()); 5464 5465 // ... 5466 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5467 // ... 5468 CodeGenFunction::OMPPrivateScope Scope(CGF); 5469 auto IPriv = Privates.begin(); 5470 unsigned Idx = 0; 5471 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5472 const auto *RHSVar = 5473 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5474 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5475 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5476 }); 5477 const auto *LHSVar = 5478 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5479 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5480 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5481 }); 5482 QualType PrivTy = (*IPriv)->getType(); 5483 if (PrivTy->isVariablyModifiedType()) { 5484 // Get array size and emit VLA type. 5485 ++Idx; 5486 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5487 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5488 const VariableArrayType *VLA = 5489 CGF.getContext().getAsVariableArrayType(PrivTy); 5490 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5491 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5492 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5493 CGF.EmitVariablyModifiedType(PrivTy); 5494 } 5495 } 5496 Scope.Privatize(); 5497 IPriv = Privates.begin(); 5498 auto ILHS = LHSExprs.begin(); 5499 auto IRHS = RHSExprs.begin(); 5500 for (const Expr *E : ReductionOps) { 5501 if ((*IPriv)->getType()->isArrayType()) { 5502 // Emit reduction for array section. 5503 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5504 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5505 EmitOMPAggregateReduction( 5506 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5507 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5508 emitReductionCombiner(CGF, E); 5509 }); 5510 } else { 5511 // Emit reduction for array subscript or single variable. 5512 emitReductionCombiner(CGF, E); 5513 } 5514 ++IPriv; 5515 ++ILHS; 5516 ++IRHS; 5517 } 5518 Scope.ForceCleanup(); 5519 CGF.FinishFunction(); 5520 return Fn; 5521 } 5522 5523 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5524 const Expr *ReductionOp, 5525 const Expr *PrivateRef, 5526 const DeclRefExpr *LHS, 5527 const DeclRefExpr *RHS) { 5528 if (PrivateRef->getType()->isArrayType()) { 5529 // Emit reduction for array section. 5530 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5531 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5532 EmitOMPAggregateReduction( 5533 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5534 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5535 emitReductionCombiner(CGF, ReductionOp); 5536 }); 5537 } else { 5538 // Emit reduction for array subscript or single variable. 5539 emitReductionCombiner(CGF, ReductionOp); 5540 } 5541 } 5542 5543 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5544 ArrayRef<const Expr *> Privates, 5545 ArrayRef<const Expr *> LHSExprs, 5546 ArrayRef<const Expr *> RHSExprs, 5547 ArrayRef<const Expr *> ReductionOps, 5548 ReductionOptionsTy Options) { 5549 if (!CGF.HaveInsertPoint()) 5550 return; 5551 5552 bool WithNowait = Options.WithNowait; 5553 bool SimpleReduction = Options.SimpleReduction; 5554 5555 // Next code should be emitted for reduction: 5556 // 5557 // static kmp_critical_name lock = { 0 }; 5558 // 5559 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5560 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5561 // ... 5562 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5563 // *(Type<n>-1*)rhs[<n>-1]); 5564 // } 5565 // 5566 // ... 5567 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5568 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5569 // RedList, reduce_func, &<lock>)) { 5570 // case 1: 5571 // ... 5572 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5573 // ... 5574 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5575 // break; 5576 // case 2: 5577 // ... 5578 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5579 // ... 5580 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5581 // break; 5582 // default:; 5583 // } 5584 // 5585 // if SimpleReduction is true, only the next code is generated: 5586 // ... 5587 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5588 // ... 5589 5590 ASTContext &C = CGM.getContext(); 5591 5592 if (SimpleReduction) { 5593 CodeGenFunction::RunCleanupsScope Scope(CGF); 5594 auto IPriv = Privates.begin(); 5595 auto ILHS = LHSExprs.begin(); 5596 auto IRHS = RHSExprs.begin(); 5597 for (const Expr *E : ReductionOps) { 5598 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5599 cast<DeclRefExpr>(*IRHS)); 5600 ++IPriv; 5601 ++ILHS; 5602 ++IRHS; 5603 } 5604 return; 5605 } 5606 5607 // 1. Build a list of reduction variables. 5608 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5609 auto Size = RHSExprs.size(); 5610 for (const Expr *E : Privates) { 5611 if (E->getType()->isVariablyModifiedType()) 5612 // Reserve place for array size. 5613 ++Size; 5614 } 5615 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5616 QualType ReductionArrayTy = 5617 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5618 /*IndexTypeQuals=*/0); 5619 Address ReductionList = 5620 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5621 auto IPriv = Privates.begin(); 5622 unsigned Idx = 0; 5623 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5624 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5625 CGF.Builder.CreateStore( 5626 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5627 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5628 Elem); 5629 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5630 // Store array size. 5631 ++Idx; 5632 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5633 llvm::Value *Size = CGF.Builder.CreateIntCast( 5634 CGF.getVLASize( 5635 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5636 .NumElts, 5637 CGF.SizeTy, /*isSigned=*/false); 5638 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5639 Elem); 5640 } 5641 } 5642 5643 // 2. Emit reduce_func(). 5644 llvm::Function *ReductionFn = emitReductionFunction( 5645 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5646 LHSExprs, RHSExprs, ReductionOps); 5647 5648 // 3. Create static kmp_critical_name lock = { 0 }; 5649 std::string Name = getName({"reduction"}); 5650 llvm::Value *Lock = getCriticalRegionLock(Name); 5651 5652 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5653 // RedList, reduce_func, &<lock>); 5654 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5655 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5656 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5657 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5658 ReductionList.getPointer(), CGF.VoidPtrTy); 5659 llvm::Value *Args[] = { 5660 IdentTLoc, // ident_t *<loc> 5661 ThreadId, // i32 <gtid> 5662 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5663 ReductionArrayTySize, // size_type sizeof(RedList) 5664 RL, // void *RedList 5665 ReductionFn, // void (*) (void *, void *) <reduce_func> 5666 Lock // kmp_critical_name *&<lock> 5667 }; 5668 llvm::Value *Res = CGF.EmitRuntimeCall( 5669 OMPBuilder.getOrCreateRuntimeFunction( 5670 CGM.getModule(), 5671 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5672 Args); 5673 5674 // 5. Build switch(res) 5675 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5676 llvm::SwitchInst *SwInst = 5677 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5678 5679 // 6. Build case 1: 5680 // ... 5681 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5682 // ... 5683 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5684 // break; 5685 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5686 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5687 CGF.EmitBlock(Case1BB); 5688 5689 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5690 llvm::Value *EndArgs[] = { 5691 IdentTLoc, // ident_t *<loc> 5692 ThreadId, // i32 <gtid> 5693 Lock // kmp_critical_name *&<lock> 5694 }; 5695 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5696 CodeGenFunction &CGF, PrePostActionTy &Action) { 5697 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5698 auto IPriv = Privates.begin(); 5699 auto ILHS = LHSExprs.begin(); 5700 auto IRHS = RHSExprs.begin(); 5701 for (const Expr *E : ReductionOps) { 5702 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5703 cast<DeclRefExpr>(*IRHS)); 5704 ++IPriv; 5705 ++ILHS; 5706 ++IRHS; 5707 } 5708 }; 5709 RegionCodeGenTy RCG(CodeGen); 5710 CommonActionTy Action( 5711 nullptr, llvm::None, 5712 OMPBuilder.getOrCreateRuntimeFunction( 5713 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5714 : OMPRTL___kmpc_end_reduce), 5715 EndArgs); 5716 RCG.setAction(Action); 5717 RCG(CGF); 5718 5719 CGF.EmitBranch(DefaultBB); 5720 5721 // 7. Build case 2: 5722 // ... 5723 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5724 // ... 5725 // break; 5726 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5727 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5728 CGF.EmitBlock(Case2BB); 5729 5730 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5731 CodeGenFunction &CGF, PrePostActionTy &Action) { 5732 auto ILHS = LHSExprs.begin(); 5733 auto IRHS = RHSExprs.begin(); 5734 auto IPriv = Privates.begin(); 5735 for (const Expr *E : ReductionOps) { 5736 const Expr *XExpr = nullptr; 5737 const Expr *EExpr = nullptr; 5738 const Expr *UpExpr = nullptr; 5739 BinaryOperatorKind BO = BO_Comma; 5740 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5741 if (BO->getOpcode() == BO_Assign) { 5742 XExpr = BO->getLHS(); 5743 UpExpr = BO->getRHS(); 5744 } 5745 } 5746 // Try to emit update expression as a simple atomic. 5747 const Expr *RHSExpr = UpExpr; 5748 if (RHSExpr) { 5749 // Analyze RHS part of the whole expression. 5750 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5751 RHSExpr->IgnoreParenImpCasts())) { 5752 // If this is a conditional operator, analyze its condition for 5753 // min/max reduction operator. 5754 RHSExpr = ACO->getCond(); 5755 } 5756 if (const auto *BORHS = 5757 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5758 EExpr = BORHS->getRHS(); 5759 BO = BORHS->getOpcode(); 5760 } 5761 } 5762 if (XExpr) { 5763 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5764 auto &&AtomicRedGen = [BO, VD, 5765 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5766 const Expr *EExpr, const Expr *UpExpr) { 5767 LValue X = CGF.EmitLValue(XExpr); 5768 RValue E; 5769 if (EExpr) 5770 E = CGF.EmitAnyExpr(EExpr); 5771 CGF.EmitOMPAtomicSimpleUpdateExpr( 5772 X, E, BO, /*IsXLHSInRHSPart=*/true, 5773 llvm::AtomicOrdering::Monotonic, Loc, 5774 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5775 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5776 PrivateScope.addPrivate( 5777 VD, [&CGF, VD, XRValue, Loc]() { 5778 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5779 CGF.emitOMPSimpleStore( 5780 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5781 VD->getType().getNonReferenceType(), Loc); 5782 return LHSTemp; 5783 }); 5784 (void)PrivateScope.Privatize(); 5785 return CGF.EmitAnyExpr(UpExpr); 5786 }); 5787 }; 5788 if ((*IPriv)->getType()->isArrayType()) { 5789 // Emit atomic reduction for array section. 5790 const auto *RHSVar = 5791 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5792 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5793 AtomicRedGen, XExpr, EExpr, UpExpr); 5794 } else { 5795 // Emit atomic reduction for array subscript or single variable. 5796 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5797 } 5798 } else { 5799 // Emit as a critical region. 5800 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5801 const Expr *, const Expr *) { 5802 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5803 std::string Name = RT.getName({"atomic_reduction"}); 5804 RT.emitCriticalRegion( 5805 CGF, Name, 5806 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5807 Action.Enter(CGF); 5808 emitReductionCombiner(CGF, E); 5809 }, 5810 Loc); 5811 }; 5812 if ((*IPriv)->getType()->isArrayType()) { 5813 const auto *LHSVar = 5814 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5815 const auto *RHSVar = 5816 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5817 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5818 CritRedGen); 5819 } else { 5820 CritRedGen(CGF, nullptr, nullptr, nullptr); 5821 } 5822 } 5823 ++ILHS; 5824 ++IRHS; 5825 ++IPriv; 5826 } 5827 }; 5828 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5829 if (!WithNowait) { 5830 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5831 llvm::Value *EndArgs[] = { 5832 IdentTLoc, // ident_t *<loc> 5833 ThreadId, // i32 <gtid> 5834 Lock // kmp_critical_name *&<lock> 5835 }; 5836 CommonActionTy Action(nullptr, llvm::None, 5837 OMPBuilder.getOrCreateRuntimeFunction( 5838 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5839 EndArgs); 5840 AtomicRCG.setAction(Action); 5841 AtomicRCG(CGF); 5842 } else { 5843 AtomicRCG(CGF); 5844 } 5845 5846 CGF.EmitBranch(DefaultBB); 5847 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5848 } 5849 5850 /// Generates unique name for artificial threadprivate variables. 5851 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5852 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5853 const Expr *Ref) { 5854 SmallString<256> Buffer; 5855 llvm::raw_svector_ostream Out(Buffer); 5856 const clang::DeclRefExpr *DE; 5857 const VarDecl *D = ::getBaseDecl(Ref, DE); 5858 if (!D) 5859 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5860 D = D->getCanonicalDecl(); 5861 std::string Name = CGM.getOpenMPRuntime().getName( 5862 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5863 Out << Prefix << Name << "_" 5864 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5865 return std::string(Out.str()); 5866 } 5867 5868 /// Emits reduction initializer function: 5869 /// \code 5870 /// void @.red_init(void* %arg, void* %orig) { 5871 /// %0 = bitcast void* %arg to <type>* 5872 /// store <type> <init>, <type>* %0 5873 /// ret void 5874 /// } 5875 /// \endcode 5876 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5877 SourceLocation Loc, 5878 ReductionCodeGen &RCG, unsigned N) { 5879 ASTContext &C = CGM.getContext(); 5880 QualType VoidPtrTy = C.VoidPtrTy; 5881 VoidPtrTy.addRestrict(); 5882 FunctionArgList Args; 5883 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5884 ImplicitParamDecl::Other); 5885 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5886 ImplicitParamDecl::Other); 5887 Args.emplace_back(&Param); 5888 Args.emplace_back(&ParamOrig); 5889 const auto &FnInfo = 5890 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5891 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5892 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5893 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5894 Name, &CGM.getModule()); 5895 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5896 Fn->setDoesNotRecurse(); 5897 CodeGenFunction CGF(CGM); 5898 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5899 Address PrivateAddr = CGF.EmitLoadOfPointer( 5900 CGF.GetAddrOfLocalVar(&Param), 5901 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5902 llvm::Value *Size = nullptr; 5903 // If the size of the reduction item is non-constant, load it from global 5904 // threadprivate variable. 5905 if (RCG.getSizes(N).second) { 5906 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5907 CGF, CGM.getContext().getSizeType(), 5908 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5909 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5910 CGM.getContext().getSizeType(), Loc); 5911 } 5912 RCG.emitAggregateType(CGF, N, Size); 5913 Address OrigAddr = Address::invalid(); 5914 // If initializer uses initializer from declare reduction construct, emit a 5915 // pointer to the address of the original reduction item (reuired by reduction 5916 // initializer) 5917 if (RCG.usesReductionInitializer(N)) { 5918 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5919 OrigAddr = CGF.EmitLoadOfPointer( 5920 SharedAddr, 5921 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5922 } 5923 // Emit the initializer: 5924 // %0 = bitcast void* %arg to <type>* 5925 // store <type> <init>, <type>* %0 5926 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr, 5927 [](CodeGenFunction &) { return false; }); 5928 CGF.FinishFunction(); 5929 return Fn; 5930 } 5931 5932 /// Emits reduction combiner function: 5933 /// \code 5934 /// void @.red_comb(void* %arg0, void* %arg1) { 5935 /// %lhs = bitcast void* %arg0 to <type>* 5936 /// %rhs = bitcast void* %arg1 to <type>* 5937 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5938 /// store <type> %2, <type>* %lhs 5939 /// ret void 5940 /// } 5941 /// \endcode 5942 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5943 SourceLocation Loc, 5944 ReductionCodeGen &RCG, unsigned N, 5945 const Expr *ReductionOp, 5946 const Expr *LHS, const Expr *RHS, 5947 const Expr *PrivateRef) { 5948 ASTContext &C = CGM.getContext(); 5949 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5950 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5951 FunctionArgList Args; 5952 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5953 C.VoidPtrTy, ImplicitParamDecl::Other); 5954 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5955 ImplicitParamDecl::Other); 5956 Args.emplace_back(&ParamInOut); 5957 Args.emplace_back(&ParamIn); 5958 const auto &FnInfo = 5959 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5960 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5961 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5962 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5963 Name, &CGM.getModule()); 5964 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5965 Fn->setDoesNotRecurse(); 5966 CodeGenFunction CGF(CGM); 5967 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5968 llvm::Value *Size = nullptr; 5969 // If the size of the reduction item is non-constant, load it from global 5970 // threadprivate variable. 5971 if (RCG.getSizes(N).second) { 5972 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5973 CGF, CGM.getContext().getSizeType(), 5974 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5975 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5976 CGM.getContext().getSizeType(), Loc); 5977 } 5978 RCG.emitAggregateType(CGF, N, Size); 5979 // Remap lhs and rhs variables to the addresses of the function arguments. 5980 // %lhs = bitcast void* %arg0 to <type>* 5981 // %rhs = bitcast void* %arg1 to <type>* 5982 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5983 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 5984 // Pull out the pointer to the variable. 5985 Address PtrAddr = CGF.EmitLoadOfPointer( 5986 CGF.GetAddrOfLocalVar(&ParamInOut), 5987 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5988 return CGF.Builder.CreateElementBitCast( 5989 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5990 }); 5991 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 5992 // Pull out the pointer to the variable. 5993 Address PtrAddr = CGF.EmitLoadOfPointer( 5994 CGF.GetAddrOfLocalVar(&ParamIn), 5995 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5996 return CGF.Builder.CreateElementBitCast( 5997 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 5998 }); 5999 PrivateScope.Privatize(); 6000 // Emit the combiner body: 6001 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6002 // store <type> %2, <type>* %lhs 6003 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6004 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6005 cast<DeclRefExpr>(RHS)); 6006 CGF.FinishFunction(); 6007 return Fn; 6008 } 6009 6010 /// Emits reduction finalizer function: 6011 /// \code 6012 /// void @.red_fini(void* %arg) { 6013 /// %0 = bitcast void* %arg to <type>* 6014 /// <destroy>(<type>* %0) 6015 /// ret void 6016 /// } 6017 /// \endcode 6018 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6019 SourceLocation Loc, 6020 ReductionCodeGen &RCG, unsigned N) { 6021 if (!RCG.needCleanups(N)) 6022 return nullptr; 6023 ASTContext &C = CGM.getContext(); 6024 FunctionArgList Args; 6025 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6026 ImplicitParamDecl::Other); 6027 Args.emplace_back(&Param); 6028 const auto &FnInfo = 6029 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6030 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6031 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6032 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6033 Name, &CGM.getModule()); 6034 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6035 Fn->setDoesNotRecurse(); 6036 CodeGenFunction CGF(CGM); 6037 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6038 Address PrivateAddr = CGF.EmitLoadOfPointer( 6039 CGF.GetAddrOfLocalVar(&Param), 6040 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6041 llvm::Value *Size = nullptr; 6042 // If the size of the reduction item is non-constant, load it from global 6043 // threadprivate variable. 6044 if (RCG.getSizes(N).second) { 6045 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6046 CGF, CGM.getContext().getSizeType(), 6047 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6048 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6049 CGM.getContext().getSizeType(), Loc); 6050 } 6051 RCG.emitAggregateType(CGF, N, Size); 6052 // Emit the finalizer body: 6053 // <destroy>(<type>* %0) 6054 RCG.emitCleanups(CGF, N, PrivateAddr); 6055 CGF.FinishFunction(Loc); 6056 return Fn; 6057 } 6058 6059 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6060 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6061 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6062 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6063 return nullptr; 6064 6065 // Build typedef struct: 6066 // kmp_taskred_input { 6067 // void *reduce_shar; // shared reduction item 6068 // void *reduce_orig; // original reduction item used for initialization 6069 // size_t reduce_size; // size of data item 6070 // void *reduce_init; // data initialization routine 6071 // void *reduce_fini; // data finalization routine 6072 // void *reduce_comb; // data combiner routine 6073 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6074 // } kmp_taskred_input_t; 6075 ASTContext &C = CGM.getContext(); 6076 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6077 RD->startDefinition(); 6078 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6079 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6080 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6081 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6082 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6083 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6084 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6085 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6086 RD->completeDefinition(); 6087 QualType RDType = C.getRecordType(RD); 6088 unsigned Size = Data.ReductionVars.size(); 6089 llvm::APInt ArraySize(/*numBits=*/64, Size); 6090 QualType ArrayRDType = C.getConstantArrayType( 6091 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6092 // kmp_task_red_input_t .rd_input.[Size]; 6093 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6094 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6095 Data.ReductionCopies, Data.ReductionOps); 6096 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6097 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6098 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6099 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6100 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6101 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs, 6102 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6103 ".rd_input.gep."); 6104 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6105 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6106 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6107 RCG.emitSharedOrigLValue(CGF, Cnt); 6108 llvm::Value *CastedShared = 6109 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6110 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6111 // ElemLVal.reduce_orig = &Origs[Cnt]; 6112 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6113 llvm::Value *CastedOrig = 6114 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6115 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6116 RCG.emitAggregateType(CGF, Cnt); 6117 llvm::Value *SizeValInChars; 6118 llvm::Value *SizeVal; 6119 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6120 // We use delayed creation/initialization for VLAs and array sections. It is 6121 // required because runtime does not provide the way to pass the sizes of 6122 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6123 // threadprivate global variables are used to store these values and use 6124 // them in the functions. 6125 bool DelayedCreation = !!SizeVal; 6126 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6127 /*isSigned=*/false); 6128 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6129 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6130 // ElemLVal.reduce_init = init; 6131 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6132 llvm::Value *InitAddr = 6133 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6134 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6135 // ElemLVal.reduce_fini = fini; 6136 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6137 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6138 llvm::Value *FiniAddr = Fini 6139 ? CGF.EmitCastToVoidPtr(Fini) 6140 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6141 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6142 // ElemLVal.reduce_comb = comb; 6143 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6144 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6145 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6146 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6147 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6148 // ElemLVal.flags = 0; 6149 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6150 if (DelayedCreation) { 6151 CGF.EmitStoreOfScalar( 6152 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6153 FlagsLVal); 6154 } else 6155 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6156 FlagsLVal.getType()); 6157 } 6158 if (Data.IsReductionWithTaskMod) { 6159 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6160 // is_ws, int num, void *data); 6161 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6162 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6163 CGM.IntTy, /*isSigned=*/true); 6164 llvm::Value *Args[] = { 6165 IdentTLoc, GTid, 6166 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6167 /*isSigned=*/true), 6168 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6169 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6170 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6171 return CGF.EmitRuntimeCall( 6172 OMPBuilder.getOrCreateRuntimeFunction( 6173 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6174 Args); 6175 } 6176 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6177 llvm::Value *Args[] = { 6178 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6179 /*isSigned=*/true), 6180 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6181 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6182 CGM.VoidPtrTy)}; 6183 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6184 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6185 Args); 6186 } 6187 6188 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6189 SourceLocation Loc, 6190 bool IsWorksharingReduction) { 6191 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6192 // is_ws, int num, void *data); 6193 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6194 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6195 CGM.IntTy, /*isSigned=*/true); 6196 llvm::Value *Args[] = {IdentTLoc, GTid, 6197 llvm::ConstantInt::get(CGM.IntTy, 6198 IsWorksharingReduction ? 1 : 0, 6199 /*isSigned=*/true)}; 6200 (void)CGF.EmitRuntimeCall( 6201 OMPBuilder.getOrCreateRuntimeFunction( 6202 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6203 Args); 6204 } 6205 6206 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6207 SourceLocation Loc, 6208 ReductionCodeGen &RCG, 6209 unsigned N) { 6210 auto Sizes = RCG.getSizes(N); 6211 // Emit threadprivate global variable if the type is non-constant 6212 // (Sizes.second = nullptr). 6213 if (Sizes.second) { 6214 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6215 /*isSigned=*/false); 6216 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6217 CGF, CGM.getContext().getSizeType(), 6218 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6219 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6220 } 6221 } 6222 6223 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6224 SourceLocation Loc, 6225 llvm::Value *ReductionsPtr, 6226 LValue SharedLVal) { 6227 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6228 // *d); 6229 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6230 CGM.IntTy, 6231 /*isSigned=*/true), 6232 ReductionsPtr, 6233 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6234 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6235 return Address( 6236 CGF.EmitRuntimeCall( 6237 OMPBuilder.getOrCreateRuntimeFunction( 6238 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6239 Args), 6240 SharedLVal.getAlignment()); 6241 } 6242 6243 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, 6244 const OMPTaskDataTy &Data) { 6245 if (!CGF.HaveInsertPoint()) 6246 return; 6247 6248 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) { 6249 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder. 6250 OMPBuilder.createTaskwait(CGF.Builder); 6251 } else { 6252 llvm::Value *ThreadID = getThreadID(CGF, Loc); 6253 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 6254 auto &M = CGM.getModule(); 6255 Address DependenciesArray = Address::invalid(); 6256 llvm::Value *NumOfElements; 6257 std::tie(NumOfElements, DependenciesArray) = 6258 emitDependClause(CGF, Data.Dependences, Loc); 6259 llvm::Value *DepWaitTaskArgs[6]; 6260 if (!Data.Dependences.empty()) { 6261 DepWaitTaskArgs[0] = UpLoc; 6262 DepWaitTaskArgs[1] = ThreadID; 6263 DepWaitTaskArgs[2] = NumOfElements; 6264 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 6265 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 6266 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6267 6268 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 6269 6270 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 6271 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 6272 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 6273 // is specified. 6274 CGF.EmitRuntimeCall( 6275 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 6276 DepWaitTaskArgs); 6277 6278 } else { 6279 6280 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6281 // global_tid); 6282 llvm::Value *Args[] = {UpLoc, ThreadID}; 6283 // Ignore return result until untied tasks are supported. 6284 CGF.EmitRuntimeCall( 6285 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait), 6286 Args); 6287 } 6288 } 6289 6290 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6291 Region->emitUntiedSwitch(CGF); 6292 } 6293 6294 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6295 OpenMPDirectiveKind InnerKind, 6296 const RegionCodeGenTy &CodeGen, 6297 bool HasCancel) { 6298 if (!CGF.HaveInsertPoint()) 6299 return; 6300 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 6301 InnerKind != OMPD_critical && 6302 InnerKind != OMPD_master && 6303 InnerKind != OMPD_masked); 6304 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6305 } 6306 6307 namespace { 6308 enum RTCancelKind { 6309 CancelNoreq = 0, 6310 CancelParallel = 1, 6311 CancelLoop = 2, 6312 CancelSections = 3, 6313 CancelTaskgroup = 4 6314 }; 6315 } // anonymous namespace 6316 6317 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6318 RTCancelKind CancelKind = CancelNoreq; 6319 if (CancelRegion == OMPD_parallel) 6320 CancelKind = CancelParallel; 6321 else if (CancelRegion == OMPD_for) 6322 CancelKind = CancelLoop; 6323 else if (CancelRegion == OMPD_sections) 6324 CancelKind = CancelSections; 6325 else { 6326 assert(CancelRegion == OMPD_taskgroup); 6327 CancelKind = CancelTaskgroup; 6328 } 6329 return CancelKind; 6330 } 6331 6332 void CGOpenMPRuntime::emitCancellationPointCall( 6333 CodeGenFunction &CGF, SourceLocation Loc, 6334 OpenMPDirectiveKind CancelRegion) { 6335 if (!CGF.HaveInsertPoint()) 6336 return; 6337 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6338 // global_tid, kmp_int32 cncl_kind); 6339 if (auto *OMPRegionInfo = 6340 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6341 // For 'cancellation point taskgroup', the task region info may not have a 6342 // cancel. This may instead happen in another adjacent task. 6343 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6344 llvm::Value *Args[] = { 6345 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6346 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6347 // Ignore return result until untied tasks are supported. 6348 llvm::Value *Result = CGF.EmitRuntimeCall( 6349 OMPBuilder.getOrCreateRuntimeFunction( 6350 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6351 Args); 6352 // if (__kmpc_cancellationpoint()) { 6353 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6354 // exit from construct; 6355 // } 6356 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6357 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6358 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6359 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6360 CGF.EmitBlock(ExitBB); 6361 if (CancelRegion == OMPD_parallel) 6362 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6363 // exit from construct; 6364 CodeGenFunction::JumpDest CancelDest = 6365 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6366 CGF.EmitBranchThroughCleanup(CancelDest); 6367 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6368 } 6369 } 6370 } 6371 6372 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6373 const Expr *IfCond, 6374 OpenMPDirectiveKind CancelRegion) { 6375 if (!CGF.HaveInsertPoint()) 6376 return; 6377 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6378 // kmp_int32 cncl_kind); 6379 auto &M = CGM.getModule(); 6380 if (auto *OMPRegionInfo = 6381 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6382 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6383 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6384 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6385 llvm::Value *Args[] = { 6386 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6387 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6388 // Ignore return result until untied tasks are supported. 6389 llvm::Value *Result = CGF.EmitRuntimeCall( 6390 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6391 // if (__kmpc_cancel()) { 6392 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6393 // exit from construct; 6394 // } 6395 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6396 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6397 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6398 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6399 CGF.EmitBlock(ExitBB); 6400 if (CancelRegion == OMPD_parallel) 6401 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6402 // exit from construct; 6403 CodeGenFunction::JumpDest CancelDest = 6404 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6405 CGF.EmitBranchThroughCleanup(CancelDest); 6406 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6407 }; 6408 if (IfCond) { 6409 emitIfClause(CGF, IfCond, ThenGen, 6410 [](CodeGenFunction &, PrePostActionTy &) {}); 6411 } else { 6412 RegionCodeGenTy ThenRCG(ThenGen); 6413 ThenRCG(CGF); 6414 } 6415 } 6416 } 6417 6418 namespace { 6419 /// Cleanup action for uses_allocators support. 6420 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6421 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6422 6423 public: 6424 OMPUsesAllocatorsActionTy( 6425 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6426 : Allocators(Allocators) {} 6427 void Enter(CodeGenFunction &CGF) override { 6428 if (!CGF.HaveInsertPoint()) 6429 return; 6430 for (const auto &AllocatorData : Allocators) { 6431 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6432 CGF, AllocatorData.first, AllocatorData.second); 6433 } 6434 } 6435 void Exit(CodeGenFunction &CGF) override { 6436 if (!CGF.HaveInsertPoint()) 6437 return; 6438 for (const auto &AllocatorData : Allocators) { 6439 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6440 AllocatorData.first); 6441 } 6442 } 6443 }; 6444 } // namespace 6445 6446 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6447 const OMPExecutableDirective &D, StringRef ParentName, 6448 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6449 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6450 assert(!ParentName.empty() && "Invalid target region parent name!"); 6451 HasEmittedTargetRegion = true; 6452 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6453 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6454 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6455 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6456 if (!D.AllocatorTraits) 6457 continue; 6458 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6459 } 6460 } 6461 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6462 CodeGen.setAction(UsesAllocatorAction); 6463 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6464 IsOffloadEntry, CodeGen); 6465 } 6466 6467 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6468 const Expr *Allocator, 6469 const Expr *AllocatorTraits) { 6470 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6471 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6472 // Use default memspace handle. 6473 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6474 llvm::Value *NumTraits = llvm::ConstantInt::get( 6475 CGF.IntTy, cast<ConstantArrayType>( 6476 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6477 ->getSize() 6478 .getLimitedValue()); 6479 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6480 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6481 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6482 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6483 AllocatorTraitsLVal.getBaseInfo(), 6484 AllocatorTraitsLVal.getTBAAInfo()); 6485 llvm::Value *Traits = 6486 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6487 6488 llvm::Value *AllocatorVal = 6489 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6490 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6491 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6492 // Store to allocator. 6493 CGF.EmitVarDecl(*cast<VarDecl>( 6494 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6495 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6496 AllocatorVal = 6497 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6498 Allocator->getType(), Allocator->getExprLoc()); 6499 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6500 } 6501 6502 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6503 const Expr *Allocator) { 6504 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6505 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6506 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6507 llvm::Value *AllocatorVal = 6508 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6509 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6510 CGF.getContext().VoidPtrTy, 6511 Allocator->getExprLoc()); 6512 (void)CGF.EmitRuntimeCall( 6513 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6514 OMPRTL___kmpc_destroy_allocator), 6515 {ThreadId, AllocatorVal}); 6516 } 6517 6518 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6519 const OMPExecutableDirective &D, StringRef ParentName, 6520 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6521 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6522 // Create a unique name for the entry function using the source location 6523 // information of the current target region. The name will be something like: 6524 // 6525 // __omp_offloading_DD_FFFF_PP_lBB 6526 // 6527 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6528 // mangled name of the function that encloses the target region and BB is the 6529 // line number of the target region. 6530 6531 unsigned DeviceID; 6532 unsigned FileID; 6533 unsigned Line; 6534 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6535 Line); 6536 SmallString<64> EntryFnName; 6537 { 6538 llvm::raw_svector_ostream OS(EntryFnName); 6539 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6540 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6541 } 6542 6543 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6544 6545 CodeGenFunction CGF(CGM, true); 6546 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6547 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6548 6549 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6550 6551 // If this target outline function is not an offload entry, we don't need to 6552 // register it. 6553 if (!IsOffloadEntry) 6554 return; 6555 6556 // The target region ID is used by the runtime library to identify the current 6557 // target region, so it only has to be unique and not necessarily point to 6558 // anything. It could be the pointer to the outlined function that implements 6559 // the target region, but we aren't using that so that the compiler doesn't 6560 // need to keep that, and could therefore inline the host function if proven 6561 // worthwhile during optimization. In the other hand, if emitting code for the 6562 // device, the ID has to be the function address so that it can retrieved from 6563 // the offloading entry and launched by the runtime library. We also mark the 6564 // outlined function to have external linkage in case we are emitting code for 6565 // the device, because these functions will be entry points to the device. 6566 6567 if (CGM.getLangOpts().OpenMPIsDevice) { 6568 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6569 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6570 OutlinedFn->setDSOLocal(false); 6571 if (CGM.getTriple().isAMDGCN()) 6572 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 6573 } else { 6574 std::string Name = getName({EntryFnName, "region_id"}); 6575 OutlinedFnID = new llvm::GlobalVariable( 6576 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6577 llvm::GlobalValue::WeakAnyLinkage, 6578 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6579 } 6580 6581 // Register the information for the entry associated with this target region. 6582 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6583 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6584 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6585 6586 // Add NumTeams and ThreadLimit attributes to the outlined GPU function 6587 int32_t DefaultValTeams = -1; 6588 getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams); 6589 if (DefaultValTeams > 0) { 6590 OutlinedFn->addFnAttr("omp_target_num_teams", 6591 std::to_string(DefaultValTeams)); 6592 } 6593 int32_t DefaultValThreads = -1; 6594 getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads); 6595 if (DefaultValThreads > 0) { 6596 OutlinedFn->addFnAttr("omp_target_thread_limit", 6597 std::to_string(DefaultValThreads)); 6598 } 6599 6600 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); 6601 } 6602 6603 /// Checks if the expression is constant or does not have non-trivial function 6604 /// calls. 6605 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6606 // We can skip constant expressions. 6607 // We can skip expressions with trivial calls or simple expressions. 6608 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6609 !E->hasNonTrivialCall(Ctx)) && 6610 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6611 } 6612 6613 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6614 const Stmt *Body) { 6615 const Stmt *Child = Body->IgnoreContainers(); 6616 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6617 Child = nullptr; 6618 for (const Stmt *S : C->body()) { 6619 if (const auto *E = dyn_cast<Expr>(S)) { 6620 if (isTrivial(Ctx, E)) 6621 continue; 6622 } 6623 // Some of the statements can be ignored. 6624 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6625 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6626 continue; 6627 // Analyze declarations. 6628 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6629 if (llvm::all_of(DS->decls(), [](const Decl *D) { 6630 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6631 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6632 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6633 isa<UsingDirectiveDecl>(D) || 6634 isa<OMPDeclareReductionDecl>(D) || 6635 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6636 return true; 6637 const auto *VD = dyn_cast<VarDecl>(D); 6638 if (!VD) 6639 return false; 6640 return VD->hasGlobalStorage() || !VD->isUsed(); 6641 })) 6642 continue; 6643 } 6644 // Found multiple children - cannot get the one child only. 6645 if (Child) 6646 return nullptr; 6647 Child = S; 6648 } 6649 if (Child) 6650 Child = Child->IgnoreContainers(); 6651 } 6652 return Child; 6653 } 6654 6655 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( 6656 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6657 int32_t &DefaultVal) { 6658 6659 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6660 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6661 "Expected target-based executable directive."); 6662 switch (DirectiveKind) { 6663 case OMPD_target: { 6664 const auto *CS = D.getInnermostCapturedStmt(); 6665 const auto *Body = 6666 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6667 const Stmt *ChildStmt = 6668 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6669 if (const auto *NestedDir = 6670 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6671 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6672 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6673 const Expr *NumTeams = 6674 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6675 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6676 if (auto Constant = 6677 NumTeams->getIntegerConstantExpr(CGF.getContext())) 6678 DefaultVal = Constant->getExtValue(); 6679 return NumTeams; 6680 } 6681 DefaultVal = 0; 6682 return nullptr; 6683 } 6684 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6685 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) { 6686 DefaultVal = 1; 6687 return nullptr; 6688 } 6689 DefaultVal = 1; 6690 return nullptr; 6691 } 6692 // A value of -1 is used to check if we need to emit no teams region 6693 DefaultVal = -1; 6694 return nullptr; 6695 } 6696 case OMPD_target_teams: 6697 case OMPD_target_teams_distribute: 6698 case OMPD_target_teams_distribute_simd: 6699 case OMPD_target_teams_distribute_parallel_for: 6700 case OMPD_target_teams_distribute_parallel_for_simd: { 6701 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6702 const Expr *NumTeams = 6703 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6704 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6705 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) 6706 DefaultVal = Constant->getExtValue(); 6707 return NumTeams; 6708 } 6709 DefaultVal = 0; 6710 return nullptr; 6711 } 6712 case OMPD_target_parallel: 6713 case OMPD_target_parallel_for: 6714 case OMPD_target_parallel_for_simd: 6715 case OMPD_target_simd: 6716 DefaultVal = 1; 6717 return nullptr; 6718 case OMPD_parallel: 6719 case OMPD_for: 6720 case OMPD_parallel_for: 6721 case OMPD_parallel_master: 6722 case OMPD_parallel_sections: 6723 case OMPD_for_simd: 6724 case OMPD_parallel_for_simd: 6725 case OMPD_cancel: 6726 case OMPD_cancellation_point: 6727 case OMPD_ordered: 6728 case OMPD_threadprivate: 6729 case OMPD_allocate: 6730 case OMPD_task: 6731 case OMPD_simd: 6732 case OMPD_tile: 6733 case OMPD_unroll: 6734 case OMPD_sections: 6735 case OMPD_section: 6736 case OMPD_single: 6737 case OMPD_master: 6738 case OMPD_critical: 6739 case OMPD_taskyield: 6740 case OMPD_barrier: 6741 case OMPD_taskwait: 6742 case OMPD_taskgroup: 6743 case OMPD_atomic: 6744 case OMPD_flush: 6745 case OMPD_depobj: 6746 case OMPD_scan: 6747 case OMPD_teams: 6748 case OMPD_target_data: 6749 case OMPD_target_exit_data: 6750 case OMPD_target_enter_data: 6751 case OMPD_distribute: 6752 case OMPD_distribute_simd: 6753 case OMPD_distribute_parallel_for: 6754 case OMPD_distribute_parallel_for_simd: 6755 case OMPD_teams_distribute: 6756 case OMPD_teams_distribute_simd: 6757 case OMPD_teams_distribute_parallel_for: 6758 case OMPD_teams_distribute_parallel_for_simd: 6759 case OMPD_target_update: 6760 case OMPD_declare_simd: 6761 case OMPD_declare_variant: 6762 case OMPD_begin_declare_variant: 6763 case OMPD_end_declare_variant: 6764 case OMPD_declare_target: 6765 case OMPD_end_declare_target: 6766 case OMPD_declare_reduction: 6767 case OMPD_declare_mapper: 6768 case OMPD_taskloop: 6769 case OMPD_taskloop_simd: 6770 case OMPD_master_taskloop: 6771 case OMPD_master_taskloop_simd: 6772 case OMPD_parallel_master_taskloop: 6773 case OMPD_parallel_master_taskloop_simd: 6774 case OMPD_requires: 6775 case OMPD_metadirective: 6776 case OMPD_unknown: 6777 break; 6778 default: 6779 break; 6780 } 6781 llvm_unreachable("Unexpected directive kind."); 6782 } 6783 6784 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( 6785 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6786 assert(!CGF.getLangOpts().OpenMPIsDevice && 6787 "Clauses associated with the teams directive expected to be emitted " 6788 "only for the host!"); 6789 CGBuilderTy &Bld = CGF.Builder; 6790 int32_t DefaultNT = -1; 6791 const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT); 6792 if (NumTeams != nullptr) { 6793 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6794 6795 switch (DirectiveKind) { 6796 case OMPD_target: { 6797 const auto *CS = D.getInnermostCapturedStmt(); 6798 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6799 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6800 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6801 /*IgnoreResultAssign*/ true); 6802 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6803 /*isSigned=*/true); 6804 } 6805 case OMPD_target_teams: 6806 case OMPD_target_teams_distribute: 6807 case OMPD_target_teams_distribute_simd: 6808 case OMPD_target_teams_distribute_parallel_for: 6809 case OMPD_target_teams_distribute_parallel_for_simd: { 6810 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6811 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6812 /*IgnoreResultAssign*/ true); 6813 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6814 /*isSigned=*/true); 6815 } 6816 default: 6817 break; 6818 } 6819 } else if (DefaultNT == -1) { 6820 return nullptr; 6821 } 6822 6823 return Bld.getInt32(DefaultNT); 6824 } 6825 6826 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6827 llvm::Value *DefaultThreadLimitVal) { 6828 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6829 CGF.getContext(), CS->getCapturedStmt()); 6830 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6831 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6832 llvm::Value *NumThreads = nullptr; 6833 llvm::Value *CondVal = nullptr; 6834 // Handle if clause. If if clause present, the number of threads is 6835 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6836 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6837 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6838 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6839 const OMPIfClause *IfClause = nullptr; 6840 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6841 if (C->getNameModifier() == OMPD_unknown || 6842 C->getNameModifier() == OMPD_parallel) { 6843 IfClause = C; 6844 break; 6845 } 6846 } 6847 if (IfClause) { 6848 const Expr *Cond = IfClause->getCondition(); 6849 bool Result; 6850 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6851 if (!Result) 6852 return CGF.Builder.getInt32(1); 6853 } else { 6854 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6855 if (const auto *PreInit = 6856 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6857 for (const auto *I : PreInit->decls()) { 6858 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6859 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6860 } else { 6861 CodeGenFunction::AutoVarEmission Emission = 6862 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6863 CGF.EmitAutoVarCleanups(Emission); 6864 } 6865 } 6866 } 6867 CondVal = CGF.EvaluateExprAsBool(Cond); 6868 } 6869 } 6870 } 6871 // Check the value of num_threads clause iff if clause was not specified 6872 // or is not evaluated to false. 6873 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6874 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6875 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6876 const auto *NumThreadsClause = 6877 Dir->getSingleClause<OMPNumThreadsClause>(); 6878 CodeGenFunction::LexicalScope Scope( 6879 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6880 if (const auto *PreInit = 6881 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6882 for (const auto *I : PreInit->decls()) { 6883 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6884 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6885 } else { 6886 CodeGenFunction::AutoVarEmission Emission = 6887 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6888 CGF.EmitAutoVarCleanups(Emission); 6889 } 6890 } 6891 } 6892 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6893 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6894 /*isSigned=*/false); 6895 if (DefaultThreadLimitVal) 6896 NumThreads = CGF.Builder.CreateSelect( 6897 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6898 DefaultThreadLimitVal, NumThreads); 6899 } else { 6900 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6901 : CGF.Builder.getInt32(0); 6902 } 6903 // Process condition of the if clause. 6904 if (CondVal) { 6905 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6906 CGF.Builder.getInt32(1)); 6907 } 6908 return NumThreads; 6909 } 6910 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6911 return CGF.Builder.getInt32(1); 6912 return DefaultThreadLimitVal; 6913 } 6914 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6915 : CGF.Builder.getInt32(0); 6916 } 6917 6918 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( 6919 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6920 int32_t &DefaultVal) { 6921 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6922 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6923 "Expected target-based executable directive."); 6924 6925 switch (DirectiveKind) { 6926 case OMPD_target: 6927 // Teams have no clause thread_limit 6928 return nullptr; 6929 case OMPD_target_teams: 6930 case OMPD_target_teams_distribute: 6931 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6932 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6933 const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit(); 6934 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6935 if (auto Constant = 6936 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6937 DefaultVal = Constant->getExtValue(); 6938 return ThreadLimit; 6939 } 6940 return nullptr; 6941 case OMPD_target_parallel: 6942 case OMPD_target_parallel_for: 6943 case OMPD_target_parallel_for_simd: 6944 case OMPD_target_teams_distribute_parallel_for: 6945 case OMPD_target_teams_distribute_parallel_for_simd: { 6946 Expr *ThreadLimit = nullptr; 6947 Expr *NumThreads = nullptr; 6948 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6949 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6950 ThreadLimit = ThreadLimitClause->getThreadLimit(); 6951 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6952 if (auto Constant = 6953 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6954 DefaultVal = Constant->getExtValue(); 6955 } 6956 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6957 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6958 NumThreads = NumThreadsClause->getNumThreads(); 6959 if (NumThreads->isIntegerConstantExpr(CGF.getContext())) { 6960 if (auto Constant = 6961 NumThreads->getIntegerConstantExpr(CGF.getContext())) { 6962 if (Constant->getExtValue() < DefaultVal) { 6963 DefaultVal = Constant->getExtValue(); 6964 ThreadLimit = NumThreads; 6965 } 6966 } 6967 } 6968 } 6969 return ThreadLimit; 6970 } 6971 case OMPD_target_teams_distribute_simd: 6972 case OMPD_target_simd: 6973 DefaultVal = 1; 6974 return nullptr; 6975 case OMPD_parallel: 6976 case OMPD_for: 6977 case OMPD_parallel_for: 6978 case OMPD_parallel_master: 6979 case OMPD_parallel_sections: 6980 case OMPD_for_simd: 6981 case OMPD_parallel_for_simd: 6982 case OMPD_cancel: 6983 case OMPD_cancellation_point: 6984 case OMPD_ordered: 6985 case OMPD_threadprivate: 6986 case OMPD_allocate: 6987 case OMPD_task: 6988 case OMPD_simd: 6989 case OMPD_tile: 6990 case OMPD_unroll: 6991 case OMPD_sections: 6992 case OMPD_section: 6993 case OMPD_single: 6994 case OMPD_master: 6995 case OMPD_critical: 6996 case OMPD_taskyield: 6997 case OMPD_barrier: 6998 case OMPD_taskwait: 6999 case OMPD_taskgroup: 7000 case OMPD_atomic: 7001 case OMPD_flush: 7002 case OMPD_depobj: 7003 case OMPD_scan: 7004 case OMPD_teams: 7005 case OMPD_target_data: 7006 case OMPD_target_exit_data: 7007 case OMPD_target_enter_data: 7008 case OMPD_distribute: 7009 case OMPD_distribute_simd: 7010 case OMPD_distribute_parallel_for: 7011 case OMPD_distribute_parallel_for_simd: 7012 case OMPD_teams_distribute: 7013 case OMPD_teams_distribute_simd: 7014 case OMPD_teams_distribute_parallel_for: 7015 case OMPD_teams_distribute_parallel_for_simd: 7016 case OMPD_target_update: 7017 case OMPD_declare_simd: 7018 case OMPD_declare_variant: 7019 case OMPD_begin_declare_variant: 7020 case OMPD_end_declare_variant: 7021 case OMPD_declare_target: 7022 case OMPD_end_declare_target: 7023 case OMPD_declare_reduction: 7024 case OMPD_declare_mapper: 7025 case OMPD_taskloop: 7026 case OMPD_taskloop_simd: 7027 case OMPD_master_taskloop: 7028 case OMPD_master_taskloop_simd: 7029 case OMPD_parallel_master_taskloop: 7030 case OMPD_parallel_master_taskloop_simd: 7031 case OMPD_requires: 7032 case OMPD_unknown: 7033 break; 7034 default: 7035 break; 7036 } 7037 llvm_unreachable("Unsupported directive kind."); 7038 } 7039 7040 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( 7041 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 7042 assert(!CGF.getLangOpts().OpenMPIsDevice && 7043 "Clauses associated with the teams directive expected to be emitted " 7044 "only for the host!"); 7045 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 7046 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 7047 "Expected target-based executable directive."); 7048 CGBuilderTy &Bld = CGF.Builder; 7049 llvm::Value *ThreadLimitVal = nullptr; 7050 llvm::Value *NumThreadsVal = nullptr; 7051 switch (DirectiveKind) { 7052 case OMPD_target: { 7053 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7054 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7055 return NumThreads; 7056 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7057 CGF.getContext(), CS->getCapturedStmt()); 7058 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7059 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 7060 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7061 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7062 const auto *ThreadLimitClause = 7063 Dir->getSingleClause<OMPThreadLimitClause>(); 7064 CodeGenFunction::LexicalScope Scope( 7065 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 7066 if (const auto *PreInit = 7067 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 7068 for (const auto *I : PreInit->decls()) { 7069 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7070 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7071 } else { 7072 CodeGenFunction::AutoVarEmission Emission = 7073 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7074 CGF.EmitAutoVarCleanups(Emission); 7075 } 7076 } 7077 } 7078 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7079 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7080 ThreadLimitVal = 7081 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7082 } 7083 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 7084 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 7085 CS = Dir->getInnermostCapturedStmt(); 7086 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7087 CGF.getContext(), CS->getCapturedStmt()); 7088 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 7089 } 7090 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 7091 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 7092 CS = Dir->getInnermostCapturedStmt(); 7093 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7094 return NumThreads; 7095 } 7096 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 7097 return Bld.getInt32(1); 7098 } 7099 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7100 } 7101 case OMPD_target_teams: { 7102 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7103 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7104 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7105 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7106 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7107 ThreadLimitVal = 7108 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7109 } 7110 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7111 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7112 return NumThreads; 7113 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7114 CGF.getContext(), CS->getCapturedStmt()); 7115 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7116 if (Dir->getDirectiveKind() == OMPD_distribute) { 7117 CS = Dir->getInnermostCapturedStmt(); 7118 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7119 return NumThreads; 7120 } 7121 } 7122 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7123 } 7124 case OMPD_target_teams_distribute: 7125 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7126 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7127 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7128 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7129 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7130 ThreadLimitVal = 7131 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7132 } 7133 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 7134 case OMPD_target_parallel: 7135 case OMPD_target_parallel_for: 7136 case OMPD_target_parallel_for_simd: 7137 case OMPD_target_teams_distribute_parallel_for: 7138 case OMPD_target_teams_distribute_parallel_for_simd: { 7139 llvm::Value *CondVal = nullptr; 7140 // Handle if clause. If if clause present, the number of threads is 7141 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 7142 if (D.hasClausesOfKind<OMPIfClause>()) { 7143 const OMPIfClause *IfClause = nullptr; 7144 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 7145 if (C->getNameModifier() == OMPD_unknown || 7146 C->getNameModifier() == OMPD_parallel) { 7147 IfClause = C; 7148 break; 7149 } 7150 } 7151 if (IfClause) { 7152 const Expr *Cond = IfClause->getCondition(); 7153 bool Result; 7154 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7155 if (!Result) 7156 return Bld.getInt32(1); 7157 } else { 7158 CodeGenFunction::RunCleanupsScope Scope(CGF); 7159 CondVal = CGF.EvaluateExprAsBool(Cond); 7160 } 7161 } 7162 } 7163 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7164 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7165 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7166 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7167 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7168 ThreadLimitVal = 7169 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7170 } 7171 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 7172 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 7173 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 7174 llvm::Value *NumThreads = CGF.EmitScalarExpr( 7175 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 7176 NumThreadsVal = 7177 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 7178 ThreadLimitVal = ThreadLimitVal 7179 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 7180 ThreadLimitVal), 7181 NumThreadsVal, ThreadLimitVal) 7182 : NumThreadsVal; 7183 } 7184 if (!ThreadLimitVal) 7185 ThreadLimitVal = Bld.getInt32(0); 7186 if (CondVal) 7187 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 7188 return ThreadLimitVal; 7189 } 7190 case OMPD_target_teams_distribute_simd: 7191 case OMPD_target_simd: 7192 return Bld.getInt32(1); 7193 case OMPD_parallel: 7194 case OMPD_for: 7195 case OMPD_parallel_for: 7196 case OMPD_parallel_master: 7197 case OMPD_parallel_sections: 7198 case OMPD_for_simd: 7199 case OMPD_parallel_for_simd: 7200 case OMPD_cancel: 7201 case OMPD_cancellation_point: 7202 case OMPD_ordered: 7203 case OMPD_threadprivate: 7204 case OMPD_allocate: 7205 case OMPD_task: 7206 case OMPD_simd: 7207 case OMPD_tile: 7208 case OMPD_unroll: 7209 case OMPD_sections: 7210 case OMPD_section: 7211 case OMPD_single: 7212 case OMPD_master: 7213 case OMPD_critical: 7214 case OMPD_taskyield: 7215 case OMPD_barrier: 7216 case OMPD_taskwait: 7217 case OMPD_taskgroup: 7218 case OMPD_atomic: 7219 case OMPD_flush: 7220 case OMPD_depobj: 7221 case OMPD_scan: 7222 case OMPD_teams: 7223 case OMPD_target_data: 7224 case OMPD_target_exit_data: 7225 case OMPD_target_enter_data: 7226 case OMPD_distribute: 7227 case OMPD_distribute_simd: 7228 case OMPD_distribute_parallel_for: 7229 case OMPD_distribute_parallel_for_simd: 7230 case OMPD_teams_distribute: 7231 case OMPD_teams_distribute_simd: 7232 case OMPD_teams_distribute_parallel_for: 7233 case OMPD_teams_distribute_parallel_for_simd: 7234 case OMPD_target_update: 7235 case OMPD_declare_simd: 7236 case OMPD_declare_variant: 7237 case OMPD_begin_declare_variant: 7238 case OMPD_end_declare_variant: 7239 case OMPD_declare_target: 7240 case OMPD_end_declare_target: 7241 case OMPD_declare_reduction: 7242 case OMPD_declare_mapper: 7243 case OMPD_taskloop: 7244 case OMPD_taskloop_simd: 7245 case OMPD_master_taskloop: 7246 case OMPD_master_taskloop_simd: 7247 case OMPD_parallel_master_taskloop: 7248 case OMPD_parallel_master_taskloop_simd: 7249 case OMPD_requires: 7250 case OMPD_metadirective: 7251 case OMPD_unknown: 7252 break; 7253 default: 7254 break; 7255 } 7256 llvm_unreachable("Unsupported directive kind."); 7257 } 7258 7259 namespace { 7260 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7261 7262 // Utility to handle information from clauses associated with a given 7263 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7264 // It provides a convenient interface to obtain the information and generate 7265 // code for that information. 7266 class MappableExprsHandler { 7267 public: 7268 /// Values for bit flags used to specify the mapping type for 7269 /// offloading. 7270 enum OpenMPOffloadMappingFlags : uint64_t { 7271 /// No flags 7272 OMP_MAP_NONE = 0x0, 7273 /// Allocate memory on the device and move data from host to device. 7274 OMP_MAP_TO = 0x01, 7275 /// Allocate memory on the device and move data from device to host. 7276 OMP_MAP_FROM = 0x02, 7277 /// Always perform the requested mapping action on the element, even 7278 /// if it was already mapped before. 7279 OMP_MAP_ALWAYS = 0x04, 7280 /// Delete the element from the device environment, ignoring the 7281 /// current reference count associated with the element. 7282 OMP_MAP_DELETE = 0x08, 7283 /// The element being mapped is a pointer-pointee pair; both the 7284 /// pointer and the pointee should be mapped. 7285 OMP_MAP_PTR_AND_OBJ = 0x10, 7286 /// This flags signals that the base address of an entry should be 7287 /// passed to the target kernel as an argument. 7288 OMP_MAP_TARGET_PARAM = 0x20, 7289 /// Signal that the runtime library has to return the device pointer 7290 /// in the current position for the data being mapped. Used when we have the 7291 /// use_device_ptr or use_device_addr clause. 7292 OMP_MAP_RETURN_PARAM = 0x40, 7293 /// This flag signals that the reference being passed is a pointer to 7294 /// private data. 7295 OMP_MAP_PRIVATE = 0x80, 7296 /// Pass the element to the device by value. 7297 OMP_MAP_LITERAL = 0x100, 7298 /// Implicit map 7299 OMP_MAP_IMPLICIT = 0x200, 7300 /// Close is a hint to the runtime to allocate memory close to 7301 /// the target device. 7302 OMP_MAP_CLOSE = 0x400, 7303 /// 0x800 is reserved for compatibility with XLC. 7304 /// Produce a runtime error if the data is not already allocated. 7305 OMP_MAP_PRESENT = 0x1000, 7306 // Increment and decrement a separate reference counter so that the data 7307 // cannot be unmapped within the associated region. Thus, this flag is 7308 // intended to be used on 'target' and 'target data' directives because they 7309 // are inherently structured. It is not intended to be used on 'target 7310 // enter data' and 'target exit data' directives because they are inherently 7311 // dynamic. 7312 // This is an OpenMP extension for the sake of OpenACC support. 7313 OMP_MAP_OMPX_HOLD = 0x2000, 7314 /// Signal that the runtime library should use args as an array of 7315 /// descriptor_dim pointers and use args_size as dims. Used when we have 7316 /// non-contiguous list items in target update directive 7317 OMP_MAP_NON_CONTIG = 0x100000000000, 7318 /// The 16 MSBs of the flags indicate whether the entry is member of some 7319 /// struct/class. 7320 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7321 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7322 }; 7323 7324 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7325 static unsigned getFlagMemberOffset() { 7326 unsigned Offset = 0; 7327 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7328 Remain = Remain >> 1) 7329 Offset++; 7330 return Offset; 7331 } 7332 7333 /// Class that holds debugging information for a data mapping to be passed to 7334 /// the runtime library. 7335 class MappingExprInfo { 7336 /// The variable declaration used for the data mapping. 7337 const ValueDecl *MapDecl = nullptr; 7338 /// The original expression used in the map clause, or null if there is 7339 /// none. 7340 const Expr *MapExpr = nullptr; 7341 7342 public: 7343 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 7344 : MapDecl(MapDecl), MapExpr(MapExpr) {} 7345 7346 const ValueDecl *getMapDecl() const { return MapDecl; } 7347 const Expr *getMapExpr() const { return MapExpr; } 7348 }; 7349 7350 /// Class that associates information with a base pointer to be passed to the 7351 /// runtime library. 7352 class BasePointerInfo { 7353 /// The base pointer. 7354 llvm::Value *Ptr = nullptr; 7355 /// The base declaration that refers to this device pointer, or null if 7356 /// there is none. 7357 const ValueDecl *DevPtrDecl = nullptr; 7358 7359 public: 7360 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7361 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7362 llvm::Value *operator*() const { return Ptr; } 7363 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7364 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7365 }; 7366 7367 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 7368 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7369 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7370 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7371 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7372 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7373 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7374 7375 /// This structure contains combined information generated for mappable 7376 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7377 /// mappers, and non-contiguous information. 7378 struct MapCombinedInfoTy { 7379 struct StructNonContiguousInfo { 7380 bool IsNonContiguous = false; 7381 MapDimArrayTy Dims; 7382 MapNonContiguousArrayTy Offsets; 7383 MapNonContiguousArrayTy Counts; 7384 MapNonContiguousArrayTy Strides; 7385 }; 7386 MapExprsArrayTy Exprs; 7387 MapBaseValuesArrayTy BasePointers; 7388 MapValuesArrayTy Pointers; 7389 MapValuesArrayTy Sizes; 7390 MapFlagsArrayTy Types; 7391 MapMappersArrayTy Mappers; 7392 StructNonContiguousInfo NonContigInfo; 7393 7394 /// Append arrays in \a CurInfo. 7395 void append(MapCombinedInfoTy &CurInfo) { 7396 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 7397 BasePointers.append(CurInfo.BasePointers.begin(), 7398 CurInfo.BasePointers.end()); 7399 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7400 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7401 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7402 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7403 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7404 CurInfo.NonContigInfo.Dims.end()); 7405 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7406 CurInfo.NonContigInfo.Offsets.end()); 7407 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7408 CurInfo.NonContigInfo.Counts.end()); 7409 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7410 CurInfo.NonContigInfo.Strides.end()); 7411 } 7412 }; 7413 7414 /// Map between a struct and the its lowest & highest elements which have been 7415 /// mapped. 7416 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7417 /// HE(FieldIndex, Pointer)} 7418 struct StructRangeInfoTy { 7419 MapCombinedInfoTy PreliminaryMapData; 7420 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7421 0, Address::invalid()}; 7422 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7423 0, Address::invalid()}; 7424 Address Base = Address::invalid(); 7425 Address LB = Address::invalid(); 7426 bool IsArraySection = false; 7427 bool HasCompleteRecord = false; 7428 }; 7429 7430 private: 7431 /// Kind that defines how a device pointer has to be returned. 7432 struct MapInfo { 7433 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7434 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7435 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7436 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7437 bool ReturnDevicePointer = false; 7438 bool IsImplicit = false; 7439 const ValueDecl *Mapper = nullptr; 7440 const Expr *VarRef = nullptr; 7441 bool ForDeviceAddr = false; 7442 7443 MapInfo() = default; 7444 MapInfo( 7445 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7446 OpenMPMapClauseKind MapType, 7447 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7448 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7449 bool ReturnDevicePointer, bool IsImplicit, 7450 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 7451 bool ForDeviceAddr = false) 7452 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7453 MotionModifiers(MotionModifiers), 7454 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7455 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 7456 }; 7457 7458 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7459 /// member and there is no map information about it, then emission of that 7460 /// entry is deferred until the whole struct has been processed. 7461 struct DeferredDevicePtrEntryTy { 7462 const Expr *IE = nullptr; 7463 const ValueDecl *VD = nullptr; 7464 bool ForDeviceAddr = false; 7465 7466 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7467 bool ForDeviceAddr) 7468 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7469 }; 7470 7471 /// The target directive from where the mappable clauses were extracted. It 7472 /// is either a executable directive or a user-defined mapper directive. 7473 llvm::PointerUnion<const OMPExecutableDirective *, 7474 const OMPDeclareMapperDecl *> 7475 CurDir; 7476 7477 /// Function the directive is being generated for. 7478 CodeGenFunction &CGF; 7479 7480 /// Set of all first private variables in the current directive. 7481 /// bool data is set to true if the variable is implicitly marked as 7482 /// firstprivate, false otherwise. 7483 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7484 7485 /// Map between device pointer declarations and their expression components. 7486 /// The key value for declarations in 'this' is null. 7487 llvm::DenseMap< 7488 const ValueDecl *, 7489 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7490 DevPointersMap; 7491 7492 /// Map between lambda declarations and their map type. 7493 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap; 7494 7495 llvm::Value *getExprTypeSize(const Expr *E) const { 7496 QualType ExprTy = E->getType().getCanonicalType(); 7497 7498 // Calculate the size for array shaping expression. 7499 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7500 llvm::Value *Size = 7501 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7502 for (const Expr *SE : OAE->getDimensions()) { 7503 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7504 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7505 CGF.getContext().getSizeType(), 7506 SE->getExprLoc()); 7507 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7508 } 7509 return Size; 7510 } 7511 7512 // Reference types are ignored for mapping purposes. 7513 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7514 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7515 7516 // Given that an array section is considered a built-in type, we need to 7517 // do the calculation based on the length of the section instead of relying 7518 // on CGF.getTypeSize(E->getType()). 7519 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7520 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7521 OAE->getBase()->IgnoreParenImpCasts()) 7522 .getCanonicalType(); 7523 7524 // If there is no length associated with the expression and lower bound is 7525 // not specified too, that means we are using the whole length of the 7526 // base. 7527 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7528 !OAE->getLowerBound()) 7529 return CGF.getTypeSize(BaseTy); 7530 7531 llvm::Value *ElemSize; 7532 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7533 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7534 } else { 7535 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7536 assert(ATy && "Expecting array type if not a pointer type."); 7537 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7538 } 7539 7540 // If we don't have a length at this point, that is because we have an 7541 // array section with a single element. 7542 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7543 return ElemSize; 7544 7545 if (const Expr *LenExpr = OAE->getLength()) { 7546 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7547 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7548 CGF.getContext().getSizeType(), 7549 LenExpr->getExprLoc()); 7550 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7551 } 7552 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7553 OAE->getLowerBound() && "expected array_section[lb:]."); 7554 // Size = sizetype - lb * elemtype; 7555 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7556 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7557 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7558 CGF.getContext().getSizeType(), 7559 OAE->getLowerBound()->getExprLoc()); 7560 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7561 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7562 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7563 LengthVal = CGF.Builder.CreateSelect( 7564 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7565 return LengthVal; 7566 } 7567 return CGF.getTypeSize(ExprTy); 7568 } 7569 7570 /// Return the corresponding bits for a given map clause modifier. Add 7571 /// a flag marking the map as a pointer if requested. Add a flag marking the 7572 /// map as the first one of a series of maps that relate to the same map 7573 /// expression. 7574 OpenMPOffloadMappingFlags getMapTypeBits( 7575 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7576 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7577 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7578 OpenMPOffloadMappingFlags Bits = 7579 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7580 switch (MapType) { 7581 case OMPC_MAP_alloc: 7582 case OMPC_MAP_release: 7583 // alloc and release is the default behavior in the runtime library, i.e. 7584 // if we don't pass any bits alloc/release that is what the runtime is 7585 // going to do. Therefore, we don't need to signal anything for these two 7586 // type modifiers. 7587 break; 7588 case OMPC_MAP_to: 7589 Bits |= OMP_MAP_TO; 7590 break; 7591 case OMPC_MAP_from: 7592 Bits |= OMP_MAP_FROM; 7593 break; 7594 case OMPC_MAP_tofrom: 7595 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7596 break; 7597 case OMPC_MAP_delete: 7598 Bits |= OMP_MAP_DELETE; 7599 break; 7600 case OMPC_MAP_unknown: 7601 llvm_unreachable("Unexpected map type!"); 7602 } 7603 if (AddPtrFlag) 7604 Bits |= OMP_MAP_PTR_AND_OBJ; 7605 if (AddIsTargetParamFlag) 7606 Bits |= OMP_MAP_TARGET_PARAM; 7607 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always)) 7608 Bits |= OMP_MAP_ALWAYS; 7609 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close)) 7610 Bits |= OMP_MAP_CLOSE; 7611 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) || 7612 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present)) 7613 Bits |= OMP_MAP_PRESENT; 7614 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold)) 7615 Bits |= OMP_MAP_OMPX_HOLD; 7616 if (IsNonContiguous) 7617 Bits |= OMP_MAP_NON_CONTIG; 7618 return Bits; 7619 } 7620 7621 /// Return true if the provided expression is a final array section. A 7622 /// final array section, is one whose length can't be proved to be one. 7623 bool isFinalArraySectionExpression(const Expr *E) const { 7624 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7625 7626 // It is not an array section and therefore not a unity-size one. 7627 if (!OASE) 7628 return false; 7629 7630 // An array section with no colon always refer to a single element. 7631 if (OASE->getColonLocFirst().isInvalid()) 7632 return false; 7633 7634 const Expr *Length = OASE->getLength(); 7635 7636 // If we don't have a length we have to check if the array has size 1 7637 // for this dimension. Also, we should always expect a length if the 7638 // base type is pointer. 7639 if (!Length) { 7640 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7641 OASE->getBase()->IgnoreParenImpCasts()) 7642 .getCanonicalType(); 7643 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7644 return ATy->getSize().getSExtValue() != 1; 7645 // If we don't have a constant dimension length, we have to consider 7646 // the current section as having any size, so it is not necessarily 7647 // unitary. If it happen to be unity size, that's user fault. 7648 return true; 7649 } 7650 7651 // Check if the length evaluates to 1. 7652 Expr::EvalResult Result; 7653 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7654 return true; // Can have more that size 1. 7655 7656 llvm::APSInt ConstLength = Result.Val.getInt(); 7657 return ConstLength.getSExtValue() != 1; 7658 } 7659 7660 /// Generate the base pointers, section pointers, sizes, map type bits, and 7661 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7662 /// map type, map or motion modifiers, and expression components. 7663 /// \a IsFirstComponent should be set to true if the provided set of 7664 /// components is the first associated with a capture. 7665 void generateInfoForComponentList( 7666 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7667 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7668 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7669 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7670 bool IsFirstComponentList, bool IsImplicit, 7671 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7672 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7673 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7674 OverlappedElements = llvm::None) const { 7675 // The following summarizes what has to be generated for each map and the 7676 // types below. The generated information is expressed in this order: 7677 // base pointer, section pointer, size, flags 7678 // (to add to the ones that come from the map type and modifier). 7679 // 7680 // double d; 7681 // int i[100]; 7682 // float *p; 7683 // 7684 // struct S1 { 7685 // int i; 7686 // float f[50]; 7687 // } 7688 // struct S2 { 7689 // int i; 7690 // float f[50]; 7691 // S1 s; 7692 // double *p; 7693 // struct S2 *ps; 7694 // int &ref; 7695 // } 7696 // S2 s; 7697 // S2 *ps; 7698 // 7699 // map(d) 7700 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7701 // 7702 // map(i) 7703 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7704 // 7705 // map(i[1:23]) 7706 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7707 // 7708 // map(p) 7709 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7710 // 7711 // map(p[1:24]) 7712 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7713 // in unified shared memory mode or for local pointers 7714 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7715 // 7716 // map(s) 7717 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7718 // 7719 // map(s.i) 7720 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7721 // 7722 // map(s.s.f) 7723 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7724 // 7725 // map(s.p) 7726 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7727 // 7728 // map(to: s.p[:22]) 7729 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7730 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7731 // &(s.p), &(s.p[0]), 22*sizeof(double), 7732 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7733 // (*) alloc space for struct members, only this is a target parameter 7734 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7735 // optimizes this entry out, same in the examples below) 7736 // (***) map the pointee (map: to) 7737 // 7738 // map(to: s.ref) 7739 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) 7740 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7741 // (*) alloc space for struct members, only this is a target parameter 7742 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7743 // optimizes this entry out, same in the examples below) 7744 // (***) map the pointee (map: to) 7745 // 7746 // map(s.ps) 7747 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7748 // 7749 // map(from: s.ps->s.i) 7750 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7751 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7752 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7753 // 7754 // map(to: s.ps->ps) 7755 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7756 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7757 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7758 // 7759 // map(s.ps->ps->ps) 7760 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7761 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7762 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7763 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7764 // 7765 // map(to: s.ps->ps->s.f[:22]) 7766 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7767 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7768 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7769 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7770 // 7771 // map(ps) 7772 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7773 // 7774 // map(ps->i) 7775 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7776 // 7777 // map(ps->s.f) 7778 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7779 // 7780 // map(from: ps->p) 7781 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7782 // 7783 // map(to: ps->p[:22]) 7784 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7785 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7786 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7787 // 7788 // map(ps->ps) 7789 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7790 // 7791 // map(from: ps->ps->s.i) 7792 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7793 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7794 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7795 // 7796 // map(from: ps->ps->ps) 7797 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7798 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7799 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7800 // 7801 // map(ps->ps->ps->ps) 7802 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7803 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7804 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7805 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7806 // 7807 // map(to: ps->ps->ps->s.f[:22]) 7808 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7809 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7810 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7811 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7812 // 7813 // map(to: s.f[:22]) map(from: s.p[:33]) 7814 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7815 // sizeof(double*) (**), TARGET_PARAM 7816 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7817 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7818 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7819 // (*) allocate contiguous space needed to fit all mapped members even if 7820 // we allocate space for members not mapped (in this example, 7821 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7822 // them as well because they fall between &s.f[0] and &s.p) 7823 // 7824 // map(from: s.f[:22]) map(to: ps->p[:33]) 7825 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7826 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7827 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7828 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7829 // (*) the struct this entry pertains to is the 2nd element in the list of 7830 // arguments, hence MEMBER_OF(2) 7831 // 7832 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7833 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7834 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7835 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7836 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7837 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7838 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7839 // (*) the struct this entry pertains to is the 4th element in the list 7840 // of arguments, hence MEMBER_OF(4) 7841 7842 // Track if the map information being generated is the first for a capture. 7843 bool IsCaptureFirstInfo = IsFirstComponentList; 7844 // When the variable is on a declare target link or in a to clause with 7845 // unified memory, a reference is needed to hold the host/device address 7846 // of the variable. 7847 bool RequiresReference = false; 7848 7849 // Scan the components from the base to the complete expression. 7850 auto CI = Components.rbegin(); 7851 auto CE = Components.rend(); 7852 auto I = CI; 7853 7854 // Track if the map information being generated is the first for a list of 7855 // components. 7856 bool IsExpressionFirstInfo = true; 7857 bool FirstPointerInComplexData = false; 7858 Address BP = Address::invalid(); 7859 const Expr *AssocExpr = I->getAssociatedExpression(); 7860 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7861 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7862 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7863 7864 if (isa<MemberExpr>(AssocExpr)) { 7865 // The base is the 'this' pointer. The content of the pointer is going 7866 // to be the base of the field being mapped. 7867 BP = CGF.LoadCXXThisAddress(); 7868 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7869 (OASE && 7870 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7871 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7872 } else if (OAShE && 7873 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7874 BP = Address( 7875 CGF.EmitScalarExpr(OAShE->getBase()), 7876 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7877 } else { 7878 // The base is the reference to the variable. 7879 // BP = &Var. 7880 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7881 if (const auto *VD = 7882 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7883 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7884 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7885 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7886 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7887 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7888 RequiresReference = true; 7889 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7890 } 7891 } 7892 } 7893 7894 // If the variable is a pointer and is being dereferenced (i.e. is not 7895 // the last component), the base has to be the pointer itself, not its 7896 // reference. References are ignored for mapping purposes. 7897 QualType Ty = 7898 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7899 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7900 // No need to generate individual map information for the pointer, it 7901 // can be associated with the combined storage if shared memory mode is 7902 // active or the base declaration is not global variable. 7903 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7904 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7905 !VD || VD->hasLocalStorage()) 7906 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7907 else 7908 FirstPointerInComplexData = true; 7909 ++I; 7910 } 7911 } 7912 7913 // Track whether a component of the list should be marked as MEMBER_OF some 7914 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7915 // in a component list should be marked as MEMBER_OF, all subsequent entries 7916 // do not belong to the base struct. E.g. 7917 // struct S2 s; 7918 // s.ps->ps->ps->f[:] 7919 // (1) (2) (3) (4) 7920 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7921 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7922 // is the pointee of ps(2) which is not member of struct s, so it should not 7923 // be marked as such (it is still PTR_AND_OBJ). 7924 // The variable is initialized to false so that PTR_AND_OBJ entries which 7925 // are not struct members are not considered (e.g. array of pointers to 7926 // data). 7927 bool ShouldBeMemberOf = false; 7928 7929 // Variable keeping track of whether or not we have encountered a component 7930 // in the component list which is a member expression. Useful when we have a 7931 // pointer or a final array section, in which case it is the previous 7932 // component in the list which tells us whether we have a member expression. 7933 // E.g. X.f[:] 7934 // While processing the final array section "[:]" it is "f" which tells us 7935 // whether we are dealing with a member of a declared struct. 7936 const MemberExpr *EncounteredME = nullptr; 7937 7938 // Track for the total number of dimension. Start from one for the dummy 7939 // dimension. 7940 uint64_t DimSize = 1; 7941 7942 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7943 bool IsPrevMemberReference = false; 7944 7945 for (; I != CE; ++I) { 7946 // If the current component is member of a struct (parent struct) mark it. 7947 if (!EncounteredME) { 7948 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7949 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7950 // as MEMBER_OF the parent struct. 7951 if (EncounteredME) { 7952 ShouldBeMemberOf = true; 7953 // Do not emit as complex pointer if this is actually not array-like 7954 // expression. 7955 if (FirstPointerInComplexData) { 7956 QualType Ty = std::prev(I) 7957 ->getAssociatedDeclaration() 7958 ->getType() 7959 .getNonReferenceType(); 7960 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7961 FirstPointerInComplexData = false; 7962 } 7963 } 7964 } 7965 7966 auto Next = std::next(I); 7967 7968 // We need to generate the addresses and sizes if this is the last 7969 // component, if the component is a pointer or if it is an array section 7970 // whose length can't be proved to be one. If this is a pointer, it 7971 // becomes the base address for the following components. 7972 7973 // A final array section, is one whose length can't be proved to be one. 7974 // If the map item is non-contiguous then we don't treat any array section 7975 // as final array section. 7976 bool IsFinalArraySection = 7977 !IsNonContiguous && 7978 isFinalArraySectionExpression(I->getAssociatedExpression()); 7979 7980 // If we have a declaration for the mapping use that, otherwise use 7981 // the base declaration of the map clause. 7982 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 7983 ? I->getAssociatedDeclaration() 7984 : BaseDecl; 7985 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression() 7986 : MapExpr; 7987 7988 // Get information on whether the element is a pointer. Have to do a 7989 // special treatment for array sections given that they are built-in 7990 // types. 7991 const auto *OASE = 7992 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7993 const auto *OAShE = 7994 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7995 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7996 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7997 bool IsPointer = 7998 OAShE || 7999 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 8000 .getCanonicalType() 8001 ->isAnyPointerType()) || 8002 I->getAssociatedExpression()->getType()->isAnyPointerType(); 8003 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && 8004 MapDecl && 8005 MapDecl->getType()->isLValueReferenceType(); 8006 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 8007 8008 if (OASE) 8009 ++DimSize; 8010 8011 if (Next == CE || IsMemberReference || IsNonDerefPointer || 8012 IsFinalArraySection) { 8013 // If this is not the last component, we expect the pointer to be 8014 // associated with an array expression or member expression. 8015 assert((Next == CE || 8016 isa<MemberExpr>(Next->getAssociatedExpression()) || 8017 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 8018 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 8019 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 8020 isa<UnaryOperator>(Next->getAssociatedExpression()) || 8021 isa<BinaryOperator>(Next->getAssociatedExpression())) && 8022 "Unexpected expression"); 8023 8024 Address LB = Address::invalid(); 8025 Address LowestElem = Address::invalid(); 8026 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, 8027 const MemberExpr *E) { 8028 const Expr *BaseExpr = E->getBase(); 8029 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a 8030 // scalar. 8031 LValue BaseLV; 8032 if (E->isArrow()) { 8033 LValueBaseInfo BaseInfo; 8034 TBAAAccessInfo TBAAInfo; 8035 Address Addr = 8036 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); 8037 QualType PtrTy = BaseExpr->getType()->getPointeeType(); 8038 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); 8039 } else { 8040 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); 8041 } 8042 return BaseLV; 8043 }; 8044 if (OAShE) { 8045 LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 8046 CGF.getContext().getTypeAlignInChars( 8047 OAShE->getBase()->getType())); 8048 } else if (IsMemberReference) { 8049 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); 8050 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8051 LowestElem = CGF.EmitLValueForFieldInitialization( 8052 BaseLVal, cast<FieldDecl>(MapDecl)) 8053 .getAddress(CGF); 8054 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) 8055 .getAddress(CGF); 8056 } else { 8057 LowestElem = LB = 8058 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 8059 .getAddress(CGF); 8060 } 8061 8062 // If this component is a pointer inside the base struct then we don't 8063 // need to create any entry for it - it will be combined with the object 8064 // it is pointing to into a single PTR_AND_OBJ entry. 8065 bool IsMemberPointerOrAddr = 8066 EncounteredME && 8067 (((IsPointer || ForDeviceAddr) && 8068 I->getAssociatedExpression() == EncounteredME) || 8069 (IsPrevMemberReference && !IsPointer) || 8070 (IsMemberReference && Next != CE && 8071 !Next->getAssociatedExpression()->getType()->isPointerType())); 8072 if (!OverlappedElements.empty() && Next == CE) { 8073 // Handle base element with the info for overlapped elements. 8074 assert(!PartialStruct.Base.isValid() && "The base element is set."); 8075 assert(!IsPointer && 8076 "Unexpected base element with the pointer type."); 8077 // Mark the whole struct as the struct that requires allocation on the 8078 // device. 8079 PartialStruct.LowestElem = {0, LowestElem}; 8080 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 8081 I->getAssociatedExpression()->getType()); 8082 Address HB = CGF.Builder.CreateConstGEP( 8083 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem, 8084 CGF.VoidPtrTy), 8085 TypeSize.getQuantity() - 1); 8086 PartialStruct.HighestElem = { 8087 std::numeric_limits<decltype( 8088 PartialStruct.HighestElem.first)>::max(), 8089 HB}; 8090 PartialStruct.Base = BP; 8091 PartialStruct.LB = LB; 8092 assert( 8093 PartialStruct.PreliminaryMapData.BasePointers.empty() && 8094 "Overlapped elements must be used only once for the variable."); 8095 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 8096 // Emit data for non-overlapped data. 8097 OpenMPOffloadMappingFlags Flags = 8098 OMP_MAP_MEMBER_OF | 8099 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 8100 /*AddPtrFlag=*/false, 8101 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 8102 llvm::Value *Size = nullptr; 8103 // Do bitcopy of all non-overlapped structure elements. 8104 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 8105 Component : OverlappedElements) { 8106 Address ComponentLB = Address::invalid(); 8107 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 8108 Component) { 8109 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { 8110 const auto *FD = dyn_cast<FieldDecl>(VD); 8111 if (FD && FD->getType()->isLValueReferenceType()) { 8112 const auto *ME = 8113 cast<MemberExpr>(MC.getAssociatedExpression()); 8114 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8115 ComponentLB = 8116 CGF.EmitLValueForFieldInitialization(BaseLVal, FD) 8117 .getAddress(CGF); 8118 } else { 8119 ComponentLB = 8120 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 8121 .getAddress(CGF); 8122 } 8123 Size = CGF.Builder.CreatePtrDiff( 8124 CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 8125 CGF.EmitCastToVoidPtr(LB.getPointer())); 8126 break; 8127 } 8128 } 8129 assert(Size && "Failed to determine structure size"); 8130 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8131 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8132 CombinedInfo.Pointers.push_back(LB.getPointer()); 8133 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8134 Size, CGF.Int64Ty, /*isSigned=*/true)); 8135 CombinedInfo.Types.push_back(Flags); 8136 CombinedInfo.Mappers.push_back(nullptr); 8137 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8138 : 1); 8139 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 8140 } 8141 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8142 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8143 CombinedInfo.Pointers.push_back(LB.getPointer()); 8144 Size = CGF.Builder.CreatePtrDiff( 8145 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(), 8146 CGF.EmitCastToVoidPtr(LB.getPointer())); 8147 CombinedInfo.Sizes.push_back( 8148 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8149 CombinedInfo.Types.push_back(Flags); 8150 CombinedInfo.Mappers.push_back(nullptr); 8151 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8152 : 1); 8153 break; 8154 } 8155 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 8156 if (!IsMemberPointerOrAddr || 8157 (Next == CE && MapType != OMPC_MAP_unknown)) { 8158 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8159 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8160 CombinedInfo.Pointers.push_back(LB.getPointer()); 8161 CombinedInfo.Sizes.push_back( 8162 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8163 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8164 : 1); 8165 8166 // If Mapper is valid, the last component inherits the mapper. 8167 bool HasMapper = Mapper && Next == CE; 8168 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 8169 8170 // We need to add a pointer flag for each map that comes from the 8171 // same expression except for the first one. We also need to signal 8172 // this map is the first one that relates with the current capture 8173 // (there is a set of entries for each capture). 8174 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 8175 MapType, MapModifiers, MotionModifiers, IsImplicit, 8176 !IsExpressionFirstInfo || RequiresReference || 8177 FirstPointerInComplexData || IsMemberReference, 8178 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 8179 8180 if (!IsExpressionFirstInfo || IsMemberReference) { 8181 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 8182 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 8183 if (IsPointer || (IsMemberReference && Next != CE)) 8184 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 8185 OMP_MAP_DELETE | OMP_MAP_CLOSE); 8186 8187 if (ShouldBeMemberOf) { 8188 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 8189 // should be later updated with the correct value of MEMBER_OF. 8190 Flags |= OMP_MAP_MEMBER_OF; 8191 // From now on, all subsequent PTR_AND_OBJ entries should not be 8192 // marked as MEMBER_OF. 8193 ShouldBeMemberOf = false; 8194 } 8195 } 8196 8197 CombinedInfo.Types.push_back(Flags); 8198 } 8199 8200 // If we have encountered a member expression so far, keep track of the 8201 // mapped member. If the parent is "*this", then the value declaration 8202 // is nullptr. 8203 if (EncounteredME) { 8204 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 8205 unsigned FieldIndex = FD->getFieldIndex(); 8206 8207 // Update info about the lowest and highest elements for this struct 8208 if (!PartialStruct.Base.isValid()) { 8209 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8210 if (IsFinalArraySection) { 8211 Address HB = 8212 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 8213 .getAddress(CGF); 8214 PartialStruct.HighestElem = {FieldIndex, HB}; 8215 } else { 8216 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8217 } 8218 PartialStruct.Base = BP; 8219 PartialStruct.LB = BP; 8220 } else if (FieldIndex < PartialStruct.LowestElem.first) { 8221 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8222 } else if (FieldIndex > PartialStruct.HighestElem.first) { 8223 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8224 } 8225 } 8226 8227 // Need to emit combined struct for array sections. 8228 if (IsFinalArraySection || IsNonContiguous) 8229 PartialStruct.IsArraySection = true; 8230 8231 // If we have a final array section, we are done with this expression. 8232 if (IsFinalArraySection) 8233 break; 8234 8235 // The pointer becomes the base for the next element. 8236 if (Next != CE) 8237 BP = IsMemberReference ? LowestElem : LB; 8238 8239 IsExpressionFirstInfo = false; 8240 IsCaptureFirstInfo = false; 8241 FirstPointerInComplexData = false; 8242 IsPrevMemberReference = IsMemberReference; 8243 } else if (FirstPointerInComplexData) { 8244 QualType Ty = Components.rbegin() 8245 ->getAssociatedDeclaration() 8246 ->getType() 8247 .getNonReferenceType(); 8248 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 8249 FirstPointerInComplexData = false; 8250 } 8251 } 8252 // If ran into the whole component - allocate the space for the whole 8253 // record. 8254 if (!EncounteredME) 8255 PartialStruct.HasCompleteRecord = true; 8256 8257 if (!IsNonContiguous) 8258 return; 8259 8260 const ASTContext &Context = CGF.getContext(); 8261 8262 // For supporting stride in array section, we need to initialize the first 8263 // dimension size as 1, first offset as 0, and first count as 1 8264 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 8265 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8266 MapValuesArrayTy CurStrides; 8267 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8268 uint64_t ElementTypeSize; 8269 8270 // Collect Size information for each dimension and get the element size as 8271 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 8272 // should be [10, 10] and the first stride is 4 btyes. 8273 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8274 Components) { 8275 const Expr *AssocExpr = Component.getAssociatedExpression(); 8276 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8277 8278 if (!OASE) 8279 continue; 8280 8281 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 8282 auto *CAT = Context.getAsConstantArrayType(Ty); 8283 auto *VAT = Context.getAsVariableArrayType(Ty); 8284 8285 // We need all the dimension size except for the last dimension. 8286 assert((VAT || CAT || &Component == &*Components.begin()) && 8287 "Should be either ConstantArray or VariableArray if not the " 8288 "first Component"); 8289 8290 // Get element size if CurStrides is empty. 8291 if (CurStrides.empty()) { 8292 const Type *ElementType = nullptr; 8293 if (CAT) 8294 ElementType = CAT->getElementType().getTypePtr(); 8295 else if (VAT) 8296 ElementType = VAT->getElementType().getTypePtr(); 8297 else 8298 assert(&Component == &*Components.begin() && 8299 "Only expect pointer (non CAT or VAT) when this is the " 8300 "first Component"); 8301 // If ElementType is null, then it means the base is a pointer 8302 // (neither CAT nor VAT) and we'll attempt to get ElementType again 8303 // for next iteration. 8304 if (ElementType) { 8305 // For the case that having pointer as base, we need to remove one 8306 // level of indirection. 8307 if (&Component != &*Components.begin()) 8308 ElementType = ElementType->getPointeeOrArrayElementType(); 8309 ElementTypeSize = 8310 Context.getTypeSizeInChars(ElementType).getQuantity(); 8311 CurStrides.push_back( 8312 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 8313 } 8314 } 8315 // Get dimension value except for the last dimension since we don't need 8316 // it. 8317 if (DimSizes.size() < Components.size() - 1) { 8318 if (CAT) 8319 DimSizes.push_back(llvm::ConstantInt::get( 8320 CGF.Int64Ty, CAT->getSize().getZExtValue())); 8321 else if (VAT) 8322 DimSizes.push_back(CGF.Builder.CreateIntCast( 8323 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 8324 /*IsSigned=*/false)); 8325 } 8326 } 8327 8328 // Skip the dummy dimension since we have already have its information. 8329 auto DI = DimSizes.begin() + 1; 8330 // Product of dimension. 8331 llvm::Value *DimProd = 8332 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 8333 8334 // Collect info for non-contiguous. Notice that offset, count, and stride 8335 // are only meaningful for array-section, so we insert a null for anything 8336 // other than array-section. 8337 // Also, the size of offset, count, and stride are not the same as 8338 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 8339 // count, and stride are the same as the number of non-contiguous 8340 // declaration in target update to/from clause. 8341 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8342 Components) { 8343 const Expr *AssocExpr = Component.getAssociatedExpression(); 8344 8345 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 8346 llvm::Value *Offset = CGF.Builder.CreateIntCast( 8347 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 8348 /*isSigned=*/false); 8349 CurOffsets.push_back(Offset); 8350 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 8351 CurStrides.push_back(CurStrides.back()); 8352 continue; 8353 } 8354 8355 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8356 8357 if (!OASE) 8358 continue; 8359 8360 // Offset 8361 const Expr *OffsetExpr = OASE->getLowerBound(); 8362 llvm::Value *Offset = nullptr; 8363 if (!OffsetExpr) { 8364 // If offset is absent, then we just set it to zero. 8365 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 8366 } else { 8367 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 8368 CGF.Int64Ty, 8369 /*isSigned=*/false); 8370 } 8371 CurOffsets.push_back(Offset); 8372 8373 // Count 8374 const Expr *CountExpr = OASE->getLength(); 8375 llvm::Value *Count = nullptr; 8376 if (!CountExpr) { 8377 // In Clang, once a high dimension is an array section, we construct all 8378 // the lower dimension as array section, however, for case like 8379 // arr[0:2][2], Clang construct the inner dimension as an array section 8380 // but it actually is not in an array section form according to spec. 8381 if (!OASE->getColonLocFirst().isValid() && 8382 !OASE->getColonLocSecond().isValid()) { 8383 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 8384 } else { 8385 // OpenMP 5.0, 2.1.5 Array Sections, Description. 8386 // When the length is absent it defaults to ⌈(size − 8387 // lower-bound)/stride⌉, where size is the size of the array 8388 // dimension. 8389 const Expr *StrideExpr = OASE->getStride(); 8390 llvm::Value *Stride = 8391 StrideExpr 8392 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8393 CGF.Int64Ty, /*isSigned=*/false) 8394 : nullptr; 8395 if (Stride) 8396 Count = CGF.Builder.CreateUDiv( 8397 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8398 else 8399 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8400 } 8401 } else { 8402 Count = CGF.EmitScalarExpr(CountExpr); 8403 } 8404 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8405 CurCounts.push_back(Count); 8406 8407 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8408 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8409 // Offset Count Stride 8410 // D0 0 1 4 (int) <- dummy dimension 8411 // D1 0 2 8 (2 * (1) * 4) 8412 // D2 1 2 20 (1 * (1 * 5) * 4) 8413 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8414 const Expr *StrideExpr = OASE->getStride(); 8415 llvm::Value *Stride = 8416 StrideExpr 8417 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8418 CGF.Int64Ty, /*isSigned=*/false) 8419 : nullptr; 8420 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8421 if (Stride) 8422 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8423 else 8424 CurStrides.push_back(DimProd); 8425 if (DI != DimSizes.end()) 8426 ++DI; 8427 } 8428 8429 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8430 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8431 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8432 } 8433 8434 /// Return the adjusted map modifiers if the declaration a capture refers to 8435 /// appears in a first-private clause. This is expected to be used only with 8436 /// directives that start with 'target'. 8437 MappableExprsHandler::OpenMPOffloadMappingFlags 8438 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8439 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8440 8441 // A first private variable captured by reference will use only the 8442 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8443 // declaration is known as first-private in this handler. 8444 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8445 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8446 return MappableExprsHandler::OMP_MAP_TO | 8447 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8448 return MappableExprsHandler::OMP_MAP_PRIVATE | 8449 MappableExprsHandler::OMP_MAP_TO; 8450 } 8451 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl()); 8452 if (I != LambdasMap.end()) 8453 // for map(to: lambda): using user specified map type. 8454 return getMapTypeBits( 8455 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(), 8456 /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(), 8457 /*AddPtrFlag=*/false, 8458 /*AddIsTargetParamFlag=*/false, 8459 /*isNonContiguous=*/false); 8460 return MappableExprsHandler::OMP_MAP_TO | 8461 MappableExprsHandler::OMP_MAP_FROM; 8462 } 8463 8464 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8465 // Rotate by getFlagMemberOffset() bits. 8466 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8467 << getFlagMemberOffset()); 8468 } 8469 8470 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8471 OpenMPOffloadMappingFlags MemberOfFlag) { 8472 // If the entry is PTR_AND_OBJ but has not been marked with the special 8473 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8474 // marked as MEMBER_OF. 8475 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8476 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8477 return; 8478 8479 // Reset the placeholder value to prepare the flag for the assignment of the 8480 // proper MEMBER_OF value. 8481 Flags &= ~OMP_MAP_MEMBER_OF; 8482 Flags |= MemberOfFlag; 8483 } 8484 8485 void getPlainLayout(const CXXRecordDecl *RD, 8486 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8487 bool AsBase) const { 8488 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8489 8490 llvm::StructType *St = 8491 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8492 8493 unsigned NumElements = St->getNumElements(); 8494 llvm::SmallVector< 8495 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8496 RecordLayout(NumElements); 8497 8498 // Fill bases. 8499 for (const auto &I : RD->bases()) { 8500 if (I.isVirtual()) 8501 continue; 8502 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8503 // Ignore empty bases. 8504 if (Base->isEmpty() || CGF.getContext() 8505 .getASTRecordLayout(Base) 8506 .getNonVirtualSize() 8507 .isZero()) 8508 continue; 8509 8510 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8511 RecordLayout[FieldIndex] = Base; 8512 } 8513 // Fill in virtual bases. 8514 for (const auto &I : RD->vbases()) { 8515 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8516 // Ignore empty bases. 8517 if (Base->isEmpty()) 8518 continue; 8519 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8520 if (RecordLayout[FieldIndex]) 8521 continue; 8522 RecordLayout[FieldIndex] = Base; 8523 } 8524 // Fill in all the fields. 8525 assert(!RD->isUnion() && "Unexpected union."); 8526 for (const auto *Field : RD->fields()) { 8527 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8528 // will fill in later.) 8529 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8530 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8531 RecordLayout[FieldIndex] = Field; 8532 } 8533 } 8534 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8535 &Data : RecordLayout) { 8536 if (Data.isNull()) 8537 continue; 8538 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8539 getPlainLayout(Base, Layout, /*AsBase=*/true); 8540 else 8541 Layout.push_back(Data.get<const FieldDecl *>()); 8542 } 8543 } 8544 8545 /// Generate all the base pointers, section pointers, sizes, map types, and 8546 /// mappers for the extracted mappable expressions (all included in \a 8547 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8548 /// pair of the relevant declaration and index where it occurs is appended to 8549 /// the device pointers info array. 8550 void generateAllInfoForClauses( 8551 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 8552 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8553 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8554 // We have to process the component lists that relate with the same 8555 // declaration in a single chunk so that we can generate the map flags 8556 // correctly. Therefore, we organize all lists in a map. 8557 enum MapKind { Present, Allocs, Other, Total }; 8558 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8559 SmallVector<SmallVector<MapInfo, 8>, 4>> 8560 Info; 8561 8562 // Helper function to fill the information map for the different supported 8563 // clauses. 8564 auto &&InfoGen = 8565 [&Info, &SkipVarSet]( 8566 const ValueDecl *D, MapKind Kind, 8567 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8568 OpenMPMapClauseKind MapType, 8569 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8570 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8571 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8572 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8573 if (SkipVarSet.contains(D)) 8574 return; 8575 auto It = Info.find(D); 8576 if (It == Info.end()) 8577 It = Info 8578 .insert(std::make_pair( 8579 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 8580 .first; 8581 It->second[Kind].emplace_back( 8582 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 8583 IsImplicit, Mapper, VarRef, ForDeviceAddr); 8584 }; 8585 8586 for (const auto *Cl : Clauses) { 8587 const auto *C = dyn_cast<OMPMapClause>(Cl); 8588 if (!C) 8589 continue; 8590 MapKind Kind = Other; 8591 if (llvm::is_contained(C->getMapTypeModifiers(), 8592 OMPC_MAP_MODIFIER_present)) 8593 Kind = Present; 8594 else if (C->getMapType() == OMPC_MAP_alloc) 8595 Kind = Allocs; 8596 const auto *EI = C->getVarRefs().begin(); 8597 for (const auto L : C->component_lists()) { 8598 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8599 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 8600 C->getMapTypeModifiers(), llvm::None, 8601 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8602 E); 8603 ++EI; 8604 } 8605 } 8606 for (const auto *Cl : Clauses) { 8607 const auto *C = dyn_cast<OMPToClause>(Cl); 8608 if (!C) 8609 continue; 8610 MapKind Kind = Other; 8611 if (llvm::is_contained(C->getMotionModifiers(), 8612 OMPC_MOTION_MODIFIER_present)) 8613 Kind = Present; 8614 const auto *EI = C->getVarRefs().begin(); 8615 for (const auto L : C->component_lists()) { 8616 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None, 8617 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8618 C->isImplicit(), std::get<2>(L), *EI); 8619 ++EI; 8620 } 8621 } 8622 for (const auto *Cl : Clauses) { 8623 const auto *C = dyn_cast<OMPFromClause>(Cl); 8624 if (!C) 8625 continue; 8626 MapKind Kind = Other; 8627 if (llvm::is_contained(C->getMotionModifiers(), 8628 OMPC_MOTION_MODIFIER_present)) 8629 Kind = Present; 8630 const auto *EI = C->getVarRefs().begin(); 8631 for (const auto L : C->component_lists()) { 8632 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None, 8633 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8634 C->isImplicit(), std::get<2>(L), *EI); 8635 ++EI; 8636 } 8637 } 8638 8639 // Look at the use_device_ptr clause information and mark the existing map 8640 // entries as such. If there is no map information for an entry in the 8641 // use_device_ptr list, we create one with map type 'alloc' and zero size 8642 // section. It is the user fault if that was not mapped before. If there is 8643 // no map information and the pointer is a struct member, then we defer the 8644 // emission of that entry until the whole struct has been processed. 8645 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8646 SmallVector<DeferredDevicePtrEntryTy, 4>> 8647 DeferredInfo; 8648 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8649 8650 for (const auto *Cl : Clauses) { 8651 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 8652 if (!C) 8653 continue; 8654 for (const auto L : C->component_lists()) { 8655 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8656 std::get<1>(L); 8657 assert(!Components.empty() && 8658 "Not expecting empty list of components!"); 8659 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8660 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8661 const Expr *IE = Components.back().getAssociatedExpression(); 8662 // If the first component is a member expression, we have to look into 8663 // 'this', which maps to null in the map of map information. Otherwise 8664 // look directly for the information. 8665 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8666 8667 // We potentially have map information for this declaration already. 8668 // Look for the first set of components that refer to it. 8669 if (It != Info.end()) { 8670 bool Found = false; 8671 for (auto &Data : It->second) { 8672 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8673 return MI.Components.back().getAssociatedDeclaration() == VD; 8674 }); 8675 // If we found a map entry, signal that the pointer has to be 8676 // returned and move on to the next declaration. Exclude cases where 8677 // the base pointer is mapped as array subscript, array section or 8678 // array shaping. The base address is passed as a pointer to base in 8679 // this case and cannot be used as a base for use_device_ptr list 8680 // item. 8681 if (CI != Data.end()) { 8682 auto PrevCI = std::next(CI->Components.rbegin()); 8683 const auto *VarD = dyn_cast<VarDecl>(VD); 8684 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8685 isa<MemberExpr>(IE) || 8686 !VD->getType().getNonReferenceType()->isPointerType() || 8687 PrevCI == CI->Components.rend() || 8688 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8689 VarD->hasLocalStorage()) { 8690 CI->ReturnDevicePointer = true; 8691 Found = true; 8692 break; 8693 } 8694 } 8695 } 8696 if (Found) 8697 continue; 8698 } 8699 8700 // We didn't find any match in our map information - generate a zero 8701 // size array section - if the pointer is a struct member we defer this 8702 // action until the whole struct has been processed. 8703 if (isa<MemberExpr>(IE)) { 8704 // Insert the pointer into Info to be processed by 8705 // generateInfoForComponentList. Because it is a member pointer 8706 // without a pointee, no entry will be generated for it, therefore 8707 // we need to generate one after the whole struct has been processed. 8708 // Nonetheless, generateInfoForComponentList must be called to take 8709 // the pointer into account for the calculation of the range of the 8710 // partial struct. 8711 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None, 8712 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8713 nullptr); 8714 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8715 } else { 8716 llvm::Value *Ptr = 8717 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8718 UseDevicePtrCombinedInfo.Exprs.push_back(VD); 8719 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8720 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8721 UseDevicePtrCombinedInfo.Sizes.push_back( 8722 llvm::Constant::getNullValue(CGF.Int64Ty)); 8723 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8724 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8725 } 8726 } 8727 } 8728 8729 // Look at the use_device_addr clause information and mark the existing map 8730 // entries as such. If there is no map information for an entry in the 8731 // use_device_addr list, we create one with map type 'alloc' and zero size 8732 // section. It is the user fault if that was not mapped before. If there is 8733 // no map information and the pointer is a struct member, then we defer the 8734 // emission of that entry until the whole struct has been processed. 8735 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8736 for (const auto *Cl : Clauses) { 8737 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8738 if (!C) 8739 continue; 8740 for (const auto L : C->component_lists()) { 8741 assert(!std::get<1>(L).empty() && 8742 "Not expecting empty list of components!"); 8743 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8744 if (!Processed.insert(VD).second) 8745 continue; 8746 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8747 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8748 // If the first component is a member expression, we have to look into 8749 // 'this', which maps to null in the map of map information. Otherwise 8750 // look directly for the information. 8751 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8752 8753 // We potentially have map information for this declaration already. 8754 // Look for the first set of components that refer to it. 8755 if (It != Info.end()) { 8756 bool Found = false; 8757 for (auto &Data : It->second) { 8758 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8759 return MI.Components.back().getAssociatedDeclaration() == VD; 8760 }); 8761 // If we found a map entry, signal that the pointer has to be 8762 // returned and move on to the next declaration. 8763 if (CI != Data.end()) { 8764 CI->ReturnDevicePointer = true; 8765 Found = true; 8766 break; 8767 } 8768 } 8769 if (Found) 8770 continue; 8771 } 8772 8773 // We didn't find any match in our map information - generate a zero 8774 // size array section - if the pointer is a struct member we defer this 8775 // action until the whole struct has been processed. 8776 if (isa<MemberExpr>(IE)) { 8777 // Insert the pointer into Info to be processed by 8778 // generateInfoForComponentList. Because it is a member pointer 8779 // without a pointee, no entry will be generated for it, therefore 8780 // we need to generate one after the whole struct has been processed. 8781 // Nonetheless, generateInfoForComponentList must be called to take 8782 // the pointer into account for the calculation of the range of the 8783 // partial struct. 8784 InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8785 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8786 nullptr, nullptr, /*ForDeviceAddr=*/true); 8787 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8788 } else { 8789 llvm::Value *Ptr; 8790 if (IE->isGLValue()) 8791 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8792 else 8793 Ptr = CGF.EmitScalarExpr(IE); 8794 CombinedInfo.Exprs.push_back(VD); 8795 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8796 CombinedInfo.Pointers.push_back(Ptr); 8797 CombinedInfo.Sizes.push_back( 8798 llvm::Constant::getNullValue(CGF.Int64Ty)); 8799 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8800 CombinedInfo.Mappers.push_back(nullptr); 8801 } 8802 } 8803 } 8804 8805 for (const auto &Data : Info) { 8806 StructRangeInfoTy PartialStruct; 8807 // Temporary generated information. 8808 MapCombinedInfoTy CurInfo; 8809 const Decl *D = Data.first; 8810 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8811 for (const auto &M : Data.second) { 8812 for (const MapInfo &L : M) { 8813 assert(!L.Components.empty() && 8814 "Not expecting declaration with no component lists."); 8815 8816 // Remember the current base pointer index. 8817 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8818 CurInfo.NonContigInfo.IsNonContiguous = 8819 L.Components.back().isNonContiguous(); 8820 generateInfoForComponentList( 8821 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8822 CurInfo, PartialStruct, /*IsFirstComponentList=*/false, 8823 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); 8824 8825 // If this entry relates with a device pointer, set the relevant 8826 // declaration and add the 'return pointer' flag. 8827 if (L.ReturnDevicePointer) { 8828 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8829 "Unexpected number of mapped base pointers."); 8830 8831 const ValueDecl *RelevantVD = 8832 L.Components.back().getAssociatedDeclaration(); 8833 assert(RelevantVD && 8834 "No relevant declaration related with device pointer??"); 8835 8836 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8837 RelevantVD); 8838 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8839 } 8840 } 8841 } 8842 8843 // Append any pending zero-length pointers which are struct members and 8844 // used with use_device_ptr or use_device_addr. 8845 auto CI = DeferredInfo.find(Data.first); 8846 if (CI != DeferredInfo.end()) { 8847 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8848 llvm::Value *BasePtr; 8849 llvm::Value *Ptr; 8850 if (L.ForDeviceAddr) { 8851 if (L.IE->isGLValue()) 8852 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8853 else 8854 Ptr = this->CGF.EmitScalarExpr(L.IE); 8855 BasePtr = Ptr; 8856 // Entry is RETURN_PARAM. Also, set the placeholder value 8857 // MEMBER_OF=FFFF so that the entry is later updated with the 8858 // correct value of MEMBER_OF. 8859 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8860 } else { 8861 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8862 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8863 L.IE->getExprLoc()); 8864 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8865 // placeholder value MEMBER_OF=FFFF so that the entry is later 8866 // updated with the correct value of MEMBER_OF. 8867 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8868 OMP_MAP_MEMBER_OF); 8869 } 8870 CurInfo.Exprs.push_back(L.VD); 8871 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8872 CurInfo.Pointers.push_back(Ptr); 8873 CurInfo.Sizes.push_back( 8874 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8875 CurInfo.Mappers.push_back(nullptr); 8876 } 8877 } 8878 // If there is an entry in PartialStruct it means we have a struct with 8879 // individual members mapped. Emit an extra combined entry. 8880 if (PartialStruct.Base.isValid()) { 8881 CurInfo.NonContigInfo.Dims.push_back(0); 8882 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8883 } 8884 8885 // We need to append the results of this capture to what we already 8886 // have. 8887 CombinedInfo.append(CurInfo); 8888 } 8889 // Append data for use_device_ptr clauses. 8890 CombinedInfo.append(UseDevicePtrCombinedInfo); 8891 } 8892 8893 public: 8894 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8895 : CurDir(&Dir), CGF(CGF) { 8896 // Extract firstprivate clause information. 8897 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8898 for (const auto *D : C->varlists()) 8899 FirstPrivateDecls.try_emplace( 8900 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8901 // Extract implicit firstprivates from uses_allocators clauses. 8902 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8903 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8904 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8905 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8906 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8907 /*Implicit=*/true); 8908 else if (const auto *VD = dyn_cast<VarDecl>( 8909 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8910 ->getDecl())) 8911 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8912 } 8913 } 8914 // Extract device pointer clause information. 8915 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8916 for (auto L : C->component_lists()) 8917 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8918 // Extract map information. 8919 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) { 8920 if (C->getMapType() != OMPC_MAP_to) 8921 continue; 8922 for (auto L : C->component_lists()) { 8923 const ValueDecl *VD = std::get<0>(L); 8924 const auto *RD = VD ? VD->getType() 8925 .getCanonicalType() 8926 .getNonReferenceType() 8927 ->getAsCXXRecordDecl() 8928 : nullptr; 8929 if (RD && RD->isLambda()) 8930 LambdasMap.try_emplace(std::get<0>(L), C); 8931 } 8932 } 8933 } 8934 8935 /// Constructor for the declare mapper directive. 8936 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8937 : CurDir(&Dir), CGF(CGF) {} 8938 8939 /// Generate code for the combined entry if we have a partially mapped struct 8940 /// and take care of the mapping flags of the arguments corresponding to 8941 /// individual struct members. 8942 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8943 MapFlagsArrayTy &CurTypes, 8944 const StructRangeInfoTy &PartialStruct, 8945 const ValueDecl *VD = nullptr, 8946 bool NotTargetParams = true) const { 8947 if (CurTypes.size() == 1 && 8948 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && 8949 !PartialStruct.IsArraySection) 8950 return; 8951 Address LBAddr = PartialStruct.LowestElem.second; 8952 Address HBAddr = PartialStruct.HighestElem.second; 8953 if (PartialStruct.HasCompleteRecord) { 8954 LBAddr = PartialStruct.LB; 8955 HBAddr = PartialStruct.LB; 8956 } 8957 CombinedInfo.Exprs.push_back(VD); 8958 // Base is the base of the struct 8959 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8960 // Pointer is the address of the lowest element 8961 llvm::Value *LB = LBAddr.getPointer(); 8962 CombinedInfo.Pointers.push_back(LB); 8963 // There should not be a mapper for a combined entry. 8964 CombinedInfo.Mappers.push_back(nullptr); 8965 // Size is (addr of {highest+1} element) - (addr of lowest element) 8966 llvm::Value *HB = HBAddr.getPointer(); 8967 llvm::Value *HAddr = 8968 CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1); 8969 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8970 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8971 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr); 8972 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8973 /*isSigned=*/false); 8974 CombinedInfo.Sizes.push_back(Size); 8975 // Map type is always TARGET_PARAM, if generate info for captures. 8976 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 8977 : OMP_MAP_TARGET_PARAM); 8978 // If any element has the present modifier, then make sure the runtime 8979 // doesn't attempt to allocate the struct. 8980 if (CurTypes.end() != 8981 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8982 return Type & OMP_MAP_PRESENT; 8983 })) 8984 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 8985 // Remove TARGET_PARAM flag from the first element 8986 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 8987 // If any element has the ompx_hold modifier, then make sure the runtime 8988 // uses the hold reference count for the struct as a whole so that it won't 8989 // be unmapped by an extra dynamic reference count decrement. Add it to all 8990 // elements as well so the runtime knows which reference count to check 8991 // when determining whether it's time for device-to-host transfers of 8992 // individual elements. 8993 if (CurTypes.end() != 8994 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8995 return Type & OMP_MAP_OMPX_HOLD; 8996 })) { 8997 CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD; 8998 for (auto &M : CurTypes) 8999 M |= OMP_MAP_OMPX_HOLD; 9000 } 9001 9002 // All other current entries will be MEMBER_OF the combined entry 9003 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9004 // 0xFFFF in the MEMBER_OF field). 9005 OpenMPOffloadMappingFlags MemberOfFlag = 9006 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 9007 for (auto &M : CurTypes) 9008 setCorrectMemberOfFlag(M, MemberOfFlag); 9009 } 9010 9011 /// Generate all the base pointers, section pointers, sizes, map types, and 9012 /// mappers for the extracted mappable expressions (all included in \a 9013 /// CombinedInfo). Also, for each item that relates with a device pointer, a 9014 /// pair of the relevant declaration and index where it occurs is appended to 9015 /// the device pointers info array. 9016 void generateAllInfo( 9017 MapCombinedInfoTy &CombinedInfo, 9018 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 9019 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 9020 assert(CurDir.is<const OMPExecutableDirective *>() && 9021 "Expect a executable directive"); 9022 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9023 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); 9024 } 9025 9026 /// Generate all the base pointers, section pointers, sizes, map types, and 9027 /// mappers for the extracted map clauses of user-defined mapper (all included 9028 /// in \a CombinedInfo). 9029 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 9030 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 9031 "Expect a declare mapper directive"); 9032 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 9033 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); 9034 } 9035 9036 /// Emit capture info for lambdas for variables captured by reference. 9037 void generateInfoForLambdaCaptures( 9038 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9039 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 9040 const auto *RD = VD->getType() 9041 .getCanonicalType() 9042 .getNonReferenceType() 9043 ->getAsCXXRecordDecl(); 9044 if (!RD || !RD->isLambda()) 9045 return; 9046 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 9047 LValue VDLVal = CGF.MakeAddrLValue( 9048 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 9049 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 9050 FieldDecl *ThisCapture = nullptr; 9051 RD->getCaptureFields(Captures, ThisCapture); 9052 if (ThisCapture) { 9053 LValue ThisLVal = 9054 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 9055 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 9056 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 9057 VDLVal.getPointer(CGF)); 9058 CombinedInfo.Exprs.push_back(VD); 9059 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 9060 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 9061 CombinedInfo.Sizes.push_back( 9062 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 9063 CGF.Int64Ty, /*isSigned=*/true)); 9064 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9065 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9066 CombinedInfo.Mappers.push_back(nullptr); 9067 } 9068 for (const LambdaCapture &LC : RD->captures()) { 9069 if (!LC.capturesVariable()) 9070 continue; 9071 const VarDecl *VD = LC.getCapturedVar(); 9072 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 9073 continue; 9074 auto It = Captures.find(VD); 9075 assert(It != Captures.end() && "Found lambda capture without field."); 9076 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 9077 if (LC.getCaptureKind() == LCK_ByRef) { 9078 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 9079 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9080 VDLVal.getPointer(CGF)); 9081 CombinedInfo.Exprs.push_back(VD); 9082 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9083 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 9084 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9085 CGF.getTypeSize( 9086 VD->getType().getCanonicalType().getNonReferenceType()), 9087 CGF.Int64Ty, /*isSigned=*/true)); 9088 } else { 9089 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 9090 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9091 VDLVal.getPointer(CGF)); 9092 CombinedInfo.Exprs.push_back(VD); 9093 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9094 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 9095 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 9096 } 9097 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9098 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9099 CombinedInfo.Mappers.push_back(nullptr); 9100 } 9101 } 9102 9103 /// Set correct indices for lambdas captures. 9104 void adjustMemberOfForLambdaCaptures( 9105 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 9106 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 9107 MapFlagsArrayTy &Types) const { 9108 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 9109 // Set correct member_of idx for all implicit lambda captures. 9110 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9111 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 9112 continue; 9113 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 9114 assert(BasePtr && "Unable to find base lambda address."); 9115 int TgtIdx = -1; 9116 for (unsigned J = I; J > 0; --J) { 9117 unsigned Idx = J - 1; 9118 if (Pointers[Idx] != BasePtr) 9119 continue; 9120 TgtIdx = Idx; 9121 break; 9122 } 9123 assert(TgtIdx != -1 && "Unable to find parent lambda."); 9124 // All other current entries will be MEMBER_OF the combined entry 9125 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9126 // 0xFFFF in the MEMBER_OF field). 9127 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 9128 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 9129 } 9130 } 9131 9132 /// Generate the base pointers, section pointers, sizes, map types, and 9133 /// mappers associated to a given capture (all included in \a CombinedInfo). 9134 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 9135 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9136 StructRangeInfoTy &PartialStruct) const { 9137 assert(!Cap->capturesVariableArrayType() && 9138 "Not expecting to generate map info for a variable array type!"); 9139 9140 // We need to know when we generating information for the first component 9141 const ValueDecl *VD = Cap->capturesThis() 9142 ? nullptr 9143 : Cap->getCapturedVar()->getCanonicalDecl(); 9144 9145 // for map(to: lambda): skip here, processing it in 9146 // generateDefaultMapInfo 9147 if (LambdasMap.count(VD)) 9148 return; 9149 9150 // If this declaration appears in a is_device_ptr clause we just have to 9151 // pass the pointer by value. If it is a reference to a declaration, we just 9152 // pass its value. 9153 if (DevPointersMap.count(VD)) { 9154 CombinedInfo.Exprs.push_back(VD); 9155 CombinedInfo.BasePointers.emplace_back(Arg, VD); 9156 CombinedInfo.Pointers.push_back(Arg); 9157 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9158 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 9159 /*isSigned=*/true)); 9160 CombinedInfo.Types.push_back( 9161 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 9162 OMP_MAP_TARGET_PARAM); 9163 CombinedInfo.Mappers.push_back(nullptr); 9164 return; 9165 } 9166 9167 using MapData = 9168 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 9169 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 9170 const ValueDecl *, const Expr *>; 9171 SmallVector<MapData, 4> DeclComponentLists; 9172 assert(CurDir.is<const OMPExecutableDirective *>() && 9173 "Expect a executable directive"); 9174 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9175 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 9176 const auto *EI = C->getVarRefs().begin(); 9177 for (const auto L : C->decl_component_lists(VD)) { 9178 const ValueDecl *VDecl, *Mapper; 9179 // The Expression is not correct if the mapping is implicit 9180 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 9181 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9182 std::tie(VDecl, Components, Mapper) = L; 9183 assert(VDecl == VD && "We got information for the wrong declaration??"); 9184 assert(!Components.empty() && 9185 "Not expecting declaration with no component lists."); 9186 DeclComponentLists.emplace_back(Components, C->getMapType(), 9187 C->getMapTypeModifiers(), 9188 C->isImplicit(), Mapper, E); 9189 ++EI; 9190 } 9191 } 9192 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 9193 const MapData &RHS) { 9194 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 9195 OpenMPMapClauseKind MapType = std::get<1>(RHS); 9196 bool HasPresent = 9197 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9198 bool HasAllocs = MapType == OMPC_MAP_alloc; 9199 MapModifiers = std::get<2>(RHS); 9200 MapType = std::get<1>(LHS); 9201 bool HasPresentR = 9202 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9203 bool HasAllocsR = MapType == OMPC_MAP_alloc; 9204 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 9205 }); 9206 9207 // Find overlapping elements (including the offset from the base element). 9208 llvm::SmallDenseMap< 9209 const MapData *, 9210 llvm::SmallVector< 9211 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 9212 4> 9213 OverlappedData; 9214 size_t Count = 0; 9215 for (const MapData &L : DeclComponentLists) { 9216 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9217 OpenMPMapClauseKind MapType; 9218 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9219 bool IsImplicit; 9220 const ValueDecl *Mapper; 9221 const Expr *VarRef; 9222 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9223 L; 9224 ++Count; 9225 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 9226 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 9227 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 9228 VarRef) = L1; 9229 auto CI = Components.rbegin(); 9230 auto CE = Components.rend(); 9231 auto SI = Components1.rbegin(); 9232 auto SE = Components1.rend(); 9233 for (; CI != CE && SI != SE; ++CI, ++SI) { 9234 if (CI->getAssociatedExpression()->getStmtClass() != 9235 SI->getAssociatedExpression()->getStmtClass()) 9236 break; 9237 // Are we dealing with different variables/fields? 9238 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 9239 break; 9240 } 9241 // Found overlapping if, at least for one component, reached the head 9242 // of the components list. 9243 if (CI == CE || SI == SE) { 9244 // Ignore it if it is the same component. 9245 if (CI == CE && SI == SE) 9246 continue; 9247 const auto It = (SI == SE) ? CI : SI; 9248 // If one component is a pointer and another one is a kind of 9249 // dereference of this pointer (array subscript, section, dereference, 9250 // etc.), it is not an overlapping. 9251 // Same, if one component is a base and another component is a 9252 // dereferenced pointer memberexpr with the same base. 9253 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 9254 (std::prev(It)->getAssociatedDeclaration() && 9255 std::prev(It) 9256 ->getAssociatedDeclaration() 9257 ->getType() 9258 ->isPointerType()) || 9259 (It->getAssociatedDeclaration() && 9260 It->getAssociatedDeclaration()->getType()->isPointerType() && 9261 std::next(It) != CE && std::next(It) != SE)) 9262 continue; 9263 const MapData &BaseData = CI == CE ? L : L1; 9264 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 9265 SI == SE ? Components : Components1; 9266 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 9267 OverlappedElements.getSecond().push_back(SubData); 9268 } 9269 } 9270 } 9271 // Sort the overlapped elements for each item. 9272 llvm::SmallVector<const FieldDecl *, 4> Layout; 9273 if (!OverlappedData.empty()) { 9274 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 9275 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 9276 while (BaseType != OrigType) { 9277 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 9278 OrigType = BaseType->getPointeeOrArrayElementType(); 9279 } 9280 9281 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 9282 getPlainLayout(CRD, Layout, /*AsBase=*/false); 9283 else { 9284 const auto *RD = BaseType->getAsRecordDecl(); 9285 Layout.append(RD->field_begin(), RD->field_end()); 9286 } 9287 } 9288 for (auto &Pair : OverlappedData) { 9289 llvm::stable_sort( 9290 Pair.getSecond(), 9291 [&Layout]( 9292 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 9293 OMPClauseMappableExprCommon::MappableExprComponentListRef 9294 Second) { 9295 auto CI = First.rbegin(); 9296 auto CE = First.rend(); 9297 auto SI = Second.rbegin(); 9298 auto SE = Second.rend(); 9299 for (; CI != CE && SI != SE; ++CI, ++SI) { 9300 if (CI->getAssociatedExpression()->getStmtClass() != 9301 SI->getAssociatedExpression()->getStmtClass()) 9302 break; 9303 // Are we dealing with different variables/fields? 9304 if (CI->getAssociatedDeclaration() != 9305 SI->getAssociatedDeclaration()) 9306 break; 9307 } 9308 9309 // Lists contain the same elements. 9310 if (CI == CE && SI == SE) 9311 return false; 9312 9313 // List with less elements is less than list with more elements. 9314 if (CI == CE || SI == SE) 9315 return CI == CE; 9316 9317 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 9318 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 9319 if (FD1->getParent() == FD2->getParent()) 9320 return FD1->getFieldIndex() < FD2->getFieldIndex(); 9321 const auto *It = 9322 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 9323 return FD == FD1 || FD == FD2; 9324 }); 9325 return *It == FD1; 9326 }); 9327 } 9328 9329 // Associated with a capture, because the mapping flags depend on it. 9330 // Go through all of the elements with the overlapped elements. 9331 bool IsFirstComponentList = true; 9332 for (const auto &Pair : OverlappedData) { 9333 const MapData &L = *Pair.getFirst(); 9334 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9335 OpenMPMapClauseKind MapType; 9336 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9337 bool IsImplicit; 9338 const ValueDecl *Mapper; 9339 const Expr *VarRef; 9340 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9341 L; 9342 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 9343 OverlappedComponents = Pair.getSecond(); 9344 generateInfoForComponentList( 9345 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 9346 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 9347 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 9348 IsFirstComponentList = false; 9349 } 9350 // Go through other elements without overlapped elements. 9351 for (const MapData &L : DeclComponentLists) { 9352 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9353 OpenMPMapClauseKind MapType; 9354 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9355 bool IsImplicit; 9356 const ValueDecl *Mapper; 9357 const Expr *VarRef; 9358 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9359 L; 9360 auto It = OverlappedData.find(&L); 9361 if (It == OverlappedData.end()) 9362 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 9363 Components, CombinedInfo, PartialStruct, 9364 IsFirstComponentList, IsImplicit, Mapper, 9365 /*ForDeviceAddr=*/false, VD, VarRef); 9366 IsFirstComponentList = false; 9367 } 9368 } 9369 9370 /// Generate the default map information for a given capture \a CI, 9371 /// record field declaration \a RI and captured value \a CV. 9372 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 9373 const FieldDecl &RI, llvm::Value *CV, 9374 MapCombinedInfoTy &CombinedInfo) const { 9375 bool IsImplicit = true; 9376 // Do the default mapping. 9377 if (CI.capturesThis()) { 9378 CombinedInfo.Exprs.push_back(nullptr); 9379 CombinedInfo.BasePointers.push_back(CV); 9380 CombinedInfo.Pointers.push_back(CV); 9381 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 9382 CombinedInfo.Sizes.push_back( 9383 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 9384 CGF.Int64Ty, /*isSigned=*/true)); 9385 // Default map type. 9386 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 9387 } else if (CI.capturesVariableByCopy()) { 9388 const VarDecl *VD = CI.getCapturedVar(); 9389 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9390 CombinedInfo.BasePointers.push_back(CV); 9391 CombinedInfo.Pointers.push_back(CV); 9392 if (!RI.getType()->isAnyPointerType()) { 9393 // We have to signal to the runtime captures passed by value that are 9394 // not pointers. 9395 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 9396 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9397 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 9398 } else { 9399 // Pointers are implicitly mapped with a zero size and no flags 9400 // (other than first map that is added for all implicit maps). 9401 CombinedInfo.Types.push_back(OMP_MAP_NONE); 9402 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 9403 } 9404 auto I = FirstPrivateDecls.find(VD); 9405 if (I != FirstPrivateDecls.end()) 9406 IsImplicit = I->getSecond(); 9407 } else { 9408 assert(CI.capturesVariable() && "Expected captured reference."); 9409 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 9410 QualType ElementType = PtrTy->getPointeeType(); 9411 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9412 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 9413 // The default map type for a scalar/complex type is 'to' because by 9414 // default the value doesn't have to be retrieved. For an aggregate 9415 // type, the default is 'tofrom'. 9416 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 9417 const VarDecl *VD = CI.getCapturedVar(); 9418 auto I = FirstPrivateDecls.find(VD); 9419 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9420 CombinedInfo.BasePointers.push_back(CV); 9421 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9422 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9423 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9424 AlignmentSource::Decl)); 9425 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 9426 } else { 9427 CombinedInfo.Pointers.push_back(CV); 9428 } 9429 if (I != FirstPrivateDecls.end()) 9430 IsImplicit = I->getSecond(); 9431 } 9432 // Every default map produces a single argument which is a target parameter. 9433 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 9434 9435 // Add flag stating this is an implicit map. 9436 if (IsImplicit) 9437 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 9438 9439 // No user-defined mapper for default mapping. 9440 CombinedInfo.Mappers.push_back(nullptr); 9441 } 9442 }; 9443 } // anonymous namespace 9444 9445 static void emitNonContiguousDescriptor( 9446 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9447 CGOpenMPRuntime::TargetDataInfo &Info) { 9448 CodeGenModule &CGM = CGF.CGM; 9449 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 9450 &NonContigInfo = CombinedInfo.NonContigInfo; 9451 9452 // Build an array of struct descriptor_dim and then assign it to 9453 // offload_args. 9454 // 9455 // struct descriptor_dim { 9456 // uint64_t offset; 9457 // uint64_t count; 9458 // uint64_t stride 9459 // }; 9460 ASTContext &C = CGF.getContext(); 9461 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9462 RecordDecl *RD; 9463 RD = C.buildImplicitRecord("descriptor_dim"); 9464 RD->startDefinition(); 9465 addFieldToRecordDecl(C, RD, Int64Ty); 9466 addFieldToRecordDecl(C, RD, Int64Ty); 9467 addFieldToRecordDecl(C, RD, Int64Ty); 9468 RD->completeDefinition(); 9469 QualType DimTy = C.getRecordType(RD); 9470 9471 enum { OffsetFD = 0, CountFD, StrideFD }; 9472 // We need two index variable here since the size of "Dims" is the same as the 9473 // size of Components, however, the size of offset, count, and stride is equal 9474 // to the size of base declaration that is non-contiguous. 9475 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9476 // Skip emitting ir if dimension size is 1 since it cannot be 9477 // non-contiguous. 9478 if (NonContigInfo.Dims[I] == 1) 9479 continue; 9480 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9481 QualType ArrayTy = 9482 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9483 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9484 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9485 unsigned RevIdx = EE - II - 1; 9486 LValue DimsLVal = CGF.MakeAddrLValue( 9487 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9488 // Offset 9489 LValue OffsetLVal = CGF.EmitLValueForField( 9490 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9491 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9492 // Count 9493 LValue CountLVal = CGF.EmitLValueForField( 9494 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9495 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9496 // Stride 9497 LValue StrideLVal = CGF.EmitLValueForField( 9498 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9499 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9500 } 9501 // args[I] = &dims 9502 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9503 DimsAddr, CGM.Int8PtrTy); 9504 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9505 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9506 Info.PointersArray, 0, I); 9507 Address PAddr(P, CGF.getPointerAlign()); 9508 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9509 ++L; 9510 } 9511 } 9512 9513 // Try to extract the base declaration from a `this->x` expression if possible. 9514 static ValueDecl *getDeclFromThisExpr(const Expr *E) { 9515 if (!E) 9516 return nullptr; 9517 9518 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts())) 9519 if (const MemberExpr *ME = 9520 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts())) 9521 return ME->getMemberDecl(); 9522 return nullptr; 9523 } 9524 9525 /// Emit a string constant containing the names of the values mapped to the 9526 /// offloading runtime library. 9527 llvm::Constant * 9528 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9529 MappableExprsHandler::MappingExprInfo &MapExprs) { 9530 9531 uint32_t SrcLocStrSize; 9532 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) 9533 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 9534 9535 SourceLocation Loc; 9536 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { 9537 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr())) 9538 Loc = VD->getLocation(); 9539 else 9540 Loc = MapExprs.getMapExpr()->getExprLoc(); 9541 } else { 9542 Loc = MapExprs.getMapDecl()->getLocation(); 9543 } 9544 9545 std::string ExprName; 9546 if (MapExprs.getMapExpr()) { 9547 PrintingPolicy P(CGF.getContext().getLangOpts()); 9548 llvm::raw_string_ostream OS(ExprName); 9549 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9550 OS.flush(); 9551 } else { 9552 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9553 } 9554 9555 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9556 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName, 9557 PLoc.getLine(), PLoc.getColumn(), 9558 SrcLocStrSize); 9559 } 9560 9561 /// Emit the arrays used to pass the captures and map information to the 9562 /// offloading runtime library. If there is no map or capture information, 9563 /// return nullptr by reference. 9564 static void emitOffloadingArrays( 9565 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9566 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9567 bool IsNonContiguous = false) { 9568 CodeGenModule &CGM = CGF.CGM; 9569 ASTContext &Ctx = CGF.getContext(); 9570 9571 // Reset the array information. 9572 Info.clearArrayInfo(); 9573 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9574 9575 if (Info.NumberOfPtrs) { 9576 // Detect if we have any capture size requiring runtime evaluation of the 9577 // size so that a constant array could be eventually used. 9578 bool hasRuntimeEvaluationCaptureSize = false; 9579 for (llvm::Value *S : CombinedInfo.Sizes) 9580 if (!isa<llvm::Constant>(S)) { 9581 hasRuntimeEvaluationCaptureSize = true; 9582 break; 9583 } 9584 9585 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9586 QualType PointerArrayType = Ctx.getConstantArrayType( 9587 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9588 /*IndexTypeQuals=*/0); 9589 9590 Info.BasePointersArray = 9591 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9592 Info.PointersArray = 9593 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9594 Address MappersArray = 9595 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9596 Info.MappersArray = MappersArray.getPointer(); 9597 9598 // If we don't have any VLA types or other types that require runtime 9599 // evaluation, we can use a constant array for the map sizes, otherwise we 9600 // need to fill up the arrays as we do for the pointers. 9601 QualType Int64Ty = 9602 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9603 if (hasRuntimeEvaluationCaptureSize) { 9604 QualType SizeArrayType = Ctx.getConstantArrayType( 9605 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9606 /*IndexTypeQuals=*/0); 9607 Info.SizesArray = 9608 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9609 } else { 9610 // We expect all the sizes to be constant, so we collect them to create 9611 // a constant array. 9612 SmallVector<llvm::Constant *, 16> ConstSizes; 9613 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9614 if (IsNonContiguous && 9615 (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) { 9616 ConstSizes.push_back(llvm::ConstantInt::get( 9617 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I])); 9618 } else { 9619 ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I])); 9620 } 9621 } 9622 9623 auto *SizesArrayInit = llvm::ConstantArray::get( 9624 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9625 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9626 auto *SizesArrayGbl = new llvm::GlobalVariable( 9627 CGM.getModule(), SizesArrayInit->getType(), 9628 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9629 SizesArrayInit, Name); 9630 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9631 Info.SizesArray = SizesArrayGbl; 9632 } 9633 9634 // The map types are always constant so we don't need to generate code to 9635 // fill arrays. Instead, we create an array constant. 9636 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9637 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9638 std::string MaptypesName = 9639 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9640 auto *MapTypesArrayGbl = 9641 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9642 Info.MapTypesArray = MapTypesArrayGbl; 9643 9644 // The information types are only built if there is debug information 9645 // requested. 9646 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9647 Info.MapNamesArray = llvm::Constant::getNullValue( 9648 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9649 } else { 9650 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9651 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9652 }; 9653 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9654 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9655 std::string MapnamesName = 9656 CGM.getOpenMPRuntime().getName({"offload_mapnames"}); 9657 auto *MapNamesArrayGbl = 9658 OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName); 9659 Info.MapNamesArray = MapNamesArrayGbl; 9660 } 9661 9662 // If there's a present map type modifier, it must not be applied to the end 9663 // of a region, so generate a separate map type array in that case. 9664 if (Info.separateBeginEndCalls()) { 9665 bool EndMapTypesDiffer = false; 9666 for (uint64_t &Type : Mapping) { 9667 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9668 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9669 EndMapTypesDiffer = true; 9670 } 9671 } 9672 if (EndMapTypesDiffer) { 9673 MapTypesArrayGbl = 9674 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9675 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9676 } 9677 } 9678 9679 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9680 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9681 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9682 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9683 Info.BasePointersArray, 0, I); 9684 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9685 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9686 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9687 CGF.Builder.CreateStore(BPVal, BPAddr); 9688 9689 if (Info.requiresDevicePointerInfo()) 9690 if (const ValueDecl *DevVD = 9691 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9692 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9693 9694 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9695 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9696 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9697 Info.PointersArray, 0, I); 9698 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9699 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9700 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9701 CGF.Builder.CreateStore(PVal, PAddr); 9702 9703 if (hasRuntimeEvaluationCaptureSize) { 9704 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9705 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9706 Info.SizesArray, 9707 /*Idx0=*/0, 9708 /*Idx1=*/I); 9709 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 9710 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9711 CGM.Int64Ty, 9712 /*isSigned=*/true), 9713 SAddr); 9714 } 9715 9716 // Fill up the mapper array. 9717 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9718 if (CombinedInfo.Mappers[I]) { 9719 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9720 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9721 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9722 Info.HasMapper = true; 9723 } 9724 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9725 CGF.Builder.CreateStore(MFunc, MAddr); 9726 } 9727 } 9728 9729 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9730 Info.NumberOfPtrs == 0) 9731 return; 9732 9733 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9734 } 9735 9736 namespace { 9737 /// Additional arguments for emitOffloadingArraysArgument function. 9738 struct ArgumentsOptions { 9739 bool ForEndCall = false; 9740 ArgumentsOptions() = default; 9741 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9742 }; 9743 } // namespace 9744 9745 /// Emit the arguments to be passed to the runtime library based on the 9746 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 9747 /// ForEndCall, emit map types to be passed for the end of the region instead of 9748 /// the beginning. 9749 static void emitOffloadingArraysArgument( 9750 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9751 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9752 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, 9753 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, 9754 const ArgumentsOptions &Options = ArgumentsOptions()) { 9755 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9756 "expected region end call to runtime only when end call is separate"); 9757 CodeGenModule &CGM = CGF.CGM; 9758 if (Info.NumberOfPtrs) { 9759 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9760 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9761 Info.BasePointersArray, 9762 /*Idx0=*/0, /*Idx1=*/0); 9763 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9764 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9765 Info.PointersArray, 9766 /*Idx0=*/0, 9767 /*Idx1=*/0); 9768 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9769 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9770 /*Idx0=*/0, /*Idx1=*/0); 9771 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9772 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9773 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9774 : Info.MapTypesArray, 9775 /*Idx0=*/0, 9776 /*Idx1=*/0); 9777 9778 // Only emit the mapper information arrays if debug information is 9779 // requested. 9780 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9781 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9782 else 9783 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9784 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9785 Info.MapNamesArray, 9786 /*Idx0=*/0, 9787 /*Idx1=*/0); 9788 // If there is no user-defined mapper, set the mapper array to nullptr to 9789 // avoid an unnecessary data privatization 9790 if (!Info.HasMapper) 9791 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9792 else 9793 MappersArrayArg = 9794 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9795 } else { 9796 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9797 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9798 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9799 MapTypesArrayArg = 9800 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9801 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9802 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9803 } 9804 } 9805 9806 /// Check for inner distribute directive. 9807 static const OMPExecutableDirective * 9808 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9809 const auto *CS = D.getInnermostCapturedStmt(); 9810 const auto *Body = 9811 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9812 const Stmt *ChildStmt = 9813 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9814 9815 if (const auto *NestedDir = 9816 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9817 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9818 switch (D.getDirectiveKind()) { 9819 case OMPD_target: 9820 if (isOpenMPDistributeDirective(DKind)) 9821 return NestedDir; 9822 if (DKind == OMPD_teams) { 9823 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9824 /*IgnoreCaptured=*/true); 9825 if (!Body) 9826 return nullptr; 9827 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9828 if (const auto *NND = 9829 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9830 DKind = NND->getDirectiveKind(); 9831 if (isOpenMPDistributeDirective(DKind)) 9832 return NND; 9833 } 9834 } 9835 return nullptr; 9836 case OMPD_target_teams: 9837 if (isOpenMPDistributeDirective(DKind)) 9838 return NestedDir; 9839 return nullptr; 9840 case OMPD_target_parallel: 9841 case OMPD_target_simd: 9842 case OMPD_target_parallel_for: 9843 case OMPD_target_parallel_for_simd: 9844 return nullptr; 9845 case OMPD_target_teams_distribute: 9846 case OMPD_target_teams_distribute_simd: 9847 case OMPD_target_teams_distribute_parallel_for: 9848 case OMPD_target_teams_distribute_parallel_for_simd: 9849 case OMPD_parallel: 9850 case OMPD_for: 9851 case OMPD_parallel_for: 9852 case OMPD_parallel_master: 9853 case OMPD_parallel_sections: 9854 case OMPD_for_simd: 9855 case OMPD_parallel_for_simd: 9856 case OMPD_cancel: 9857 case OMPD_cancellation_point: 9858 case OMPD_ordered: 9859 case OMPD_threadprivate: 9860 case OMPD_allocate: 9861 case OMPD_task: 9862 case OMPD_simd: 9863 case OMPD_tile: 9864 case OMPD_unroll: 9865 case OMPD_sections: 9866 case OMPD_section: 9867 case OMPD_single: 9868 case OMPD_master: 9869 case OMPD_critical: 9870 case OMPD_taskyield: 9871 case OMPD_barrier: 9872 case OMPD_taskwait: 9873 case OMPD_taskgroup: 9874 case OMPD_atomic: 9875 case OMPD_flush: 9876 case OMPD_depobj: 9877 case OMPD_scan: 9878 case OMPD_teams: 9879 case OMPD_target_data: 9880 case OMPD_target_exit_data: 9881 case OMPD_target_enter_data: 9882 case OMPD_distribute: 9883 case OMPD_distribute_simd: 9884 case OMPD_distribute_parallel_for: 9885 case OMPD_distribute_parallel_for_simd: 9886 case OMPD_teams_distribute: 9887 case OMPD_teams_distribute_simd: 9888 case OMPD_teams_distribute_parallel_for: 9889 case OMPD_teams_distribute_parallel_for_simd: 9890 case OMPD_target_update: 9891 case OMPD_declare_simd: 9892 case OMPD_declare_variant: 9893 case OMPD_begin_declare_variant: 9894 case OMPD_end_declare_variant: 9895 case OMPD_declare_target: 9896 case OMPD_end_declare_target: 9897 case OMPD_declare_reduction: 9898 case OMPD_declare_mapper: 9899 case OMPD_taskloop: 9900 case OMPD_taskloop_simd: 9901 case OMPD_master_taskloop: 9902 case OMPD_master_taskloop_simd: 9903 case OMPD_parallel_master_taskloop: 9904 case OMPD_parallel_master_taskloop_simd: 9905 case OMPD_requires: 9906 case OMPD_metadirective: 9907 case OMPD_unknown: 9908 default: 9909 llvm_unreachable("Unexpected directive."); 9910 } 9911 } 9912 9913 return nullptr; 9914 } 9915 9916 /// Emit the user-defined mapper function. The code generation follows the 9917 /// pattern in the example below. 9918 /// \code 9919 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9920 /// void *base, void *begin, 9921 /// int64_t size, int64_t type, 9922 /// void *name = nullptr) { 9923 /// // Allocate space for an array section first or add a base/begin for 9924 /// // pointer dereference. 9925 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9926 /// !maptype.IsDelete) 9927 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9928 /// size*sizeof(Ty), clearToFromMember(type)); 9929 /// // Map members. 9930 /// for (unsigned i = 0; i < size; i++) { 9931 /// // For each component specified by this mapper: 9932 /// for (auto c : begin[i]->all_components) { 9933 /// if (c.hasMapper()) 9934 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9935 /// c.arg_type, c.arg_name); 9936 /// else 9937 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9938 /// c.arg_begin, c.arg_size, c.arg_type, 9939 /// c.arg_name); 9940 /// } 9941 /// } 9942 /// // Delete the array section. 9943 /// if (size > 1 && maptype.IsDelete) 9944 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9945 /// size*sizeof(Ty), clearToFromMember(type)); 9946 /// } 9947 /// \endcode 9948 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9949 CodeGenFunction *CGF) { 9950 if (UDMMap.count(D) > 0) 9951 return; 9952 ASTContext &C = CGM.getContext(); 9953 QualType Ty = D->getType(); 9954 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9955 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9956 auto *MapperVarDecl = 9957 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9958 SourceLocation Loc = D->getLocation(); 9959 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9960 9961 // Prepare mapper function arguments and attributes. 9962 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9963 C.VoidPtrTy, ImplicitParamDecl::Other); 9964 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9965 ImplicitParamDecl::Other); 9966 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9967 C.VoidPtrTy, ImplicitParamDecl::Other); 9968 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9969 ImplicitParamDecl::Other); 9970 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9971 ImplicitParamDecl::Other); 9972 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9973 ImplicitParamDecl::Other); 9974 FunctionArgList Args; 9975 Args.push_back(&HandleArg); 9976 Args.push_back(&BaseArg); 9977 Args.push_back(&BeginArg); 9978 Args.push_back(&SizeArg); 9979 Args.push_back(&TypeArg); 9980 Args.push_back(&NameArg); 9981 const CGFunctionInfo &FnInfo = 9982 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9983 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9984 SmallString<64> TyStr; 9985 llvm::raw_svector_ostream Out(TyStr); 9986 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9987 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9988 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9989 Name, &CGM.getModule()); 9990 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9991 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9992 // Start the mapper function code generation. 9993 CodeGenFunction MapperCGF(CGM); 9994 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9995 // Compute the starting and end addresses of array elements. 9996 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9997 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9998 C.getPointerType(Int64Ty), Loc); 9999 // Prepare common arguments for array initiation and deletion. 10000 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 10001 MapperCGF.GetAddrOfLocalVar(&HandleArg), 10002 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10003 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 10004 MapperCGF.GetAddrOfLocalVar(&BaseArg), 10005 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10006 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 10007 MapperCGF.GetAddrOfLocalVar(&BeginArg), 10008 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10009 // Convert the size in bytes into the number of array elements. 10010 Size = MapperCGF.Builder.CreateExactUDiv( 10011 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10012 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 10013 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 10014 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP( 10015 PtrBegin->getType()->getPointerElementType(), PtrBegin, Size); 10016 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 10017 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 10018 C.getPointerType(Int64Ty), Loc); 10019 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar( 10020 MapperCGF.GetAddrOfLocalVar(&NameArg), 10021 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10022 10023 // Emit array initiation if this is an array section and \p MapType indicates 10024 // that memory allocation is required. 10025 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 10026 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10027 MapName, ElementSize, HeadBB, /*IsInit=*/true); 10028 10029 // Emit a for loop to iterate through SizeArg of elements and map all of them. 10030 10031 // Emit the loop header block. 10032 MapperCGF.EmitBlock(HeadBB); 10033 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 10034 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 10035 // Evaluate whether the initial condition is satisfied. 10036 llvm::Value *IsEmpty = 10037 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 10038 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 10039 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 10040 10041 // Emit the loop body block. 10042 MapperCGF.EmitBlock(BodyBB); 10043 llvm::BasicBlock *LastBB = BodyBB; 10044 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 10045 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 10046 PtrPHI->addIncoming(PtrBegin, EntryBB); 10047 Address PtrCurrent = 10048 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 10049 .getAlignment() 10050 .alignmentOfArrayElement(ElementSize)); 10051 // Privatize the declared variable of mapper to be the current array element. 10052 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 10053 Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; }); 10054 (void)Scope.Privatize(); 10055 10056 // Get map clause information. Fill up the arrays with all mapped variables. 10057 MappableExprsHandler::MapCombinedInfoTy Info; 10058 MappableExprsHandler MEHandler(*D, MapperCGF); 10059 MEHandler.generateAllInfoForMapper(Info); 10060 10061 // Call the runtime API __tgt_mapper_num_components to get the number of 10062 // pre-existing components. 10063 llvm::Value *OffloadingArgs[] = {Handle}; 10064 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 10065 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10066 OMPRTL___tgt_mapper_num_components), 10067 OffloadingArgs); 10068 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 10069 PreviousSize, 10070 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 10071 10072 // Fill up the runtime mapper handle for all components. 10073 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 10074 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 10075 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10076 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 10077 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10078 llvm::Value *CurSizeArg = Info.Sizes[I]; 10079 llvm::Value *CurNameArg = 10080 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 10081 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 10082 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 10083 10084 // Extract the MEMBER_OF field from the map type. 10085 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 10086 llvm::Value *MemberMapType = 10087 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 10088 10089 // Combine the map type inherited from user-defined mapper with that 10090 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 10091 // bits of the \a MapType, which is the input argument of the mapper 10092 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 10093 // bits of MemberMapType. 10094 // [OpenMP 5.0], 1.2.6. map-type decay. 10095 // | alloc | to | from | tofrom | release | delete 10096 // ---------------------------------------------------------- 10097 // alloc | alloc | alloc | alloc | alloc | release | delete 10098 // to | alloc | to | alloc | to | release | delete 10099 // from | alloc | alloc | from | from | release | delete 10100 // tofrom | alloc | to | from | tofrom | release | delete 10101 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 10102 MapType, 10103 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 10104 MappableExprsHandler::OMP_MAP_FROM)); 10105 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 10106 llvm::BasicBlock *AllocElseBB = 10107 MapperCGF.createBasicBlock("omp.type.alloc.else"); 10108 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 10109 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 10110 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 10111 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 10112 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 10113 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 10114 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 10115 MapperCGF.EmitBlock(AllocBB); 10116 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 10117 MemberMapType, 10118 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10119 MappableExprsHandler::OMP_MAP_FROM))); 10120 MapperCGF.Builder.CreateBr(EndBB); 10121 MapperCGF.EmitBlock(AllocElseBB); 10122 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 10123 LeftToFrom, 10124 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 10125 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 10126 // In case of to, clear OMP_MAP_FROM. 10127 MapperCGF.EmitBlock(ToBB); 10128 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 10129 MemberMapType, 10130 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 10131 MapperCGF.Builder.CreateBr(EndBB); 10132 MapperCGF.EmitBlock(ToElseBB); 10133 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 10134 LeftToFrom, 10135 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 10136 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 10137 // In case of from, clear OMP_MAP_TO. 10138 MapperCGF.EmitBlock(FromBB); 10139 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 10140 MemberMapType, 10141 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 10142 // In case of tofrom, do nothing. 10143 MapperCGF.EmitBlock(EndBB); 10144 LastBB = EndBB; 10145 llvm::PHINode *CurMapType = 10146 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 10147 CurMapType->addIncoming(AllocMapType, AllocBB); 10148 CurMapType->addIncoming(ToMapType, ToBB); 10149 CurMapType->addIncoming(FromMapType, FromBB); 10150 CurMapType->addIncoming(MemberMapType, ToElseBB); 10151 10152 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 10153 CurSizeArg, CurMapType, CurNameArg}; 10154 if (Info.Mappers[I]) { 10155 // Call the corresponding mapper function. 10156 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 10157 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 10158 assert(MapperFunc && "Expect a valid mapper function is available."); 10159 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 10160 } else { 10161 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10162 // data structure. 10163 MapperCGF.EmitRuntimeCall( 10164 OMPBuilder.getOrCreateRuntimeFunction( 10165 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 10166 OffloadingArgs); 10167 } 10168 } 10169 10170 // Update the pointer to point to the next element that needs to be mapped, 10171 // and check whether we have mapped all elements. 10172 llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType(); 10173 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 10174 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 10175 PtrPHI->addIncoming(PtrNext, LastBB); 10176 llvm::Value *IsDone = 10177 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 10178 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 10179 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 10180 10181 MapperCGF.EmitBlock(ExitBB); 10182 // Emit array deletion if this is an array section and \p MapType indicates 10183 // that deletion is required. 10184 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10185 MapName, ElementSize, DoneBB, /*IsInit=*/false); 10186 10187 // Emit the function exit block. 10188 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 10189 MapperCGF.FinishFunction(); 10190 UDMMap.try_emplace(D, Fn); 10191 if (CGF) { 10192 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 10193 Decls.second.push_back(D); 10194 } 10195 } 10196 10197 /// Emit the array initialization or deletion portion for user-defined mapper 10198 /// code generation. First, it evaluates whether an array section is mapped and 10199 /// whether the \a MapType instructs to delete this section. If \a IsInit is 10200 /// true, and \a MapType indicates to not delete this array, array 10201 /// initialization code is generated. If \a IsInit is false, and \a MapType 10202 /// indicates to not this array, array deletion code is generated. 10203 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 10204 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 10205 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 10206 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, 10207 bool IsInit) { 10208 StringRef Prefix = IsInit ? ".init" : ".del"; 10209 10210 // Evaluate if this is an array section. 10211 llvm::BasicBlock *BodyBB = 10212 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 10213 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 10214 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 10215 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 10216 MapType, 10217 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 10218 llvm::Value *DeleteCond; 10219 llvm::Value *Cond; 10220 if (IsInit) { 10221 // base != begin? 10222 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin); 10223 // IsPtrAndObj? 10224 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 10225 MapType, 10226 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ)); 10227 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 10228 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 10229 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 10230 DeleteCond = MapperCGF.Builder.CreateIsNull( 10231 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10232 } else { 10233 Cond = IsArray; 10234 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 10235 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10236 } 10237 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 10238 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 10239 10240 MapperCGF.EmitBlock(BodyBB); 10241 // Get the array size by multiplying element size and element number (i.e., \p 10242 // Size). 10243 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 10244 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10245 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 10246 // memory allocation/deletion purpose only. 10247 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 10248 MapType, 10249 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10250 MappableExprsHandler::OMP_MAP_FROM))); 10251 MapTypeArg = MapperCGF.Builder.CreateOr( 10252 MapTypeArg, 10253 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT)); 10254 10255 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10256 // data structure. 10257 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 10258 ArraySize, MapTypeArg, MapName}; 10259 MapperCGF.EmitRuntimeCall( 10260 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10261 OMPRTL___tgt_push_mapper_component), 10262 OffloadingArgs); 10263 } 10264 10265 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 10266 const OMPDeclareMapperDecl *D) { 10267 auto I = UDMMap.find(D); 10268 if (I != UDMMap.end()) 10269 return I->second; 10270 emitUserDefinedMapper(D); 10271 return UDMMap.lookup(D); 10272 } 10273 10274 void CGOpenMPRuntime::emitTargetNumIterationsCall( 10275 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10276 llvm::Value *DeviceID, 10277 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10278 const OMPLoopDirective &D)> 10279 SizeEmitter) { 10280 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 10281 const OMPExecutableDirective *TD = &D; 10282 // Get nested teams distribute kind directive, if any. 10283 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 10284 TD = getNestedDistributeDirective(CGM.getContext(), D); 10285 if (!TD) 10286 return; 10287 const auto *LD = cast<OMPLoopDirective>(TD); 10288 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, 10289 PrePostActionTy &) { 10290 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 10291 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10292 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; 10293 CGF.EmitRuntimeCall( 10294 OMPBuilder.getOrCreateRuntimeFunction( 10295 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper), 10296 Args); 10297 } 10298 }; 10299 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 10300 } 10301 10302 void CGOpenMPRuntime::emitTargetCall( 10303 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10304 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 10305 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 10306 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10307 const OMPLoopDirective &D)> 10308 SizeEmitter) { 10309 if (!CGF.HaveInsertPoint()) 10310 return; 10311 10312 assert(OutlinedFn && "Invalid outlined function!"); 10313 10314 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10315 D.hasClausesOfKind<OMPNowaitClause>(); 10316 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 10317 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 10318 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 10319 PrePostActionTy &) { 10320 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10321 }; 10322 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 10323 10324 CodeGenFunction::OMPTargetDataInfo InputInfo; 10325 llvm::Value *MapTypesArray = nullptr; 10326 llvm::Value *MapNamesArray = nullptr; 10327 // Fill up the pointer arrays and transfer execution to the device. 10328 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 10329 &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask, 10330 &CapturedVars, 10331 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 10332 if (Device.getInt() == OMPC_DEVICE_ancestor) { 10333 // Reverse offloading is not supported, so just execute on the host. 10334 if (RequiresOuterTask) { 10335 CapturedVars.clear(); 10336 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10337 } 10338 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10339 return; 10340 } 10341 10342 // On top of the arrays that were filled up, the target offloading call 10343 // takes as arguments the device id as well as the host pointer. The host 10344 // pointer is used by the runtime library to identify the current target 10345 // region, so it only has to be unique and not necessarily point to 10346 // anything. It could be the pointer to the outlined function that 10347 // implements the target region, but we aren't using that so that the 10348 // compiler doesn't need to keep that, and could therefore inline the host 10349 // function if proven worthwhile during optimization. 10350 10351 // From this point on, we need to have an ID of the target region defined. 10352 assert(OutlinedFnID && "Invalid outlined function ID!"); 10353 10354 // Emit device ID if any. 10355 llvm::Value *DeviceID; 10356 if (Device.getPointer()) { 10357 assert((Device.getInt() == OMPC_DEVICE_unknown || 10358 Device.getInt() == OMPC_DEVICE_device_num) && 10359 "Expected device_num modifier."); 10360 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 10361 DeviceID = 10362 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 10363 } else { 10364 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10365 } 10366 10367 // Emit the number of elements in the offloading arrays. 10368 llvm::Value *PointerNum = 10369 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10370 10371 // Return value of the runtime offloading call. 10372 llvm::Value *Return; 10373 10374 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 10375 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 10376 10377 // Source location for the ident struct 10378 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10379 10380 // Emit tripcount for the target loop-based directive. 10381 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 10382 10383 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10384 // The target region is an outlined function launched by the runtime 10385 // via calls __tgt_target() or __tgt_target_teams(). 10386 // 10387 // __tgt_target() launches a target region with one team and one thread, 10388 // executing a serial region. This master thread may in turn launch 10389 // more threads within its team upon encountering a parallel region, 10390 // however, no additional teams can be launched on the device. 10391 // 10392 // __tgt_target_teams() launches a target region with one or more teams, 10393 // each with one or more threads. This call is required for target 10394 // constructs such as: 10395 // 'target teams' 10396 // 'target' / 'teams' 10397 // 'target teams distribute parallel for' 10398 // 'target parallel' 10399 // and so on. 10400 // 10401 // Note that on the host and CPU targets, the runtime implementation of 10402 // these calls simply call the outlined function without forking threads. 10403 // The outlined functions themselves have runtime calls to 10404 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 10405 // the compiler in emitTeamsCall() and emitParallelCall(). 10406 // 10407 // In contrast, on the NVPTX target, the implementation of 10408 // __tgt_target_teams() launches a GPU kernel with the requested number 10409 // of teams and threads so no additional calls to the runtime are required. 10410 if (NumTeams) { 10411 // If we have NumTeams defined this means that we have an enclosed teams 10412 // region. Therefore we also expect to have NumThreads defined. These two 10413 // values should be defined in the presence of a teams directive, 10414 // regardless of having any clauses associated. If the user is using teams 10415 // but no clauses, these two values will be the default that should be 10416 // passed to the runtime library - a 32-bit integer with the value zero. 10417 assert(NumThreads && "Thread limit expression should be available along " 10418 "with number of teams."); 10419 SmallVector<llvm::Value *> OffloadingArgs = { 10420 RTLoc, 10421 DeviceID, 10422 OutlinedFnID, 10423 PointerNum, 10424 InputInfo.BasePointersArray.getPointer(), 10425 InputInfo.PointersArray.getPointer(), 10426 InputInfo.SizesArray.getPointer(), 10427 MapTypesArray, 10428 MapNamesArray, 10429 InputInfo.MappersArray.getPointer(), 10430 NumTeams, 10431 NumThreads}; 10432 if (HasNowait) { 10433 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10434 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10435 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10436 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10437 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10438 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10439 } 10440 Return = CGF.EmitRuntimeCall( 10441 OMPBuilder.getOrCreateRuntimeFunction( 10442 CGM.getModule(), HasNowait 10443 ? OMPRTL___tgt_target_teams_nowait_mapper 10444 : OMPRTL___tgt_target_teams_mapper), 10445 OffloadingArgs); 10446 } else { 10447 SmallVector<llvm::Value *> OffloadingArgs = { 10448 RTLoc, 10449 DeviceID, 10450 OutlinedFnID, 10451 PointerNum, 10452 InputInfo.BasePointersArray.getPointer(), 10453 InputInfo.PointersArray.getPointer(), 10454 InputInfo.SizesArray.getPointer(), 10455 MapTypesArray, 10456 MapNamesArray, 10457 InputInfo.MappersArray.getPointer()}; 10458 if (HasNowait) { 10459 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10460 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10461 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10462 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10463 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10464 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10465 } 10466 Return = CGF.EmitRuntimeCall( 10467 OMPBuilder.getOrCreateRuntimeFunction( 10468 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 10469 : OMPRTL___tgt_target_mapper), 10470 OffloadingArgs); 10471 } 10472 10473 // Check the error code and execute the host version if required. 10474 llvm::BasicBlock *OffloadFailedBlock = 10475 CGF.createBasicBlock("omp_offload.failed"); 10476 llvm::BasicBlock *OffloadContBlock = 10477 CGF.createBasicBlock("omp_offload.cont"); 10478 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10479 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10480 10481 CGF.EmitBlock(OffloadFailedBlock); 10482 if (RequiresOuterTask) { 10483 CapturedVars.clear(); 10484 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10485 } 10486 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10487 CGF.EmitBranch(OffloadContBlock); 10488 10489 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10490 }; 10491 10492 // Notify that the host version must be executed. 10493 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 10494 RequiresOuterTask](CodeGenFunction &CGF, 10495 PrePostActionTy &) { 10496 if (RequiresOuterTask) { 10497 CapturedVars.clear(); 10498 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10499 } 10500 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10501 }; 10502 10503 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10504 &MapNamesArray, &CapturedVars, RequiresOuterTask, 10505 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10506 // Fill up the arrays with all the captured variables. 10507 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10508 10509 // Get mappable expression information. 10510 MappableExprsHandler MEHandler(D, CGF); 10511 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10512 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 10513 10514 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10515 auto *CV = CapturedVars.begin(); 10516 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10517 CE = CS.capture_end(); 10518 CI != CE; ++CI, ++RI, ++CV) { 10519 MappableExprsHandler::MapCombinedInfoTy CurInfo; 10520 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10521 10522 // VLA sizes are passed to the outlined region by copy and do not have map 10523 // information associated. 10524 if (CI->capturesVariableArrayType()) { 10525 CurInfo.Exprs.push_back(nullptr); 10526 CurInfo.BasePointers.push_back(*CV); 10527 CurInfo.Pointers.push_back(*CV); 10528 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10529 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10530 // Copy to the device as an argument. No need to retrieve it. 10531 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10532 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10533 MappableExprsHandler::OMP_MAP_IMPLICIT); 10534 CurInfo.Mappers.push_back(nullptr); 10535 } else { 10536 // If we have any information in the map clause, we use it, otherwise we 10537 // just do a default mapping. 10538 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10539 if (!CI->capturesThis()) 10540 MappedVarSet.insert(CI->getCapturedVar()); 10541 else 10542 MappedVarSet.insert(nullptr); 10543 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10544 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10545 // Generate correct mapping for variables captured by reference in 10546 // lambdas. 10547 if (CI->capturesVariable()) 10548 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10549 CurInfo, LambdaPointers); 10550 } 10551 // We expect to have at least an element of information for this capture. 10552 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10553 "Non-existing map pointer for capture!"); 10554 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10555 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10556 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10557 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10558 "Inconsistent map information sizes!"); 10559 10560 // If there is an entry in PartialStruct it means we have a struct with 10561 // individual members mapped. Emit an extra combined entry. 10562 if (PartialStruct.Base.isValid()) { 10563 CombinedInfo.append(PartialStruct.PreliminaryMapData); 10564 MEHandler.emitCombinedEntry( 10565 CombinedInfo, CurInfo.Types, PartialStruct, nullptr, 10566 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 10567 } 10568 10569 // We need to append the results of this capture to what we already have. 10570 CombinedInfo.append(CurInfo); 10571 } 10572 // Adjust MEMBER_OF flags for the lambdas captures. 10573 MEHandler.adjustMemberOfForLambdaCaptures( 10574 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10575 CombinedInfo.Types); 10576 // Map any list items in a map clause that were not captures because they 10577 // weren't referenced within the construct. 10578 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 10579 10580 TargetDataInfo Info; 10581 // Fill up the arrays and create the arguments. 10582 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10583 emitOffloadingArraysArgument( 10584 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10585 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 10586 {/*ForEndCall=*/false}); 10587 10588 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10589 InputInfo.BasePointersArray = 10590 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10591 InputInfo.PointersArray = 10592 Address(Info.PointersArray, CGM.getPointerAlign()); 10593 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 10594 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 10595 MapTypesArray = Info.MapTypesArray; 10596 MapNamesArray = Info.MapNamesArray; 10597 if (RequiresOuterTask) 10598 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10599 else 10600 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10601 }; 10602 10603 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10604 CodeGenFunction &CGF, PrePostActionTy &) { 10605 if (RequiresOuterTask) { 10606 CodeGenFunction::OMPTargetDataInfo InputInfo; 10607 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10608 } else { 10609 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10610 } 10611 }; 10612 10613 // If we have a target function ID it means that we need to support 10614 // offloading, otherwise, just execute on the host. We need to execute on host 10615 // regardless of the conditional in the if clause if, e.g., the user do not 10616 // specify target triples. 10617 if (OutlinedFnID) { 10618 if (IfCond) { 10619 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10620 } else { 10621 RegionCodeGenTy ThenRCG(TargetThenGen); 10622 ThenRCG(CGF); 10623 } 10624 } else { 10625 RegionCodeGenTy ElseRCG(TargetElseGen); 10626 ElseRCG(CGF); 10627 } 10628 } 10629 10630 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10631 StringRef ParentName) { 10632 if (!S) 10633 return; 10634 10635 // Codegen OMP target directives that offload compute to the device. 10636 bool RequiresDeviceCodegen = 10637 isa<OMPExecutableDirective>(S) && 10638 isOpenMPTargetExecutionDirective( 10639 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10640 10641 if (RequiresDeviceCodegen) { 10642 const auto &E = *cast<OMPExecutableDirective>(S); 10643 unsigned DeviceID; 10644 unsigned FileID; 10645 unsigned Line; 10646 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10647 FileID, Line); 10648 10649 // Is this a target region that should not be emitted as an entry point? If 10650 // so just signal we are done with this target region. 10651 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10652 ParentName, Line)) 10653 return; 10654 10655 switch (E.getDirectiveKind()) { 10656 case OMPD_target: 10657 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10658 cast<OMPTargetDirective>(E)); 10659 break; 10660 case OMPD_target_parallel: 10661 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10662 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10663 break; 10664 case OMPD_target_teams: 10665 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10666 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10667 break; 10668 case OMPD_target_teams_distribute: 10669 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10670 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10671 break; 10672 case OMPD_target_teams_distribute_simd: 10673 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10674 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10675 break; 10676 case OMPD_target_parallel_for: 10677 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10678 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10679 break; 10680 case OMPD_target_parallel_for_simd: 10681 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10682 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10683 break; 10684 case OMPD_target_simd: 10685 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10686 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10687 break; 10688 case OMPD_target_teams_distribute_parallel_for: 10689 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10690 CGM, ParentName, 10691 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10692 break; 10693 case OMPD_target_teams_distribute_parallel_for_simd: 10694 CodeGenFunction:: 10695 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10696 CGM, ParentName, 10697 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10698 break; 10699 case OMPD_parallel: 10700 case OMPD_for: 10701 case OMPD_parallel_for: 10702 case OMPD_parallel_master: 10703 case OMPD_parallel_sections: 10704 case OMPD_for_simd: 10705 case OMPD_parallel_for_simd: 10706 case OMPD_cancel: 10707 case OMPD_cancellation_point: 10708 case OMPD_ordered: 10709 case OMPD_threadprivate: 10710 case OMPD_allocate: 10711 case OMPD_task: 10712 case OMPD_simd: 10713 case OMPD_tile: 10714 case OMPD_unroll: 10715 case OMPD_sections: 10716 case OMPD_section: 10717 case OMPD_single: 10718 case OMPD_master: 10719 case OMPD_critical: 10720 case OMPD_taskyield: 10721 case OMPD_barrier: 10722 case OMPD_taskwait: 10723 case OMPD_taskgroup: 10724 case OMPD_atomic: 10725 case OMPD_flush: 10726 case OMPD_depobj: 10727 case OMPD_scan: 10728 case OMPD_teams: 10729 case OMPD_target_data: 10730 case OMPD_target_exit_data: 10731 case OMPD_target_enter_data: 10732 case OMPD_distribute: 10733 case OMPD_distribute_simd: 10734 case OMPD_distribute_parallel_for: 10735 case OMPD_distribute_parallel_for_simd: 10736 case OMPD_teams_distribute: 10737 case OMPD_teams_distribute_simd: 10738 case OMPD_teams_distribute_parallel_for: 10739 case OMPD_teams_distribute_parallel_for_simd: 10740 case OMPD_target_update: 10741 case OMPD_declare_simd: 10742 case OMPD_declare_variant: 10743 case OMPD_begin_declare_variant: 10744 case OMPD_end_declare_variant: 10745 case OMPD_declare_target: 10746 case OMPD_end_declare_target: 10747 case OMPD_declare_reduction: 10748 case OMPD_declare_mapper: 10749 case OMPD_taskloop: 10750 case OMPD_taskloop_simd: 10751 case OMPD_master_taskloop: 10752 case OMPD_master_taskloop_simd: 10753 case OMPD_parallel_master_taskloop: 10754 case OMPD_parallel_master_taskloop_simd: 10755 case OMPD_requires: 10756 case OMPD_metadirective: 10757 case OMPD_unknown: 10758 default: 10759 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10760 } 10761 return; 10762 } 10763 10764 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10765 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10766 return; 10767 10768 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10769 return; 10770 } 10771 10772 // If this is a lambda function, look into its body. 10773 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10774 S = L->getBody(); 10775 10776 // Keep looking for target regions recursively. 10777 for (const Stmt *II : S->children()) 10778 scanForTargetRegionsFunctions(II, ParentName); 10779 } 10780 10781 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { 10782 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10783 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10784 if (!DevTy) 10785 return false; 10786 // Do not emit device_type(nohost) functions for the host. 10787 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10788 return true; 10789 // Do not emit device_type(host) functions for the device. 10790 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10791 return true; 10792 return false; 10793 } 10794 10795 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10796 // If emitting code for the host, we do not process FD here. Instead we do 10797 // the normal code generation. 10798 if (!CGM.getLangOpts().OpenMPIsDevice) { 10799 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) 10800 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10801 CGM.getLangOpts().OpenMPIsDevice)) 10802 return true; 10803 return false; 10804 } 10805 10806 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10807 // Try to detect target regions in the function. 10808 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10809 StringRef Name = CGM.getMangledName(GD); 10810 scanForTargetRegionsFunctions(FD->getBody(), Name); 10811 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10812 CGM.getLangOpts().OpenMPIsDevice)) 10813 return true; 10814 } 10815 10816 // Do not to emit function if it is not marked as declare target. 10817 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10818 AlreadyEmittedTargetDecls.count(VD) == 0; 10819 } 10820 10821 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10822 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()), 10823 CGM.getLangOpts().OpenMPIsDevice)) 10824 return true; 10825 10826 if (!CGM.getLangOpts().OpenMPIsDevice) 10827 return false; 10828 10829 // Check if there are Ctors/Dtors in this declaration and look for target 10830 // regions in it. We use the complete variant to produce the kernel name 10831 // mangling. 10832 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10833 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10834 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10835 StringRef ParentName = 10836 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10837 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10838 } 10839 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10840 StringRef ParentName = 10841 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10842 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10843 } 10844 } 10845 10846 // Do not to emit variable if it is not marked as declare target. 10847 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10848 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10849 cast<VarDecl>(GD.getDecl())); 10850 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10851 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10852 HasRequiresUnifiedSharedMemory)) { 10853 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10854 return true; 10855 } 10856 return false; 10857 } 10858 10859 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10860 llvm::Constant *Addr) { 10861 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10862 !CGM.getLangOpts().OpenMPIsDevice) 10863 return; 10864 10865 // If we have host/nohost variables, they do not need to be registered. 10866 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10867 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10868 if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any) 10869 return; 10870 10871 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10872 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10873 if (!Res) { 10874 if (CGM.getLangOpts().OpenMPIsDevice) { 10875 // Register non-target variables being emitted in device code (debug info 10876 // may cause this). 10877 StringRef VarName = CGM.getMangledName(VD); 10878 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10879 } 10880 return; 10881 } 10882 // Register declare target variables. 10883 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10884 StringRef VarName; 10885 CharUnits VarSize; 10886 llvm::GlobalValue::LinkageTypes Linkage; 10887 10888 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10889 !HasRequiresUnifiedSharedMemory) { 10890 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10891 VarName = CGM.getMangledName(VD); 10892 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10893 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10894 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10895 } else { 10896 VarSize = CharUnits::Zero(); 10897 } 10898 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10899 // Temp solution to prevent optimizations of the internal variables. 10900 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10901 // Do not create a "ref-variable" if the original is not also available 10902 // on the host. 10903 if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName)) 10904 return; 10905 std::string RefName = getName({VarName, "ref"}); 10906 if (!CGM.GetGlobalValue(RefName)) { 10907 llvm::Constant *AddrRef = 10908 getOrCreateInternalVariable(Addr->getType(), RefName); 10909 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10910 GVAddrRef->setConstant(/*Val=*/true); 10911 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10912 GVAddrRef->setInitializer(Addr); 10913 CGM.addCompilerUsedGlobal(GVAddrRef); 10914 } 10915 } 10916 } else { 10917 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10918 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10919 HasRequiresUnifiedSharedMemory)) && 10920 "Declare target attribute must link or to with unified memory."); 10921 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10922 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10923 else 10924 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10925 10926 if (CGM.getLangOpts().OpenMPIsDevice) { 10927 VarName = Addr->getName(); 10928 Addr = nullptr; 10929 } else { 10930 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10931 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10932 } 10933 VarSize = CGM.getPointerSize(); 10934 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10935 } 10936 10937 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10938 VarName, Addr, VarSize, Flags, Linkage); 10939 } 10940 10941 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10942 if (isa<FunctionDecl>(GD.getDecl()) || 10943 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10944 return emitTargetFunctions(GD); 10945 10946 return emitTargetGlobalVariable(GD); 10947 } 10948 10949 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10950 for (const VarDecl *VD : DeferredGlobalVariables) { 10951 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10952 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10953 if (!Res) 10954 continue; 10955 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10956 !HasRequiresUnifiedSharedMemory) { 10957 CGM.EmitGlobal(VD); 10958 } else { 10959 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10960 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10961 HasRequiresUnifiedSharedMemory)) && 10962 "Expected link clause or to clause with unified memory."); 10963 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10964 } 10965 } 10966 } 10967 10968 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10969 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10970 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10971 " Expected target-based directive."); 10972 } 10973 10974 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10975 for (const OMPClause *Clause : D->clauselists()) { 10976 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10977 HasRequiresUnifiedSharedMemory = true; 10978 } else if (const auto *AC = 10979 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10980 switch (AC->getAtomicDefaultMemOrderKind()) { 10981 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10982 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10983 break; 10984 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10985 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10986 break; 10987 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10988 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10989 break; 10990 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10991 break; 10992 } 10993 } 10994 } 10995 } 10996 10997 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10998 return RequiresAtomicOrdering; 10999 } 11000 11001 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 11002 LangAS &AS) { 11003 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 11004 return false; 11005 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 11006 switch(A->getAllocatorType()) { 11007 case OMPAllocateDeclAttr::OMPNullMemAlloc: 11008 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 11009 // Not supported, fallback to the default mem space. 11010 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 11011 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 11012 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 11013 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 11014 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 11015 case OMPAllocateDeclAttr::OMPConstMemAlloc: 11016 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 11017 AS = LangAS::Default; 11018 return true; 11019 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 11020 llvm_unreachable("Expected predefined allocator for the variables with the " 11021 "static storage."); 11022 } 11023 return false; 11024 } 11025 11026 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 11027 return HasRequiresUnifiedSharedMemory; 11028 } 11029 11030 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 11031 CodeGenModule &CGM) 11032 : CGM(CGM) { 11033 if (CGM.getLangOpts().OpenMPIsDevice) { 11034 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 11035 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 11036 } 11037 } 11038 11039 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 11040 if (CGM.getLangOpts().OpenMPIsDevice) 11041 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 11042 } 11043 11044 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 11045 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 11046 return true; 11047 11048 const auto *D = cast<FunctionDecl>(GD.getDecl()); 11049 // Do not to emit function if it is marked as declare target as it was already 11050 // emitted. 11051 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 11052 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 11053 if (auto *F = dyn_cast_or_null<llvm::Function>( 11054 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 11055 return !F->isDeclaration(); 11056 return false; 11057 } 11058 return true; 11059 } 11060 11061 return !AlreadyEmittedTargetDecls.insert(D).second; 11062 } 11063 11064 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 11065 // If we don't have entries or if we are emitting code for the device, we 11066 // don't need to do anything. 11067 if (CGM.getLangOpts().OMPTargetTriples.empty() || 11068 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 11069 (OffloadEntriesInfoManager.empty() && 11070 !HasEmittedDeclareTargetRegion && 11071 !HasEmittedTargetRegion)) 11072 return nullptr; 11073 11074 // Create and register the function that handles the requires directives. 11075 ASTContext &C = CGM.getContext(); 11076 11077 llvm::Function *RequiresRegFn; 11078 { 11079 CodeGenFunction CGF(CGM); 11080 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 11081 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 11082 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 11083 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 11084 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 11085 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 11086 // TODO: check for other requires clauses. 11087 // The requires directive takes effect only when a target region is 11088 // present in the compilation unit. Otherwise it is ignored and not 11089 // passed to the runtime. This avoids the runtime from throwing an error 11090 // for mismatching requires clauses across compilation units that don't 11091 // contain at least 1 target region. 11092 assert((HasEmittedTargetRegion || 11093 HasEmittedDeclareTargetRegion || 11094 !OffloadEntriesInfoManager.empty()) && 11095 "Target or declare target region expected."); 11096 if (HasRequiresUnifiedSharedMemory) 11097 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 11098 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11099 CGM.getModule(), OMPRTL___tgt_register_requires), 11100 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 11101 CGF.FinishFunction(); 11102 } 11103 return RequiresRegFn; 11104 } 11105 11106 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 11107 const OMPExecutableDirective &D, 11108 SourceLocation Loc, 11109 llvm::Function *OutlinedFn, 11110 ArrayRef<llvm::Value *> CapturedVars) { 11111 if (!CGF.HaveInsertPoint()) 11112 return; 11113 11114 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11115 CodeGenFunction::RunCleanupsScope Scope(CGF); 11116 11117 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 11118 llvm::Value *Args[] = { 11119 RTLoc, 11120 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 11121 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 11122 llvm::SmallVector<llvm::Value *, 16> RealArgs; 11123 RealArgs.append(std::begin(Args), std::end(Args)); 11124 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 11125 11126 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11127 CGM.getModule(), OMPRTL___kmpc_fork_teams); 11128 CGF.EmitRuntimeCall(RTLFn, RealArgs); 11129 } 11130 11131 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11132 const Expr *NumTeams, 11133 const Expr *ThreadLimit, 11134 SourceLocation Loc) { 11135 if (!CGF.HaveInsertPoint()) 11136 return; 11137 11138 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11139 11140 llvm::Value *NumTeamsVal = 11141 NumTeams 11142 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 11143 CGF.CGM.Int32Ty, /* isSigned = */ true) 11144 : CGF.Builder.getInt32(0); 11145 11146 llvm::Value *ThreadLimitVal = 11147 ThreadLimit 11148 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 11149 CGF.CGM.Int32Ty, /* isSigned = */ true) 11150 : CGF.Builder.getInt32(0); 11151 11152 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 11153 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 11154 ThreadLimitVal}; 11155 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11156 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 11157 PushNumTeamsArgs); 11158 } 11159 11160 void CGOpenMPRuntime::emitTargetDataCalls( 11161 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11162 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11163 if (!CGF.HaveInsertPoint()) 11164 return; 11165 11166 // Action used to replace the default codegen action and turn privatization 11167 // off. 11168 PrePostActionTy NoPrivAction; 11169 11170 // Generate the code for the opening of the data environment. Capture all the 11171 // arguments of the runtime call by reference because they are used in the 11172 // closing of the region. 11173 auto &&BeginThenGen = [this, &D, Device, &Info, 11174 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 11175 // Fill up the arrays with all the mapped variables. 11176 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11177 11178 // Get map clause information. 11179 MappableExprsHandler MEHandler(D, CGF); 11180 MEHandler.generateAllInfo(CombinedInfo); 11181 11182 // Fill up the arrays and create the arguments. 11183 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11184 /*IsNonContiguous=*/true); 11185 11186 llvm::Value *BasePointersArrayArg = nullptr; 11187 llvm::Value *PointersArrayArg = nullptr; 11188 llvm::Value *SizesArrayArg = nullptr; 11189 llvm::Value *MapTypesArrayArg = nullptr; 11190 llvm::Value *MapNamesArrayArg = nullptr; 11191 llvm::Value *MappersArrayArg = nullptr; 11192 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11193 SizesArrayArg, MapTypesArrayArg, 11194 MapNamesArrayArg, MappersArrayArg, Info); 11195 11196 // Emit device ID if any. 11197 llvm::Value *DeviceID = nullptr; 11198 if (Device) { 11199 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11200 CGF.Int64Ty, /*isSigned=*/true); 11201 } else { 11202 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11203 } 11204 11205 // Emit the number of elements in the offloading arrays. 11206 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11207 // 11208 // Source location for the ident struct 11209 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11210 11211 llvm::Value *OffloadingArgs[] = {RTLoc, 11212 DeviceID, 11213 PointerNum, 11214 BasePointersArrayArg, 11215 PointersArrayArg, 11216 SizesArrayArg, 11217 MapTypesArrayArg, 11218 MapNamesArrayArg, 11219 MappersArrayArg}; 11220 CGF.EmitRuntimeCall( 11221 OMPBuilder.getOrCreateRuntimeFunction( 11222 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 11223 OffloadingArgs); 11224 11225 // If device pointer privatization is required, emit the body of the region 11226 // here. It will have to be duplicated: with and without privatization. 11227 if (!Info.CaptureDeviceAddrMap.empty()) 11228 CodeGen(CGF); 11229 }; 11230 11231 // Generate code for the closing of the data region. 11232 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 11233 PrePostActionTy &) { 11234 assert(Info.isValid() && "Invalid data environment closing arguments."); 11235 11236 llvm::Value *BasePointersArrayArg = nullptr; 11237 llvm::Value *PointersArrayArg = nullptr; 11238 llvm::Value *SizesArrayArg = nullptr; 11239 llvm::Value *MapTypesArrayArg = nullptr; 11240 llvm::Value *MapNamesArrayArg = nullptr; 11241 llvm::Value *MappersArrayArg = nullptr; 11242 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11243 SizesArrayArg, MapTypesArrayArg, 11244 MapNamesArrayArg, MappersArrayArg, Info, 11245 {/*ForEndCall=*/true}); 11246 11247 // Emit device ID if any. 11248 llvm::Value *DeviceID = nullptr; 11249 if (Device) { 11250 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11251 CGF.Int64Ty, /*isSigned=*/true); 11252 } else { 11253 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11254 } 11255 11256 // Emit the number of elements in the offloading arrays. 11257 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11258 11259 // Source location for the ident struct 11260 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11261 11262 llvm::Value *OffloadingArgs[] = {RTLoc, 11263 DeviceID, 11264 PointerNum, 11265 BasePointersArrayArg, 11266 PointersArrayArg, 11267 SizesArrayArg, 11268 MapTypesArrayArg, 11269 MapNamesArrayArg, 11270 MappersArrayArg}; 11271 CGF.EmitRuntimeCall( 11272 OMPBuilder.getOrCreateRuntimeFunction( 11273 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 11274 OffloadingArgs); 11275 }; 11276 11277 // If we need device pointer privatization, we need to emit the body of the 11278 // region with no privatization in the 'else' branch of the conditional. 11279 // Otherwise, we don't have to do anything. 11280 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 11281 PrePostActionTy &) { 11282 if (!Info.CaptureDeviceAddrMap.empty()) { 11283 CodeGen.setAction(NoPrivAction); 11284 CodeGen(CGF); 11285 } 11286 }; 11287 11288 // We don't have to do anything to close the region if the if clause evaluates 11289 // to false. 11290 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 11291 11292 if (IfCond) { 11293 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 11294 } else { 11295 RegionCodeGenTy RCG(BeginThenGen); 11296 RCG(CGF); 11297 } 11298 11299 // If we don't require privatization of device pointers, we emit the body in 11300 // between the runtime calls. This avoids duplicating the body code. 11301 if (Info.CaptureDeviceAddrMap.empty()) { 11302 CodeGen.setAction(NoPrivAction); 11303 CodeGen(CGF); 11304 } 11305 11306 if (IfCond) { 11307 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 11308 } else { 11309 RegionCodeGenTy RCG(EndThenGen); 11310 RCG(CGF); 11311 } 11312 } 11313 11314 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 11315 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11316 const Expr *Device) { 11317 if (!CGF.HaveInsertPoint()) 11318 return; 11319 11320 assert((isa<OMPTargetEnterDataDirective>(D) || 11321 isa<OMPTargetExitDataDirective>(D) || 11322 isa<OMPTargetUpdateDirective>(D)) && 11323 "Expecting either target enter, exit data, or update directives."); 11324 11325 CodeGenFunction::OMPTargetDataInfo InputInfo; 11326 llvm::Value *MapTypesArray = nullptr; 11327 llvm::Value *MapNamesArray = nullptr; 11328 // Generate the code for the opening of the data environment. 11329 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 11330 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 11331 // Emit device ID if any. 11332 llvm::Value *DeviceID = nullptr; 11333 if (Device) { 11334 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11335 CGF.Int64Ty, /*isSigned=*/true); 11336 } else { 11337 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11338 } 11339 11340 // Emit the number of elements in the offloading arrays. 11341 llvm::Constant *PointerNum = 11342 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 11343 11344 // Source location for the ident struct 11345 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11346 11347 llvm::Value *OffloadingArgs[] = {RTLoc, 11348 DeviceID, 11349 PointerNum, 11350 InputInfo.BasePointersArray.getPointer(), 11351 InputInfo.PointersArray.getPointer(), 11352 InputInfo.SizesArray.getPointer(), 11353 MapTypesArray, 11354 MapNamesArray, 11355 InputInfo.MappersArray.getPointer()}; 11356 11357 // Select the right runtime function call for each standalone 11358 // directive. 11359 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 11360 RuntimeFunction RTLFn; 11361 switch (D.getDirectiveKind()) { 11362 case OMPD_target_enter_data: 11363 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 11364 : OMPRTL___tgt_target_data_begin_mapper; 11365 break; 11366 case OMPD_target_exit_data: 11367 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 11368 : OMPRTL___tgt_target_data_end_mapper; 11369 break; 11370 case OMPD_target_update: 11371 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 11372 : OMPRTL___tgt_target_data_update_mapper; 11373 break; 11374 case OMPD_parallel: 11375 case OMPD_for: 11376 case OMPD_parallel_for: 11377 case OMPD_parallel_master: 11378 case OMPD_parallel_sections: 11379 case OMPD_for_simd: 11380 case OMPD_parallel_for_simd: 11381 case OMPD_cancel: 11382 case OMPD_cancellation_point: 11383 case OMPD_ordered: 11384 case OMPD_threadprivate: 11385 case OMPD_allocate: 11386 case OMPD_task: 11387 case OMPD_simd: 11388 case OMPD_tile: 11389 case OMPD_unroll: 11390 case OMPD_sections: 11391 case OMPD_section: 11392 case OMPD_single: 11393 case OMPD_master: 11394 case OMPD_critical: 11395 case OMPD_taskyield: 11396 case OMPD_barrier: 11397 case OMPD_taskwait: 11398 case OMPD_taskgroup: 11399 case OMPD_atomic: 11400 case OMPD_flush: 11401 case OMPD_depobj: 11402 case OMPD_scan: 11403 case OMPD_teams: 11404 case OMPD_target_data: 11405 case OMPD_distribute: 11406 case OMPD_distribute_simd: 11407 case OMPD_distribute_parallel_for: 11408 case OMPD_distribute_parallel_for_simd: 11409 case OMPD_teams_distribute: 11410 case OMPD_teams_distribute_simd: 11411 case OMPD_teams_distribute_parallel_for: 11412 case OMPD_teams_distribute_parallel_for_simd: 11413 case OMPD_declare_simd: 11414 case OMPD_declare_variant: 11415 case OMPD_begin_declare_variant: 11416 case OMPD_end_declare_variant: 11417 case OMPD_declare_target: 11418 case OMPD_end_declare_target: 11419 case OMPD_declare_reduction: 11420 case OMPD_declare_mapper: 11421 case OMPD_taskloop: 11422 case OMPD_taskloop_simd: 11423 case OMPD_master_taskloop: 11424 case OMPD_master_taskloop_simd: 11425 case OMPD_parallel_master_taskloop: 11426 case OMPD_parallel_master_taskloop_simd: 11427 case OMPD_target: 11428 case OMPD_target_simd: 11429 case OMPD_target_teams_distribute: 11430 case OMPD_target_teams_distribute_simd: 11431 case OMPD_target_teams_distribute_parallel_for: 11432 case OMPD_target_teams_distribute_parallel_for_simd: 11433 case OMPD_target_teams: 11434 case OMPD_target_parallel: 11435 case OMPD_target_parallel_for: 11436 case OMPD_target_parallel_for_simd: 11437 case OMPD_requires: 11438 case OMPD_metadirective: 11439 case OMPD_unknown: 11440 default: 11441 llvm_unreachable("Unexpected standalone target data directive."); 11442 break; 11443 } 11444 CGF.EmitRuntimeCall( 11445 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 11446 OffloadingArgs); 11447 }; 11448 11449 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 11450 &MapNamesArray](CodeGenFunction &CGF, 11451 PrePostActionTy &) { 11452 // Fill up the arrays with all the mapped variables. 11453 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11454 11455 // Get map clause information. 11456 MappableExprsHandler MEHandler(D, CGF); 11457 MEHandler.generateAllInfo(CombinedInfo); 11458 11459 TargetDataInfo Info; 11460 // Fill up the arrays and create the arguments. 11461 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11462 /*IsNonContiguous=*/true); 11463 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 11464 D.hasClausesOfKind<OMPNowaitClause>(); 11465 emitOffloadingArraysArgument( 11466 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 11467 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 11468 {/*ForEndCall=*/false}); 11469 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11470 InputInfo.BasePointersArray = 11471 Address(Info.BasePointersArray, CGM.getPointerAlign()); 11472 InputInfo.PointersArray = 11473 Address(Info.PointersArray, CGM.getPointerAlign()); 11474 InputInfo.SizesArray = 11475 Address(Info.SizesArray, CGM.getPointerAlign()); 11476 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 11477 MapTypesArray = Info.MapTypesArray; 11478 MapNamesArray = Info.MapNamesArray; 11479 if (RequiresOuterTask) 11480 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11481 else 11482 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11483 }; 11484 11485 if (IfCond) { 11486 emitIfClause(CGF, IfCond, TargetThenGen, 11487 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11488 } else { 11489 RegionCodeGenTy ThenRCG(TargetThenGen); 11490 ThenRCG(CGF); 11491 } 11492 } 11493 11494 namespace { 11495 /// Kind of parameter in a function with 'declare simd' directive. 11496 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 11497 /// Attribute set of the parameter. 11498 struct ParamAttrTy { 11499 ParamKindTy Kind = Vector; 11500 llvm::APSInt StrideOrArg; 11501 llvm::APSInt Alignment; 11502 }; 11503 } // namespace 11504 11505 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11506 ArrayRef<ParamAttrTy> ParamAttrs) { 11507 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11508 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11509 // of that clause. The VLEN value must be power of 2. 11510 // In other case the notion of the function`s "characteristic data type" (CDT) 11511 // is used to compute the vector length. 11512 // CDT is defined in the following order: 11513 // a) For non-void function, the CDT is the return type. 11514 // b) If the function has any non-uniform, non-linear parameters, then the 11515 // CDT is the type of the first such parameter. 11516 // c) If the CDT determined by a) or b) above is struct, union, or class 11517 // type which is pass-by-value (except for the type that maps to the 11518 // built-in complex data type), the characteristic data type is int. 11519 // d) If none of the above three cases is applicable, the CDT is int. 11520 // The VLEN is then determined based on the CDT and the size of vector 11521 // register of that ISA for which current vector version is generated. The 11522 // VLEN is computed using the formula below: 11523 // VLEN = sizeof(vector_register) / sizeof(CDT), 11524 // where vector register size specified in section 3.2.1 Registers and the 11525 // Stack Frame of original AMD64 ABI document. 11526 QualType RetType = FD->getReturnType(); 11527 if (RetType.isNull()) 11528 return 0; 11529 ASTContext &C = FD->getASTContext(); 11530 QualType CDT; 11531 if (!RetType.isNull() && !RetType->isVoidType()) { 11532 CDT = RetType; 11533 } else { 11534 unsigned Offset = 0; 11535 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11536 if (ParamAttrs[Offset].Kind == Vector) 11537 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11538 ++Offset; 11539 } 11540 if (CDT.isNull()) { 11541 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11542 if (ParamAttrs[I + Offset].Kind == Vector) { 11543 CDT = FD->getParamDecl(I)->getType(); 11544 break; 11545 } 11546 } 11547 } 11548 } 11549 if (CDT.isNull()) 11550 CDT = C.IntTy; 11551 CDT = CDT->getCanonicalTypeUnqualified(); 11552 if (CDT->isRecordType() || CDT->isUnionType()) 11553 CDT = C.IntTy; 11554 return C.getTypeSize(CDT); 11555 } 11556 11557 static void 11558 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11559 const llvm::APSInt &VLENVal, 11560 ArrayRef<ParamAttrTy> ParamAttrs, 11561 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11562 struct ISADataTy { 11563 char ISA; 11564 unsigned VecRegSize; 11565 }; 11566 ISADataTy ISAData[] = { 11567 { 11568 'b', 128 11569 }, // SSE 11570 { 11571 'c', 256 11572 }, // AVX 11573 { 11574 'd', 256 11575 }, // AVX2 11576 { 11577 'e', 512 11578 }, // AVX512 11579 }; 11580 llvm::SmallVector<char, 2> Masked; 11581 switch (State) { 11582 case OMPDeclareSimdDeclAttr::BS_Undefined: 11583 Masked.push_back('N'); 11584 Masked.push_back('M'); 11585 break; 11586 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11587 Masked.push_back('N'); 11588 break; 11589 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11590 Masked.push_back('M'); 11591 break; 11592 } 11593 for (char Mask : Masked) { 11594 for (const ISADataTy &Data : ISAData) { 11595 SmallString<256> Buffer; 11596 llvm::raw_svector_ostream Out(Buffer); 11597 Out << "_ZGV" << Data.ISA << Mask; 11598 if (!VLENVal) { 11599 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11600 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11601 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11602 } else { 11603 Out << VLENVal; 11604 } 11605 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11606 switch (ParamAttr.Kind){ 11607 case LinearWithVarStride: 11608 Out << 's' << ParamAttr.StrideOrArg; 11609 break; 11610 case Linear: 11611 Out << 'l'; 11612 if (ParamAttr.StrideOrArg != 1) 11613 Out << ParamAttr.StrideOrArg; 11614 break; 11615 case Uniform: 11616 Out << 'u'; 11617 break; 11618 case Vector: 11619 Out << 'v'; 11620 break; 11621 } 11622 if (!!ParamAttr.Alignment) 11623 Out << 'a' << ParamAttr.Alignment; 11624 } 11625 Out << '_' << Fn->getName(); 11626 Fn->addFnAttr(Out.str()); 11627 } 11628 } 11629 } 11630 11631 // This are the Functions that are needed to mangle the name of the 11632 // vector functions generated by the compiler, according to the rules 11633 // defined in the "Vector Function ABI specifications for AArch64", 11634 // available at 11635 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11636 11637 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11638 /// 11639 /// TODO: Need to implement the behavior for reference marked with a 11640 /// var or no linear modifiers (1.b in the section). For this, we 11641 /// need to extend ParamKindTy to support the linear modifiers. 11642 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11643 QT = QT.getCanonicalType(); 11644 11645 if (QT->isVoidType()) 11646 return false; 11647 11648 if (Kind == ParamKindTy::Uniform) 11649 return false; 11650 11651 if (Kind == ParamKindTy::Linear) 11652 return false; 11653 11654 // TODO: Handle linear references with modifiers 11655 11656 if (Kind == ParamKindTy::LinearWithVarStride) 11657 return false; 11658 11659 return true; 11660 } 11661 11662 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11663 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11664 QT = QT.getCanonicalType(); 11665 unsigned Size = C.getTypeSize(QT); 11666 11667 // Only scalars and complex within 16 bytes wide set PVB to true. 11668 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11669 return false; 11670 11671 if (QT->isFloatingType()) 11672 return true; 11673 11674 if (QT->isIntegerType()) 11675 return true; 11676 11677 if (QT->isPointerType()) 11678 return true; 11679 11680 // TODO: Add support for complex types (section 3.1.2, item 2). 11681 11682 return false; 11683 } 11684 11685 /// Computes the lane size (LS) of a return type or of an input parameter, 11686 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11687 /// TODO: Add support for references, section 3.2.1, item 1. 11688 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11689 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11690 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11691 if (getAArch64PBV(PTy, C)) 11692 return C.getTypeSize(PTy); 11693 } 11694 if (getAArch64PBV(QT, C)) 11695 return C.getTypeSize(QT); 11696 11697 return C.getTypeSize(C.getUIntPtrType()); 11698 } 11699 11700 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11701 // signature of the scalar function, as defined in 3.2.2 of the 11702 // AAVFABI. 11703 static std::tuple<unsigned, unsigned, bool> 11704 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11705 QualType RetType = FD->getReturnType().getCanonicalType(); 11706 11707 ASTContext &C = FD->getASTContext(); 11708 11709 bool OutputBecomesInput = false; 11710 11711 llvm::SmallVector<unsigned, 8> Sizes; 11712 if (!RetType->isVoidType()) { 11713 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11714 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11715 OutputBecomesInput = true; 11716 } 11717 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11718 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11719 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11720 } 11721 11722 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11723 // The LS of a function parameter / return value can only be a power 11724 // of 2, starting from 8 bits, up to 128. 11725 assert(llvm::all_of(Sizes, 11726 [](unsigned Size) { 11727 return Size == 8 || Size == 16 || Size == 32 || 11728 Size == 64 || Size == 128; 11729 }) && 11730 "Invalid size"); 11731 11732 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11733 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11734 OutputBecomesInput); 11735 } 11736 11737 /// Mangle the parameter part of the vector function name according to 11738 /// their OpenMP classification. The mangling function is defined in 11739 /// section 3.5 of the AAVFABI. 11740 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11741 SmallString<256> Buffer; 11742 llvm::raw_svector_ostream Out(Buffer); 11743 for (const auto &ParamAttr : ParamAttrs) { 11744 switch (ParamAttr.Kind) { 11745 case LinearWithVarStride: 11746 Out << "ls" << ParamAttr.StrideOrArg; 11747 break; 11748 case Linear: 11749 Out << 'l'; 11750 // Don't print the step value if it is not present or if it is 11751 // equal to 1. 11752 if (ParamAttr.StrideOrArg != 1) 11753 Out << ParamAttr.StrideOrArg; 11754 break; 11755 case Uniform: 11756 Out << 'u'; 11757 break; 11758 case Vector: 11759 Out << 'v'; 11760 break; 11761 } 11762 11763 if (!!ParamAttr.Alignment) 11764 Out << 'a' << ParamAttr.Alignment; 11765 } 11766 11767 return std::string(Out.str()); 11768 } 11769 11770 // Function used to add the attribute. The parameter `VLEN` is 11771 // templated to allow the use of "x" when targeting scalable functions 11772 // for SVE. 11773 template <typename T> 11774 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11775 char ISA, StringRef ParSeq, 11776 StringRef MangledName, bool OutputBecomesInput, 11777 llvm::Function *Fn) { 11778 SmallString<256> Buffer; 11779 llvm::raw_svector_ostream Out(Buffer); 11780 Out << Prefix << ISA << LMask << VLEN; 11781 if (OutputBecomesInput) 11782 Out << "v"; 11783 Out << ParSeq << "_" << MangledName; 11784 Fn->addFnAttr(Out.str()); 11785 } 11786 11787 // Helper function to generate the Advanced SIMD names depending on 11788 // the value of the NDS when simdlen is not present. 11789 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11790 StringRef Prefix, char ISA, 11791 StringRef ParSeq, StringRef MangledName, 11792 bool OutputBecomesInput, 11793 llvm::Function *Fn) { 11794 switch (NDS) { 11795 case 8: 11796 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11797 OutputBecomesInput, Fn); 11798 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11799 OutputBecomesInput, Fn); 11800 break; 11801 case 16: 11802 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11803 OutputBecomesInput, Fn); 11804 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11805 OutputBecomesInput, Fn); 11806 break; 11807 case 32: 11808 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11809 OutputBecomesInput, Fn); 11810 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11811 OutputBecomesInput, Fn); 11812 break; 11813 case 64: 11814 case 128: 11815 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11816 OutputBecomesInput, Fn); 11817 break; 11818 default: 11819 llvm_unreachable("Scalar type is too wide."); 11820 } 11821 } 11822 11823 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11824 static void emitAArch64DeclareSimdFunction( 11825 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11826 ArrayRef<ParamAttrTy> ParamAttrs, 11827 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11828 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11829 11830 // Get basic data for building the vector signature. 11831 const auto Data = getNDSWDS(FD, ParamAttrs); 11832 const unsigned NDS = std::get<0>(Data); 11833 const unsigned WDS = std::get<1>(Data); 11834 const bool OutputBecomesInput = std::get<2>(Data); 11835 11836 // Check the values provided via `simdlen` by the user. 11837 // 1. A `simdlen(1)` doesn't produce vector signatures, 11838 if (UserVLEN == 1) { 11839 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11840 DiagnosticsEngine::Warning, 11841 "The clause simdlen(1) has no effect when targeting aarch64."); 11842 CGM.getDiags().Report(SLoc, DiagID); 11843 return; 11844 } 11845 11846 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11847 // Advanced SIMD output. 11848 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11849 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11850 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11851 "power of 2 when targeting Advanced SIMD."); 11852 CGM.getDiags().Report(SLoc, DiagID); 11853 return; 11854 } 11855 11856 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11857 // limits. 11858 if (ISA == 's' && UserVLEN != 0) { 11859 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11860 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11861 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11862 "lanes in the architectural constraints " 11863 "for SVE (min is 128-bit, max is " 11864 "2048-bit, by steps of 128-bit)"); 11865 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11866 return; 11867 } 11868 } 11869 11870 // Sort out parameter sequence. 11871 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11872 StringRef Prefix = "_ZGV"; 11873 // Generate simdlen from user input (if any). 11874 if (UserVLEN) { 11875 if (ISA == 's') { 11876 // SVE generates only a masked function. 11877 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11878 OutputBecomesInput, Fn); 11879 } else { 11880 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11881 // Advanced SIMD generates one or two functions, depending on 11882 // the `[not]inbranch` clause. 11883 switch (State) { 11884 case OMPDeclareSimdDeclAttr::BS_Undefined: 11885 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11886 OutputBecomesInput, Fn); 11887 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11888 OutputBecomesInput, Fn); 11889 break; 11890 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11891 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11892 OutputBecomesInput, Fn); 11893 break; 11894 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11895 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11896 OutputBecomesInput, Fn); 11897 break; 11898 } 11899 } 11900 } else { 11901 // If no user simdlen is provided, follow the AAVFABI rules for 11902 // generating the vector length. 11903 if (ISA == 's') { 11904 // SVE, section 3.4.1, item 1. 11905 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11906 OutputBecomesInput, Fn); 11907 } else { 11908 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11909 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11910 // two vector names depending on the use of the clause 11911 // `[not]inbranch`. 11912 switch (State) { 11913 case OMPDeclareSimdDeclAttr::BS_Undefined: 11914 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11915 OutputBecomesInput, Fn); 11916 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11917 OutputBecomesInput, Fn); 11918 break; 11919 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11920 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11921 OutputBecomesInput, Fn); 11922 break; 11923 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11924 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11925 OutputBecomesInput, Fn); 11926 break; 11927 } 11928 } 11929 } 11930 } 11931 11932 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11933 llvm::Function *Fn) { 11934 ASTContext &C = CGM.getContext(); 11935 FD = FD->getMostRecentDecl(); 11936 // Map params to their positions in function decl. 11937 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11938 if (isa<CXXMethodDecl>(FD)) 11939 ParamPositions.try_emplace(FD, 0); 11940 unsigned ParamPos = ParamPositions.size(); 11941 for (const ParmVarDecl *P : FD->parameters()) { 11942 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11943 ++ParamPos; 11944 } 11945 while (FD) { 11946 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11947 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11948 // Mark uniform parameters. 11949 for (const Expr *E : Attr->uniforms()) { 11950 E = E->IgnoreParenImpCasts(); 11951 unsigned Pos; 11952 if (isa<CXXThisExpr>(E)) { 11953 Pos = ParamPositions[FD]; 11954 } else { 11955 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11956 ->getCanonicalDecl(); 11957 Pos = ParamPositions[PVD]; 11958 } 11959 ParamAttrs[Pos].Kind = Uniform; 11960 } 11961 // Get alignment info. 11962 auto NI = Attr->alignments_begin(); 11963 for (const Expr *E : Attr->aligneds()) { 11964 E = E->IgnoreParenImpCasts(); 11965 unsigned Pos; 11966 QualType ParmTy; 11967 if (isa<CXXThisExpr>(E)) { 11968 Pos = ParamPositions[FD]; 11969 ParmTy = E->getType(); 11970 } else { 11971 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11972 ->getCanonicalDecl(); 11973 Pos = ParamPositions[PVD]; 11974 ParmTy = PVD->getType(); 11975 } 11976 ParamAttrs[Pos].Alignment = 11977 (*NI) 11978 ? (*NI)->EvaluateKnownConstInt(C) 11979 : llvm::APSInt::getUnsigned( 11980 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11981 .getQuantity()); 11982 ++NI; 11983 } 11984 // Mark linear parameters. 11985 auto SI = Attr->steps_begin(); 11986 auto MI = Attr->modifiers_begin(); 11987 for (const Expr *E : Attr->linears()) { 11988 E = E->IgnoreParenImpCasts(); 11989 unsigned Pos; 11990 // Rescaling factor needed to compute the linear parameter 11991 // value in the mangled name. 11992 unsigned PtrRescalingFactor = 1; 11993 if (isa<CXXThisExpr>(E)) { 11994 Pos = ParamPositions[FD]; 11995 } else { 11996 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11997 ->getCanonicalDecl(); 11998 Pos = ParamPositions[PVD]; 11999 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 12000 PtrRescalingFactor = CGM.getContext() 12001 .getTypeSizeInChars(P->getPointeeType()) 12002 .getQuantity(); 12003 } 12004 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 12005 ParamAttr.Kind = Linear; 12006 // Assuming a stride of 1, for `linear` without modifiers. 12007 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 12008 if (*SI) { 12009 Expr::EvalResult Result; 12010 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 12011 if (const auto *DRE = 12012 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 12013 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 12014 ParamAttr.Kind = LinearWithVarStride; 12015 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 12016 ParamPositions[StridePVD->getCanonicalDecl()]); 12017 } 12018 } 12019 } else { 12020 ParamAttr.StrideOrArg = Result.Val.getInt(); 12021 } 12022 } 12023 // If we are using a linear clause on a pointer, we need to 12024 // rescale the value of linear_step with the byte size of the 12025 // pointee type. 12026 if (Linear == ParamAttr.Kind) 12027 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 12028 ++SI; 12029 ++MI; 12030 } 12031 llvm::APSInt VLENVal; 12032 SourceLocation ExprLoc; 12033 const Expr *VLENExpr = Attr->getSimdlen(); 12034 if (VLENExpr) { 12035 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 12036 ExprLoc = VLENExpr->getExprLoc(); 12037 } 12038 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 12039 if (CGM.getTriple().isX86()) { 12040 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 12041 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 12042 unsigned VLEN = VLENVal.getExtValue(); 12043 StringRef MangledName = Fn->getName(); 12044 if (CGM.getTarget().hasFeature("sve")) 12045 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12046 MangledName, 's', 128, Fn, ExprLoc); 12047 if (CGM.getTarget().hasFeature("neon")) 12048 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12049 MangledName, 'n', 128, Fn, ExprLoc); 12050 } 12051 } 12052 FD = FD->getPreviousDecl(); 12053 } 12054 } 12055 12056 namespace { 12057 /// Cleanup action for doacross support. 12058 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 12059 public: 12060 static const int DoacrossFinArgs = 2; 12061 12062 private: 12063 llvm::FunctionCallee RTLFn; 12064 llvm::Value *Args[DoacrossFinArgs]; 12065 12066 public: 12067 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 12068 ArrayRef<llvm::Value *> CallArgs) 12069 : RTLFn(RTLFn) { 12070 assert(CallArgs.size() == DoacrossFinArgs); 12071 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 12072 } 12073 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12074 if (!CGF.HaveInsertPoint()) 12075 return; 12076 CGF.EmitRuntimeCall(RTLFn, Args); 12077 } 12078 }; 12079 } // namespace 12080 12081 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12082 const OMPLoopDirective &D, 12083 ArrayRef<Expr *> NumIterations) { 12084 if (!CGF.HaveInsertPoint()) 12085 return; 12086 12087 ASTContext &C = CGM.getContext(); 12088 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 12089 RecordDecl *RD; 12090 if (KmpDimTy.isNull()) { 12091 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 12092 // kmp_int64 lo; // lower 12093 // kmp_int64 up; // upper 12094 // kmp_int64 st; // stride 12095 // }; 12096 RD = C.buildImplicitRecord("kmp_dim"); 12097 RD->startDefinition(); 12098 addFieldToRecordDecl(C, RD, Int64Ty); 12099 addFieldToRecordDecl(C, RD, Int64Ty); 12100 addFieldToRecordDecl(C, RD, Int64Ty); 12101 RD->completeDefinition(); 12102 KmpDimTy = C.getRecordType(RD); 12103 } else { 12104 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 12105 } 12106 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 12107 QualType ArrayTy = 12108 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 12109 12110 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 12111 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 12112 enum { LowerFD = 0, UpperFD, StrideFD }; 12113 // Fill dims with data. 12114 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 12115 LValue DimsLVal = CGF.MakeAddrLValue( 12116 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 12117 // dims.upper = num_iterations; 12118 LValue UpperLVal = CGF.EmitLValueForField( 12119 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 12120 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 12121 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 12122 Int64Ty, NumIterations[I]->getExprLoc()); 12123 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 12124 // dims.stride = 1; 12125 LValue StrideLVal = CGF.EmitLValueForField( 12126 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 12127 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 12128 StrideLVal); 12129 } 12130 12131 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 12132 // kmp_int32 num_dims, struct kmp_dim * dims); 12133 llvm::Value *Args[] = { 12134 emitUpdateLocation(CGF, D.getBeginLoc()), 12135 getThreadID(CGF, D.getBeginLoc()), 12136 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 12137 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12138 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 12139 CGM.VoidPtrTy)}; 12140 12141 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12142 CGM.getModule(), OMPRTL___kmpc_doacross_init); 12143 CGF.EmitRuntimeCall(RTLFn, Args); 12144 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 12145 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 12146 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12147 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 12148 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 12149 llvm::makeArrayRef(FiniArgs)); 12150 } 12151 12152 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12153 const OMPDependClause *C) { 12154 QualType Int64Ty = 12155 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 12156 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 12157 QualType ArrayTy = CGM.getContext().getConstantArrayType( 12158 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 12159 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 12160 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 12161 const Expr *CounterVal = C->getLoopData(I); 12162 assert(CounterVal); 12163 llvm::Value *CntVal = CGF.EmitScalarConversion( 12164 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 12165 CounterVal->getExprLoc()); 12166 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 12167 /*Volatile=*/false, Int64Ty); 12168 } 12169 llvm::Value *Args[] = { 12170 emitUpdateLocation(CGF, C->getBeginLoc()), 12171 getThreadID(CGF, C->getBeginLoc()), 12172 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 12173 llvm::FunctionCallee RTLFn; 12174 if (C->getDependencyKind() == OMPC_DEPEND_source) { 12175 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12176 OMPRTL___kmpc_doacross_post); 12177 } else { 12178 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 12179 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12180 OMPRTL___kmpc_doacross_wait); 12181 } 12182 CGF.EmitRuntimeCall(RTLFn, Args); 12183 } 12184 12185 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 12186 llvm::FunctionCallee Callee, 12187 ArrayRef<llvm::Value *> Args) const { 12188 assert(Loc.isValid() && "Outlined function call location must be valid."); 12189 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 12190 12191 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 12192 if (Fn->doesNotThrow()) { 12193 CGF.EmitNounwindRuntimeCall(Fn, Args); 12194 return; 12195 } 12196 } 12197 CGF.EmitRuntimeCall(Callee, Args); 12198 } 12199 12200 void CGOpenMPRuntime::emitOutlinedFunctionCall( 12201 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 12202 ArrayRef<llvm::Value *> Args) const { 12203 emitCall(CGF, Loc, OutlinedFn, Args); 12204 } 12205 12206 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 12207 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 12208 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 12209 HasEmittedDeclareTargetRegion = true; 12210 } 12211 12212 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 12213 const VarDecl *NativeParam, 12214 const VarDecl *TargetParam) const { 12215 return CGF.GetAddrOfLocalVar(NativeParam); 12216 } 12217 12218 /// Return allocator value from expression, or return a null allocator (default 12219 /// when no allocator specified). 12220 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF, 12221 const Expr *Allocator) { 12222 llvm::Value *AllocVal; 12223 if (Allocator) { 12224 AllocVal = CGF.EmitScalarExpr(Allocator); 12225 // According to the standard, the original allocator type is a enum 12226 // (integer). Convert to pointer type, if required. 12227 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 12228 CGF.getContext().VoidPtrTy, 12229 Allocator->getExprLoc()); 12230 } else { 12231 // If no allocator specified, it defaults to the null allocator. 12232 AllocVal = llvm::Constant::getNullValue( 12233 CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy)); 12234 } 12235 return AllocVal; 12236 } 12237 12238 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 12239 const VarDecl *VD) { 12240 if (!VD) 12241 return Address::invalid(); 12242 Address UntiedAddr = Address::invalid(); 12243 Address UntiedRealAddr = Address::invalid(); 12244 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12245 if (It != FunctionToUntiedTaskStackMap.end()) { 12246 const UntiedLocalVarsAddressesMap &UntiedData = 12247 UntiedLocalVarsStack[It->second]; 12248 auto I = UntiedData.find(VD); 12249 if (I != UntiedData.end()) { 12250 UntiedAddr = I->second.first; 12251 UntiedRealAddr = I->second.second; 12252 } 12253 } 12254 const VarDecl *CVD = VD->getCanonicalDecl(); 12255 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 12256 // Use the default allocation. 12257 if (!isAllocatableDecl(VD)) 12258 return UntiedAddr; 12259 llvm::Value *Size; 12260 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 12261 if (CVD->getType()->isVariablyModifiedType()) { 12262 Size = CGF.getTypeSize(CVD->getType()); 12263 // Align the size: ((size + align - 1) / align) * align 12264 Size = CGF.Builder.CreateNUWAdd( 12265 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 12266 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 12267 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 12268 } else { 12269 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 12270 Size = CGM.getSize(Sz.alignTo(Align)); 12271 } 12272 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 12273 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 12274 const Expr *Allocator = AA->getAllocator(); 12275 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator); 12276 llvm::Value *Alignment = 12277 AA->getAlignment() 12278 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(AA->getAlignment()), 12279 CGM.SizeTy, /*isSigned=*/false) 12280 : nullptr; 12281 SmallVector<llvm::Value *, 4> Args; 12282 Args.push_back(ThreadID); 12283 if (Alignment) 12284 Args.push_back(Alignment); 12285 Args.push_back(Size); 12286 Args.push_back(AllocVal); 12287 llvm::omp::RuntimeFunction FnID = 12288 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc; 12289 llvm::Value *Addr = CGF.EmitRuntimeCall( 12290 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args, 12291 getName({CVD->getName(), ".void.addr"})); 12292 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12293 CGM.getModule(), OMPRTL___kmpc_free); 12294 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 12295 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12296 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 12297 if (UntiedAddr.isValid()) 12298 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 12299 12300 // Cleanup action for allocate support. 12301 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 12302 llvm::FunctionCallee RTLFn; 12303 SourceLocation::UIntTy LocEncoding; 12304 Address Addr; 12305 const Expr *AllocExpr; 12306 12307 public: 12308 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 12309 SourceLocation::UIntTy LocEncoding, Address Addr, 12310 const Expr *AllocExpr) 12311 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 12312 AllocExpr(AllocExpr) {} 12313 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12314 if (!CGF.HaveInsertPoint()) 12315 return; 12316 llvm::Value *Args[3]; 12317 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 12318 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 12319 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12320 Addr.getPointer(), CGF.VoidPtrTy); 12321 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr); 12322 Args[2] = AllocVal; 12323 CGF.EmitRuntimeCall(RTLFn, Args); 12324 } 12325 }; 12326 Address VDAddr = 12327 UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align); 12328 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 12329 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 12330 VDAddr, Allocator); 12331 if (UntiedRealAddr.isValid()) 12332 if (auto *Region = 12333 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 12334 Region->emitUntiedSwitch(CGF); 12335 return VDAddr; 12336 } 12337 return UntiedAddr; 12338 } 12339 12340 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 12341 const VarDecl *VD) const { 12342 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12343 if (It == FunctionToUntiedTaskStackMap.end()) 12344 return false; 12345 return UntiedLocalVarsStack[It->second].count(VD) > 0; 12346 } 12347 12348 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 12349 CodeGenModule &CGM, const OMPLoopDirective &S) 12350 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 12351 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12352 if (!NeedToPush) 12353 return; 12354 NontemporalDeclsSet &DS = 12355 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 12356 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 12357 for (const Stmt *Ref : C->private_refs()) { 12358 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 12359 const ValueDecl *VD; 12360 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 12361 VD = DRE->getDecl(); 12362 } else { 12363 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 12364 assert((ME->isImplicitCXXThis() || 12365 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 12366 "Expected member of current class."); 12367 VD = ME->getMemberDecl(); 12368 } 12369 DS.insert(VD); 12370 } 12371 } 12372 } 12373 12374 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 12375 if (!NeedToPush) 12376 return; 12377 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 12378 } 12379 12380 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 12381 CodeGenFunction &CGF, 12382 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 12383 std::pair<Address, Address>> &LocalVars) 12384 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 12385 if (!NeedToPush) 12386 return; 12387 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 12388 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 12389 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 12390 } 12391 12392 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 12393 if (!NeedToPush) 12394 return; 12395 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 12396 } 12397 12398 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 12399 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12400 12401 return llvm::any_of( 12402 CGM.getOpenMPRuntime().NontemporalDeclsStack, 12403 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); }); 12404 } 12405 12406 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 12407 const OMPExecutableDirective &S, 12408 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 12409 const { 12410 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 12411 // Vars in target/task regions must be excluded completely. 12412 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 12413 isOpenMPTaskingDirective(S.getDirectiveKind())) { 12414 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12415 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 12416 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 12417 for (const CapturedStmt::Capture &Cap : CS->captures()) { 12418 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 12419 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 12420 } 12421 } 12422 // Exclude vars in private clauses. 12423 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 12424 for (const Expr *Ref : C->varlists()) { 12425 if (!Ref->getType()->isScalarType()) 12426 continue; 12427 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12428 if (!DRE) 12429 continue; 12430 NeedToCheckForLPCs.insert(DRE->getDecl()); 12431 } 12432 } 12433 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 12434 for (const Expr *Ref : C->varlists()) { 12435 if (!Ref->getType()->isScalarType()) 12436 continue; 12437 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12438 if (!DRE) 12439 continue; 12440 NeedToCheckForLPCs.insert(DRE->getDecl()); 12441 } 12442 } 12443 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12444 for (const Expr *Ref : C->varlists()) { 12445 if (!Ref->getType()->isScalarType()) 12446 continue; 12447 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12448 if (!DRE) 12449 continue; 12450 NeedToCheckForLPCs.insert(DRE->getDecl()); 12451 } 12452 } 12453 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 12454 for (const Expr *Ref : C->varlists()) { 12455 if (!Ref->getType()->isScalarType()) 12456 continue; 12457 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12458 if (!DRE) 12459 continue; 12460 NeedToCheckForLPCs.insert(DRE->getDecl()); 12461 } 12462 } 12463 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 12464 for (const Expr *Ref : C->varlists()) { 12465 if (!Ref->getType()->isScalarType()) 12466 continue; 12467 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12468 if (!DRE) 12469 continue; 12470 NeedToCheckForLPCs.insert(DRE->getDecl()); 12471 } 12472 } 12473 for (const Decl *VD : NeedToCheckForLPCs) { 12474 for (const LastprivateConditionalData &Data : 12475 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12476 if (Data.DeclToUniqueName.count(VD) > 0) { 12477 if (!Data.Disabled) 12478 NeedToAddForLPCsAsDisabled.insert(VD); 12479 break; 12480 } 12481 } 12482 } 12483 } 12484 12485 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12486 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12487 : CGM(CGF.CGM), 12488 Action((CGM.getLangOpts().OpenMP >= 50 && 12489 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12490 [](const OMPLastprivateClause *C) { 12491 return C->getKind() == 12492 OMPC_LASTPRIVATE_conditional; 12493 })) 12494 ? ActionToDo::PushAsLastprivateConditional 12495 : ActionToDo::DoNotPush) { 12496 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12497 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12498 return; 12499 assert(Action == ActionToDo::PushAsLastprivateConditional && 12500 "Expected a push action."); 12501 LastprivateConditionalData &Data = 12502 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12503 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12504 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12505 continue; 12506 12507 for (const Expr *Ref : C->varlists()) { 12508 Data.DeclToUniqueName.insert(std::make_pair( 12509 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12510 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12511 } 12512 } 12513 Data.IVLVal = IVLVal; 12514 Data.Fn = CGF.CurFn; 12515 } 12516 12517 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12518 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12519 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12520 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12521 if (CGM.getLangOpts().OpenMP < 50) 12522 return; 12523 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12524 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12525 if (!NeedToAddForLPCsAsDisabled.empty()) { 12526 Action = ActionToDo::DisableLastprivateConditional; 12527 LastprivateConditionalData &Data = 12528 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12529 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12530 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12531 Data.Fn = CGF.CurFn; 12532 Data.Disabled = true; 12533 } 12534 } 12535 12536 CGOpenMPRuntime::LastprivateConditionalRAII 12537 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12538 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12539 return LastprivateConditionalRAII(CGF, S); 12540 } 12541 12542 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12543 if (CGM.getLangOpts().OpenMP < 50) 12544 return; 12545 if (Action == ActionToDo::DisableLastprivateConditional) { 12546 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12547 "Expected list of disabled private vars."); 12548 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12549 } 12550 if (Action == ActionToDo::PushAsLastprivateConditional) { 12551 assert( 12552 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12553 "Expected list of lastprivate conditional vars."); 12554 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12555 } 12556 } 12557 12558 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12559 const VarDecl *VD) { 12560 ASTContext &C = CGM.getContext(); 12561 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12562 if (I == LastprivateConditionalToTypes.end()) 12563 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12564 QualType NewType; 12565 const FieldDecl *VDField; 12566 const FieldDecl *FiredField; 12567 LValue BaseLVal; 12568 auto VI = I->getSecond().find(VD); 12569 if (VI == I->getSecond().end()) { 12570 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12571 RD->startDefinition(); 12572 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12573 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12574 RD->completeDefinition(); 12575 NewType = C.getRecordType(RD); 12576 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12577 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12578 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12579 } else { 12580 NewType = std::get<0>(VI->getSecond()); 12581 VDField = std::get<1>(VI->getSecond()); 12582 FiredField = std::get<2>(VI->getSecond()); 12583 BaseLVal = std::get<3>(VI->getSecond()); 12584 } 12585 LValue FiredLVal = 12586 CGF.EmitLValueForField(BaseLVal, FiredField); 12587 CGF.EmitStoreOfScalar( 12588 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12589 FiredLVal); 12590 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12591 } 12592 12593 namespace { 12594 /// Checks if the lastprivate conditional variable is referenced in LHS. 12595 class LastprivateConditionalRefChecker final 12596 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12597 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12598 const Expr *FoundE = nullptr; 12599 const Decl *FoundD = nullptr; 12600 StringRef UniqueDeclName; 12601 LValue IVLVal; 12602 llvm::Function *FoundFn = nullptr; 12603 SourceLocation Loc; 12604 12605 public: 12606 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12607 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12608 llvm::reverse(LPM)) { 12609 auto It = D.DeclToUniqueName.find(E->getDecl()); 12610 if (It == D.DeclToUniqueName.end()) 12611 continue; 12612 if (D.Disabled) 12613 return false; 12614 FoundE = E; 12615 FoundD = E->getDecl()->getCanonicalDecl(); 12616 UniqueDeclName = It->second; 12617 IVLVal = D.IVLVal; 12618 FoundFn = D.Fn; 12619 break; 12620 } 12621 return FoundE == E; 12622 } 12623 bool VisitMemberExpr(const MemberExpr *E) { 12624 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12625 return false; 12626 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12627 llvm::reverse(LPM)) { 12628 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12629 if (It == D.DeclToUniqueName.end()) 12630 continue; 12631 if (D.Disabled) 12632 return false; 12633 FoundE = E; 12634 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12635 UniqueDeclName = It->second; 12636 IVLVal = D.IVLVal; 12637 FoundFn = D.Fn; 12638 break; 12639 } 12640 return FoundE == E; 12641 } 12642 bool VisitStmt(const Stmt *S) { 12643 for (const Stmt *Child : S->children()) { 12644 if (!Child) 12645 continue; 12646 if (const auto *E = dyn_cast<Expr>(Child)) 12647 if (!E->isGLValue()) 12648 continue; 12649 if (Visit(Child)) 12650 return true; 12651 } 12652 return false; 12653 } 12654 explicit LastprivateConditionalRefChecker( 12655 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12656 : LPM(LPM) {} 12657 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12658 getFoundData() const { 12659 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12660 } 12661 }; 12662 } // namespace 12663 12664 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12665 LValue IVLVal, 12666 StringRef UniqueDeclName, 12667 LValue LVal, 12668 SourceLocation Loc) { 12669 // Last updated loop counter for the lastprivate conditional var. 12670 // int<xx> last_iv = 0; 12671 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12672 llvm::Constant *LastIV = 12673 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12674 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12675 IVLVal.getAlignment().getAsAlign()); 12676 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12677 12678 // Last value of the lastprivate conditional. 12679 // decltype(priv_a) last_a; 12680 llvm::GlobalVariable *Last = getOrCreateInternalVariable( 12681 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12682 Last->setAlignment(LVal.getAlignment().getAsAlign()); 12683 LValue LastLVal = CGF.MakeAddrLValue( 12684 Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType()); 12685 12686 // Global loop counter. Required to handle inner parallel-for regions. 12687 // iv 12688 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12689 12690 // #pragma omp critical(a) 12691 // if (last_iv <= iv) { 12692 // last_iv = iv; 12693 // last_a = priv_a; 12694 // } 12695 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12696 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12697 Action.Enter(CGF); 12698 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12699 // (last_iv <= iv) ? Check if the variable is updated and store new 12700 // value in global var. 12701 llvm::Value *CmpRes; 12702 if (IVLVal.getType()->isSignedIntegerType()) { 12703 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12704 } else { 12705 assert(IVLVal.getType()->isUnsignedIntegerType() && 12706 "Loop iteration variable must be integer."); 12707 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12708 } 12709 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12710 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12711 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12712 // { 12713 CGF.EmitBlock(ThenBB); 12714 12715 // last_iv = iv; 12716 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12717 12718 // last_a = priv_a; 12719 switch (CGF.getEvaluationKind(LVal.getType())) { 12720 case TEK_Scalar: { 12721 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12722 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12723 break; 12724 } 12725 case TEK_Complex: { 12726 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12727 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12728 break; 12729 } 12730 case TEK_Aggregate: 12731 llvm_unreachable( 12732 "Aggregates are not supported in lastprivate conditional."); 12733 } 12734 // } 12735 CGF.EmitBranch(ExitBB); 12736 // There is no need to emit line number for unconditional branch. 12737 (void)ApplyDebugLocation::CreateEmpty(CGF); 12738 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12739 }; 12740 12741 if (CGM.getLangOpts().OpenMPSimd) { 12742 // Do not emit as a critical region as no parallel region could be emitted. 12743 RegionCodeGenTy ThenRCG(CodeGen); 12744 ThenRCG(CGF); 12745 } else { 12746 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12747 } 12748 } 12749 12750 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12751 const Expr *LHS) { 12752 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12753 return; 12754 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12755 if (!Checker.Visit(LHS)) 12756 return; 12757 const Expr *FoundE; 12758 const Decl *FoundD; 12759 StringRef UniqueDeclName; 12760 LValue IVLVal; 12761 llvm::Function *FoundFn; 12762 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12763 Checker.getFoundData(); 12764 if (FoundFn != CGF.CurFn) { 12765 // Special codegen for inner parallel regions. 12766 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12767 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12768 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12769 "Lastprivate conditional is not found in outer region."); 12770 QualType StructTy = std::get<0>(It->getSecond()); 12771 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12772 LValue PrivLVal = CGF.EmitLValue(FoundE); 12773 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12774 PrivLVal.getAddress(CGF), 12775 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 12776 LValue BaseLVal = 12777 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12778 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12779 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12780 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12781 FiredLVal, llvm::AtomicOrdering::Unordered, 12782 /*IsVolatile=*/true, /*isInit=*/false); 12783 return; 12784 } 12785 12786 // Private address of the lastprivate conditional in the current context. 12787 // priv_a 12788 LValue LVal = CGF.EmitLValue(FoundE); 12789 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12790 FoundE->getExprLoc()); 12791 } 12792 12793 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12794 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12795 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12796 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12797 return; 12798 auto Range = llvm::reverse(LastprivateConditionalStack); 12799 auto It = llvm::find_if( 12800 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12801 if (It == Range.end() || It->Fn != CGF.CurFn) 12802 return; 12803 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12804 assert(LPCI != LastprivateConditionalToTypes.end() && 12805 "Lastprivates must be registered already."); 12806 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12807 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12808 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12809 for (const auto &Pair : It->DeclToUniqueName) { 12810 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12811 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD)) 12812 continue; 12813 auto I = LPCI->getSecond().find(Pair.first); 12814 assert(I != LPCI->getSecond().end() && 12815 "Lastprivate must be rehistered already."); 12816 // bool Cmp = priv_a.Fired != 0; 12817 LValue BaseLVal = std::get<3>(I->getSecond()); 12818 LValue FiredLVal = 12819 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12820 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12821 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12822 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12823 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12824 // if (Cmp) { 12825 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12826 CGF.EmitBlock(ThenBB); 12827 Address Addr = CGF.GetAddrOfLocalVar(VD); 12828 LValue LVal; 12829 if (VD->getType()->isReferenceType()) 12830 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12831 AlignmentSource::Decl); 12832 else 12833 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12834 AlignmentSource::Decl); 12835 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12836 D.getBeginLoc()); 12837 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12838 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12839 // } 12840 } 12841 } 12842 12843 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12844 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12845 SourceLocation Loc) { 12846 if (CGF.getLangOpts().OpenMP < 50) 12847 return; 12848 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12849 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12850 "Unknown lastprivate conditional variable."); 12851 StringRef UniqueName = It->second; 12852 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12853 // The variable was not updated in the region - exit. 12854 if (!GV) 12855 return; 12856 LValue LPLVal = CGF.MakeAddrLValue( 12857 Address(GV, GV->getValueType(), PrivLVal.getAlignment()), 12858 PrivLVal.getType().getNonReferenceType()); 12859 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12860 CGF.EmitStoreOfScalar(Res, PrivLVal); 12861 } 12862 12863 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12864 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12865 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12866 llvm_unreachable("Not supported in SIMD-only mode"); 12867 } 12868 12869 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12870 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12871 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12872 llvm_unreachable("Not supported in SIMD-only mode"); 12873 } 12874 12875 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12876 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12877 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12878 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12879 bool Tied, unsigned &NumberOfParts) { 12880 llvm_unreachable("Not supported in SIMD-only mode"); 12881 } 12882 12883 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12884 SourceLocation Loc, 12885 llvm::Function *OutlinedFn, 12886 ArrayRef<llvm::Value *> CapturedVars, 12887 const Expr *IfCond, 12888 llvm::Value *NumThreads) { 12889 llvm_unreachable("Not supported in SIMD-only mode"); 12890 } 12891 12892 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12893 CodeGenFunction &CGF, StringRef CriticalName, 12894 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12895 const Expr *Hint) { 12896 llvm_unreachable("Not supported in SIMD-only mode"); 12897 } 12898 12899 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12900 const RegionCodeGenTy &MasterOpGen, 12901 SourceLocation Loc) { 12902 llvm_unreachable("Not supported in SIMD-only mode"); 12903 } 12904 12905 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, 12906 const RegionCodeGenTy &MasterOpGen, 12907 SourceLocation Loc, 12908 const Expr *Filter) { 12909 llvm_unreachable("Not supported in SIMD-only mode"); 12910 } 12911 12912 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12913 SourceLocation Loc) { 12914 llvm_unreachable("Not supported in SIMD-only mode"); 12915 } 12916 12917 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12918 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12919 SourceLocation Loc) { 12920 llvm_unreachable("Not supported in SIMD-only mode"); 12921 } 12922 12923 void CGOpenMPSIMDRuntime::emitSingleRegion( 12924 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12925 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12926 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12927 ArrayRef<const Expr *> AssignmentOps) { 12928 llvm_unreachable("Not supported in SIMD-only mode"); 12929 } 12930 12931 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12932 const RegionCodeGenTy &OrderedOpGen, 12933 SourceLocation Loc, 12934 bool IsThreads) { 12935 llvm_unreachable("Not supported in SIMD-only mode"); 12936 } 12937 12938 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12939 SourceLocation Loc, 12940 OpenMPDirectiveKind Kind, 12941 bool EmitChecks, 12942 bool ForceSimpleCall) { 12943 llvm_unreachable("Not supported in SIMD-only mode"); 12944 } 12945 12946 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12947 CodeGenFunction &CGF, SourceLocation Loc, 12948 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12949 bool Ordered, const DispatchRTInput &DispatchValues) { 12950 llvm_unreachable("Not supported in SIMD-only mode"); 12951 } 12952 12953 void CGOpenMPSIMDRuntime::emitForStaticInit( 12954 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12955 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12956 llvm_unreachable("Not supported in SIMD-only mode"); 12957 } 12958 12959 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12960 CodeGenFunction &CGF, SourceLocation Loc, 12961 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12962 llvm_unreachable("Not supported in SIMD-only mode"); 12963 } 12964 12965 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12966 SourceLocation Loc, 12967 unsigned IVSize, 12968 bool IVSigned) { 12969 llvm_unreachable("Not supported in SIMD-only mode"); 12970 } 12971 12972 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12973 SourceLocation Loc, 12974 OpenMPDirectiveKind DKind) { 12975 llvm_unreachable("Not supported in SIMD-only mode"); 12976 } 12977 12978 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12979 SourceLocation Loc, 12980 unsigned IVSize, bool IVSigned, 12981 Address IL, Address LB, 12982 Address UB, Address ST) { 12983 llvm_unreachable("Not supported in SIMD-only mode"); 12984 } 12985 12986 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12987 llvm::Value *NumThreads, 12988 SourceLocation Loc) { 12989 llvm_unreachable("Not supported in SIMD-only mode"); 12990 } 12991 12992 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12993 ProcBindKind ProcBind, 12994 SourceLocation Loc) { 12995 llvm_unreachable("Not supported in SIMD-only mode"); 12996 } 12997 12998 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12999 const VarDecl *VD, 13000 Address VDAddr, 13001 SourceLocation Loc) { 13002 llvm_unreachable("Not supported in SIMD-only mode"); 13003 } 13004 13005 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 13006 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 13007 CodeGenFunction *CGF) { 13008 llvm_unreachable("Not supported in SIMD-only mode"); 13009 } 13010 13011 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 13012 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 13013 llvm_unreachable("Not supported in SIMD-only mode"); 13014 } 13015 13016 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 13017 ArrayRef<const Expr *> Vars, 13018 SourceLocation Loc, 13019 llvm::AtomicOrdering AO) { 13020 llvm_unreachable("Not supported in SIMD-only mode"); 13021 } 13022 13023 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 13024 const OMPExecutableDirective &D, 13025 llvm::Function *TaskFunction, 13026 QualType SharedsTy, Address Shareds, 13027 const Expr *IfCond, 13028 const OMPTaskDataTy &Data) { 13029 llvm_unreachable("Not supported in SIMD-only mode"); 13030 } 13031 13032 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 13033 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 13034 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 13035 const Expr *IfCond, const OMPTaskDataTy &Data) { 13036 llvm_unreachable("Not supported in SIMD-only mode"); 13037 } 13038 13039 void CGOpenMPSIMDRuntime::emitReduction( 13040 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 13041 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 13042 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 13043 assert(Options.SimpleReduction && "Only simple reduction is expected."); 13044 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 13045 ReductionOps, Options); 13046 } 13047 13048 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 13049 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 13050 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 13051 llvm_unreachable("Not supported in SIMD-only mode"); 13052 } 13053 13054 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 13055 SourceLocation Loc, 13056 bool IsWorksharingReduction) { 13057 llvm_unreachable("Not supported in SIMD-only mode"); 13058 } 13059 13060 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 13061 SourceLocation Loc, 13062 ReductionCodeGen &RCG, 13063 unsigned N) { 13064 llvm_unreachable("Not supported in SIMD-only mode"); 13065 } 13066 13067 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 13068 SourceLocation Loc, 13069 llvm::Value *ReductionsPtr, 13070 LValue SharedLVal) { 13071 llvm_unreachable("Not supported in SIMD-only mode"); 13072 } 13073 13074 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 13075 SourceLocation Loc, 13076 const OMPTaskDataTy &Data) { 13077 llvm_unreachable("Not supported in SIMD-only mode"); 13078 } 13079 13080 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 13081 CodeGenFunction &CGF, SourceLocation Loc, 13082 OpenMPDirectiveKind CancelRegion) { 13083 llvm_unreachable("Not supported in SIMD-only mode"); 13084 } 13085 13086 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 13087 SourceLocation Loc, const Expr *IfCond, 13088 OpenMPDirectiveKind CancelRegion) { 13089 llvm_unreachable("Not supported in SIMD-only mode"); 13090 } 13091 13092 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 13093 const OMPExecutableDirective &D, StringRef ParentName, 13094 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 13095 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 13096 llvm_unreachable("Not supported in SIMD-only mode"); 13097 } 13098 13099 void CGOpenMPSIMDRuntime::emitTargetCall( 13100 CodeGenFunction &CGF, const OMPExecutableDirective &D, 13101 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 13102 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 13103 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 13104 const OMPLoopDirective &D)> 13105 SizeEmitter) { 13106 llvm_unreachable("Not supported in SIMD-only mode"); 13107 } 13108 13109 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 13110 llvm_unreachable("Not supported in SIMD-only mode"); 13111 } 13112 13113 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 13114 llvm_unreachable("Not supported in SIMD-only mode"); 13115 } 13116 13117 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 13118 return false; 13119 } 13120 13121 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 13122 const OMPExecutableDirective &D, 13123 SourceLocation Loc, 13124 llvm::Function *OutlinedFn, 13125 ArrayRef<llvm::Value *> CapturedVars) { 13126 llvm_unreachable("Not supported in SIMD-only mode"); 13127 } 13128 13129 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 13130 const Expr *NumTeams, 13131 const Expr *ThreadLimit, 13132 SourceLocation Loc) { 13133 llvm_unreachable("Not supported in SIMD-only mode"); 13134 } 13135 13136 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 13137 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13138 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 13139 llvm_unreachable("Not supported in SIMD-only mode"); 13140 } 13141 13142 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 13143 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13144 const Expr *Device) { 13145 llvm_unreachable("Not supported in SIMD-only mode"); 13146 } 13147 13148 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 13149 const OMPLoopDirective &D, 13150 ArrayRef<Expr *> NumIterations) { 13151 llvm_unreachable("Not supported in SIMD-only mode"); 13152 } 13153 13154 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 13155 const OMPDependClause *C) { 13156 llvm_unreachable("Not supported in SIMD-only mode"); 13157 } 13158 13159 const VarDecl * 13160 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 13161 const VarDecl *NativeParam) const { 13162 llvm_unreachable("Not supported in SIMD-only mode"); 13163 } 13164 13165 Address 13166 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 13167 const VarDecl *NativeParam, 13168 const VarDecl *TargetParam) const { 13169 llvm_unreachable("Not supported in SIMD-only mode"); 13170 } 13171