1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/APValue.h" 20 #include "clang/AST/Attr.h" 21 #include "clang/AST/Decl.h" 22 #include "clang/AST/OpenMPClause.h" 23 #include "clang/AST/StmtOpenMP.h" 24 #include "clang/AST/StmtVisitor.h" 25 #include "clang/Basic/BitmaskEnum.h" 26 #include "clang/Basic/FileManager.h" 27 #include "clang/Basic/OpenMPKinds.h" 28 #include "clang/Basic/SourceManager.h" 29 #include "clang/CodeGen/ConstantInitBuilder.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/SetOperations.h" 32 #include "llvm/ADT/SmallBitVector.h" 33 #include "llvm/ADT/StringExtras.h" 34 #include "llvm/Bitcode/BitcodeReader.h" 35 #include "llvm/IR/Constants.h" 36 #include "llvm/IR/DerivedTypes.h" 37 #include "llvm/IR/GlobalValue.h" 38 #include "llvm/IR/InstrTypes.h" 39 #include "llvm/IR/Value.h" 40 #include "llvm/Support/AtomicOrdering.h" 41 #include "llvm/Support/Format.h" 42 #include "llvm/Support/raw_ostream.h" 43 #include <cassert> 44 #include <numeric> 45 46 using namespace clang; 47 using namespace CodeGen; 48 using namespace llvm::omp; 49 50 namespace { 51 /// Base class for handling code generation inside OpenMP regions. 52 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 53 public: 54 /// Kinds of OpenMP regions used in codegen. 55 enum CGOpenMPRegionKind { 56 /// Region with outlined function for standalone 'parallel' 57 /// directive. 58 ParallelOutlinedRegion, 59 /// Region with outlined function for standalone 'task' directive. 60 TaskOutlinedRegion, 61 /// Region for constructs that do not require function outlining, 62 /// like 'for', 'sections', 'atomic' etc. directives. 63 InlinedRegion, 64 /// Region with outlined function for standalone 'target' directive. 65 TargetRegion, 66 }; 67 68 CGOpenMPRegionInfo(const CapturedStmt &CS, 69 const CGOpenMPRegionKind RegionKind, 70 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 71 bool HasCancel) 72 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 73 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 74 75 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 76 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 77 bool HasCancel) 78 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 79 Kind(Kind), HasCancel(HasCancel) {} 80 81 /// Get a variable or parameter for storing global thread id 82 /// inside OpenMP construct. 83 virtual const VarDecl *getThreadIDVariable() const = 0; 84 85 /// Emit the captured statement body. 86 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 87 88 /// Get an LValue for the current ThreadID variable. 89 /// \return LValue for thread id variable. This LValue always has type int32*. 90 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 91 92 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 93 94 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 95 96 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 97 98 bool hasCancel() const { return HasCancel; } 99 100 static bool classof(const CGCapturedStmtInfo *Info) { 101 return Info->getKind() == CR_OpenMP; 102 } 103 104 ~CGOpenMPRegionInfo() override = default; 105 106 protected: 107 CGOpenMPRegionKind RegionKind; 108 RegionCodeGenTy CodeGen; 109 OpenMPDirectiveKind Kind; 110 bool HasCancel; 111 }; 112 113 /// API for captured statement code generation in OpenMP constructs. 114 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 115 public: 116 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 117 const RegionCodeGenTy &CodeGen, 118 OpenMPDirectiveKind Kind, bool HasCancel, 119 StringRef HelperName) 120 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 121 HasCancel), 122 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 123 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 124 } 125 126 /// Get a variable or parameter for storing global thread id 127 /// inside OpenMP construct. 128 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 129 130 /// Get the name of the capture helper. 131 StringRef getHelperName() const override { return HelperName; } 132 133 static bool classof(const CGCapturedStmtInfo *Info) { 134 return CGOpenMPRegionInfo::classof(Info) && 135 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 136 ParallelOutlinedRegion; 137 } 138 139 private: 140 /// A variable or parameter storing global thread id for OpenMP 141 /// constructs. 142 const VarDecl *ThreadIDVar; 143 StringRef HelperName; 144 }; 145 146 /// API for captured statement code generation in OpenMP constructs. 147 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 148 public: 149 class UntiedTaskActionTy final : public PrePostActionTy { 150 bool Untied; 151 const VarDecl *PartIDVar; 152 const RegionCodeGenTy UntiedCodeGen; 153 llvm::SwitchInst *UntiedSwitch = nullptr; 154 155 public: 156 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 157 const RegionCodeGenTy &UntiedCodeGen) 158 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 159 void Enter(CodeGenFunction &CGF) override { 160 if (Untied) { 161 // Emit task switching point. 162 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 163 CGF.GetAddrOfLocalVar(PartIDVar), 164 PartIDVar->getType()->castAs<PointerType>()); 165 llvm::Value *Res = 166 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 167 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 168 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 169 CGF.EmitBlock(DoneBB); 170 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 171 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 172 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 173 CGF.Builder.GetInsertBlock()); 174 emitUntiedSwitch(CGF); 175 } 176 } 177 void emitUntiedSwitch(CodeGenFunction &CGF) const { 178 if (Untied) { 179 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 180 CGF.GetAddrOfLocalVar(PartIDVar), 181 PartIDVar->getType()->castAs<PointerType>()); 182 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 183 PartIdLVal); 184 UntiedCodeGen(CGF); 185 CodeGenFunction::JumpDest CurPoint = 186 CGF.getJumpDestInCurrentScope(".untied.next."); 187 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 188 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 189 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 190 CGF.Builder.GetInsertBlock()); 191 CGF.EmitBranchThroughCleanup(CurPoint); 192 CGF.EmitBlock(CurPoint.getBlock()); 193 } 194 } 195 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 196 }; 197 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 198 const VarDecl *ThreadIDVar, 199 const RegionCodeGenTy &CodeGen, 200 OpenMPDirectiveKind Kind, bool HasCancel, 201 const UntiedTaskActionTy &Action) 202 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 203 ThreadIDVar(ThreadIDVar), Action(Action) { 204 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 205 } 206 207 /// Get a variable or parameter for storing global thread id 208 /// inside OpenMP construct. 209 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 210 211 /// Get an LValue for the current ThreadID variable. 212 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 213 214 /// Get the name of the capture helper. 215 StringRef getHelperName() const override { return ".omp_outlined."; } 216 217 void emitUntiedSwitch(CodeGenFunction &CGF) override { 218 Action.emitUntiedSwitch(CGF); 219 } 220 221 static bool classof(const CGCapturedStmtInfo *Info) { 222 return CGOpenMPRegionInfo::classof(Info) && 223 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 224 TaskOutlinedRegion; 225 } 226 227 private: 228 /// A variable or parameter storing global thread id for OpenMP 229 /// constructs. 230 const VarDecl *ThreadIDVar; 231 /// Action for emitting code for untied tasks. 232 const UntiedTaskActionTy &Action; 233 }; 234 235 /// API for inlined captured statement code generation in OpenMP 236 /// constructs. 237 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 238 public: 239 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 240 const RegionCodeGenTy &CodeGen, 241 OpenMPDirectiveKind Kind, bool HasCancel) 242 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 243 OldCSI(OldCSI), 244 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 245 246 // Retrieve the value of the context parameter. 247 llvm::Value *getContextValue() const override { 248 if (OuterRegionInfo) 249 return OuterRegionInfo->getContextValue(); 250 llvm_unreachable("No context value for inlined OpenMP region"); 251 } 252 253 void setContextValue(llvm::Value *V) override { 254 if (OuterRegionInfo) { 255 OuterRegionInfo->setContextValue(V); 256 return; 257 } 258 llvm_unreachable("No context value for inlined OpenMP region"); 259 } 260 261 /// Lookup the captured field decl for a variable. 262 const FieldDecl *lookup(const VarDecl *VD) const override { 263 if (OuterRegionInfo) 264 return OuterRegionInfo->lookup(VD); 265 // If there is no outer outlined region,no need to lookup in a list of 266 // captured variables, we can use the original one. 267 return nullptr; 268 } 269 270 FieldDecl *getThisFieldDecl() const override { 271 if (OuterRegionInfo) 272 return OuterRegionInfo->getThisFieldDecl(); 273 return nullptr; 274 } 275 276 /// Get a variable or parameter for storing global thread id 277 /// inside OpenMP construct. 278 const VarDecl *getThreadIDVariable() const override { 279 if (OuterRegionInfo) 280 return OuterRegionInfo->getThreadIDVariable(); 281 return nullptr; 282 } 283 284 /// Get an LValue for the current ThreadID variable. 285 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 286 if (OuterRegionInfo) 287 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 288 llvm_unreachable("No LValue for inlined OpenMP construct"); 289 } 290 291 /// Get the name of the capture helper. 292 StringRef getHelperName() const override { 293 if (auto *OuterRegionInfo = getOldCSI()) 294 return OuterRegionInfo->getHelperName(); 295 llvm_unreachable("No helper name for inlined OpenMP construct"); 296 } 297 298 void emitUntiedSwitch(CodeGenFunction &CGF) override { 299 if (OuterRegionInfo) 300 OuterRegionInfo->emitUntiedSwitch(CGF); 301 } 302 303 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 304 305 static bool classof(const CGCapturedStmtInfo *Info) { 306 return CGOpenMPRegionInfo::classof(Info) && 307 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 308 } 309 310 ~CGOpenMPInlinedRegionInfo() override = default; 311 312 private: 313 /// CodeGen info about outer OpenMP region. 314 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 315 CGOpenMPRegionInfo *OuterRegionInfo; 316 }; 317 318 /// API for captured statement code generation in OpenMP target 319 /// constructs. For this captures, implicit parameters are used instead of the 320 /// captured fields. The name of the target region has to be unique in a given 321 /// application so it is provided by the client, because only the client has 322 /// the information to generate that. 323 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 324 public: 325 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 326 const RegionCodeGenTy &CodeGen, StringRef HelperName) 327 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 328 /*HasCancel=*/false), 329 HelperName(HelperName) {} 330 331 /// This is unused for target regions because each starts executing 332 /// with a single thread. 333 const VarDecl *getThreadIDVariable() const override { return nullptr; } 334 335 /// Get the name of the capture helper. 336 StringRef getHelperName() const override { return HelperName; } 337 338 static bool classof(const CGCapturedStmtInfo *Info) { 339 return CGOpenMPRegionInfo::classof(Info) && 340 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 341 } 342 343 private: 344 StringRef HelperName; 345 }; 346 347 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 348 llvm_unreachable("No codegen for expressions"); 349 } 350 /// API for generation of expressions captured in a innermost OpenMP 351 /// region. 352 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 353 public: 354 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 355 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 356 OMPD_unknown, 357 /*HasCancel=*/false), 358 PrivScope(CGF) { 359 // Make sure the globals captured in the provided statement are local by 360 // using the privatization logic. We assume the same variable is not 361 // captured more than once. 362 for (const auto &C : CS.captures()) { 363 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 364 continue; 365 366 const VarDecl *VD = C.getCapturedVar(); 367 if (VD->isLocalVarDeclOrParm()) 368 continue; 369 370 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 371 /*RefersToEnclosingVariableOrCapture=*/false, 372 VD->getType().getNonReferenceType(), VK_LValue, 373 C.getLocation()); 374 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF)); 375 } 376 (void)PrivScope.Privatize(); 377 } 378 379 /// Lookup the captured field decl for a variable. 380 const FieldDecl *lookup(const VarDecl *VD) const override { 381 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 382 return FD; 383 return nullptr; 384 } 385 386 /// Emit the captured statement body. 387 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 388 llvm_unreachable("No body for expressions"); 389 } 390 391 /// Get a variable or parameter for storing global thread id 392 /// inside OpenMP construct. 393 const VarDecl *getThreadIDVariable() const override { 394 llvm_unreachable("No thread id for expressions"); 395 } 396 397 /// Get the name of the capture helper. 398 StringRef getHelperName() const override { 399 llvm_unreachable("No helper name for expressions"); 400 } 401 402 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 403 404 private: 405 /// Private scope to capture global variables. 406 CodeGenFunction::OMPPrivateScope PrivScope; 407 }; 408 409 /// RAII for emitting code of OpenMP constructs. 410 class InlinedOpenMPRegionRAII { 411 CodeGenFunction &CGF; 412 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 413 FieldDecl *LambdaThisCaptureField = nullptr; 414 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 415 bool NoInheritance = false; 416 417 public: 418 /// Constructs region for combined constructs. 419 /// \param CodeGen Code generation sequence for combined directives. Includes 420 /// a list of functions used for code generation of implicitly inlined 421 /// regions. 422 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 423 OpenMPDirectiveKind Kind, bool HasCancel, 424 bool NoInheritance = true) 425 : CGF(CGF), NoInheritance(NoInheritance) { 426 // Start emission for the construct. 427 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 428 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 429 if (NoInheritance) { 430 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 431 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 432 CGF.LambdaThisCaptureField = nullptr; 433 BlockInfo = CGF.BlockInfo; 434 CGF.BlockInfo = nullptr; 435 } 436 } 437 438 ~InlinedOpenMPRegionRAII() { 439 // Restore original CapturedStmtInfo only if we're done with code emission. 440 auto *OldCSI = 441 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 442 delete CGF.CapturedStmtInfo; 443 CGF.CapturedStmtInfo = OldCSI; 444 if (NoInheritance) { 445 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 446 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 447 CGF.BlockInfo = BlockInfo; 448 } 449 } 450 }; 451 452 /// Values for bit flags used in the ident_t to describe the fields. 453 /// All enumeric elements are named and described in accordance with the code 454 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 455 enum OpenMPLocationFlags : unsigned { 456 /// Use trampoline for internal microtask. 457 OMP_IDENT_IMD = 0x01, 458 /// Use c-style ident structure. 459 OMP_IDENT_KMPC = 0x02, 460 /// Atomic reduction option for kmpc_reduce. 461 OMP_ATOMIC_REDUCE = 0x10, 462 /// Explicit 'barrier' directive. 463 OMP_IDENT_BARRIER_EXPL = 0x20, 464 /// Implicit barrier in code. 465 OMP_IDENT_BARRIER_IMPL = 0x40, 466 /// Implicit barrier in 'for' directive. 467 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 468 /// Implicit barrier in 'sections' directive. 469 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 470 /// Implicit barrier in 'single' directive. 471 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 472 /// Call of __kmp_for_static_init for static loop. 473 OMP_IDENT_WORK_LOOP = 0x200, 474 /// Call of __kmp_for_static_init for sections. 475 OMP_IDENT_WORK_SECTIONS = 0x400, 476 /// Call of __kmp_for_static_init for distribute. 477 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 478 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 479 }; 480 481 namespace { 482 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 483 /// Values for bit flags for marking which requires clauses have been used. 484 enum OpenMPOffloadingRequiresDirFlags : int64_t { 485 /// flag undefined. 486 OMP_REQ_UNDEFINED = 0x000, 487 /// no requires clause present. 488 OMP_REQ_NONE = 0x001, 489 /// reverse_offload clause. 490 OMP_REQ_REVERSE_OFFLOAD = 0x002, 491 /// unified_address clause. 492 OMP_REQ_UNIFIED_ADDRESS = 0x004, 493 /// unified_shared_memory clause. 494 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 495 /// dynamic_allocators clause. 496 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 497 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 498 }; 499 500 enum OpenMPOffloadingReservedDeviceIDs { 501 /// Device ID if the device was not defined, runtime should get it 502 /// from environment variables in the spec. 503 OMP_DEVICEID_UNDEF = -1, 504 }; 505 } // anonymous namespace 506 507 /// Describes ident structure that describes a source location. 508 /// All descriptions are taken from 509 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 510 /// Original structure: 511 /// typedef struct ident { 512 /// kmp_int32 reserved_1; /**< might be used in Fortran; 513 /// see above */ 514 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 515 /// KMP_IDENT_KMPC identifies this union 516 /// member */ 517 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 518 /// see above */ 519 ///#if USE_ITT_BUILD 520 /// /* but currently used for storing 521 /// region-specific ITT */ 522 /// /* contextual information. */ 523 ///#endif /* USE_ITT_BUILD */ 524 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 525 /// C++ */ 526 /// char const *psource; /**< String describing the source location. 527 /// The string is composed of semi-colon separated 528 // fields which describe the source file, 529 /// the function and a pair of line numbers that 530 /// delimit the construct. 531 /// */ 532 /// } ident_t; 533 enum IdentFieldIndex { 534 /// might be used in Fortran 535 IdentField_Reserved_1, 536 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 537 IdentField_Flags, 538 /// Not really used in Fortran any more 539 IdentField_Reserved_2, 540 /// Source[4] in Fortran, do not use for C++ 541 IdentField_Reserved_3, 542 /// String describing the source location. The string is composed of 543 /// semi-colon separated fields which describe the source file, the function 544 /// and a pair of line numbers that delimit the construct. 545 IdentField_PSource 546 }; 547 548 /// Schedule types for 'omp for' loops (these enumerators are taken from 549 /// the enum sched_type in kmp.h). 550 enum OpenMPSchedType { 551 /// Lower bound for default (unordered) versions. 552 OMP_sch_lower = 32, 553 OMP_sch_static_chunked = 33, 554 OMP_sch_static = 34, 555 OMP_sch_dynamic_chunked = 35, 556 OMP_sch_guided_chunked = 36, 557 OMP_sch_runtime = 37, 558 OMP_sch_auto = 38, 559 /// static with chunk adjustment (e.g., simd) 560 OMP_sch_static_balanced_chunked = 45, 561 /// Lower bound for 'ordered' versions. 562 OMP_ord_lower = 64, 563 OMP_ord_static_chunked = 65, 564 OMP_ord_static = 66, 565 OMP_ord_dynamic_chunked = 67, 566 OMP_ord_guided_chunked = 68, 567 OMP_ord_runtime = 69, 568 OMP_ord_auto = 70, 569 OMP_sch_default = OMP_sch_static, 570 /// dist_schedule types 571 OMP_dist_sch_static_chunked = 91, 572 OMP_dist_sch_static = 92, 573 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 574 /// Set if the monotonic schedule modifier was present. 575 OMP_sch_modifier_monotonic = (1 << 29), 576 /// Set if the nonmonotonic schedule modifier was present. 577 OMP_sch_modifier_nonmonotonic = (1 << 30), 578 }; 579 580 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 581 /// region. 582 class CleanupTy final : public EHScopeStack::Cleanup { 583 PrePostActionTy *Action; 584 585 public: 586 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 587 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 588 if (!CGF.HaveInsertPoint()) 589 return; 590 Action->Exit(CGF); 591 } 592 }; 593 594 } // anonymous namespace 595 596 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 597 CodeGenFunction::RunCleanupsScope Scope(CGF); 598 if (PrePostAction) { 599 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 600 Callback(CodeGen, CGF, *PrePostAction); 601 } else { 602 PrePostActionTy Action; 603 Callback(CodeGen, CGF, Action); 604 } 605 } 606 607 /// Check if the combiner is a call to UDR combiner and if it is so return the 608 /// UDR decl used for reduction. 609 static const OMPDeclareReductionDecl * 610 getReductionInit(const Expr *ReductionOp) { 611 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 612 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 613 if (const auto *DRE = 614 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 615 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 616 return DRD; 617 return nullptr; 618 } 619 620 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 621 const OMPDeclareReductionDecl *DRD, 622 const Expr *InitOp, 623 Address Private, Address Original, 624 QualType Ty) { 625 if (DRD->getInitializer()) { 626 std::pair<llvm::Function *, llvm::Function *> Reduction = 627 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 628 const auto *CE = cast<CallExpr>(InitOp); 629 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 630 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 631 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 632 const auto *LHSDRE = 633 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 634 const auto *RHSDRE = 635 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 636 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 637 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private); 638 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original); 639 (void)PrivateScope.Privatize(); 640 RValue Func = RValue::get(Reduction.second); 641 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 642 CGF.EmitIgnoredExpr(InitOp); 643 } else { 644 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 645 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 646 auto *GV = new llvm::GlobalVariable( 647 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 648 llvm::GlobalValue::PrivateLinkage, Init, Name); 649 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 650 RValue InitRVal; 651 switch (CGF.getEvaluationKind(Ty)) { 652 case TEK_Scalar: 653 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 654 break; 655 case TEK_Complex: 656 InitRVal = 657 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 658 break; 659 case TEK_Aggregate: { 660 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); 661 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); 662 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 663 /*IsInitializer=*/false); 664 return; 665 } 666 } 667 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue); 668 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 669 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 670 /*IsInitializer=*/false); 671 } 672 } 673 674 /// Emit initialization of arrays of complex types. 675 /// \param DestAddr Address of the array. 676 /// \param Type Type of array. 677 /// \param Init Initial expression of array. 678 /// \param SrcAddr Address of the original array. 679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 680 QualType Type, bool EmitDeclareReductionInit, 681 const Expr *Init, 682 const OMPDeclareReductionDecl *DRD, 683 Address SrcAddr = Address::invalid()) { 684 // Perform element-by-element initialization. 685 QualType ElementTy; 686 687 // Drill down to the base element type on both arrays. 688 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 689 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 690 if (DRD) 691 SrcAddr = 692 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 693 694 llvm::Value *SrcBegin = nullptr; 695 if (DRD) 696 SrcBegin = SrcAddr.getPointer(); 697 llvm::Value *DestBegin = DestAddr.getPointer(); 698 // Cast from pointer to array type to pointer to single element. 699 llvm::Value *DestEnd = 700 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); 701 // The basic structure here is a while-do loop. 702 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 703 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 704 llvm::Value *IsEmpty = 705 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 706 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 707 708 // Enter the loop body, making that address the current address. 709 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 710 CGF.EmitBlock(BodyBB); 711 712 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 713 714 llvm::PHINode *SrcElementPHI = nullptr; 715 Address SrcElementCurrent = Address::invalid(); 716 if (DRD) { 717 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 718 "omp.arraycpy.srcElementPast"); 719 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 720 SrcElementCurrent = 721 Address(SrcElementPHI, SrcAddr.getElementType(), 722 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 723 } 724 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 725 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 726 DestElementPHI->addIncoming(DestBegin, EntryBB); 727 Address DestElementCurrent = 728 Address(DestElementPHI, DestAddr.getElementType(), 729 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 730 731 // Emit copy. 732 { 733 CodeGenFunction::RunCleanupsScope InitScope(CGF); 734 if (EmitDeclareReductionInit) { 735 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 736 SrcElementCurrent, ElementTy); 737 } else 738 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 739 /*IsInitializer=*/false); 740 } 741 742 if (DRD) { 743 // Shift the address forward by one element. 744 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 745 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, 746 "omp.arraycpy.dest.element"); 747 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 748 } 749 750 // Shift the address forward by one element. 751 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 752 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, 753 "omp.arraycpy.dest.element"); 754 // Check whether we've reached the end. 755 llvm::Value *Done = 756 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 757 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 758 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 759 760 // Done. 761 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 762 } 763 764 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 765 return CGF.EmitOMPSharedLValue(E); 766 } 767 768 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 769 const Expr *E) { 770 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 771 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 772 return LValue(); 773 } 774 775 void ReductionCodeGen::emitAggregateInitialization( 776 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 777 const OMPDeclareReductionDecl *DRD) { 778 // Emit VarDecl with copy init for arrays. 779 // Get the address of the original variable captured in current 780 // captured region. 781 const auto *PrivateVD = 782 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 783 bool EmitDeclareReductionInit = 784 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 785 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 786 EmitDeclareReductionInit, 787 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 788 : PrivateVD->getInit(), 789 DRD, SharedAddr); 790 } 791 792 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 793 ArrayRef<const Expr *> Origs, 794 ArrayRef<const Expr *> Privates, 795 ArrayRef<const Expr *> ReductionOps) { 796 ClausesData.reserve(Shareds.size()); 797 SharedAddresses.reserve(Shareds.size()); 798 Sizes.reserve(Shareds.size()); 799 BaseDecls.reserve(Shareds.size()); 800 const auto *IOrig = Origs.begin(); 801 const auto *IPriv = Privates.begin(); 802 const auto *IRed = ReductionOps.begin(); 803 for (const Expr *Ref : Shareds) { 804 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 805 std::advance(IOrig, 1); 806 std::advance(IPriv, 1); 807 std::advance(IRed, 1); 808 } 809 } 810 811 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 812 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 813 "Number of generated lvalues must be exactly N."); 814 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 815 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 816 SharedAddresses.emplace_back(First, Second); 817 if (ClausesData[N].Shared == ClausesData[N].Ref) { 818 OrigAddresses.emplace_back(First, Second); 819 } else { 820 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 821 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 822 OrigAddresses.emplace_back(First, Second); 823 } 824 } 825 826 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 827 QualType PrivateType = getPrivateType(N); 828 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 829 if (!PrivateType->isVariablyModifiedType()) { 830 Sizes.emplace_back( 831 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 832 nullptr); 833 return; 834 } 835 llvm::Value *Size; 836 llvm::Value *SizeInChars; 837 auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType(); 838 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 839 if (AsArraySection) { 840 Size = CGF.Builder.CreatePtrDiff(ElemType, 841 OrigAddresses[N].second.getPointer(CGF), 842 OrigAddresses[N].first.getPointer(CGF)); 843 Size = CGF.Builder.CreateNUWAdd( 844 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 845 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 846 } else { 847 SizeInChars = 848 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 849 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 850 } 851 Sizes.emplace_back(SizeInChars, Size); 852 CodeGenFunction::OpaqueValueMapping OpaqueMap( 853 CGF, 854 cast<OpaqueValueExpr>( 855 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 856 RValue::get(Size)); 857 CGF.EmitVariablyModifiedType(PrivateType); 858 } 859 860 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 861 llvm::Value *Size) { 862 QualType PrivateType = getPrivateType(N); 863 if (!PrivateType->isVariablyModifiedType()) { 864 assert(!Size && !Sizes[N].second && 865 "Size should be nullptr for non-variably modified reduction " 866 "items."); 867 return; 868 } 869 CodeGenFunction::OpaqueValueMapping OpaqueMap( 870 CGF, 871 cast<OpaqueValueExpr>( 872 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 873 RValue::get(Size)); 874 CGF.EmitVariablyModifiedType(PrivateType); 875 } 876 877 void ReductionCodeGen::emitInitialization( 878 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 879 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 880 assert(SharedAddresses.size() > N && "No variable was generated"); 881 const auto *PrivateVD = 882 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 883 const OMPDeclareReductionDecl *DRD = 884 getReductionInit(ClausesData[N].ReductionOp); 885 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 886 if (DRD && DRD->getInitializer()) 887 (void)DefaultInit(CGF); 888 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD); 889 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 890 (void)DefaultInit(CGF); 891 QualType SharedType = SharedAddresses[N].first.getType(); 892 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 893 PrivateAddr, SharedAddr, SharedType); 894 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 895 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 896 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 897 PrivateVD->getType().getQualifiers(), 898 /*IsInitializer=*/false); 899 } 900 } 901 902 bool ReductionCodeGen::needCleanups(unsigned N) { 903 QualType PrivateType = getPrivateType(N); 904 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 905 return DTorKind != QualType::DK_none; 906 } 907 908 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 909 Address PrivateAddr) { 910 QualType PrivateType = getPrivateType(N); 911 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 912 if (needCleanups(N)) { 913 PrivateAddr = CGF.Builder.CreateElementBitCast( 914 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 915 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 916 } 917 } 918 919 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 920 LValue BaseLV) { 921 BaseTy = BaseTy.getNonReferenceType(); 922 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 923 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 924 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 925 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 926 } else { 927 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 928 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 929 } 930 BaseTy = BaseTy->getPointeeType(); 931 } 932 return CGF.MakeAddrLValue( 933 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 934 CGF.ConvertTypeForMem(ElTy)), 935 BaseLV.getType(), BaseLV.getBaseInfo(), 936 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 937 } 938 939 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 940 Address OriginalBaseAddress, llvm::Value *Addr) { 941 Address Tmp = Address::invalid(); 942 Address TopTmp = Address::invalid(); 943 Address MostTopTmp = Address::invalid(); 944 BaseTy = BaseTy.getNonReferenceType(); 945 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 946 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 947 Tmp = CGF.CreateMemTemp(BaseTy); 948 if (TopTmp.isValid()) 949 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 950 else 951 MostTopTmp = Tmp; 952 TopTmp = Tmp; 953 BaseTy = BaseTy->getPointeeType(); 954 } 955 956 if (Tmp.isValid()) { 957 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 958 Addr, Tmp.getElementType()); 959 CGF.Builder.CreateStore(Addr, Tmp); 960 return MostTopTmp; 961 } 962 963 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 964 Addr, OriginalBaseAddress.getType()); 965 return OriginalBaseAddress.withPointer(Addr); 966 } 967 968 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 969 const VarDecl *OrigVD = nullptr; 970 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 971 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 972 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 973 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 974 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 975 Base = TempASE->getBase()->IgnoreParenImpCasts(); 976 DE = cast<DeclRefExpr>(Base); 977 OrigVD = cast<VarDecl>(DE->getDecl()); 978 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 979 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 980 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 981 Base = TempASE->getBase()->IgnoreParenImpCasts(); 982 DE = cast<DeclRefExpr>(Base); 983 OrigVD = cast<VarDecl>(DE->getDecl()); 984 } 985 return OrigVD; 986 } 987 988 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 989 Address PrivateAddr) { 990 const DeclRefExpr *DE; 991 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 992 BaseDecls.emplace_back(OrigVD); 993 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 994 LValue BaseLValue = 995 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 996 OriginalBaseLValue); 997 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF); 998 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 999 SharedAddr.getElementType(), BaseLValue.getPointer(CGF), 1000 SharedAddr.getPointer()); 1001 llvm::Value *PrivatePointer = 1002 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1003 PrivateAddr.getPointer(), SharedAddr.getType()); 1004 llvm::Value *Ptr = CGF.Builder.CreateGEP( 1005 SharedAddr.getElementType(), PrivatePointer, Adjustment); 1006 return castToBase(CGF, OrigVD->getType(), 1007 SharedAddresses[N].first.getType(), 1008 OriginalBaseLValue.getAddress(CGF), Ptr); 1009 } 1010 BaseDecls.emplace_back( 1011 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1012 return PrivateAddr; 1013 } 1014 1015 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1016 const OMPDeclareReductionDecl *DRD = 1017 getReductionInit(ClausesData[N].ReductionOp); 1018 return DRD && DRD->getInitializer(); 1019 } 1020 1021 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1022 return CGF.EmitLoadOfPointerLValue( 1023 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1024 getThreadIDVariable()->getType()->castAs<PointerType>()); 1025 } 1026 1027 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 1028 if (!CGF.HaveInsertPoint()) 1029 return; 1030 // 1.2.2 OpenMP Language Terminology 1031 // Structured block - An executable statement with a single entry at the 1032 // top and a single exit at the bottom. 1033 // The point of exit cannot be a branch out of the structured block. 1034 // longjmp() and throw() must not violate the entry/exit criteria. 1035 CGF.EHStack.pushTerminate(); 1036 if (S) 1037 CGF.incrementProfileCounter(S); 1038 CodeGen(CGF); 1039 CGF.EHStack.popTerminate(); 1040 } 1041 1042 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1043 CodeGenFunction &CGF) { 1044 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1045 getThreadIDVariable()->getType(), 1046 AlignmentSource::Decl); 1047 } 1048 1049 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1050 QualType FieldTy) { 1051 auto *Field = FieldDecl::Create( 1052 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1053 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1054 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1055 Field->setAccess(AS_public); 1056 DC->addDecl(Field); 1057 return Field; 1058 } 1059 1060 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1061 StringRef Separator) 1062 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1063 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1064 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1065 1066 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1067 OMPBuilder.initialize(); 1068 loadOffloadInfoMetadata(); 1069 } 1070 1071 void CGOpenMPRuntime::clear() { 1072 InternalVars.clear(); 1073 // Clean non-target variable declarations possibly used only in debug info. 1074 for (const auto &Data : EmittedNonTargetVariables) { 1075 if (!Data.getValue().pointsToAliveValue()) 1076 continue; 1077 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1078 if (!GV) 1079 continue; 1080 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1081 continue; 1082 GV->eraseFromParent(); 1083 } 1084 } 1085 1086 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1087 SmallString<128> Buffer; 1088 llvm::raw_svector_ostream OS(Buffer); 1089 StringRef Sep = FirstSeparator; 1090 for (StringRef Part : Parts) { 1091 OS << Sep << Part; 1092 Sep = Separator; 1093 } 1094 return std::string(OS.str()); 1095 } 1096 1097 static llvm::Function * 1098 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1099 const Expr *CombinerInitializer, const VarDecl *In, 1100 const VarDecl *Out, bool IsCombiner) { 1101 // void .omp_combiner.(Ty *in, Ty *out); 1102 ASTContext &C = CGM.getContext(); 1103 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1104 FunctionArgList Args; 1105 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1106 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1107 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1108 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1109 Args.push_back(&OmpOutParm); 1110 Args.push_back(&OmpInParm); 1111 const CGFunctionInfo &FnInfo = 1112 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1113 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1114 std::string Name = CGM.getOpenMPRuntime().getName( 1115 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1116 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1117 Name, &CGM.getModule()); 1118 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1119 if (CGM.getLangOpts().Optimize) { 1120 Fn->removeFnAttr(llvm::Attribute::NoInline); 1121 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1122 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1123 } 1124 CodeGenFunction CGF(CGM); 1125 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1126 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1127 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1128 Out->getLocation()); 1129 CodeGenFunction::OMPPrivateScope Scope(CGF); 1130 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1131 Scope.addPrivate( 1132 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1133 .getAddress(CGF)); 1134 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1135 Scope.addPrivate( 1136 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1137 .getAddress(CGF)); 1138 (void)Scope.Privatize(); 1139 if (!IsCombiner && Out->hasInit() && 1140 !CGF.isTrivialInitializer(Out->getInit())) { 1141 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1142 Out->getType().getQualifiers(), 1143 /*IsInitializer=*/true); 1144 } 1145 if (CombinerInitializer) 1146 CGF.EmitIgnoredExpr(CombinerInitializer); 1147 Scope.ForceCleanup(); 1148 CGF.FinishFunction(); 1149 return Fn; 1150 } 1151 1152 void CGOpenMPRuntime::emitUserDefinedReduction( 1153 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1154 if (UDRMap.count(D) > 0) 1155 return; 1156 llvm::Function *Combiner = emitCombinerOrInitializer( 1157 CGM, D->getType(), D->getCombiner(), 1158 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1159 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1160 /*IsCombiner=*/true); 1161 llvm::Function *Initializer = nullptr; 1162 if (const Expr *Init = D->getInitializer()) { 1163 Initializer = emitCombinerOrInitializer( 1164 CGM, D->getType(), 1165 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1166 : nullptr, 1167 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1168 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1169 /*IsCombiner=*/false); 1170 } 1171 UDRMap.try_emplace(D, Combiner, Initializer); 1172 if (CGF) { 1173 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1174 Decls.second.push_back(D); 1175 } 1176 } 1177 1178 std::pair<llvm::Function *, llvm::Function *> 1179 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1180 auto I = UDRMap.find(D); 1181 if (I != UDRMap.end()) 1182 return I->second; 1183 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1184 return UDRMap.lookup(D); 1185 } 1186 1187 namespace { 1188 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1189 // Builder if one is present. 1190 struct PushAndPopStackRAII { 1191 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1192 bool HasCancel, llvm::omp::Directive Kind) 1193 : OMPBuilder(OMPBuilder) { 1194 if (!OMPBuilder) 1195 return; 1196 1197 // The following callback is the crucial part of clangs cleanup process. 1198 // 1199 // NOTE: 1200 // Once the OpenMPIRBuilder is used to create parallel regions (and 1201 // similar), the cancellation destination (Dest below) is determined via 1202 // IP. That means if we have variables to finalize we split the block at IP, 1203 // use the new block (=BB) as destination to build a JumpDest (via 1204 // getJumpDestInCurrentScope(BB)) which then is fed to 1205 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1206 // to push & pop an FinalizationInfo object. 1207 // The FiniCB will still be needed but at the point where the 1208 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1209 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1210 assert(IP.getBlock()->end() == IP.getPoint() && 1211 "Clang CG should cause non-terminated block!"); 1212 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1213 CGF.Builder.restoreIP(IP); 1214 CodeGenFunction::JumpDest Dest = 1215 CGF.getOMPCancelDestination(OMPD_parallel); 1216 CGF.EmitBranchThroughCleanup(Dest); 1217 }; 1218 1219 // TODO: Remove this once we emit parallel regions through the 1220 // OpenMPIRBuilder as it can do this setup internally. 1221 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); 1222 OMPBuilder->pushFinalizationCB(std::move(FI)); 1223 } 1224 ~PushAndPopStackRAII() { 1225 if (OMPBuilder) 1226 OMPBuilder->popFinalizationCB(); 1227 } 1228 llvm::OpenMPIRBuilder *OMPBuilder; 1229 }; 1230 } // namespace 1231 1232 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1233 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1234 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1235 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1236 assert(ThreadIDVar->getType()->isPointerType() && 1237 "thread id variable must be of type kmp_int32 *"); 1238 CodeGenFunction CGF(CGM, true); 1239 bool HasCancel = false; 1240 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1241 HasCancel = OPD->hasCancel(); 1242 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1243 HasCancel = OPD->hasCancel(); 1244 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1245 HasCancel = OPSD->hasCancel(); 1246 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1247 HasCancel = OPFD->hasCancel(); 1248 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1249 HasCancel = OPFD->hasCancel(); 1250 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1251 HasCancel = OPFD->hasCancel(); 1252 else if (const auto *OPFD = 1253 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1254 HasCancel = OPFD->hasCancel(); 1255 else if (const auto *OPFD = 1256 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1257 HasCancel = OPFD->hasCancel(); 1258 1259 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1260 // parallel region to make cancellation barriers work properly. 1261 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1262 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); 1263 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1264 HasCancel, OutlinedHelperName); 1265 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1266 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1267 } 1268 1269 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1270 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1271 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1272 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1273 return emitParallelOrTeamsOutlinedFunction( 1274 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1275 } 1276 1277 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1278 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1279 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1280 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1281 return emitParallelOrTeamsOutlinedFunction( 1282 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1283 } 1284 1285 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1286 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1287 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1288 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1289 bool Tied, unsigned &NumberOfParts) { 1290 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1291 PrePostActionTy &) { 1292 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1293 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1294 llvm::Value *TaskArgs[] = { 1295 UpLoc, ThreadID, 1296 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1297 TaskTVar->getType()->castAs<PointerType>()) 1298 .getPointer(CGF)}; 1299 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1300 CGM.getModule(), OMPRTL___kmpc_omp_task), 1301 TaskArgs); 1302 }; 1303 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1304 UntiedCodeGen); 1305 CodeGen.setAction(Action); 1306 assert(!ThreadIDVar->getType()->isPointerType() && 1307 "thread id variable must be of type kmp_int32 for tasks"); 1308 const OpenMPDirectiveKind Region = 1309 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1310 : OMPD_task; 1311 const CapturedStmt *CS = D.getCapturedStmt(Region); 1312 bool HasCancel = false; 1313 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1314 HasCancel = TD->hasCancel(); 1315 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1316 HasCancel = TD->hasCancel(); 1317 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1318 HasCancel = TD->hasCancel(); 1319 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1320 HasCancel = TD->hasCancel(); 1321 1322 CodeGenFunction CGF(CGM, true); 1323 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1324 InnermostKind, HasCancel, Action); 1325 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1326 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1327 if (!Tied) 1328 NumberOfParts = Action.getNumberOfParts(); 1329 return Res; 1330 } 1331 1332 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1333 bool AtCurrentPoint) { 1334 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1335 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1336 1337 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1338 if (AtCurrentPoint) { 1339 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1340 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1341 } else { 1342 Elem.second.ServiceInsertPt = 1343 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1344 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1345 } 1346 } 1347 1348 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1349 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1350 if (Elem.second.ServiceInsertPt) { 1351 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1352 Elem.second.ServiceInsertPt = nullptr; 1353 Ptr->eraseFromParent(); 1354 } 1355 } 1356 1357 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1358 SourceLocation Loc, 1359 SmallString<128> &Buffer) { 1360 llvm::raw_svector_ostream OS(Buffer); 1361 // Build debug location 1362 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1363 OS << ";" << PLoc.getFilename() << ";"; 1364 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1365 OS << FD->getQualifiedNameAsString(); 1366 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1367 return OS.str(); 1368 } 1369 1370 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1371 SourceLocation Loc, 1372 unsigned Flags) { 1373 uint32_t SrcLocStrSize; 1374 llvm::Constant *SrcLocStr; 1375 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1376 Loc.isInvalid()) { 1377 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 1378 } else { 1379 std::string FunctionName; 1380 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1381 FunctionName = FD->getQualifiedNameAsString(); 1382 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1383 const char *FileName = PLoc.getFilename(); 1384 unsigned Line = PLoc.getLine(); 1385 unsigned Column = PLoc.getColumn(); 1386 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, 1387 Column, SrcLocStrSize); 1388 } 1389 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1390 return OMPBuilder.getOrCreateIdent( 1391 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags); 1392 } 1393 1394 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1395 SourceLocation Loc) { 1396 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1397 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1398 // the clang invariants used below might be broken. 1399 if (CGM.getLangOpts().OpenMPIRBuilder) { 1400 SmallString<128> Buffer; 1401 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1402 uint32_t SrcLocStrSize; 1403 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1404 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize); 1405 return OMPBuilder.getOrCreateThreadID( 1406 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize)); 1407 } 1408 1409 llvm::Value *ThreadID = nullptr; 1410 // Check whether we've already cached a load of the thread id in this 1411 // function. 1412 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1413 if (I != OpenMPLocThreadIDMap.end()) { 1414 ThreadID = I->second.ThreadID; 1415 if (ThreadID != nullptr) 1416 return ThreadID; 1417 } 1418 // If exceptions are enabled, do not use parameter to avoid possible crash. 1419 if (auto *OMPRegionInfo = 1420 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1421 if (OMPRegionInfo->getThreadIDVariable()) { 1422 // Check if this an outlined function with thread id passed as argument. 1423 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1424 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1425 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1426 !CGF.getLangOpts().CXXExceptions || 1427 CGF.Builder.GetInsertBlock() == TopBlock || 1428 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1429 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1430 TopBlock || 1431 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1432 CGF.Builder.GetInsertBlock()) { 1433 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1434 // If value loaded in entry block, cache it and use it everywhere in 1435 // function. 1436 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1437 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1438 Elem.second.ThreadID = ThreadID; 1439 } 1440 return ThreadID; 1441 } 1442 } 1443 } 1444 1445 // This is not an outlined function region - need to call __kmpc_int32 1446 // kmpc_global_thread_num(ident_t *loc). 1447 // Generate thread id value and cache this value for use across the 1448 // function. 1449 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1450 if (!Elem.second.ServiceInsertPt) 1451 setLocThreadIdInsertPt(CGF); 1452 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1453 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1454 llvm::CallInst *Call = CGF.Builder.CreateCall( 1455 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1456 OMPRTL___kmpc_global_thread_num), 1457 emitUpdateLocation(CGF, Loc)); 1458 Call->setCallingConv(CGF.getRuntimeCC()); 1459 Elem.second.ThreadID = Call; 1460 return Call; 1461 } 1462 1463 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1464 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1465 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1466 clearLocThreadIdInsertPt(CGF); 1467 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1468 } 1469 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1470 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1471 UDRMap.erase(D); 1472 FunctionUDRMap.erase(CGF.CurFn); 1473 } 1474 auto I = FunctionUDMMap.find(CGF.CurFn); 1475 if (I != FunctionUDMMap.end()) { 1476 for(const auto *D : I->second) 1477 UDMMap.erase(D); 1478 FunctionUDMMap.erase(I); 1479 } 1480 LastprivateConditionalToTypes.erase(CGF.CurFn); 1481 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1482 } 1483 1484 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1485 return OMPBuilder.IdentPtr; 1486 } 1487 1488 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1489 if (!Kmpc_MicroTy) { 1490 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1491 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1492 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1493 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1494 } 1495 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1496 } 1497 1498 llvm::FunctionCallee 1499 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned, 1500 bool IsGPUDistribute) { 1501 assert((IVSize == 32 || IVSize == 64) && 1502 "IV size is not compatible with the omp runtime"); 1503 StringRef Name; 1504 if (IsGPUDistribute) 1505 Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4" 1506 : "__kmpc_distribute_static_init_4u") 1507 : (IVSigned ? "__kmpc_distribute_static_init_8" 1508 : "__kmpc_distribute_static_init_8u"); 1509 else 1510 Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1511 : "__kmpc_for_static_init_4u") 1512 : (IVSigned ? "__kmpc_for_static_init_8" 1513 : "__kmpc_for_static_init_8u"); 1514 1515 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1516 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1517 llvm::Type *TypeParams[] = { 1518 getIdentTyPointerTy(), // loc 1519 CGM.Int32Ty, // tid 1520 CGM.Int32Ty, // schedtype 1521 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1522 PtrTy, // p_lower 1523 PtrTy, // p_upper 1524 PtrTy, // p_stride 1525 ITy, // incr 1526 ITy // chunk 1527 }; 1528 auto *FnTy = 1529 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1530 return CGM.CreateRuntimeFunction(FnTy, Name); 1531 } 1532 1533 llvm::FunctionCallee 1534 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1535 assert((IVSize == 32 || IVSize == 64) && 1536 "IV size is not compatible with the omp runtime"); 1537 StringRef Name = 1538 IVSize == 32 1539 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1540 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1541 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1542 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1543 CGM.Int32Ty, // tid 1544 CGM.Int32Ty, // schedtype 1545 ITy, // lower 1546 ITy, // upper 1547 ITy, // stride 1548 ITy // chunk 1549 }; 1550 auto *FnTy = 1551 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1552 return CGM.CreateRuntimeFunction(FnTy, Name); 1553 } 1554 1555 llvm::FunctionCallee 1556 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1557 assert((IVSize == 32 || IVSize == 64) && 1558 "IV size is not compatible with the omp runtime"); 1559 StringRef Name = 1560 IVSize == 32 1561 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1562 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1563 llvm::Type *TypeParams[] = { 1564 getIdentTyPointerTy(), // loc 1565 CGM.Int32Ty, // tid 1566 }; 1567 auto *FnTy = 1568 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1569 return CGM.CreateRuntimeFunction(FnTy, Name); 1570 } 1571 1572 llvm::FunctionCallee 1573 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1574 assert((IVSize == 32 || IVSize == 64) && 1575 "IV size is not compatible with the omp runtime"); 1576 StringRef Name = 1577 IVSize == 32 1578 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1579 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1580 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1581 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1582 llvm::Type *TypeParams[] = { 1583 getIdentTyPointerTy(), // loc 1584 CGM.Int32Ty, // tid 1585 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1586 PtrTy, // p_lower 1587 PtrTy, // p_upper 1588 PtrTy // p_stride 1589 }; 1590 auto *FnTy = 1591 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1592 return CGM.CreateRuntimeFunction(FnTy, Name); 1593 } 1594 1595 /// Obtain information that uniquely identifies a target entry. This 1596 /// consists of the file and device IDs as well as line number associated with 1597 /// the relevant entry source location. 1598 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1599 unsigned &DeviceID, unsigned &FileID, 1600 unsigned &LineNum) { 1601 SourceManager &SM = C.getSourceManager(); 1602 1603 // The loc should be always valid and have a file ID (the user cannot use 1604 // #pragma directives in macros) 1605 1606 assert(Loc.isValid() && "Source location is expected to be always valid."); 1607 1608 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1609 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1610 1611 llvm::sys::fs::UniqueID ID; 1612 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1613 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false); 1614 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1615 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1616 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1617 << PLoc.getFilename() << EC.message(); 1618 } 1619 1620 DeviceID = ID.getDevice(); 1621 FileID = ID.getFile(); 1622 LineNum = PLoc.getLine(); 1623 } 1624 1625 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1626 if (CGM.getLangOpts().OpenMPSimd) 1627 return Address::invalid(); 1628 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1629 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1630 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1631 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1632 HasRequiresUnifiedSharedMemory))) { 1633 SmallString<64> PtrName; 1634 { 1635 llvm::raw_svector_ostream OS(PtrName); 1636 OS << CGM.getMangledName(GlobalDecl(VD)); 1637 if (!VD->isExternallyVisible()) { 1638 unsigned DeviceID, FileID, Line; 1639 getTargetEntryUniqueInfo(CGM.getContext(), 1640 VD->getCanonicalDecl()->getBeginLoc(), 1641 DeviceID, FileID, Line); 1642 OS << llvm::format("_%x", FileID); 1643 } 1644 OS << "_decl_tgt_ref_ptr"; 1645 } 1646 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1647 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1648 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(PtrTy); 1649 if (!Ptr) { 1650 Ptr = getOrCreateInternalVariable(LlvmPtrTy, PtrName); 1651 1652 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1653 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1654 1655 if (!CGM.getLangOpts().OpenMPIsDevice) 1656 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1657 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1658 } 1659 return Address(Ptr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD)); 1660 } 1661 return Address::invalid(); 1662 } 1663 1664 llvm::Constant * 1665 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1666 assert(!CGM.getLangOpts().OpenMPUseTLS || 1667 !CGM.getContext().getTargetInfo().isTLSSupported()); 1668 // Lookup the entry, lazily creating it if necessary. 1669 std::string Suffix = getName({"cache", ""}); 1670 return getOrCreateInternalVariable( 1671 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1672 } 1673 1674 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1675 const VarDecl *VD, 1676 Address VDAddr, 1677 SourceLocation Loc) { 1678 if (CGM.getLangOpts().OpenMPUseTLS && 1679 CGM.getContext().getTargetInfo().isTLSSupported()) 1680 return VDAddr; 1681 1682 llvm::Type *VarTy = VDAddr.getElementType(); 1683 llvm::Value *Args[] = { 1684 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1685 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy), 1686 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1687 getOrCreateThreadPrivateCache(VD)}; 1688 return Address( 1689 CGF.EmitRuntimeCall( 1690 OMPBuilder.getOrCreateRuntimeFunction( 1691 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1692 Args), 1693 CGF.Int8Ty, VDAddr.getAlignment()); 1694 } 1695 1696 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1697 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1698 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1699 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1700 // library. 1701 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1702 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1703 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1704 OMPLoc); 1705 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1706 // to register constructor/destructor for variable. 1707 llvm::Value *Args[] = { 1708 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1709 Ctor, CopyCtor, Dtor}; 1710 CGF.EmitRuntimeCall( 1711 OMPBuilder.getOrCreateRuntimeFunction( 1712 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1713 Args); 1714 } 1715 1716 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1717 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1718 bool PerformInit, CodeGenFunction *CGF) { 1719 if (CGM.getLangOpts().OpenMPUseTLS && 1720 CGM.getContext().getTargetInfo().isTLSSupported()) 1721 return nullptr; 1722 1723 VD = VD->getDefinition(CGM.getContext()); 1724 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1725 QualType ASTTy = VD->getType(); 1726 1727 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1728 const Expr *Init = VD->getAnyInitializer(); 1729 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1730 // Generate function that re-emits the declaration's initializer into the 1731 // threadprivate copy of the variable VD 1732 CodeGenFunction CtorCGF(CGM); 1733 FunctionArgList Args; 1734 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1735 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1736 ImplicitParamDecl::Other); 1737 Args.push_back(&Dst); 1738 1739 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1740 CGM.getContext().VoidPtrTy, Args); 1741 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1742 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1743 llvm::Function *Fn = 1744 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1745 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1746 Args, Loc, Loc); 1747 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1748 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1749 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1750 Address Arg(ArgVal, CtorCGF.Int8Ty, VDAddr.getAlignment()); 1751 Arg = CtorCGF.Builder.CreateElementBitCast( 1752 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1753 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1754 /*IsInitializer=*/true); 1755 ArgVal = CtorCGF.EmitLoadOfScalar( 1756 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1757 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1758 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1759 CtorCGF.FinishFunction(); 1760 Ctor = Fn; 1761 } 1762 if (VD->getType().isDestructedType() != QualType::DK_none) { 1763 // Generate function that emits destructor call for the threadprivate copy 1764 // of the variable VD 1765 CodeGenFunction DtorCGF(CGM); 1766 FunctionArgList Args; 1767 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1768 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1769 ImplicitParamDecl::Other); 1770 Args.push_back(&Dst); 1771 1772 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1773 CGM.getContext().VoidTy, Args); 1774 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1775 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1776 llvm::Function *Fn = 1777 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1778 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1779 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1780 Loc, Loc); 1781 // Create a scope with an artificial location for the body of this function. 1782 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1783 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1784 DtorCGF.GetAddrOfLocalVar(&Dst), 1785 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1786 DtorCGF.emitDestroy( 1787 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy, 1788 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1789 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1790 DtorCGF.FinishFunction(); 1791 Dtor = Fn; 1792 } 1793 // Do not emit init function if it is not required. 1794 if (!Ctor && !Dtor) 1795 return nullptr; 1796 1797 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1798 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1799 /*isVarArg=*/false) 1800 ->getPointerTo(); 1801 // Copying constructor for the threadprivate variable. 1802 // Must be NULL - reserved by runtime, but currently it requires that this 1803 // parameter is always NULL. Otherwise it fires assertion. 1804 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1805 if (Ctor == nullptr) { 1806 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1807 /*isVarArg=*/false) 1808 ->getPointerTo(); 1809 Ctor = llvm::Constant::getNullValue(CtorTy); 1810 } 1811 if (Dtor == nullptr) { 1812 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1813 /*isVarArg=*/false) 1814 ->getPointerTo(); 1815 Dtor = llvm::Constant::getNullValue(DtorTy); 1816 } 1817 if (!CGF) { 1818 auto *InitFunctionTy = 1819 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1820 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1821 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1822 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1823 CodeGenFunction InitCGF(CGM); 1824 FunctionArgList ArgList; 1825 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1826 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1827 Loc, Loc); 1828 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1829 InitCGF.FinishFunction(); 1830 return InitFunction; 1831 } 1832 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1833 } 1834 return nullptr; 1835 } 1836 1837 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1838 llvm::GlobalVariable *Addr, 1839 bool PerformInit) { 1840 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1841 !CGM.getLangOpts().OpenMPIsDevice) 1842 return false; 1843 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1844 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1845 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1846 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1847 HasRequiresUnifiedSharedMemory)) 1848 return CGM.getLangOpts().OpenMPIsDevice; 1849 VD = VD->getDefinition(CGM.getContext()); 1850 assert(VD && "Unknown VarDecl"); 1851 1852 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1853 return CGM.getLangOpts().OpenMPIsDevice; 1854 1855 QualType ASTTy = VD->getType(); 1856 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1857 1858 // Produce the unique prefix to identify the new target regions. We use 1859 // the source location of the variable declaration which we know to not 1860 // conflict with any target region. 1861 unsigned DeviceID; 1862 unsigned FileID; 1863 unsigned Line; 1864 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1865 SmallString<128> Buffer, Out; 1866 { 1867 llvm::raw_svector_ostream OS(Buffer); 1868 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1869 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1870 } 1871 1872 const Expr *Init = VD->getAnyInitializer(); 1873 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1874 llvm::Constant *Ctor; 1875 llvm::Constant *ID; 1876 if (CGM.getLangOpts().OpenMPIsDevice) { 1877 // Generate function that re-emits the declaration's initializer into 1878 // the threadprivate copy of the variable VD 1879 CodeGenFunction CtorCGF(CGM); 1880 1881 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1882 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1883 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1884 FTy, Twine(Buffer, "_ctor"), FI, Loc, false, 1885 llvm::GlobalValue::WeakODRLinkage); 1886 if (CGM.getTriple().isAMDGCN()) 1887 Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 1888 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1889 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1890 FunctionArgList(), Loc, Loc); 1891 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1892 llvm::Constant *AddrInAS0 = Addr; 1893 if (Addr->getAddressSpace() != 0) 1894 AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast( 1895 Addr, llvm::PointerType::getWithSamePointeeType( 1896 cast<llvm::PointerType>(Addr->getType()), 0)); 1897 CtorCGF.EmitAnyExprToMem(Init, 1898 Address(AddrInAS0, Addr->getValueType(), 1899 CGM.getContext().getDeclAlign(VD)), 1900 Init->getType().getQualifiers(), 1901 /*IsInitializer=*/true); 1902 CtorCGF.FinishFunction(); 1903 Ctor = Fn; 1904 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1905 } else { 1906 Ctor = new llvm::GlobalVariable( 1907 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1908 llvm::GlobalValue::PrivateLinkage, 1909 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1910 ID = Ctor; 1911 } 1912 1913 // Register the information for the entry associated with the constructor. 1914 Out.clear(); 1915 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1916 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1917 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1918 } 1919 if (VD->getType().isDestructedType() != QualType::DK_none) { 1920 llvm::Constant *Dtor; 1921 llvm::Constant *ID; 1922 if (CGM.getLangOpts().OpenMPIsDevice) { 1923 // Generate function that emits destructor call for the threadprivate 1924 // copy of the variable VD 1925 CodeGenFunction DtorCGF(CGM); 1926 1927 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1928 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1929 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1930 FTy, Twine(Buffer, "_dtor"), FI, Loc, false, 1931 llvm::GlobalValue::WeakODRLinkage); 1932 if (CGM.getTriple().isAMDGCN()) 1933 Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 1934 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1935 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1936 FunctionArgList(), Loc, Loc); 1937 // Create a scope with an artificial location for the body of this 1938 // function. 1939 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1940 llvm::Constant *AddrInAS0 = Addr; 1941 if (Addr->getAddressSpace() != 0) 1942 AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast( 1943 Addr, llvm::PointerType::getWithSamePointeeType( 1944 cast<llvm::PointerType>(Addr->getType()), 0)); 1945 DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(), 1946 CGM.getContext().getDeclAlign(VD)), 1947 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1948 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1949 DtorCGF.FinishFunction(); 1950 Dtor = Fn; 1951 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1952 } else { 1953 Dtor = new llvm::GlobalVariable( 1954 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1955 llvm::GlobalValue::PrivateLinkage, 1956 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 1957 ID = Dtor; 1958 } 1959 // Register the information for the entry associated with the destructor. 1960 Out.clear(); 1961 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1962 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 1963 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 1964 } 1965 return CGM.getLangOpts().OpenMPIsDevice; 1966 } 1967 1968 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 1969 QualType VarType, 1970 StringRef Name) { 1971 std::string Suffix = getName({"artificial", ""}); 1972 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 1973 llvm::GlobalVariable *GAddr = 1974 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 1975 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 1976 CGM.getTarget().isTLSSupported()) { 1977 GAddr->setThreadLocal(/*Val=*/true); 1978 return Address(GAddr, GAddr->getValueType(), 1979 CGM.getContext().getTypeAlignInChars(VarType)); 1980 } 1981 std::string CacheSuffix = getName({"cache", ""}); 1982 llvm::Value *Args[] = { 1983 emitUpdateLocation(CGF, SourceLocation()), 1984 getThreadID(CGF, SourceLocation()), 1985 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 1986 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 1987 /*isSigned=*/false), 1988 getOrCreateInternalVariable( 1989 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 1990 return Address( 1991 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1992 CGF.EmitRuntimeCall( 1993 OMPBuilder.getOrCreateRuntimeFunction( 1994 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1995 Args), 1996 VarLVType->getPointerTo(/*AddrSpace=*/0)), 1997 VarLVType, CGM.getContext().getTypeAlignInChars(VarType)); 1998 } 1999 2000 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2001 const RegionCodeGenTy &ThenGen, 2002 const RegionCodeGenTy &ElseGen) { 2003 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2004 2005 // If the condition constant folds and can be elided, try to avoid emitting 2006 // the condition and the dead arm of the if/else. 2007 bool CondConstant; 2008 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2009 if (CondConstant) 2010 ThenGen(CGF); 2011 else 2012 ElseGen(CGF); 2013 return; 2014 } 2015 2016 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2017 // emit the conditional branch. 2018 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2019 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2020 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2021 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2022 2023 // Emit the 'then' code. 2024 CGF.EmitBlock(ThenBlock); 2025 ThenGen(CGF); 2026 CGF.EmitBranch(ContBlock); 2027 // Emit the 'else' code if present. 2028 // There is no need to emit line number for unconditional branch. 2029 (void)ApplyDebugLocation::CreateEmpty(CGF); 2030 CGF.EmitBlock(ElseBlock); 2031 ElseGen(CGF); 2032 // There is no need to emit line number for unconditional branch. 2033 (void)ApplyDebugLocation::CreateEmpty(CGF); 2034 CGF.EmitBranch(ContBlock); 2035 // Emit the continuation block for code after the if. 2036 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2037 } 2038 2039 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2040 llvm::Function *OutlinedFn, 2041 ArrayRef<llvm::Value *> CapturedVars, 2042 const Expr *IfCond, 2043 llvm::Value *NumThreads) { 2044 if (!CGF.HaveInsertPoint()) 2045 return; 2046 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2047 auto &M = CGM.getModule(); 2048 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2049 this](CodeGenFunction &CGF, PrePostActionTy &) { 2050 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2051 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2052 llvm::Value *Args[] = { 2053 RTLoc, 2054 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2055 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2056 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2057 RealArgs.append(std::begin(Args), std::end(Args)); 2058 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2059 2060 llvm::FunctionCallee RTLFn = 2061 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2062 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2063 }; 2064 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2065 this](CodeGenFunction &CGF, PrePostActionTy &) { 2066 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2067 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2068 // Build calls: 2069 // __kmpc_serialized_parallel(&Loc, GTid); 2070 llvm::Value *Args[] = {RTLoc, ThreadID}; 2071 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2072 M, OMPRTL___kmpc_serialized_parallel), 2073 Args); 2074 2075 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2076 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2077 Address ZeroAddrBound = 2078 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2079 /*Name=*/".bound.zero.addr"); 2080 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound); 2081 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2082 // ThreadId for serialized parallels is 0. 2083 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2084 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2085 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2086 2087 // Ensure we do not inline the function. This is trivially true for the ones 2088 // passed to __kmpc_fork_call but the ones called in serialized regions 2089 // could be inlined. This is not a perfect but it is closer to the invariant 2090 // we want, namely, every data environment starts with a new function. 2091 // TODO: We should pass the if condition to the runtime function and do the 2092 // handling there. Much cleaner code. 2093 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline); 2094 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 2095 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2096 2097 // __kmpc_end_serialized_parallel(&Loc, GTid); 2098 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2099 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2100 M, OMPRTL___kmpc_end_serialized_parallel), 2101 EndArgs); 2102 }; 2103 if (IfCond) { 2104 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2105 } else { 2106 RegionCodeGenTy ThenRCG(ThenGen); 2107 ThenRCG(CGF); 2108 } 2109 } 2110 2111 // If we're inside an (outlined) parallel region, use the region info's 2112 // thread-ID variable (it is passed in a first argument of the outlined function 2113 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2114 // regular serial code region, get thread ID by calling kmp_int32 2115 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2116 // return the address of that temp. 2117 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2118 SourceLocation Loc) { 2119 if (auto *OMPRegionInfo = 2120 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2121 if (OMPRegionInfo->getThreadIDVariable()) 2122 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2123 2124 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2125 QualType Int32Ty = 2126 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2127 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2128 CGF.EmitStoreOfScalar(ThreadID, 2129 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2130 2131 return ThreadIDTemp; 2132 } 2133 2134 llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable( 2135 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2136 SmallString<256> Buffer; 2137 llvm::raw_svector_ostream Out(Buffer); 2138 Out << Name; 2139 StringRef RuntimeName = Out.str(); 2140 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2141 if (Elem.second) { 2142 assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) && 2143 "OMP internal variable has different type than requested"); 2144 return &*Elem.second; 2145 } 2146 2147 return Elem.second = new llvm::GlobalVariable( 2148 CGM.getModule(), Ty, /*IsConstant*/ false, 2149 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2150 Elem.first(), /*InsertBefore=*/nullptr, 2151 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2152 } 2153 2154 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2155 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2156 std::string Name = getName({Prefix, "var"}); 2157 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2158 } 2159 2160 namespace { 2161 /// Common pre(post)-action for different OpenMP constructs. 2162 class CommonActionTy final : public PrePostActionTy { 2163 llvm::FunctionCallee EnterCallee; 2164 ArrayRef<llvm::Value *> EnterArgs; 2165 llvm::FunctionCallee ExitCallee; 2166 ArrayRef<llvm::Value *> ExitArgs; 2167 bool Conditional; 2168 llvm::BasicBlock *ContBlock = nullptr; 2169 2170 public: 2171 CommonActionTy(llvm::FunctionCallee EnterCallee, 2172 ArrayRef<llvm::Value *> EnterArgs, 2173 llvm::FunctionCallee ExitCallee, 2174 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2175 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2176 ExitArgs(ExitArgs), Conditional(Conditional) {} 2177 void Enter(CodeGenFunction &CGF) override { 2178 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2179 if (Conditional) { 2180 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2181 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2182 ContBlock = CGF.createBasicBlock("omp_if.end"); 2183 // Generate the branch (If-stmt) 2184 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2185 CGF.EmitBlock(ThenBlock); 2186 } 2187 } 2188 void Done(CodeGenFunction &CGF) { 2189 // Emit the rest of blocks/branches 2190 CGF.EmitBranch(ContBlock); 2191 CGF.EmitBlock(ContBlock, true); 2192 } 2193 void Exit(CodeGenFunction &CGF) override { 2194 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2195 } 2196 }; 2197 } // anonymous namespace 2198 2199 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2200 StringRef CriticalName, 2201 const RegionCodeGenTy &CriticalOpGen, 2202 SourceLocation Loc, const Expr *Hint) { 2203 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2204 // CriticalOpGen(); 2205 // __kmpc_end_critical(ident_t *, gtid, Lock); 2206 // Prepare arguments and build a call to __kmpc_critical 2207 if (!CGF.HaveInsertPoint()) 2208 return; 2209 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2210 getCriticalRegionLock(CriticalName)}; 2211 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2212 std::end(Args)); 2213 if (Hint) { 2214 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2215 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2216 } 2217 CommonActionTy Action( 2218 OMPBuilder.getOrCreateRuntimeFunction( 2219 CGM.getModule(), 2220 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2221 EnterArgs, 2222 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2223 OMPRTL___kmpc_end_critical), 2224 Args); 2225 CriticalOpGen.setAction(Action); 2226 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2227 } 2228 2229 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2230 const RegionCodeGenTy &MasterOpGen, 2231 SourceLocation Loc) { 2232 if (!CGF.HaveInsertPoint()) 2233 return; 2234 // if(__kmpc_master(ident_t *, gtid)) { 2235 // MasterOpGen(); 2236 // __kmpc_end_master(ident_t *, gtid); 2237 // } 2238 // Prepare arguments and build a call to __kmpc_master 2239 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2240 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2241 CGM.getModule(), OMPRTL___kmpc_master), 2242 Args, 2243 OMPBuilder.getOrCreateRuntimeFunction( 2244 CGM.getModule(), OMPRTL___kmpc_end_master), 2245 Args, 2246 /*Conditional=*/true); 2247 MasterOpGen.setAction(Action); 2248 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2249 Action.Done(CGF); 2250 } 2251 2252 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, 2253 const RegionCodeGenTy &MaskedOpGen, 2254 SourceLocation Loc, const Expr *Filter) { 2255 if (!CGF.HaveInsertPoint()) 2256 return; 2257 // if(__kmpc_masked(ident_t *, gtid, filter)) { 2258 // MaskedOpGen(); 2259 // __kmpc_end_masked(iden_t *, gtid); 2260 // } 2261 // Prepare arguments and build a call to __kmpc_masked 2262 llvm::Value *FilterVal = Filter 2263 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) 2264 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 2265 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2266 FilterVal}; 2267 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), 2268 getThreadID(CGF, Loc)}; 2269 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2270 CGM.getModule(), OMPRTL___kmpc_masked), 2271 Args, 2272 OMPBuilder.getOrCreateRuntimeFunction( 2273 CGM.getModule(), OMPRTL___kmpc_end_masked), 2274 ArgsEnd, 2275 /*Conditional=*/true); 2276 MaskedOpGen.setAction(Action); 2277 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); 2278 Action.Done(CGF); 2279 } 2280 2281 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2282 SourceLocation Loc) { 2283 if (!CGF.HaveInsertPoint()) 2284 return; 2285 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2286 OMPBuilder.createTaskyield(CGF.Builder); 2287 } else { 2288 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2289 llvm::Value *Args[] = { 2290 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2291 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2292 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2293 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2294 Args); 2295 } 2296 2297 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2298 Region->emitUntiedSwitch(CGF); 2299 } 2300 2301 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2302 const RegionCodeGenTy &TaskgroupOpGen, 2303 SourceLocation Loc) { 2304 if (!CGF.HaveInsertPoint()) 2305 return; 2306 // __kmpc_taskgroup(ident_t *, gtid); 2307 // TaskgroupOpGen(); 2308 // __kmpc_end_taskgroup(ident_t *, gtid); 2309 // Prepare arguments and build a call to __kmpc_taskgroup 2310 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2311 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2312 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2313 Args, 2314 OMPBuilder.getOrCreateRuntimeFunction( 2315 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2316 Args); 2317 TaskgroupOpGen.setAction(Action); 2318 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2319 } 2320 2321 /// Given an array of pointers to variables, project the address of a 2322 /// given variable. 2323 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2324 unsigned Index, const VarDecl *Var) { 2325 // Pull out the pointer to the variable. 2326 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2327 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2328 2329 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType()); 2330 return Address( 2331 CGF.Builder.CreateBitCast( 2332 Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())), 2333 ElemTy, CGF.getContext().getDeclAlign(Var)); 2334 } 2335 2336 static llvm::Value *emitCopyprivateCopyFunction( 2337 CodeGenModule &CGM, llvm::Type *ArgsElemType, 2338 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2339 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2340 SourceLocation Loc) { 2341 ASTContext &C = CGM.getContext(); 2342 // void copy_func(void *LHSArg, void *RHSArg); 2343 FunctionArgList Args; 2344 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2345 ImplicitParamDecl::Other); 2346 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2347 ImplicitParamDecl::Other); 2348 Args.push_back(&LHSArg); 2349 Args.push_back(&RHSArg); 2350 const auto &CGFI = 2351 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2352 std::string Name = 2353 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2354 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2355 llvm::GlobalValue::InternalLinkage, Name, 2356 &CGM.getModule()); 2357 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2358 Fn->setDoesNotRecurse(); 2359 CodeGenFunction CGF(CGM); 2360 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2361 // Dest = (void*[n])(LHSArg); 2362 // Src = (void*[n])(RHSArg); 2363 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2364 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2365 ArgsElemType->getPointerTo()), 2366 ArgsElemType, CGF.getPointerAlign()); 2367 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2368 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2369 ArgsElemType->getPointerTo()), 2370 ArgsElemType, CGF.getPointerAlign()); 2371 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2372 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2373 // ... 2374 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2375 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2376 const auto *DestVar = 2377 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2378 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2379 2380 const auto *SrcVar = 2381 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2382 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2383 2384 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2385 QualType Type = VD->getType(); 2386 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2387 } 2388 CGF.FinishFunction(); 2389 return Fn; 2390 } 2391 2392 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2393 const RegionCodeGenTy &SingleOpGen, 2394 SourceLocation Loc, 2395 ArrayRef<const Expr *> CopyprivateVars, 2396 ArrayRef<const Expr *> SrcExprs, 2397 ArrayRef<const Expr *> DstExprs, 2398 ArrayRef<const Expr *> AssignmentOps) { 2399 if (!CGF.HaveInsertPoint()) 2400 return; 2401 assert(CopyprivateVars.size() == SrcExprs.size() && 2402 CopyprivateVars.size() == DstExprs.size() && 2403 CopyprivateVars.size() == AssignmentOps.size()); 2404 ASTContext &C = CGM.getContext(); 2405 // int32 did_it = 0; 2406 // if(__kmpc_single(ident_t *, gtid)) { 2407 // SingleOpGen(); 2408 // __kmpc_end_single(ident_t *, gtid); 2409 // did_it = 1; 2410 // } 2411 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2412 // <copy_func>, did_it); 2413 2414 Address DidIt = Address::invalid(); 2415 if (!CopyprivateVars.empty()) { 2416 // int32 did_it = 0; 2417 QualType KmpInt32Ty = 2418 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2419 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2420 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2421 } 2422 // Prepare arguments and build a call to __kmpc_single 2423 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2424 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2425 CGM.getModule(), OMPRTL___kmpc_single), 2426 Args, 2427 OMPBuilder.getOrCreateRuntimeFunction( 2428 CGM.getModule(), OMPRTL___kmpc_end_single), 2429 Args, 2430 /*Conditional=*/true); 2431 SingleOpGen.setAction(Action); 2432 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2433 if (DidIt.isValid()) { 2434 // did_it = 1; 2435 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2436 } 2437 Action.Done(CGF); 2438 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2439 // <copy_func>, did_it); 2440 if (DidIt.isValid()) { 2441 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2442 QualType CopyprivateArrayTy = C.getConstantArrayType( 2443 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2444 /*IndexTypeQuals=*/0); 2445 // Create a list of all private variables for copyprivate. 2446 Address CopyprivateList = 2447 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2448 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2449 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2450 CGF.Builder.CreateStore( 2451 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2452 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2453 CGF.VoidPtrTy), 2454 Elem); 2455 } 2456 // Build function that copies private values from single region to all other 2457 // threads in the corresponding parallel region. 2458 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2459 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars, 2460 SrcExprs, DstExprs, AssignmentOps, Loc); 2461 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2462 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2463 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty); 2464 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2465 llvm::Value *Args[] = { 2466 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2467 getThreadID(CGF, Loc), // i32 <gtid> 2468 BufSize, // size_t <buf_size> 2469 CL.getPointer(), // void *<copyprivate list> 2470 CpyFn, // void (*) (void *, void *) <copy_func> 2471 DidItVal // i32 did_it 2472 }; 2473 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2474 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2475 Args); 2476 } 2477 } 2478 2479 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2480 const RegionCodeGenTy &OrderedOpGen, 2481 SourceLocation Loc, bool IsThreads) { 2482 if (!CGF.HaveInsertPoint()) 2483 return; 2484 // __kmpc_ordered(ident_t *, gtid); 2485 // OrderedOpGen(); 2486 // __kmpc_end_ordered(ident_t *, gtid); 2487 // Prepare arguments and build a call to __kmpc_ordered 2488 if (IsThreads) { 2489 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2490 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2491 CGM.getModule(), OMPRTL___kmpc_ordered), 2492 Args, 2493 OMPBuilder.getOrCreateRuntimeFunction( 2494 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2495 Args); 2496 OrderedOpGen.setAction(Action); 2497 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2498 return; 2499 } 2500 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2501 } 2502 2503 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2504 unsigned Flags; 2505 if (Kind == OMPD_for) 2506 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2507 else if (Kind == OMPD_sections) 2508 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2509 else if (Kind == OMPD_single) 2510 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2511 else if (Kind == OMPD_barrier) 2512 Flags = OMP_IDENT_BARRIER_EXPL; 2513 else 2514 Flags = OMP_IDENT_BARRIER_IMPL; 2515 return Flags; 2516 } 2517 2518 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2519 CodeGenFunction &CGF, const OMPLoopDirective &S, 2520 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2521 // Check if the loop directive is actually a doacross loop directive. In this 2522 // case choose static, 1 schedule. 2523 if (llvm::any_of( 2524 S.getClausesOfKind<OMPOrderedClause>(), 2525 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2526 ScheduleKind = OMPC_SCHEDULE_static; 2527 // Chunk size is 1 in this case. 2528 llvm::APInt ChunkSize(32, 1); 2529 ChunkExpr = IntegerLiteral::Create( 2530 CGF.getContext(), ChunkSize, 2531 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2532 SourceLocation()); 2533 } 2534 } 2535 2536 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2537 OpenMPDirectiveKind Kind, bool EmitChecks, 2538 bool ForceSimpleCall) { 2539 // Check if we should use the OMPBuilder 2540 auto *OMPRegionInfo = 2541 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2542 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2543 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2544 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2545 return; 2546 } 2547 2548 if (!CGF.HaveInsertPoint()) 2549 return; 2550 // Build call __kmpc_cancel_barrier(loc, thread_id); 2551 // Build call __kmpc_barrier(loc, thread_id); 2552 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2553 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2554 // thread_id); 2555 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2556 getThreadID(CGF, Loc)}; 2557 if (OMPRegionInfo) { 2558 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2559 llvm::Value *Result = CGF.EmitRuntimeCall( 2560 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2561 OMPRTL___kmpc_cancel_barrier), 2562 Args); 2563 if (EmitChecks) { 2564 // if (__kmpc_cancel_barrier()) { 2565 // exit from construct; 2566 // } 2567 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2568 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2569 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2570 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2571 CGF.EmitBlock(ExitBB); 2572 // exit from construct; 2573 CodeGenFunction::JumpDest CancelDestination = 2574 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2575 CGF.EmitBranchThroughCleanup(CancelDestination); 2576 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2577 } 2578 return; 2579 } 2580 } 2581 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2582 CGM.getModule(), OMPRTL___kmpc_barrier), 2583 Args); 2584 } 2585 2586 /// Map the OpenMP loop schedule to the runtime enumeration. 2587 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2588 bool Chunked, bool Ordered) { 2589 switch (ScheduleKind) { 2590 case OMPC_SCHEDULE_static: 2591 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2592 : (Ordered ? OMP_ord_static : OMP_sch_static); 2593 case OMPC_SCHEDULE_dynamic: 2594 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2595 case OMPC_SCHEDULE_guided: 2596 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2597 case OMPC_SCHEDULE_runtime: 2598 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2599 case OMPC_SCHEDULE_auto: 2600 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2601 case OMPC_SCHEDULE_unknown: 2602 assert(!Chunked && "chunk was specified but schedule kind not known"); 2603 return Ordered ? OMP_ord_static : OMP_sch_static; 2604 } 2605 llvm_unreachable("Unexpected runtime schedule"); 2606 } 2607 2608 /// Map the OpenMP distribute schedule to the runtime enumeration. 2609 static OpenMPSchedType 2610 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2611 // only static is allowed for dist_schedule 2612 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2613 } 2614 2615 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2616 bool Chunked) const { 2617 OpenMPSchedType Schedule = 2618 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2619 return Schedule == OMP_sch_static; 2620 } 2621 2622 bool CGOpenMPRuntime::isStaticNonchunked( 2623 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2624 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2625 return Schedule == OMP_dist_sch_static; 2626 } 2627 2628 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2629 bool Chunked) const { 2630 OpenMPSchedType Schedule = 2631 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2632 return Schedule == OMP_sch_static_chunked; 2633 } 2634 2635 bool CGOpenMPRuntime::isStaticChunked( 2636 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2637 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2638 return Schedule == OMP_dist_sch_static_chunked; 2639 } 2640 2641 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2642 OpenMPSchedType Schedule = 2643 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2644 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2645 return Schedule != OMP_sch_static; 2646 } 2647 2648 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2649 OpenMPScheduleClauseModifier M1, 2650 OpenMPScheduleClauseModifier M2) { 2651 int Modifier = 0; 2652 switch (M1) { 2653 case OMPC_SCHEDULE_MODIFIER_monotonic: 2654 Modifier = OMP_sch_modifier_monotonic; 2655 break; 2656 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2657 Modifier = OMP_sch_modifier_nonmonotonic; 2658 break; 2659 case OMPC_SCHEDULE_MODIFIER_simd: 2660 if (Schedule == OMP_sch_static_chunked) 2661 Schedule = OMP_sch_static_balanced_chunked; 2662 break; 2663 case OMPC_SCHEDULE_MODIFIER_last: 2664 case OMPC_SCHEDULE_MODIFIER_unknown: 2665 break; 2666 } 2667 switch (M2) { 2668 case OMPC_SCHEDULE_MODIFIER_monotonic: 2669 Modifier = OMP_sch_modifier_monotonic; 2670 break; 2671 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2672 Modifier = OMP_sch_modifier_nonmonotonic; 2673 break; 2674 case OMPC_SCHEDULE_MODIFIER_simd: 2675 if (Schedule == OMP_sch_static_chunked) 2676 Schedule = OMP_sch_static_balanced_chunked; 2677 break; 2678 case OMPC_SCHEDULE_MODIFIER_last: 2679 case OMPC_SCHEDULE_MODIFIER_unknown: 2680 break; 2681 } 2682 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2683 // If the static schedule kind is specified or if the ordered clause is 2684 // specified, and if the nonmonotonic modifier is not specified, the effect is 2685 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2686 // modifier is specified, the effect is as if the nonmonotonic modifier is 2687 // specified. 2688 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2689 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2690 Schedule == OMP_sch_static_balanced_chunked || 2691 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2692 Schedule == OMP_dist_sch_static_chunked || 2693 Schedule == OMP_dist_sch_static)) 2694 Modifier = OMP_sch_modifier_nonmonotonic; 2695 } 2696 return Schedule | Modifier; 2697 } 2698 2699 void CGOpenMPRuntime::emitForDispatchInit( 2700 CodeGenFunction &CGF, SourceLocation Loc, 2701 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2702 bool Ordered, const DispatchRTInput &DispatchValues) { 2703 if (!CGF.HaveInsertPoint()) 2704 return; 2705 OpenMPSchedType Schedule = getRuntimeSchedule( 2706 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2707 assert(Ordered || 2708 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2709 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2710 Schedule != OMP_sch_static_balanced_chunked)); 2711 // Call __kmpc_dispatch_init( 2712 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2713 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2714 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2715 2716 // If the Chunk was not specified in the clause - use default value 1. 2717 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2718 : CGF.Builder.getIntN(IVSize, 1); 2719 llvm::Value *Args[] = { 2720 emitUpdateLocation(CGF, Loc), 2721 getThreadID(CGF, Loc), 2722 CGF.Builder.getInt32(addMonoNonMonoModifier( 2723 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2724 DispatchValues.LB, // Lower 2725 DispatchValues.UB, // Upper 2726 CGF.Builder.getIntN(IVSize, 1), // Stride 2727 Chunk // Chunk 2728 }; 2729 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2730 } 2731 2732 static void emitForStaticInitCall( 2733 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2734 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2735 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2736 const CGOpenMPRuntime::StaticRTInput &Values) { 2737 if (!CGF.HaveInsertPoint()) 2738 return; 2739 2740 assert(!Values.Ordered); 2741 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2742 Schedule == OMP_sch_static_balanced_chunked || 2743 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2744 Schedule == OMP_dist_sch_static || 2745 Schedule == OMP_dist_sch_static_chunked); 2746 2747 // Call __kmpc_for_static_init( 2748 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2749 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2750 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2751 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2752 llvm::Value *Chunk = Values.Chunk; 2753 if (Chunk == nullptr) { 2754 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2755 Schedule == OMP_dist_sch_static) && 2756 "expected static non-chunked schedule"); 2757 // If the Chunk was not specified in the clause - use default value 1. 2758 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2759 } else { 2760 assert((Schedule == OMP_sch_static_chunked || 2761 Schedule == OMP_sch_static_balanced_chunked || 2762 Schedule == OMP_ord_static_chunked || 2763 Schedule == OMP_dist_sch_static_chunked) && 2764 "expected static chunked schedule"); 2765 } 2766 llvm::Value *Args[] = { 2767 UpdateLocation, 2768 ThreadId, 2769 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2770 M2)), // Schedule type 2771 Values.IL.getPointer(), // &isLastIter 2772 Values.LB.getPointer(), // &LB 2773 Values.UB.getPointer(), // &UB 2774 Values.ST.getPointer(), // &Stride 2775 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2776 Chunk // Chunk 2777 }; 2778 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2779 } 2780 2781 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2782 SourceLocation Loc, 2783 OpenMPDirectiveKind DKind, 2784 const OpenMPScheduleTy &ScheduleKind, 2785 const StaticRTInput &Values) { 2786 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2787 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2788 assert(isOpenMPWorksharingDirective(DKind) && 2789 "Expected loop-based or sections-based directive."); 2790 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2791 isOpenMPLoopDirective(DKind) 2792 ? OMP_IDENT_WORK_LOOP 2793 : OMP_IDENT_WORK_SECTIONS); 2794 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2795 llvm::FunctionCallee StaticInitFunction = 2796 createForStaticInitFunction(Values.IVSize, Values.IVSigned, false); 2797 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2798 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2799 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2800 } 2801 2802 void CGOpenMPRuntime::emitDistributeStaticInit( 2803 CodeGenFunction &CGF, SourceLocation Loc, 2804 OpenMPDistScheduleClauseKind SchedKind, 2805 const CGOpenMPRuntime::StaticRTInput &Values) { 2806 OpenMPSchedType ScheduleNum = 2807 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2808 llvm::Value *UpdatedLocation = 2809 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2810 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2811 llvm::FunctionCallee StaticInitFunction; 2812 bool isGPUDistribute = 2813 CGM.getLangOpts().OpenMPIsDevice && 2814 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()); 2815 StaticInitFunction = createForStaticInitFunction( 2816 Values.IVSize, Values.IVSigned, isGPUDistribute); 2817 2818 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2819 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2820 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2821 } 2822 2823 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2824 SourceLocation Loc, 2825 OpenMPDirectiveKind DKind) { 2826 if (!CGF.HaveInsertPoint()) 2827 return; 2828 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2829 llvm::Value *Args[] = { 2830 emitUpdateLocation(CGF, Loc, 2831 isOpenMPDistributeDirective(DKind) 2832 ? OMP_IDENT_WORK_DISTRIBUTE 2833 : isOpenMPLoopDirective(DKind) 2834 ? OMP_IDENT_WORK_LOOP 2835 : OMP_IDENT_WORK_SECTIONS), 2836 getThreadID(CGF, Loc)}; 2837 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2838 if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice && 2839 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX())) 2840 CGF.EmitRuntimeCall( 2841 OMPBuilder.getOrCreateRuntimeFunction( 2842 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini), 2843 Args); 2844 else 2845 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2846 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2847 Args); 2848 } 2849 2850 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2851 SourceLocation Loc, 2852 unsigned IVSize, 2853 bool IVSigned) { 2854 if (!CGF.HaveInsertPoint()) 2855 return; 2856 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2857 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2858 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2859 } 2860 2861 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2862 SourceLocation Loc, unsigned IVSize, 2863 bool IVSigned, Address IL, 2864 Address LB, Address UB, 2865 Address ST) { 2866 // Call __kmpc_dispatch_next( 2867 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2868 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2869 // kmp_int[32|64] *p_stride); 2870 llvm::Value *Args[] = { 2871 emitUpdateLocation(CGF, Loc), 2872 getThreadID(CGF, Loc), 2873 IL.getPointer(), // &isLastIter 2874 LB.getPointer(), // &Lower 2875 UB.getPointer(), // &Upper 2876 ST.getPointer() // &Stride 2877 }; 2878 llvm::Value *Call = 2879 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2880 return CGF.EmitScalarConversion( 2881 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2882 CGF.getContext().BoolTy, Loc); 2883 } 2884 2885 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2886 llvm::Value *NumThreads, 2887 SourceLocation Loc) { 2888 if (!CGF.HaveInsertPoint()) 2889 return; 2890 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2891 llvm::Value *Args[] = { 2892 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2893 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2894 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2895 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2896 Args); 2897 } 2898 2899 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2900 ProcBindKind ProcBind, 2901 SourceLocation Loc) { 2902 if (!CGF.HaveInsertPoint()) 2903 return; 2904 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2905 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2906 llvm::Value *Args[] = { 2907 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2908 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2909 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2910 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2911 Args); 2912 } 2913 2914 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2915 SourceLocation Loc, llvm::AtomicOrdering AO) { 2916 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2917 OMPBuilder.createFlush(CGF.Builder); 2918 } else { 2919 if (!CGF.HaveInsertPoint()) 2920 return; 2921 // Build call void __kmpc_flush(ident_t *loc) 2922 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2923 CGM.getModule(), OMPRTL___kmpc_flush), 2924 emitUpdateLocation(CGF, Loc)); 2925 } 2926 } 2927 2928 namespace { 2929 /// Indexes of fields for type kmp_task_t. 2930 enum KmpTaskTFields { 2931 /// List of shared variables. 2932 KmpTaskTShareds, 2933 /// Task routine. 2934 KmpTaskTRoutine, 2935 /// Partition id for the untied tasks. 2936 KmpTaskTPartId, 2937 /// Function with call of destructors for private variables. 2938 Data1, 2939 /// Task priority. 2940 Data2, 2941 /// (Taskloops only) Lower bound. 2942 KmpTaskTLowerBound, 2943 /// (Taskloops only) Upper bound. 2944 KmpTaskTUpperBound, 2945 /// (Taskloops only) Stride. 2946 KmpTaskTStride, 2947 /// (Taskloops only) Is last iteration flag. 2948 KmpTaskTLastIter, 2949 /// (Taskloops only) Reduction data. 2950 KmpTaskTReductions, 2951 }; 2952 } // anonymous namespace 2953 2954 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2955 return OffloadEntriesTargetRegion.empty() && 2956 OffloadEntriesDeviceGlobalVar.empty(); 2957 } 2958 2959 /// Initialize target region entry. 2960 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2961 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2962 StringRef ParentName, unsigned LineNum, 2963 unsigned Order) { 2964 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2965 "only required for the device " 2966 "code generation."); 2967 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 2968 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 2969 OMPTargetRegionEntryTargetRegion); 2970 ++OffloadingEntriesNum; 2971 } 2972 2973 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2974 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2975 StringRef ParentName, unsigned LineNum, 2976 llvm::Constant *Addr, llvm::Constant *ID, 2977 OMPTargetRegionEntryKind Flags) { 2978 // If we are emitting code for a target, the entry is already initialized, 2979 // only has to be registered. 2980 if (CGM.getLangOpts().OpenMPIsDevice) { 2981 // This could happen if the device compilation is invoked standalone. 2982 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) 2983 return; 2984 auto &Entry = 2985 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 2986 Entry.setAddress(Addr); 2987 Entry.setID(ID); 2988 Entry.setFlags(Flags); 2989 } else { 2990 if (Flags == 2991 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 2992 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 2993 /*IgnoreAddressId*/ true)) 2994 return; 2995 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 2996 "Target region entry already registered!"); 2997 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 2998 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 2999 ++OffloadingEntriesNum; 3000 } 3001 } 3002 3003 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3004 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 3005 bool IgnoreAddressId) const { 3006 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3007 if (PerDevice == OffloadEntriesTargetRegion.end()) 3008 return false; 3009 auto PerFile = PerDevice->second.find(FileID); 3010 if (PerFile == PerDevice->second.end()) 3011 return false; 3012 auto PerParentName = PerFile->second.find(ParentName); 3013 if (PerParentName == PerFile->second.end()) 3014 return false; 3015 auto PerLine = PerParentName->second.find(LineNum); 3016 if (PerLine == PerParentName->second.end()) 3017 return false; 3018 // Fail if this entry is already registered. 3019 if (!IgnoreAddressId && 3020 (PerLine->second.getAddress() || PerLine->second.getID())) 3021 return false; 3022 return true; 3023 } 3024 3025 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3026 const OffloadTargetRegionEntryInfoActTy &Action) { 3027 // Scan all target region entries and perform the provided action. 3028 for (const auto &D : OffloadEntriesTargetRegion) 3029 for (const auto &F : D.second) 3030 for (const auto &P : F.second) 3031 for (const auto &L : P.second) 3032 Action(D.first, F.first, P.first(), L.first, L.second); 3033 } 3034 3035 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3036 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3037 OMPTargetGlobalVarEntryKind Flags, 3038 unsigned Order) { 3039 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3040 "only required for the device " 3041 "code generation."); 3042 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3043 ++OffloadingEntriesNum; 3044 } 3045 3046 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3047 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3048 CharUnits VarSize, 3049 OMPTargetGlobalVarEntryKind Flags, 3050 llvm::GlobalValue::LinkageTypes Linkage) { 3051 if (CGM.getLangOpts().OpenMPIsDevice) { 3052 // This could happen if the device compilation is invoked standalone. 3053 if (!hasDeviceGlobalVarEntryInfo(VarName)) 3054 return; 3055 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3056 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3057 if (Entry.getVarSize().isZero()) { 3058 Entry.setVarSize(VarSize); 3059 Entry.setLinkage(Linkage); 3060 } 3061 return; 3062 } 3063 Entry.setVarSize(VarSize); 3064 Entry.setLinkage(Linkage); 3065 Entry.setAddress(Addr); 3066 } else { 3067 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3068 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3069 assert(Entry.isValid() && Entry.getFlags() == Flags && 3070 "Entry not initialized!"); 3071 if (Entry.getVarSize().isZero()) { 3072 Entry.setVarSize(VarSize); 3073 Entry.setLinkage(Linkage); 3074 } 3075 return; 3076 } 3077 OffloadEntriesDeviceGlobalVar.try_emplace( 3078 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3079 ++OffloadingEntriesNum; 3080 } 3081 } 3082 3083 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3084 actOnDeviceGlobalVarEntriesInfo( 3085 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3086 // Scan all target region entries and perform the provided action. 3087 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3088 Action(E.getKey(), E.getValue()); 3089 } 3090 3091 void CGOpenMPRuntime::createOffloadEntry( 3092 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3093 llvm::GlobalValue::LinkageTypes Linkage) { 3094 OMPBuilder.emitOffloadingEntry(ID, Addr->getName(), Size, Flags); 3095 } 3096 3097 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3098 // Emit the offloading entries and metadata so that the device codegen side 3099 // can easily figure out what to emit. The produced metadata looks like 3100 // this: 3101 // 3102 // !omp_offload.info = !{!1, ...} 3103 // 3104 // Right now we only generate metadata for function that contain target 3105 // regions. 3106 3107 // If we are in simd mode or there are no entries, we don't need to do 3108 // anything. 3109 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3110 return; 3111 3112 llvm::Module &M = CGM.getModule(); 3113 llvm::LLVMContext &C = M.getContext(); 3114 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3115 SourceLocation, StringRef>, 3116 16> 3117 OrderedEntries(OffloadEntriesInfoManager.size()); 3118 llvm::SmallVector<StringRef, 16> ParentFunctions( 3119 OffloadEntriesInfoManager.size()); 3120 3121 // Auxiliary methods to create metadata values and strings. 3122 auto &&GetMDInt = [this](unsigned V) { 3123 return llvm::ConstantAsMetadata::get( 3124 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3125 }; 3126 3127 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3128 3129 // Create the offloading info metadata node. 3130 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3131 3132 // Create function that emits metadata for each target region entry; 3133 auto &&TargetRegionMetadataEmitter = 3134 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3135 &GetMDString]( 3136 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3137 unsigned Line, 3138 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3139 // Generate metadata for target regions. Each entry of this metadata 3140 // contains: 3141 // - Entry 0 -> Kind of this type of metadata (0). 3142 // - Entry 1 -> Device ID of the file where the entry was identified. 3143 // - Entry 2 -> File ID of the file where the entry was identified. 3144 // - Entry 3 -> Mangled name of the function where the entry was 3145 // identified. 3146 // - Entry 4 -> Line in the file where the entry was identified. 3147 // - Entry 5 -> Order the entry was created. 3148 // The first element of the metadata node is the kind. 3149 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3150 GetMDInt(FileID), GetMDString(ParentName), 3151 GetMDInt(Line), GetMDInt(E.getOrder())}; 3152 3153 SourceLocation Loc; 3154 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3155 E = CGM.getContext().getSourceManager().fileinfo_end(); 3156 I != E; ++I) { 3157 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3158 I->getFirst()->getUniqueID().getFile() == FileID) { 3159 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3160 I->getFirst(), Line, 1); 3161 break; 3162 } 3163 } 3164 // Save this entry in the right position of the ordered entries array. 3165 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3166 ParentFunctions[E.getOrder()] = ParentName; 3167 3168 // Add metadata to the named metadata node. 3169 MD->addOperand(llvm::MDNode::get(C, Ops)); 3170 }; 3171 3172 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3173 TargetRegionMetadataEmitter); 3174 3175 // Create function that emits metadata for each device global variable entry; 3176 auto &&DeviceGlobalVarMetadataEmitter = 3177 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3178 MD](StringRef MangledName, 3179 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3180 &E) { 3181 // Generate metadata for global variables. Each entry of this metadata 3182 // contains: 3183 // - Entry 0 -> Kind of this type of metadata (1). 3184 // - Entry 1 -> Mangled name of the variable. 3185 // - Entry 2 -> Declare target kind. 3186 // - Entry 3 -> Order the entry was created. 3187 // The first element of the metadata node is the kind. 3188 llvm::Metadata *Ops[] = { 3189 GetMDInt(E.getKind()), GetMDString(MangledName), 3190 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3191 3192 // Save this entry in the right position of the ordered entries array. 3193 OrderedEntries[E.getOrder()] = 3194 std::make_tuple(&E, SourceLocation(), MangledName); 3195 3196 // Add metadata to the named metadata node. 3197 MD->addOperand(llvm::MDNode::get(C, Ops)); 3198 }; 3199 3200 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3201 DeviceGlobalVarMetadataEmitter); 3202 3203 for (const auto &E : OrderedEntries) { 3204 assert(std::get<0>(E) && "All ordered entries must exist!"); 3205 if (const auto *CE = 3206 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3207 std::get<0>(E))) { 3208 if (!CE->getID() || !CE->getAddress()) { 3209 // Do not blame the entry if the parent funtion is not emitted. 3210 StringRef FnName = ParentFunctions[CE->getOrder()]; 3211 if (!CGM.GetGlobalValue(FnName)) 3212 continue; 3213 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3214 DiagnosticsEngine::Error, 3215 "Offloading entry for target region in %0 is incorrect: either the " 3216 "address or the ID is invalid."); 3217 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3218 continue; 3219 } 3220 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3221 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3222 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3223 OffloadEntryInfoDeviceGlobalVar>( 3224 std::get<0>(E))) { 3225 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3226 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3227 CE->getFlags()); 3228 switch (Flags) { 3229 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3230 if (CGM.getLangOpts().OpenMPIsDevice && 3231 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3232 continue; 3233 if (!CE->getAddress()) { 3234 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3235 DiagnosticsEngine::Error, "Offloading entry for declare target " 3236 "variable %0 is incorrect: the " 3237 "address is invalid."); 3238 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3239 continue; 3240 } 3241 // The vaiable has no definition - no need to add the entry. 3242 if (CE->getVarSize().isZero()) 3243 continue; 3244 break; 3245 } 3246 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3247 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3248 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3249 "Declaret target link address is set."); 3250 if (CGM.getLangOpts().OpenMPIsDevice) 3251 continue; 3252 if (!CE->getAddress()) { 3253 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3254 DiagnosticsEngine::Error, 3255 "Offloading entry for declare target variable is incorrect: the " 3256 "address is invalid."); 3257 CGM.getDiags().Report(DiagID); 3258 continue; 3259 } 3260 break; 3261 } 3262 3263 // Hidden or internal symbols on the device are not externally visible. We 3264 // should not attempt to register them by creating an offloading entry. 3265 if (auto *GV = dyn_cast<llvm::GlobalValue>(CE->getAddress())) 3266 if (GV->hasLocalLinkage() || GV->hasHiddenVisibility()) 3267 continue; 3268 3269 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3270 CE->getVarSize().getQuantity(), Flags, 3271 CE->getLinkage()); 3272 } else { 3273 llvm_unreachable("Unsupported entry kind."); 3274 } 3275 } 3276 } 3277 3278 /// Loads all the offload entries information from the host IR 3279 /// metadata. 3280 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3281 // If we are in target mode, load the metadata from the host IR. This code has 3282 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3283 3284 if (!CGM.getLangOpts().OpenMPIsDevice) 3285 return; 3286 3287 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3288 return; 3289 3290 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3291 if (auto EC = Buf.getError()) { 3292 CGM.getDiags().Report(diag::err_cannot_open_file) 3293 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3294 return; 3295 } 3296 3297 llvm::LLVMContext C; 3298 auto ME = expectedToErrorOrAndEmitErrors( 3299 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3300 3301 if (auto EC = ME.getError()) { 3302 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3303 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3304 CGM.getDiags().Report(DiagID) 3305 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3306 return; 3307 } 3308 3309 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3310 if (!MD) 3311 return; 3312 3313 for (llvm::MDNode *MN : MD->operands()) { 3314 auto &&GetMDInt = [MN](unsigned Idx) { 3315 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3316 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3317 }; 3318 3319 auto &&GetMDString = [MN](unsigned Idx) { 3320 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3321 return V->getString(); 3322 }; 3323 3324 switch (GetMDInt(0)) { 3325 default: 3326 llvm_unreachable("Unexpected metadata!"); 3327 break; 3328 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3329 OffloadingEntryInfoTargetRegion: 3330 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3331 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3332 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3333 /*Order=*/GetMDInt(5)); 3334 break; 3335 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3336 OffloadingEntryInfoDeviceGlobalVar: 3337 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3338 /*MangledName=*/GetMDString(1), 3339 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3340 /*Flags=*/GetMDInt(2)), 3341 /*Order=*/GetMDInt(3)); 3342 break; 3343 } 3344 } 3345 } 3346 3347 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3348 if (!KmpRoutineEntryPtrTy) { 3349 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3350 ASTContext &C = CGM.getContext(); 3351 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3352 FunctionProtoType::ExtProtoInfo EPI; 3353 KmpRoutineEntryPtrQTy = C.getPointerType( 3354 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3355 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3356 } 3357 } 3358 3359 namespace { 3360 struct PrivateHelpersTy { 3361 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3362 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3363 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3364 PrivateElemInit(PrivateElemInit) {} 3365 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3366 const Expr *OriginalRef = nullptr; 3367 const VarDecl *Original = nullptr; 3368 const VarDecl *PrivateCopy = nullptr; 3369 const VarDecl *PrivateElemInit = nullptr; 3370 bool isLocalPrivate() const { 3371 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3372 } 3373 }; 3374 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3375 } // anonymous namespace 3376 3377 static bool isAllocatableDecl(const VarDecl *VD) { 3378 const VarDecl *CVD = VD->getCanonicalDecl(); 3379 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3380 return false; 3381 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3382 // Use the default allocation. 3383 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 3384 !AA->getAllocator()); 3385 } 3386 3387 static RecordDecl * 3388 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3389 if (!Privates.empty()) { 3390 ASTContext &C = CGM.getContext(); 3391 // Build struct .kmp_privates_t. { 3392 // /* private vars */ 3393 // }; 3394 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3395 RD->startDefinition(); 3396 for (const auto &Pair : Privates) { 3397 const VarDecl *VD = Pair.second.Original; 3398 QualType Type = VD->getType().getNonReferenceType(); 3399 // If the private variable is a local variable with lvalue ref type, 3400 // allocate the pointer instead of the pointee type. 3401 if (Pair.second.isLocalPrivate()) { 3402 if (VD->getType()->isLValueReferenceType()) 3403 Type = C.getPointerType(Type); 3404 if (isAllocatableDecl(VD)) 3405 Type = C.getPointerType(Type); 3406 } 3407 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3408 if (VD->hasAttrs()) { 3409 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3410 E(VD->getAttrs().end()); 3411 I != E; ++I) 3412 FD->addAttr(*I); 3413 } 3414 } 3415 RD->completeDefinition(); 3416 return RD; 3417 } 3418 return nullptr; 3419 } 3420 3421 static RecordDecl * 3422 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3423 QualType KmpInt32Ty, 3424 QualType KmpRoutineEntryPointerQTy) { 3425 ASTContext &C = CGM.getContext(); 3426 // Build struct kmp_task_t { 3427 // void * shareds; 3428 // kmp_routine_entry_t routine; 3429 // kmp_int32 part_id; 3430 // kmp_cmplrdata_t data1; 3431 // kmp_cmplrdata_t data2; 3432 // For taskloops additional fields: 3433 // kmp_uint64 lb; 3434 // kmp_uint64 ub; 3435 // kmp_int64 st; 3436 // kmp_int32 liter; 3437 // void * reductions; 3438 // }; 3439 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3440 UD->startDefinition(); 3441 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3442 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3443 UD->completeDefinition(); 3444 QualType KmpCmplrdataTy = C.getRecordType(UD); 3445 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3446 RD->startDefinition(); 3447 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3448 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3449 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3450 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3451 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3452 if (isOpenMPTaskLoopDirective(Kind)) { 3453 QualType KmpUInt64Ty = 3454 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3455 QualType KmpInt64Ty = 3456 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3457 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3458 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3459 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3460 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3461 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3462 } 3463 RD->completeDefinition(); 3464 return RD; 3465 } 3466 3467 static RecordDecl * 3468 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3469 ArrayRef<PrivateDataTy> Privates) { 3470 ASTContext &C = CGM.getContext(); 3471 // Build struct kmp_task_t_with_privates { 3472 // kmp_task_t task_data; 3473 // .kmp_privates_t. privates; 3474 // }; 3475 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3476 RD->startDefinition(); 3477 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3478 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3479 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3480 RD->completeDefinition(); 3481 return RD; 3482 } 3483 3484 /// Emit a proxy function which accepts kmp_task_t as the second 3485 /// argument. 3486 /// \code 3487 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3488 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3489 /// For taskloops: 3490 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3491 /// tt->reductions, tt->shareds); 3492 /// return 0; 3493 /// } 3494 /// \endcode 3495 static llvm::Function * 3496 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3497 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3498 QualType KmpTaskTWithPrivatesPtrQTy, 3499 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3500 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3501 llvm::Value *TaskPrivatesMap) { 3502 ASTContext &C = CGM.getContext(); 3503 FunctionArgList Args; 3504 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3505 ImplicitParamDecl::Other); 3506 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3507 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3508 ImplicitParamDecl::Other); 3509 Args.push_back(&GtidArg); 3510 Args.push_back(&TaskTypeArg); 3511 const auto &TaskEntryFnInfo = 3512 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3513 llvm::FunctionType *TaskEntryTy = 3514 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3515 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3516 auto *TaskEntry = llvm::Function::Create( 3517 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3518 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3519 TaskEntry->setDoesNotRecurse(); 3520 CodeGenFunction CGF(CGM); 3521 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3522 Loc, Loc); 3523 3524 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3525 // tt, 3526 // For taskloops: 3527 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3528 // tt->task_data.shareds); 3529 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3530 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3531 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3532 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3533 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3534 const auto *KmpTaskTWithPrivatesQTyRD = 3535 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3536 LValue Base = 3537 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3538 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3539 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3540 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3541 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3542 3543 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3544 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3545 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3546 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3547 CGF.ConvertTypeForMem(SharedsPtrTy)); 3548 3549 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3550 llvm::Value *PrivatesParam; 3551 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3552 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3553 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3554 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3555 } else { 3556 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3557 } 3558 3559 llvm::Value *CommonArgs[] = { 3560 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap, 3561 CGF.Builder 3562 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF), 3563 CGF.VoidPtrTy, CGF.Int8Ty) 3564 .getPointer()}; 3565 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3566 std::end(CommonArgs)); 3567 if (isOpenMPTaskLoopDirective(Kind)) { 3568 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3569 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3570 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3571 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3572 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3573 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3574 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3575 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3576 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3577 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3578 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3579 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3580 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3581 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3582 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3583 CallArgs.push_back(LBParam); 3584 CallArgs.push_back(UBParam); 3585 CallArgs.push_back(StParam); 3586 CallArgs.push_back(LIParam); 3587 CallArgs.push_back(RParam); 3588 } 3589 CallArgs.push_back(SharedsParam); 3590 3591 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3592 CallArgs); 3593 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3594 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3595 CGF.FinishFunction(); 3596 return TaskEntry; 3597 } 3598 3599 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3600 SourceLocation Loc, 3601 QualType KmpInt32Ty, 3602 QualType KmpTaskTWithPrivatesPtrQTy, 3603 QualType KmpTaskTWithPrivatesQTy) { 3604 ASTContext &C = CGM.getContext(); 3605 FunctionArgList Args; 3606 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3607 ImplicitParamDecl::Other); 3608 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3609 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3610 ImplicitParamDecl::Other); 3611 Args.push_back(&GtidArg); 3612 Args.push_back(&TaskTypeArg); 3613 const auto &DestructorFnInfo = 3614 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3615 llvm::FunctionType *DestructorFnTy = 3616 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3617 std::string Name = 3618 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3619 auto *DestructorFn = 3620 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3621 Name, &CGM.getModule()); 3622 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3623 DestructorFnInfo); 3624 DestructorFn->setDoesNotRecurse(); 3625 CodeGenFunction CGF(CGM); 3626 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3627 Args, Loc, Loc); 3628 3629 LValue Base = CGF.EmitLoadOfPointerLValue( 3630 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3631 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3632 const auto *KmpTaskTWithPrivatesQTyRD = 3633 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3634 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3635 Base = CGF.EmitLValueForField(Base, *FI); 3636 for (const auto *Field : 3637 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3638 if (QualType::DestructionKind DtorKind = 3639 Field->getType().isDestructedType()) { 3640 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3641 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3642 } 3643 } 3644 CGF.FinishFunction(); 3645 return DestructorFn; 3646 } 3647 3648 /// Emit a privates mapping function for correct handling of private and 3649 /// firstprivate variables. 3650 /// \code 3651 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3652 /// **noalias priv1,..., <tyn> **noalias privn) { 3653 /// *priv1 = &.privates.priv1; 3654 /// ...; 3655 /// *privn = &.privates.privn; 3656 /// } 3657 /// \endcode 3658 static llvm::Value * 3659 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3660 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3661 ArrayRef<PrivateDataTy> Privates) { 3662 ASTContext &C = CGM.getContext(); 3663 FunctionArgList Args; 3664 ImplicitParamDecl TaskPrivatesArg( 3665 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3666 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3667 ImplicitParamDecl::Other); 3668 Args.push_back(&TaskPrivatesArg); 3669 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3670 unsigned Counter = 1; 3671 for (const Expr *E : Data.PrivateVars) { 3672 Args.push_back(ImplicitParamDecl::Create( 3673 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3674 C.getPointerType(C.getPointerType(E->getType())) 3675 .withConst() 3676 .withRestrict(), 3677 ImplicitParamDecl::Other)); 3678 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3679 PrivateVarsPos[VD] = Counter; 3680 ++Counter; 3681 } 3682 for (const Expr *E : Data.FirstprivateVars) { 3683 Args.push_back(ImplicitParamDecl::Create( 3684 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3685 C.getPointerType(C.getPointerType(E->getType())) 3686 .withConst() 3687 .withRestrict(), 3688 ImplicitParamDecl::Other)); 3689 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3690 PrivateVarsPos[VD] = Counter; 3691 ++Counter; 3692 } 3693 for (const Expr *E : Data.LastprivateVars) { 3694 Args.push_back(ImplicitParamDecl::Create( 3695 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3696 C.getPointerType(C.getPointerType(E->getType())) 3697 .withConst() 3698 .withRestrict(), 3699 ImplicitParamDecl::Other)); 3700 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3701 PrivateVarsPos[VD] = Counter; 3702 ++Counter; 3703 } 3704 for (const VarDecl *VD : Data.PrivateLocals) { 3705 QualType Ty = VD->getType().getNonReferenceType(); 3706 if (VD->getType()->isLValueReferenceType()) 3707 Ty = C.getPointerType(Ty); 3708 if (isAllocatableDecl(VD)) 3709 Ty = C.getPointerType(Ty); 3710 Args.push_back(ImplicitParamDecl::Create( 3711 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3712 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3713 ImplicitParamDecl::Other)); 3714 PrivateVarsPos[VD] = Counter; 3715 ++Counter; 3716 } 3717 const auto &TaskPrivatesMapFnInfo = 3718 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3719 llvm::FunctionType *TaskPrivatesMapTy = 3720 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3721 std::string Name = 3722 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3723 auto *TaskPrivatesMap = llvm::Function::Create( 3724 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3725 &CGM.getModule()); 3726 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3727 TaskPrivatesMapFnInfo); 3728 if (CGM.getLangOpts().Optimize) { 3729 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3730 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3731 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3732 } 3733 CodeGenFunction CGF(CGM); 3734 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3735 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3736 3737 // *privi = &.privates.privi; 3738 LValue Base = CGF.EmitLoadOfPointerLValue( 3739 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3740 TaskPrivatesArg.getType()->castAs<PointerType>()); 3741 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3742 Counter = 0; 3743 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3744 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3745 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3746 LValue RefLVal = 3747 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3748 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3749 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3750 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3751 ++Counter; 3752 } 3753 CGF.FinishFunction(); 3754 return TaskPrivatesMap; 3755 } 3756 3757 /// Emit initialization for private variables in task-based directives. 3758 static void emitPrivatesInit(CodeGenFunction &CGF, 3759 const OMPExecutableDirective &D, 3760 Address KmpTaskSharedsPtr, LValue TDBase, 3761 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3762 QualType SharedsTy, QualType SharedsPtrTy, 3763 const OMPTaskDataTy &Data, 3764 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3765 ASTContext &C = CGF.getContext(); 3766 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3767 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3768 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3769 ? OMPD_taskloop 3770 : OMPD_task; 3771 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3772 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3773 LValue SrcBase; 3774 bool IsTargetTask = 3775 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3776 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3777 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3778 // PointersArray, SizesArray, and MappersArray. The original variables for 3779 // these arrays are not captured and we get their addresses explicitly. 3780 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3781 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3782 SrcBase = CGF.MakeAddrLValue( 3783 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3784 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy), 3785 CGF.ConvertTypeForMem(SharedsTy)), 3786 SharedsTy); 3787 } 3788 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3789 for (const PrivateDataTy &Pair : Privates) { 3790 // Do not initialize private locals. 3791 if (Pair.second.isLocalPrivate()) { 3792 ++FI; 3793 continue; 3794 } 3795 const VarDecl *VD = Pair.second.PrivateCopy; 3796 const Expr *Init = VD->getAnyInitializer(); 3797 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3798 !CGF.isTrivialInitializer(Init)))) { 3799 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3800 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3801 const VarDecl *OriginalVD = Pair.second.Original; 3802 // Check if the variable is the target-based BasePointersArray, 3803 // PointersArray, SizesArray, or MappersArray. 3804 LValue SharedRefLValue; 3805 QualType Type = PrivateLValue.getType(); 3806 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3807 if (IsTargetTask && !SharedField) { 3808 assert(isa<ImplicitParamDecl>(OriginalVD) && 3809 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3810 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3811 ->getNumParams() == 0 && 3812 isa<TranslationUnitDecl>( 3813 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3814 ->getDeclContext()) && 3815 "Expected artificial target data variable."); 3816 SharedRefLValue = 3817 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3818 } else if (ForDup) { 3819 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3820 SharedRefLValue = CGF.MakeAddrLValue( 3821 SharedRefLValue.getAddress(CGF).withAlignment( 3822 C.getDeclAlign(OriginalVD)), 3823 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3824 SharedRefLValue.getTBAAInfo()); 3825 } else if (CGF.LambdaCaptureFields.count( 3826 Pair.second.Original->getCanonicalDecl()) > 0 || 3827 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) { 3828 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3829 } else { 3830 // Processing for implicitly captured variables. 3831 InlinedOpenMPRegionRAII Region( 3832 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3833 /*HasCancel=*/false, /*NoInheritance=*/true); 3834 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3835 } 3836 if (Type->isArrayType()) { 3837 // Initialize firstprivate array. 3838 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3839 // Perform simple memcpy. 3840 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3841 } else { 3842 // Initialize firstprivate array using element-by-element 3843 // initialization. 3844 CGF.EmitOMPAggregateAssign( 3845 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3846 Type, 3847 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3848 Address SrcElement) { 3849 // Clean up any temporaries needed by the initialization. 3850 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3851 InitScope.addPrivate(Elem, SrcElement); 3852 (void)InitScope.Privatize(); 3853 // Emit initialization for single element. 3854 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3855 CGF, &CapturesInfo); 3856 CGF.EmitAnyExprToMem(Init, DestElement, 3857 Init->getType().getQualifiers(), 3858 /*IsInitializer=*/false); 3859 }); 3860 } 3861 } else { 3862 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3863 InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF)); 3864 (void)InitScope.Privatize(); 3865 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3866 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3867 /*capturedByInit=*/false); 3868 } 3869 } else { 3870 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3871 } 3872 } 3873 ++FI; 3874 } 3875 } 3876 3877 /// Check if duplication function is required for taskloops. 3878 static bool checkInitIsRequired(CodeGenFunction &CGF, 3879 ArrayRef<PrivateDataTy> Privates) { 3880 bool InitRequired = false; 3881 for (const PrivateDataTy &Pair : Privates) { 3882 if (Pair.second.isLocalPrivate()) 3883 continue; 3884 const VarDecl *VD = Pair.second.PrivateCopy; 3885 const Expr *Init = VD->getAnyInitializer(); 3886 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) && 3887 !CGF.isTrivialInitializer(Init)); 3888 if (InitRequired) 3889 break; 3890 } 3891 return InitRequired; 3892 } 3893 3894 3895 /// Emit task_dup function (for initialization of 3896 /// private/firstprivate/lastprivate vars and last_iter flag) 3897 /// \code 3898 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3899 /// lastpriv) { 3900 /// // setup lastprivate flag 3901 /// task_dst->last = lastpriv; 3902 /// // could be constructor calls here... 3903 /// } 3904 /// \endcode 3905 static llvm::Value * 3906 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3907 const OMPExecutableDirective &D, 3908 QualType KmpTaskTWithPrivatesPtrQTy, 3909 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3910 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3911 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3912 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3913 ASTContext &C = CGM.getContext(); 3914 FunctionArgList Args; 3915 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3916 KmpTaskTWithPrivatesPtrQTy, 3917 ImplicitParamDecl::Other); 3918 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3919 KmpTaskTWithPrivatesPtrQTy, 3920 ImplicitParamDecl::Other); 3921 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 3922 ImplicitParamDecl::Other); 3923 Args.push_back(&DstArg); 3924 Args.push_back(&SrcArg); 3925 Args.push_back(&LastprivArg); 3926 const auto &TaskDupFnInfo = 3927 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3928 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3929 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 3930 auto *TaskDup = llvm::Function::Create( 3931 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3932 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 3933 TaskDup->setDoesNotRecurse(); 3934 CodeGenFunction CGF(CGM); 3935 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 3936 Loc); 3937 3938 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3939 CGF.GetAddrOfLocalVar(&DstArg), 3940 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3941 // task_dst->liter = lastpriv; 3942 if (WithLastIter) { 3943 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3944 LValue Base = CGF.EmitLValueForField( 3945 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3946 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3947 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 3948 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 3949 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 3950 } 3951 3952 // Emit initial values for private copies (if any). 3953 assert(!Privates.empty()); 3954 Address KmpTaskSharedsPtr = Address::invalid(); 3955 if (!Data.FirstprivateVars.empty()) { 3956 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3957 CGF.GetAddrOfLocalVar(&SrcArg), 3958 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3959 LValue Base = CGF.EmitLValueForField( 3960 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3961 KmpTaskSharedsPtr = Address( 3962 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 3963 Base, *std::next(KmpTaskTQTyRD->field_begin(), 3964 KmpTaskTShareds)), 3965 Loc), 3966 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy)); 3967 } 3968 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 3969 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 3970 CGF.FinishFunction(); 3971 return TaskDup; 3972 } 3973 3974 /// Checks if destructor function is required to be generated. 3975 /// \return true if cleanups are required, false otherwise. 3976 static bool 3977 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3978 ArrayRef<PrivateDataTy> Privates) { 3979 for (const PrivateDataTy &P : Privates) { 3980 if (P.second.isLocalPrivate()) 3981 continue; 3982 QualType Ty = P.second.Original->getType().getNonReferenceType(); 3983 if (Ty.isDestructedType()) 3984 return true; 3985 } 3986 return false; 3987 } 3988 3989 namespace { 3990 /// Loop generator for OpenMP iterator expression. 3991 class OMPIteratorGeneratorScope final 3992 : public CodeGenFunction::OMPPrivateScope { 3993 CodeGenFunction &CGF; 3994 const OMPIteratorExpr *E = nullptr; 3995 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 3996 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 3997 OMPIteratorGeneratorScope() = delete; 3998 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 3999 4000 public: 4001 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4002 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4003 if (!E) 4004 return; 4005 SmallVector<llvm::Value *, 4> Uppers; 4006 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4007 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4008 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4009 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName())); 4010 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4011 addPrivate( 4012 HelperData.CounterVD, 4013 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr")); 4014 } 4015 Privatize(); 4016 4017 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4018 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4019 LValue CLVal = 4020 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4021 HelperData.CounterVD->getType()); 4022 // Counter = 0; 4023 CGF.EmitStoreOfScalar( 4024 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4025 CLVal); 4026 CodeGenFunction::JumpDest &ContDest = 4027 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4028 CodeGenFunction::JumpDest &ExitDest = 4029 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4030 // N = <number-of_iterations>; 4031 llvm::Value *N = Uppers[I]; 4032 // cont: 4033 // if (Counter < N) goto body; else goto exit; 4034 CGF.EmitBlock(ContDest.getBlock()); 4035 auto *CVal = 4036 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4037 llvm::Value *Cmp = 4038 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4039 ? CGF.Builder.CreateICmpSLT(CVal, N) 4040 : CGF.Builder.CreateICmpULT(CVal, N); 4041 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4042 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4043 // body: 4044 CGF.EmitBlock(BodyBB); 4045 // Iteri = Begini + Counter * Stepi; 4046 CGF.EmitIgnoredExpr(HelperData.Update); 4047 } 4048 } 4049 ~OMPIteratorGeneratorScope() { 4050 if (!E) 4051 return; 4052 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4053 // Counter = Counter + 1; 4054 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4055 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4056 // goto cont; 4057 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4058 // exit: 4059 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4060 } 4061 } 4062 }; 4063 } // namespace 4064 4065 static std::pair<llvm::Value *, llvm::Value *> 4066 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4067 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4068 llvm::Value *Addr; 4069 if (OASE) { 4070 const Expr *Base = OASE->getBase(); 4071 Addr = CGF.EmitScalarExpr(Base); 4072 } else { 4073 Addr = CGF.EmitLValue(E).getPointer(CGF); 4074 } 4075 llvm::Value *SizeVal; 4076 QualType Ty = E->getType(); 4077 if (OASE) { 4078 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4079 for (const Expr *SE : OASE->getDimensions()) { 4080 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4081 Sz = CGF.EmitScalarConversion( 4082 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4083 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4084 } 4085 } else if (const auto *ASE = 4086 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4087 LValue UpAddrLVal = 4088 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4089 Address UpAddrAddress = UpAddrLVal.getAddress(CGF); 4090 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 4091 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1); 4092 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4093 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4094 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4095 } else { 4096 SizeVal = CGF.getTypeSize(Ty); 4097 } 4098 return std::make_pair(Addr, SizeVal); 4099 } 4100 4101 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4102 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4103 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4104 if (KmpTaskAffinityInfoTy.isNull()) { 4105 RecordDecl *KmpAffinityInfoRD = 4106 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4107 KmpAffinityInfoRD->startDefinition(); 4108 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4109 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4110 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4111 KmpAffinityInfoRD->completeDefinition(); 4112 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4113 } 4114 } 4115 4116 CGOpenMPRuntime::TaskResultTy 4117 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4118 const OMPExecutableDirective &D, 4119 llvm::Function *TaskFunction, QualType SharedsTy, 4120 Address Shareds, const OMPTaskDataTy &Data) { 4121 ASTContext &C = CGM.getContext(); 4122 llvm::SmallVector<PrivateDataTy, 4> Privates; 4123 // Aggregate privates and sort them by the alignment. 4124 const auto *I = Data.PrivateCopies.begin(); 4125 for (const Expr *E : Data.PrivateVars) { 4126 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4127 Privates.emplace_back( 4128 C.getDeclAlign(VD), 4129 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4130 /*PrivateElemInit=*/nullptr)); 4131 ++I; 4132 } 4133 I = Data.FirstprivateCopies.begin(); 4134 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4135 for (const Expr *E : Data.FirstprivateVars) { 4136 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4137 Privates.emplace_back( 4138 C.getDeclAlign(VD), 4139 PrivateHelpersTy( 4140 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4141 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4142 ++I; 4143 ++IElemInitRef; 4144 } 4145 I = Data.LastprivateCopies.begin(); 4146 for (const Expr *E : Data.LastprivateVars) { 4147 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4148 Privates.emplace_back( 4149 C.getDeclAlign(VD), 4150 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4151 /*PrivateElemInit=*/nullptr)); 4152 ++I; 4153 } 4154 for (const VarDecl *VD : Data.PrivateLocals) { 4155 if (isAllocatableDecl(VD)) 4156 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4157 else 4158 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4159 } 4160 llvm::stable_sort(Privates, 4161 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4162 return L.first > R.first; 4163 }); 4164 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4165 // Build type kmp_routine_entry_t (if not built yet). 4166 emitKmpRoutineEntryT(KmpInt32Ty); 4167 // Build type kmp_task_t (if not built yet). 4168 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4169 if (SavedKmpTaskloopTQTy.isNull()) { 4170 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4171 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4172 } 4173 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4174 } else { 4175 assert((D.getDirectiveKind() == OMPD_task || 4176 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4177 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4178 "Expected taskloop, task or target directive"); 4179 if (SavedKmpTaskTQTy.isNull()) { 4180 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4181 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4182 } 4183 KmpTaskTQTy = SavedKmpTaskTQTy; 4184 } 4185 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4186 // Build particular struct kmp_task_t for the given task. 4187 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4188 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4189 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4190 QualType KmpTaskTWithPrivatesPtrQTy = 4191 C.getPointerType(KmpTaskTWithPrivatesQTy); 4192 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4193 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4194 KmpTaskTWithPrivatesTy->getPointerTo(); 4195 llvm::Value *KmpTaskTWithPrivatesTySize = 4196 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4197 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4198 4199 // Emit initial values for private copies (if any). 4200 llvm::Value *TaskPrivatesMap = nullptr; 4201 llvm::Type *TaskPrivatesMapTy = 4202 std::next(TaskFunction->arg_begin(), 3)->getType(); 4203 if (!Privates.empty()) { 4204 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4205 TaskPrivatesMap = 4206 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4207 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4208 TaskPrivatesMap, TaskPrivatesMapTy); 4209 } else { 4210 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4211 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4212 } 4213 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4214 // kmp_task_t *tt); 4215 llvm::Function *TaskEntry = emitProxyTaskFunction( 4216 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4217 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4218 TaskPrivatesMap); 4219 4220 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4221 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4222 // kmp_routine_entry_t *task_entry); 4223 // Task flags. Format is taken from 4224 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 4225 // description of kmp_tasking_flags struct. 4226 enum { 4227 TiedFlag = 0x1, 4228 FinalFlag = 0x2, 4229 DestructorsFlag = 0x8, 4230 PriorityFlag = 0x20, 4231 DetachableFlag = 0x40, 4232 }; 4233 unsigned Flags = Data.Tied ? TiedFlag : 0; 4234 bool NeedsCleanup = false; 4235 if (!Privates.empty()) { 4236 NeedsCleanup = 4237 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4238 if (NeedsCleanup) 4239 Flags = Flags | DestructorsFlag; 4240 } 4241 if (Data.Priority.getInt()) 4242 Flags = Flags | PriorityFlag; 4243 if (D.hasClausesOfKind<OMPDetachClause>()) 4244 Flags = Flags | DetachableFlag; 4245 llvm::Value *TaskFlags = 4246 Data.Final.getPointer() 4247 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4248 CGF.Builder.getInt32(FinalFlag), 4249 CGF.Builder.getInt32(/*C=*/0)) 4250 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4251 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4252 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4253 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4254 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4255 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4256 TaskEntry, KmpRoutineEntryPtrTy)}; 4257 llvm::Value *NewTask; 4258 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4259 // Check if we have any device clause associated with the directive. 4260 const Expr *Device = nullptr; 4261 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4262 Device = C->getDevice(); 4263 // Emit device ID if any otherwise use default value. 4264 llvm::Value *DeviceID; 4265 if (Device) 4266 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4267 CGF.Int64Ty, /*isSigned=*/true); 4268 else 4269 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4270 AllocArgs.push_back(DeviceID); 4271 NewTask = CGF.EmitRuntimeCall( 4272 OMPBuilder.getOrCreateRuntimeFunction( 4273 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4274 AllocArgs); 4275 } else { 4276 NewTask = 4277 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4278 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4279 AllocArgs); 4280 } 4281 // Emit detach clause initialization. 4282 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4283 // task_descriptor); 4284 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4285 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4286 LValue EvtLVal = CGF.EmitLValue(Evt); 4287 4288 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4289 // int gtid, kmp_task_t *task); 4290 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4291 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4292 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4293 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4294 OMPBuilder.getOrCreateRuntimeFunction( 4295 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4296 {Loc, Tid, NewTask}); 4297 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4298 Evt->getExprLoc()); 4299 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4300 } 4301 // Process affinity clauses. 4302 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4303 // Process list of affinity data. 4304 ASTContext &C = CGM.getContext(); 4305 Address AffinitiesArray = Address::invalid(); 4306 // Calculate number of elements to form the array of affinity data. 4307 llvm::Value *NumOfElements = nullptr; 4308 unsigned NumAffinities = 0; 4309 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4310 if (const Expr *Modifier = C->getModifier()) { 4311 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4312 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4313 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4314 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4315 NumOfElements = 4316 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4317 } 4318 } else { 4319 NumAffinities += C->varlist_size(); 4320 } 4321 } 4322 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4323 // Fields ids in kmp_task_affinity_info record. 4324 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4325 4326 QualType KmpTaskAffinityInfoArrayTy; 4327 if (NumOfElements) { 4328 NumOfElements = CGF.Builder.CreateNUWAdd( 4329 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4330 auto *OVE = new (C) OpaqueValueExpr( 4331 Loc, 4332 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4333 VK_PRValue); 4334 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4335 RValue::get(NumOfElements)); 4336 KmpTaskAffinityInfoArrayTy = 4337 C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal, 4338 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4339 // Properly emit variable-sized array. 4340 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4341 ImplicitParamDecl::Other); 4342 CGF.EmitVarDecl(*PD); 4343 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4344 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4345 /*isSigned=*/false); 4346 } else { 4347 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4348 KmpTaskAffinityInfoTy, 4349 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4350 ArrayType::Normal, /*IndexTypeQuals=*/0); 4351 AffinitiesArray = 4352 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4353 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4354 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4355 /*isSigned=*/false); 4356 } 4357 4358 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4359 // Fill array by elements without iterators. 4360 unsigned Pos = 0; 4361 bool HasIterator = false; 4362 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4363 if (C->getModifier()) { 4364 HasIterator = true; 4365 continue; 4366 } 4367 for (const Expr *E : C->varlists()) { 4368 llvm::Value *Addr; 4369 llvm::Value *Size; 4370 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4371 LValue Base = 4372 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4373 KmpTaskAffinityInfoTy); 4374 // affs[i].base_addr = &<Affinities[i].second>; 4375 LValue BaseAddrLVal = CGF.EmitLValueForField( 4376 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4377 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4378 BaseAddrLVal); 4379 // affs[i].len = sizeof(<Affinities[i].second>); 4380 LValue LenLVal = CGF.EmitLValueForField( 4381 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4382 CGF.EmitStoreOfScalar(Size, LenLVal); 4383 ++Pos; 4384 } 4385 } 4386 LValue PosLVal; 4387 if (HasIterator) { 4388 PosLVal = CGF.MakeAddrLValue( 4389 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4390 C.getSizeType()); 4391 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4392 } 4393 // Process elements with iterators. 4394 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4395 const Expr *Modifier = C->getModifier(); 4396 if (!Modifier) 4397 continue; 4398 OMPIteratorGeneratorScope IteratorScope( 4399 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4400 for (const Expr *E : C->varlists()) { 4401 llvm::Value *Addr; 4402 llvm::Value *Size; 4403 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4404 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4405 LValue Base = CGF.MakeAddrLValue( 4406 CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy); 4407 // affs[i].base_addr = &<Affinities[i].second>; 4408 LValue BaseAddrLVal = CGF.EmitLValueForField( 4409 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4410 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4411 BaseAddrLVal); 4412 // affs[i].len = sizeof(<Affinities[i].second>); 4413 LValue LenLVal = CGF.EmitLValueForField( 4414 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4415 CGF.EmitStoreOfScalar(Size, LenLVal); 4416 Idx = CGF.Builder.CreateNUWAdd( 4417 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4418 CGF.EmitStoreOfScalar(Idx, PosLVal); 4419 } 4420 } 4421 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4422 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4423 // naffins, kmp_task_affinity_info_t *affin_list); 4424 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4425 llvm::Value *GTid = getThreadID(CGF, Loc); 4426 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4427 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4428 // FIXME: Emit the function and ignore its result for now unless the 4429 // runtime function is properly implemented. 4430 (void)CGF.EmitRuntimeCall( 4431 OMPBuilder.getOrCreateRuntimeFunction( 4432 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4433 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4434 } 4435 llvm::Value *NewTaskNewTaskTTy = 4436 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4437 NewTask, KmpTaskTWithPrivatesPtrTy); 4438 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4439 KmpTaskTWithPrivatesQTy); 4440 LValue TDBase = 4441 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4442 // Fill the data in the resulting kmp_task_t record. 4443 // Copy shareds if there are any. 4444 Address KmpTaskSharedsPtr = Address::invalid(); 4445 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4446 KmpTaskSharedsPtr = Address( 4447 CGF.EmitLoadOfScalar( 4448 CGF.EmitLValueForField( 4449 TDBase, 4450 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)), 4451 Loc), 4452 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy)); 4453 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4454 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4455 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4456 } 4457 // Emit initial values for private copies (if any). 4458 TaskResultTy Result; 4459 if (!Privates.empty()) { 4460 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4461 SharedsTy, SharedsPtrTy, Data, Privates, 4462 /*ForDup=*/false); 4463 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4464 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4465 Result.TaskDupFn = emitTaskDupFunction( 4466 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4467 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4468 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4469 } 4470 } 4471 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4472 enum { Priority = 0, Destructors = 1 }; 4473 // Provide pointer to function with destructors for privates. 4474 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4475 const RecordDecl *KmpCmplrdataUD = 4476 (*FI)->getType()->getAsUnionType()->getDecl(); 4477 if (NeedsCleanup) { 4478 llvm::Value *DestructorFn = emitDestructorsFunction( 4479 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4480 KmpTaskTWithPrivatesQTy); 4481 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4482 LValue DestructorsLV = CGF.EmitLValueForField( 4483 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4484 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4485 DestructorFn, KmpRoutineEntryPtrTy), 4486 DestructorsLV); 4487 } 4488 // Set priority. 4489 if (Data.Priority.getInt()) { 4490 LValue Data2LV = CGF.EmitLValueForField( 4491 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4492 LValue PriorityLV = CGF.EmitLValueForField( 4493 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4494 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4495 } 4496 Result.NewTask = NewTask; 4497 Result.TaskEntry = TaskEntry; 4498 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4499 Result.TDBase = TDBase; 4500 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4501 return Result; 4502 } 4503 4504 namespace { 4505 /// Dependence kind for RTL. 4506 enum RTLDependenceKindTy { 4507 DepIn = 0x01, 4508 DepInOut = 0x3, 4509 DepMutexInOutSet = 0x4, 4510 DepInOutSet = 0x8, 4511 DepOmpAllMem = 0x80, 4512 }; 4513 /// Fields ids in kmp_depend_info record. 4514 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4515 } // namespace 4516 4517 /// Translates internal dependency kind into the runtime kind. 4518 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4519 RTLDependenceKindTy DepKind; 4520 switch (K) { 4521 case OMPC_DEPEND_in: 4522 DepKind = DepIn; 4523 break; 4524 // Out and InOut dependencies must use the same code. 4525 case OMPC_DEPEND_out: 4526 case OMPC_DEPEND_inout: 4527 DepKind = DepInOut; 4528 break; 4529 case OMPC_DEPEND_mutexinoutset: 4530 DepKind = DepMutexInOutSet; 4531 break; 4532 case OMPC_DEPEND_inoutset: 4533 DepKind = DepInOutSet; 4534 break; 4535 case OMPC_DEPEND_outallmemory: 4536 DepKind = DepOmpAllMem; 4537 break; 4538 case OMPC_DEPEND_source: 4539 case OMPC_DEPEND_sink: 4540 case OMPC_DEPEND_depobj: 4541 case OMPC_DEPEND_inoutallmemory: 4542 case OMPC_DEPEND_unknown: 4543 llvm_unreachable("Unknown task dependence type"); 4544 } 4545 return DepKind; 4546 } 4547 4548 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4549 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4550 QualType &FlagsTy) { 4551 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4552 if (KmpDependInfoTy.isNull()) { 4553 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4554 KmpDependInfoRD->startDefinition(); 4555 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4556 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4557 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4558 KmpDependInfoRD->completeDefinition(); 4559 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4560 } 4561 } 4562 4563 std::pair<llvm::Value *, LValue> 4564 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4565 SourceLocation Loc) { 4566 ASTContext &C = CGM.getContext(); 4567 QualType FlagsTy; 4568 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4569 RecordDecl *KmpDependInfoRD = 4570 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4571 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4572 LValue Base = CGF.EmitLoadOfPointerLValue( 4573 CGF.Builder.CreateElementBitCast( 4574 DepobjLVal.getAddress(CGF), 4575 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)), 4576 KmpDependInfoPtrTy->castAs<PointerType>()); 4577 Address DepObjAddr = CGF.Builder.CreateGEP( 4578 Base.getAddress(CGF), 4579 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4580 LValue NumDepsBase = CGF.MakeAddrLValue( 4581 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4582 // NumDeps = deps[i].base_addr; 4583 LValue BaseAddrLVal = CGF.EmitLValueForField( 4584 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4585 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4586 return std::make_pair(NumDeps, Base); 4587 } 4588 4589 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4590 llvm::PointerUnion<unsigned *, LValue *> Pos, 4591 const OMPTaskDataTy::DependData &Data, 4592 Address DependenciesArray) { 4593 CodeGenModule &CGM = CGF.CGM; 4594 ASTContext &C = CGM.getContext(); 4595 QualType FlagsTy; 4596 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4597 RecordDecl *KmpDependInfoRD = 4598 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4599 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4600 4601 OMPIteratorGeneratorScope IteratorScope( 4602 CGF, cast_or_null<OMPIteratorExpr>( 4603 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4604 : nullptr)); 4605 for (const Expr *E : Data.DepExprs) { 4606 llvm::Value *Addr; 4607 llvm::Value *Size; 4608 4609 // The expression will be a nullptr in the 'omp_all_memory' case. 4610 if (E) { 4611 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4612 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy); 4613 } else { 4614 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4615 Size = llvm::ConstantInt::get(CGF.SizeTy, 0); 4616 } 4617 LValue Base; 4618 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4619 Base = CGF.MakeAddrLValue( 4620 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4621 } else { 4622 assert(E && "Expected a non-null expression"); 4623 LValue &PosLVal = *Pos.get<LValue *>(); 4624 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4625 Base = CGF.MakeAddrLValue( 4626 CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy); 4627 } 4628 // deps[i].base_addr = &<Dependencies[i].second>; 4629 LValue BaseAddrLVal = CGF.EmitLValueForField( 4630 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4631 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal); 4632 // deps[i].len = sizeof(<Dependencies[i].second>); 4633 LValue LenLVal = CGF.EmitLValueForField( 4634 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4635 CGF.EmitStoreOfScalar(Size, LenLVal); 4636 // deps[i].flags = <Dependencies[i].first>; 4637 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4638 LValue FlagsLVal = CGF.EmitLValueForField( 4639 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4640 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4641 FlagsLVal); 4642 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4643 ++(*P); 4644 } else { 4645 LValue &PosLVal = *Pos.get<LValue *>(); 4646 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4647 Idx = CGF.Builder.CreateNUWAdd(Idx, 4648 llvm::ConstantInt::get(Idx->getType(), 1)); 4649 CGF.EmitStoreOfScalar(Idx, PosLVal); 4650 } 4651 } 4652 } 4653 4654 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes( 4655 CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4656 const OMPTaskDataTy::DependData &Data) { 4657 assert(Data.DepKind == OMPC_DEPEND_depobj && 4658 "Expected depobj dependecy kind."); 4659 SmallVector<llvm::Value *, 4> Sizes; 4660 SmallVector<LValue, 4> SizeLVals; 4661 ASTContext &C = CGF.getContext(); 4662 { 4663 OMPIteratorGeneratorScope IteratorScope( 4664 CGF, cast_or_null<OMPIteratorExpr>( 4665 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4666 : nullptr)); 4667 for (const Expr *E : Data.DepExprs) { 4668 llvm::Value *NumDeps; 4669 LValue Base; 4670 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4671 std::tie(NumDeps, Base) = 4672 getDepobjElements(CGF, DepobjLVal, E->getExprLoc()); 4673 LValue NumLVal = CGF.MakeAddrLValue( 4674 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4675 C.getUIntPtrType()); 4676 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0), 4677 NumLVal.getAddress(CGF)); 4678 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4679 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4680 CGF.EmitStoreOfScalar(Add, NumLVal); 4681 SizeLVals.push_back(NumLVal); 4682 } 4683 } 4684 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4685 llvm::Value *Size = 4686 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4687 Sizes.push_back(Size); 4688 } 4689 return Sizes; 4690 } 4691 4692 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF, 4693 QualType &KmpDependInfoTy, 4694 LValue PosLVal, 4695 const OMPTaskDataTy::DependData &Data, 4696 Address DependenciesArray) { 4697 assert(Data.DepKind == OMPC_DEPEND_depobj && 4698 "Expected depobj dependecy kind."); 4699 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4700 { 4701 OMPIteratorGeneratorScope IteratorScope( 4702 CGF, cast_or_null<OMPIteratorExpr>( 4703 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4704 : nullptr)); 4705 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4706 const Expr *E = Data.DepExprs[I]; 4707 llvm::Value *NumDeps; 4708 LValue Base; 4709 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4710 std::tie(NumDeps, Base) = 4711 getDepobjElements(CGF, DepobjLVal, E->getExprLoc()); 4712 4713 // memcopy dependency data. 4714 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4715 ElSize, 4716 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4717 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4718 Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos); 4719 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4720 4721 // Increase pos. 4722 // pos += size; 4723 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4724 CGF.EmitStoreOfScalar(Add, PosLVal); 4725 } 4726 } 4727 } 4728 4729 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4730 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4731 SourceLocation Loc) { 4732 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4733 return D.DepExprs.empty(); 4734 })) 4735 return std::make_pair(nullptr, Address::invalid()); 4736 // Process list of dependencies. 4737 ASTContext &C = CGM.getContext(); 4738 Address DependenciesArray = Address::invalid(); 4739 llvm::Value *NumOfElements = nullptr; 4740 unsigned NumDependencies = std::accumulate( 4741 Dependencies.begin(), Dependencies.end(), 0, 4742 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4743 return D.DepKind == OMPC_DEPEND_depobj 4744 ? V 4745 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4746 }); 4747 QualType FlagsTy; 4748 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4749 bool HasDepobjDeps = false; 4750 bool HasRegularWithIterators = false; 4751 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4752 llvm::Value *NumOfRegularWithIterators = 4753 llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4754 // Calculate number of depobj dependecies and regular deps with the iterators. 4755 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4756 if (D.DepKind == OMPC_DEPEND_depobj) { 4757 SmallVector<llvm::Value *, 4> Sizes = 4758 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4759 for (llvm::Value *Size : Sizes) { 4760 NumOfDepobjElements = 4761 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4762 } 4763 HasDepobjDeps = true; 4764 continue; 4765 } 4766 // Include number of iterations, if any. 4767 4768 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4769 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4770 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4771 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4772 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul( 4773 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size())); 4774 NumOfRegularWithIterators = 4775 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps); 4776 } 4777 HasRegularWithIterators = true; 4778 continue; 4779 } 4780 } 4781 4782 QualType KmpDependInfoArrayTy; 4783 if (HasDepobjDeps || HasRegularWithIterators) { 4784 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4785 /*isSigned=*/false); 4786 if (HasDepobjDeps) { 4787 NumOfElements = 4788 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4789 } 4790 if (HasRegularWithIterators) { 4791 NumOfElements = 4792 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4793 } 4794 auto *OVE = new (C) OpaqueValueExpr( 4795 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4796 VK_PRValue); 4797 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4798 RValue::get(NumOfElements)); 4799 KmpDependInfoArrayTy = 4800 C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal, 4801 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4802 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4803 // Properly emit variable-sized array. 4804 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4805 ImplicitParamDecl::Other); 4806 CGF.EmitVarDecl(*PD); 4807 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4808 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4809 /*isSigned=*/false); 4810 } else { 4811 KmpDependInfoArrayTy = C.getConstantArrayType( 4812 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4813 ArrayType::Normal, /*IndexTypeQuals=*/0); 4814 DependenciesArray = 4815 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4816 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4817 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4818 /*isSigned=*/false); 4819 } 4820 unsigned Pos = 0; 4821 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4822 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4823 Dependencies[I].IteratorExpr) 4824 continue; 4825 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4826 DependenciesArray); 4827 } 4828 // Copy regular dependecies with iterators. 4829 LValue PosLVal = CGF.MakeAddrLValue( 4830 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4831 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4832 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4833 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4834 !Dependencies[I].IteratorExpr) 4835 continue; 4836 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4837 DependenciesArray); 4838 } 4839 // Copy final depobj arrays without iterators. 4840 if (HasDepobjDeps) { 4841 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4842 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4843 continue; 4844 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4845 DependenciesArray); 4846 } 4847 } 4848 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4849 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty); 4850 return std::make_pair(NumOfElements, DependenciesArray); 4851 } 4852 4853 Address CGOpenMPRuntime::emitDepobjDependClause( 4854 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4855 SourceLocation Loc) { 4856 if (Dependencies.DepExprs.empty()) 4857 return Address::invalid(); 4858 // Process list of dependencies. 4859 ASTContext &C = CGM.getContext(); 4860 Address DependenciesArray = Address::invalid(); 4861 unsigned NumDependencies = Dependencies.DepExprs.size(); 4862 QualType FlagsTy; 4863 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4864 RecordDecl *KmpDependInfoRD = 4865 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4866 4867 llvm::Value *Size; 4868 // Define type kmp_depend_info[<Dependencies.size()>]; 4869 // For depobj reserve one extra element to store the number of elements. 4870 // It is required to handle depobj(x) update(in) construct. 4871 // kmp_depend_info[<Dependencies.size()>] deps; 4872 llvm::Value *NumDepsVal; 4873 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4874 if (const auto *IE = 4875 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4876 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4877 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4878 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4879 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4880 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4881 } 4882 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4883 NumDepsVal); 4884 CharUnits SizeInBytes = 4885 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4886 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4887 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4888 NumDepsVal = 4889 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4890 } else { 4891 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4892 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4893 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 4894 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4895 Size = CGM.getSize(Sz.alignTo(Align)); 4896 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 4897 } 4898 // Need to allocate on the dynamic memory. 4899 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4900 // Use default allocator. 4901 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4902 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 4903 4904 llvm::Value *Addr = 4905 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4906 CGM.getModule(), OMPRTL___kmpc_alloc), 4907 Args, ".dep.arr.addr"); 4908 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy); 4909 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4910 Addr, KmpDependInfoLlvmTy->getPointerTo()); 4911 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align); 4912 // Write number of elements in the first element of array for depobj. 4913 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 4914 // deps[i].base_addr = NumDependencies; 4915 LValue BaseAddrLVal = CGF.EmitLValueForField( 4916 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4917 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 4918 llvm::PointerUnion<unsigned *, LValue *> Pos; 4919 unsigned Idx = 1; 4920 LValue PosLVal; 4921 if (Dependencies.IteratorExpr) { 4922 PosLVal = CGF.MakeAddrLValue( 4923 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 4924 C.getSizeType()); 4925 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 4926 /*IsInit=*/true); 4927 Pos = &PosLVal; 4928 } else { 4929 Pos = &Idx; 4930 } 4931 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 4932 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4933 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy, 4934 CGF.Int8Ty); 4935 return DependenciesArray; 4936 } 4937 4938 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 4939 SourceLocation Loc) { 4940 ASTContext &C = CGM.getContext(); 4941 QualType FlagsTy; 4942 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4943 LValue Base = CGF.EmitLoadOfPointerLValue( 4944 DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>()); 4945 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4946 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4947 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy), 4948 CGF.ConvertTypeForMem(KmpDependInfoTy)); 4949 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4950 Addr.getElementType(), Addr.getPointer(), 4951 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4952 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 4953 CGF.VoidPtrTy); 4954 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4955 // Use default allocator. 4956 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4957 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 4958 4959 // _kmpc_free(gtid, addr, nullptr); 4960 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4961 CGM.getModule(), OMPRTL___kmpc_free), 4962 Args); 4963 } 4964 4965 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 4966 OpenMPDependClauseKind NewDepKind, 4967 SourceLocation Loc) { 4968 ASTContext &C = CGM.getContext(); 4969 QualType FlagsTy; 4970 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4971 RecordDecl *KmpDependInfoRD = 4972 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4973 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4974 llvm::Value *NumDeps; 4975 LValue Base; 4976 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 4977 4978 Address Begin = Base.getAddress(CGF); 4979 // Cast from pointer to array type to pointer to single element. 4980 llvm::Value *End = CGF.Builder.CreateGEP( 4981 Begin.getElementType(), Begin.getPointer(), NumDeps); 4982 // The basic structure here is a while-do loop. 4983 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 4984 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 4985 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 4986 CGF.EmitBlock(BodyBB); 4987 llvm::PHINode *ElementPHI = 4988 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 4989 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 4990 Begin = Begin.withPointer(ElementPHI); 4991 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 4992 Base.getTBAAInfo()); 4993 // deps[i].flags = NewDepKind; 4994 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 4995 LValue FlagsLVal = CGF.EmitLValueForField( 4996 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4997 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4998 FlagsLVal); 4999 5000 // Shift the address forward by one element. 5001 Address ElementNext = 5002 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5003 ElementPHI->addIncoming(ElementNext.getPointer(), 5004 CGF.Builder.GetInsertBlock()); 5005 llvm::Value *IsEmpty = 5006 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5007 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5008 // Done. 5009 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5010 } 5011 5012 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5013 const OMPExecutableDirective &D, 5014 llvm::Function *TaskFunction, 5015 QualType SharedsTy, Address Shareds, 5016 const Expr *IfCond, 5017 const OMPTaskDataTy &Data) { 5018 if (!CGF.HaveInsertPoint()) 5019 return; 5020 5021 TaskResultTy Result = 5022 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5023 llvm::Value *NewTask = Result.NewTask; 5024 llvm::Function *TaskEntry = Result.TaskEntry; 5025 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5026 LValue TDBase = Result.TDBase; 5027 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5028 // Process list of dependences. 5029 Address DependenciesArray = Address::invalid(); 5030 llvm::Value *NumOfElements; 5031 std::tie(NumOfElements, DependenciesArray) = 5032 emitDependClause(CGF, Data.Dependences, Loc); 5033 5034 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5035 // libcall. 5036 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5037 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5038 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5039 // list is not empty 5040 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5041 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5042 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5043 llvm::Value *DepTaskArgs[7]; 5044 if (!Data.Dependences.empty()) { 5045 DepTaskArgs[0] = UpLoc; 5046 DepTaskArgs[1] = ThreadID; 5047 DepTaskArgs[2] = NewTask; 5048 DepTaskArgs[3] = NumOfElements; 5049 DepTaskArgs[4] = DependenciesArray.getPointer(); 5050 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5051 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5052 } 5053 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5054 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5055 if (!Data.Tied) { 5056 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5057 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5058 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5059 } 5060 if (!Data.Dependences.empty()) { 5061 CGF.EmitRuntimeCall( 5062 OMPBuilder.getOrCreateRuntimeFunction( 5063 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5064 DepTaskArgs); 5065 } else { 5066 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5067 CGM.getModule(), OMPRTL___kmpc_omp_task), 5068 TaskArgs); 5069 } 5070 // Check if parent region is untied and build return for untied task; 5071 if (auto *Region = 5072 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5073 Region->emitUntiedSwitch(CGF); 5074 }; 5075 5076 llvm::Value *DepWaitTaskArgs[6]; 5077 if (!Data.Dependences.empty()) { 5078 DepWaitTaskArgs[0] = UpLoc; 5079 DepWaitTaskArgs[1] = ThreadID; 5080 DepWaitTaskArgs[2] = NumOfElements; 5081 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5082 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5083 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5084 } 5085 auto &M = CGM.getModule(); 5086 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5087 TaskEntry, &Data, &DepWaitTaskArgs, 5088 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5089 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5090 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5091 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5092 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5093 // is specified. 5094 if (!Data.Dependences.empty()) 5095 CGF.EmitRuntimeCall( 5096 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5097 DepWaitTaskArgs); 5098 // Call proxy_task_entry(gtid, new_task); 5099 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5100 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5101 Action.Enter(CGF); 5102 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5103 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5104 OutlinedFnArgs); 5105 }; 5106 5107 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5108 // kmp_task_t *new_task); 5109 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5110 // kmp_task_t *new_task); 5111 RegionCodeGenTy RCG(CodeGen); 5112 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5113 M, OMPRTL___kmpc_omp_task_begin_if0), 5114 TaskArgs, 5115 OMPBuilder.getOrCreateRuntimeFunction( 5116 M, OMPRTL___kmpc_omp_task_complete_if0), 5117 TaskArgs); 5118 RCG.setAction(Action); 5119 RCG(CGF); 5120 }; 5121 5122 if (IfCond) { 5123 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5124 } else { 5125 RegionCodeGenTy ThenRCG(ThenCodeGen); 5126 ThenRCG(CGF); 5127 } 5128 } 5129 5130 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5131 const OMPLoopDirective &D, 5132 llvm::Function *TaskFunction, 5133 QualType SharedsTy, Address Shareds, 5134 const Expr *IfCond, 5135 const OMPTaskDataTy &Data) { 5136 if (!CGF.HaveInsertPoint()) 5137 return; 5138 TaskResultTy Result = 5139 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5140 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5141 // libcall. 5142 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5143 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5144 // sched, kmp_uint64 grainsize, void *task_dup); 5145 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5146 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5147 llvm::Value *IfVal; 5148 if (IfCond) { 5149 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5150 /*isSigned=*/true); 5151 } else { 5152 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5153 } 5154 5155 LValue LBLVal = CGF.EmitLValueForField( 5156 Result.TDBase, 5157 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5158 const auto *LBVar = 5159 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5160 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5161 LBLVal.getQuals(), 5162 /*IsInitializer=*/true); 5163 LValue UBLVal = CGF.EmitLValueForField( 5164 Result.TDBase, 5165 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5166 const auto *UBVar = 5167 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5168 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5169 UBLVal.getQuals(), 5170 /*IsInitializer=*/true); 5171 LValue StLVal = CGF.EmitLValueForField( 5172 Result.TDBase, 5173 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5174 const auto *StVar = 5175 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5176 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5177 StLVal.getQuals(), 5178 /*IsInitializer=*/true); 5179 // Store reductions address. 5180 LValue RedLVal = CGF.EmitLValueForField( 5181 Result.TDBase, 5182 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5183 if (Data.Reductions) { 5184 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5185 } else { 5186 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5187 CGF.getContext().VoidPtrTy); 5188 } 5189 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5190 llvm::Value *TaskArgs[] = { 5191 UpLoc, 5192 ThreadID, 5193 Result.NewTask, 5194 IfVal, 5195 LBLVal.getPointer(CGF), 5196 UBLVal.getPointer(CGF), 5197 CGF.EmitLoadOfScalar(StLVal, Loc), 5198 llvm::ConstantInt::getSigned( 5199 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5200 llvm::ConstantInt::getSigned( 5201 CGF.IntTy, Data.Schedule.getPointer() 5202 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5203 : NoSchedule), 5204 Data.Schedule.getPointer() 5205 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5206 /*isSigned=*/false) 5207 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5208 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5209 Result.TaskDupFn, CGF.VoidPtrTy) 5210 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5211 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5212 CGM.getModule(), OMPRTL___kmpc_taskloop), 5213 TaskArgs); 5214 } 5215 5216 /// Emit reduction operation for each element of array (required for 5217 /// array sections) LHS op = RHS. 5218 /// \param Type Type of array. 5219 /// \param LHSVar Variable on the left side of the reduction operation 5220 /// (references element of array in original variable). 5221 /// \param RHSVar Variable on the right side of the reduction operation 5222 /// (references element of array in original variable). 5223 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5224 /// RHSVar. 5225 static void EmitOMPAggregateReduction( 5226 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5227 const VarDecl *RHSVar, 5228 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5229 const Expr *, const Expr *)> &RedOpGen, 5230 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5231 const Expr *UpExpr = nullptr) { 5232 // Perform element-by-element initialization. 5233 QualType ElementTy; 5234 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5235 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5236 5237 // Drill down to the base element type on both arrays. 5238 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5239 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5240 5241 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5242 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5243 // Cast from pointer to array type to pointer to single element. 5244 llvm::Value *LHSEnd = 5245 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements); 5246 // The basic structure here is a while-do loop. 5247 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5248 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5249 llvm::Value *IsEmpty = 5250 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5251 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5252 5253 // Enter the loop body, making that address the current address. 5254 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5255 CGF.EmitBlock(BodyBB); 5256 5257 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5258 5259 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5260 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5261 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5262 Address RHSElementCurrent( 5263 RHSElementPHI, RHSAddr.getElementType(), 5264 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5265 5266 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5267 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5268 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5269 Address LHSElementCurrent( 5270 LHSElementPHI, LHSAddr.getElementType(), 5271 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5272 5273 // Emit copy. 5274 CodeGenFunction::OMPPrivateScope Scope(CGF); 5275 Scope.addPrivate(LHSVar, LHSElementCurrent); 5276 Scope.addPrivate(RHSVar, RHSElementCurrent); 5277 Scope.Privatize(); 5278 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5279 Scope.ForceCleanup(); 5280 5281 // Shift the address forward by one element. 5282 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5283 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1, 5284 "omp.arraycpy.dest.element"); 5285 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5286 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1, 5287 "omp.arraycpy.src.element"); 5288 // Check whether we've reached the end. 5289 llvm::Value *Done = 5290 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5291 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5292 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5293 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5294 5295 // Done. 5296 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5297 } 5298 5299 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5300 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5301 /// UDR combiner function. 5302 static void emitReductionCombiner(CodeGenFunction &CGF, 5303 const Expr *ReductionOp) { 5304 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5305 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5306 if (const auto *DRE = 5307 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5308 if (const auto *DRD = 5309 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5310 std::pair<llvm::Function *, llvm::Function *> Reduction = 5311 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5312 RValue Func = RValue::get(Reduction.first); 5313 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5314 CGF.EmitIgnoredExpr(ReductionOp); 5315 return; 5316 } 5317 CGF.EmitIgnoredExpr(ReductionOp); 5318 } 5319 5320 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5321 SourceLocation Loc, llvm::Type *ArgsElemType, 5322 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs, 5323 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) { 5324 ASTContext &C = CGM.getContext(); 5325 5326 // void reduction_func(void *LHSArg, void *RHSArg); 5327 FunctionArgList Args; 5328 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5329 ImplicitParamDecl::Other); 5330 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5331 ImplicitParamDecl::Other); 5332 Args.push_back(&LHSArg); 5333 Args.push_back(&RHSArg); 5334 const auto &CGFI = 5335 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5336 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5337 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5338 llvm::GlobalValue::InternalLinkage, Name, 5339 &CGM.getModule()); 5340 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5341 Fn->setDoesNotRecurse(); 5342 CodeGenFunction CGF(CGM); 5343 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5344 5345 // Dst = (void*[n])(LHSArg); 5346 // Src = (void*[n])(RHSArg); 5347 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5348 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5349 ArgsElemType->getPointerTo()), 5350 ArgsElemType, CGF.getPointerAlign()); 5351 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5352 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5353 ArgsElemType->getPointerTo()), 5354 ArgsElemType, CGF.getPointerAlign()); 5355 5356 // ... 5357 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5358 // ... 5359 CodeGenFunction::OMPPrivateScope Scope(CGF); 5360 const auto *IPriv = Privates.begin(); 5361 unsigned Idx = 0; 5362 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5363 const auto *RHSVar = 5364 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5365 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar)); 5366 const auto *LHSVar = 5367 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5368 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar)); 5369 QualType PrivTy = (*IPriv)->getType(); 5370 if (PrivTy->isVariablyModifiedType()) { 5371 // Get array size and emit VLA type. 5372 ++Idx; 5373 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5374 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5375 const VariableArrayType *VLA = 5376 CGF.getContext().getAsVariableArrayType(PrivTy); 5377 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5378 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5379 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5380 CGF.EmitVariablyModifiedType(PrivTy); 5381 } 5382 } 5383 Scope.Privatize(); 5384 IPriv = Privates.begin(); 5385 const auto *ILHS = LHSExprs.begin(); 5386 const auto *IRHS = RHSExprs.begin(); 5387 for (const Expr *E : ReductionOps) { 5388 if ((*IPriv)->getType()->isArrayType()) { 5389 // Emit reduction for array section. 5390 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5391 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5392 EmitOMPAggregateReduction( 5393 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5394 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5395 emitReductionCombiner(CGF, E); 5396 }); 5397 } else { 5398 // Emit reduction for array subscript or single variable. 5399 emitReductionCombiner(CGF, E); 5400 } 5401 ++IPriv; 5402 ++ILHS; 5403 ++IRHS; 5404 } 5405 Scope.ForceCleanup(); 5406 CGF.FinishFunction(); 5407 return Fn; 5408 } 5409 5410 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5411 const Expr *ReductionOp, 5412 const Expr *PrivateRef, 5413 const DeclRefExpr *LHS, 5414 const DeclRefExpr *RHS) { 5415 if (PrivateRef->getType()->isArrayType()) { 5416 // Emit reduction for array section. 5417 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5418 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5419 EmitOMPAggregateReduction( 5420 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5421 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5422 emitReductionCombiner(CGF, ReductionOp); 5423 }); 5424 } else { 5425 // Emit reduction for array subscript or single variable. 5426 emitReductionCombiner(CGF, ReductionOp); 5427 } 5428 } 5429 5430 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5431 ArrayRef<const Expr *> Privates, 5432 ArrayRef<const Expr *> LHSExprs, 5433 ArrayRef<const Expr *> RHSExprs, 5434 ArrayRef<const Expr *> ReductionOps, 5435 ReductionOptionsTy Options) { 5436 if (!CGF.HaveInsertPoint()) 5437 return; 5438 5439 bool WithNowait = Options.WithNowait; 5440 bool SimpleReduction = Options.SimpleReduction; 5441 5442 // Next code should be emitted for reduction: 5443 // 5444 // static kmp_critical_name lock = { 0 }; 5445 // 5446 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5447 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5448 // ... 5449 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5450 // *(Type<n>-1*)rhs[<n>-1]); 5451 // } 5452 // 5453 // ... 5454 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5455 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5456 // RedList, reduce_func, &<lock>)) { 5457 // case 1: 5458 // ... 5459 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5460 // ... 5461 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5462 // break; 5463 // case 2: 5464 // ... 5465 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5466 // ... 5467 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5468 // break; 5469 // default:; 5470 // } 5471 // 5472 // if SimpleReduction is true, only the next code is generated: 5473 // ... 5474 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5475 // ... 5476 5477 ASTContext &C = CGM.getContext(); 5478 5479 if (SimpleReduction) { 5480 CodeGenFunction::RunCleanupsScope Scope(CGF); 5481 const auto *IPriv = Privates.begin(); 5482 const auto *ILHS = LHSExprs.begin(); 5483 const auto *IRHS = RHSExprs.begin(); 5484 for (const Expr *E : ReductionOps) { 5485 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5486 cast<DeclRefExpr>(*IRHS)); 5487 ++IPriv; 5488 ++ILHS; 5489 ++IRHS; 5490 } 5491 return; 5492 } 5493 5494 // 1. Build a list of reduction variables. 5495 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5496 auto Size = RHSExprs.size(); 5497 for (const Expr *E : Privates) { 5498 if (E->getType()->isVariablyModifiedType()) 5499 // Reserve place for array size. 5500 ++Size; 5501 } 5502 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5503 QualType ReductionArrayTy = 5504 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5505 /*IndexTypeQuals=*/0); 5506 Address ReductionList = 5507 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5508 const auto *IPriv = Privates.begin(); 5509 unsigned Idx = 0; 5510 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5511 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5512 CGF.Builder.CreateStore( 5513 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5514 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5515 Elem); 5516 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5517 // Store array size. 5518 ++Idx; 5519 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5520 llvm::Value *Size = CGF.Builder.CreateIntCast( 5521 CGF.getVLASize( 5522 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5523 .NumElts, 5524 CGF.SizeTy, /*isSigned=*/false); 5525 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5526 Elem); 5527 } 5528 } 5529 5530 // 2. Emit reduce_func(). 5531 llvm::Function *ReductionFn = 5532 emitReductionFunction(Loc, CGF.ConvertTypeForMem(ReductionArrayTy), 5533 Privates, LHSExprs, RHSExprs, ReductionOps); 5534 5535 // 3. Create static kmp_critical_name lock = { 0 }; 5536 std::string Name = getName({"reduction"}); 5537 llvm::Value *Lock = getCriticalRegionLock(Name); 5538 5539 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5540 // RedList, reduce_func, &<lock>); 5541 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5542 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5543 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5544 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5545 ReductionList.getPointer(), CGF.VoidPtrTy); 5546 llvm::Value *Args[] = { 5547 IdentTLoc, // ident_t *<loc> 5548 ThreadId, // i32 <gtid> 5549 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5550 ReductionArrayTySize, // size_type sizeof(RedList) 5551 RL, // void *RedList 5552 ReductionFn, // void (*) (void *, void *) <reduce_func> 5553 Lock // kmp_critical_name *&<lock> 5554 }; 5555 llvm::Value *Res = CGF.EmitRuntimeCall( 5556 OMPBuilder.getOrCreateRuntimeFunction( 5557 CGM.getModule(), 5558 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5559 Args); 5560 5561 // 5. Build switch(res) 5562 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5563 llvm::SwitchInst *SwInst = 5564 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5565 5566 // 6. Build case 1: 5567 // ... 5568 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5569 // ... 5570 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5571 // break; 5572 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5573 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5574 CGF.EmitBlock(Case1BB); 5575 5576 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5577 llvm::Value *EndArgs[] = { 5578 IdentTLoc, // ident_t *<loc> 5579 ThreadId, // i32 <gtid> 5580 Lock // kmp_critical_name *&<lock> 5581 }; 5582 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5583 CodeGenFunction &CGF, PrePostActionTy &Action) { 5584 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5585 const auto *IPriv = Privates.begin(); 5586 const auto *ILHS = LHSExprs.begin(); 5587 const auto *IRHS = RHSExprs.begin(); 5588 for (const Expr *E : ReductionOps) { 5589 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5590 cast<DeclRefExpr>(*IRHS)); 5591 ++IPriv; 5592 ++ILHS; 5593 ++IRHS; 5594 } 5595 }; 5596 RegionCodeGenTy RCG(CodeGen); 5597 CommonActionTy Action( 5598 nullptr, llvm::None, 5599 OMPBuilder.getOrCreateRuntimeFunction( 5600 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5601 : OMPRTL___kmpc_end_reduce), 5602 EndArgs); 5603 RCG.setAction(Action); 5604 RCG(CGF); 5605 5606 CGF.EmitBranch(DefaultBB); 5607 5608 // 7. Build case 2: 5609 // ... 5610 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5611 // ... 5612 // break; 5613 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5614 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5615 CGF.EmitBlock(Case2BB); 5616 5617 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5618 CodeGenFunction &CGF, PrePostActionTy &Action) { 5619 const auto *ILHS = LHSExprs.begin(); 5620 const auto *IRHS = RHSExprs.begin(); 5621 const auto *IPriv = Privates.begin(); 5622 for (const Expr *E : ReductionOps) { 5623 const Expr *XExpr = nullptr; 5624 const Expr *EExpr = nullptr; 5625 const Expr *UpExpr = nullptr; 5626 BinaryOperatorKind BO = BO_Comma; 5627 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5628 if (BO->getOpcode() == BO_Assign) { 5629 XExpr = BO->getLHS(); 5630 UpExpr = BO->getRHS(); 5631 } 5632 } 5633 // Try to emit update expression as a simple atomic. 5634 const Expr *RHSExpr = UpExpr; 5635 if (RHSExpr) { 5636 // Analyze RHS part of the whole expression. 5637 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5638 RHSExpr->IgnoreParenImpCasts())) { 5639 // If this is a conditional operator, analyze its condition for 5640 // min/max reduction operator. 5641 RHSExpr = ACO->getCond(); 5642 } 5643 if (const auto *BORHS = 5644 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5645 EExpr = BORHS->getRHS(); 5646 BO = BORHS->getOpcode(); 5647 } 5648 } 5649 if (XExpr) { 5650 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5651 auto &&AtomicRedGen = [BO, VD, 5652 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5653 const Expr *EExpr, const Expr *UpExpr) { 5654 LValue X = CGF.EmitLValue(XExpr); 5655 RValue E; 5656 if (EExpr) 5657 E = CGF.EmitAnyExpr(EExpr); 5658 CGF.EmitOMPAtomicSimpleUpdateExpr( 5659 X, E, BO, /*IsXLHSInRHSPart=*/true, 5660 llvm::AtomicOrdering::Monotonic, Loc, 5661 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5662 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5663 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5664 CGF.emitOMPSimpleStore( 5665 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5666 VD->getType().getNonReferenceType(), Loc); 5667 PrivateScope.addPrivate(VD, LHSTemp); 5668 (void)PrivateScope.Privatize(); 5669 return CGF.EmitAnyExpr(UpExpr); 5670 }); 5671 }; 5672 if ((*IPriv)->getType()->isArrayType()) { 5673 // Emit atomic reduction for array section. 5674 const auto *RHSVar = 5675 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5676 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5677 AtomicRedGen, XExpr, EExpr, UpExpr); 5678 } else { 5679 // Emit atomic reduction for array subscript or single variable. 5680 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5681 } 5682 } else { 5683 // Emit as a critical region. 5684 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5685 const Expr *, const Expr *) { 5686 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5687 std::string Name = RT.getName({"atomic_reduction"}); 5688 RT.emitCriticalRegion( 5689 CGF, Name, 5690 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5691 Action.Enter(CGF); 5692 emitReductionCombiner(CGF, E); 5693 }, 5694 Loc); 5695 }; 5696 if ((*IPriv)->getType()->isArrayType()) { 5697 const auto *LHSVar = 5698 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5699 const auto *RHSVar = 5700 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5701 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5702 CritRedGen); 5703 } else { 5704 CritRedGen(CGF, nullptr, nullptr, nullptr); 5705 } 5706 } 5707 ++ILHS; 5708 ++IRHS; 5709 ++IPriv; 5710 } 5711 }; 5712 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5713 if (!WithNowait) { 5714 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5715 llvm::Value *EndArgs[] = { 5716 IdentTLoc, // ident_t *<loc> 5717 ThreadId, // i32 <gtid> 5718 Lock // kmp_critical_name *&<lock> 5719 }; 5720 CommonActionTy Action(nullptr, llvm::None, 5721 OMPBuilder.getOrCreateRuntimeFunction( 5722 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5723 EndArgs); 5724 AtomicRCG.setAction(Action); 5725 AtomicRCG(CGF); 5726 } else { 5727 AtomicRCG(CGF); 5728 } 5729 5730 CGF.EmitBranch(DefaultBB); 5731 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5732 } 5733 5734 /// Generates unique name for artificial threadprivate variables. 5735 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5736 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5737 const Expr *Ref) { 5738 SmallString<256> Buffer; 5739 llvm::raw_svector_ostream Out(Buffer); 5740 const clang::DeclRefExpr *DE; 5741 const VarDecl *D = ::getBaseDecl(Ref, DE); 5742 if (!D) 5743 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5744 D = D->getCanonicalDecl(); 5745 std::string Name = CGM.getOpenMPRuntime().getName( 5746 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5747 Out << Prefix << Name << "_" 5748 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5749 return std::string(Out.str()); 5750 } 5751 5752 /// Emits reduction initializer function: 5753 /// \code 5754 /// void @.red_init(void* %arg, void* %orig) { 5755 /// %0 = bitcast void* %arg to <type>* 5756 /// store <type> <init>, <type>* %0 5757 /// ret void 5758 /// } 5759 /// \endcode 5760 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5761 SourceLocation Loc, 5762 ReductionCodeGen &RCG, unsigned N) { 5763 ASTContext &C = CGM.getContext(); 5764 QualType VoidPtrTy = C.VoidPtrTy; 5765 VoidPtrTy.addRestrict(); 5766 FunctionArgList Args; 5767 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5768 ImplicitParamDecl::Other); 5769 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5770 ImplicitParamDecl::Other); 5771 Args.emplace_back(&Param); 5772 Args.emplace_back(&ParamOrig); 5773 const auto &FnInfo = 5774 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5775 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5776 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5777 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5778 Name, &CGM.getModule()); 5779 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5780 Fn->setDoesNotRecurse(); 5781 CodeGenFunction CGF(CGM); 5782 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5783 QualType PrivateType = RCG.getPrivateType(N); 5784 Address PrivateAddr = CGF.EmitLoadOfPointer( 5785 CGF.Builder.CreateElementBitCast( 5786 CGF.GetAddrOfLocalVar(&Param), 5787 CGF.ConvertTypeForMem(PrivateType)->getPointerTo()), 5788 C.getPointerType(PrivateType)->castAs<PointerType>()); 5789 llvm::Value *Size = nullptr; 5790 // If the size of the reduction item is non-constant, load it from global 5791 // threadprivate variable. 5792 if (RCG.getSizes(N).second) { 5793 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5794 CGF, CGM.getContext().getSizeType(), 5795 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5796 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5797 CGM.getContext().getSizeType(), Loc); 5798 } 5799 RCG.emitAggregateType(CGF, N, Size); 5800 Address OrigAddr = Address::invalid(); 5801 // If initializer uses initializer from declare reduction construct, emit a 5802 // pointer to the address of the original reduction item (reuired by reduction 5803 // initializer) 5804 if (RCG.usesReductionInitializer(N)) { 5805 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5806 OrigAddr = CGF.EmitLoadOfPointer( 5807 SharedAddr, 5808 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5809 } 5810 // Emit the initializer: 5811 // %0 = bitcast void* %arg to <type>* 5812 // store <type> <init>, <type>* %0 5813 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr, 5814 [](CodeGenFunction &) { return false; }); 5815 CGF.FinishFunction(); 5816 return Fn; 5817 } 5818 5819 /// Emits reduction combiner function: 5820 /// \code 5821 /// void @.red_comb(void* %arg0, void* %arg1) { 5822 /// %lhs = bitcast void* %arg0 to <type>* 5823 /// %rhs = bitcast void* %arg1 to <type>* 5824 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5825 /// store <type> %2, <type>* %lhs 5826 /// ret void 5827 /// } 5828 /// \endcode 5829 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5830 SourceLocation Loc, 5831 ReductionCodeGen &RCG, unsigned N, 5832 const Expr *ReductionOp, 5833 const Expr *LHS, const Expr *RHS, 5834 const Expr *PrivateRef) { 5835 ASTContext &C = CGM.getContext(); 5836 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5837 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5838 FunctionArgList Args; 5839 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5840 C.VoidPtrTy, ImplicitParamDecl::Other); 5841 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5842 ImplicitParamDecl::Other); 5843 Args.emplace_back(&ParamInOut); 5844 Args.emplace_back(&ParamIn); 5845 const auto &FnInfo = 5846 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5847 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5848 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5849 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5850 Name, &CGM.getModule()); 5851 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5852 Fn->setDoesNotRecurse(); 5853 CodeGenFunction CGF(CGM); 5854 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5855 llvm::Value *Size = nullptr; 5856 // If the size of the reduction item is non-constant, load it from global 5857 // threadprivate variable. 5858 if (RCG.getSizes(N).second) { 5859 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5860 CGF, CGM.getContext().getSizeType(), 5861 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5862 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5863 CGM.getContext().getSizeType(), Loc); 5864 } 5865 RCG.emitAggregateType(CGF, N, Size); 5866 // Remap lhs and rhs variables to the addresses of the function arguments. 5867 // %lhs = bitcast void* %arg0 to <type>* 5868 // %rhs = bitcast void* %arg1 to <type>* 5869 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5870 PrivateScope.addPrivate( 5871 LHSVD, 5872 // Pull out the pointer to the variable. 5873 CGF.EmitLoadOfPointer( 5874 CGF.Builder.CreateElementBitCast( 5875 CGF.GetAddrOfLocalVar(&ParamInOut), 5876 CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()), 5877 C.getPointerType(LHSVD->getType())->castAs<PointerType>())); 5878 PrivateScope.addPrivate( 5879 RHSVD, 5880 // Pull out the pointer to the variable. 5881 CGF.EmitLoadOfPointer( 5882 CGF.Builder.CreateElementBitCast( 5883 CGF.GetAddrOfLocalVar(&ParamIn), 5884 CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()), 5885 C.getPointerType(RHSVD->getType())->castAs<PointerType>())); 5886 PrivateScope.Privatize(); 5887 // Emit the combiner body: 5888 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5889 // store <type> %2, <type>* %lhs 5890 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5891 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5892 cast<DeclRefExpr>(RHS)); 5893 CGF.FinishFunction(); 5894 return Fn; 5895 } 5896 5897 /// Emits reduction finalizer function: 5898 /// \code 5899 /// void @.red_fini(void* %arg) { 5900 /// %0 = bitcast void* %arg to <type>* 5901 /// <destroy>(<type>* %0) 5902 /// ret void 5903 /// } 5904 /// \endcode 5905 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5906 SourceLocation Loc, 5907 ReductionCodeGen &RCG, unsigned N) { 5908 if (!RCG.needCleanups(N)) 5909 return nullptr; 5910 ASTContext &C = CGM.getContext(); 5911 FunctionArgList Args; 5912 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5913 ImplicitParamDecl::Other); 5914 Args.emplace_back(&Param); 5915 const auto &FnInfo = 5916 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5917 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5918 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 5919 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5920 Name, &CGM.getModule()); 5921 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5922 Fn->setDoesNotRecurse(); 5923 CodeGenFunction CGF(CGM); 5924 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5925 Address PrivateAddr = CGF.EmitLoadOfPointer( 5926 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>()); 5927 llvm::Value *Size = nullptr; 5928 // If the size of the reduction item is non-constant, load it from global 5929 // threadprivate variable. 5930 if (RCG.getSizes(N).second) { 5931 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5932 CGF, CGM.getContext().getSizeType(), 5933 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5934 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5935 CGM.getContext().getSizeType(), Loc); 5936 } 5937 RCG.emitAggregateType(CGF, N, Size); 5938 // Emit the finalizer body: 5939 // <destroy>(<type>* %0) 5940 RCG.emitCleanups(CGF, N, PrivateAddr); 5941 CGF.FinishFunction(Loc); 5942 return Fn; 5943 } 5944 5945 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 5946 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 5947 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 5948 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 5949 return nullptr; 5950 5951 // Build typedef struct: 5952 // kmp_taskred_input { 5953 // void *reduce_shar; // shared reduction item 5954 // void *reduce_orig; // original reduction item used for initialization 5955 // size_t reduce_size; // size of data item 5956 // void *reduce_init; // data initialization routine 5957 // void *reduce_fini; // data finalization routine 5958 // void *reduce_comb; // data combiner routine 5959 // kmp_task_red_flags_t flags; // flags for additional info from compiler 5960 // } kmp_taskred_input_t; 5961 ASTContext &C = CGM.getContext(); 5962 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 5963 RD->startDefinition(); 5964 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5965 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5966 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 5967 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5968 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5969 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5970 const FieldDecl *FlagsFD = addFieldToRecordDecl( 5971 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 5972 RD->completeDefinition(); 5973 QualType RDType = C.getRecordType(RD); 5974 unsigned Size = Data.ReductionVars.size(); 5975 llvm::APInt ArraySize(/*numBits=*/64, Size); 5976 QualType ArrayRDType = C.getConstantArrayType( 5977 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5978 // kmp_task_red_input_t .rd_input.[Size]; 5979 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 5980 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 5981 Data.ReductionCopies, Data.ReductionOps); 5982 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 5983 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 5984 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 5985 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 5986 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 5987 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs, 5988 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 5989 ".rd_input.gep."); 5990 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 5991 // ElemLVal.reduce_shar = &Shareds[Cnt]; 5992 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 5993 RCG.emitSharedOrigLValue(CGF, Cnt); 5994 llvm::Value *CastedShared = 5995 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 5996 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 5997 // ElemLVal.reduce_orig = &Origs[Cnt]; 5998 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 5999 llvm::Value *CastedOrig = 6000 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6001 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6002 RCG.emitAggregateType(CGF, Cnt); 6003 llvm::Value *SizeValInChars; 6004 llvm::Value *SizeVal; 6005 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6006 // We use delayed creation/initialization for VLAs and array sections. It is 6007 // required because runtime does not provide the way to pass the sizes of 6008 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6009 // threadprivate global variables are used to store these values and use 6010 // them in the functions. 6011 bool DelayedCreation = !!SizeVal; 6012 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6013 /*isSigned=*/false); 6014 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6015 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6016 // ElemLVal.reduce_init = init; 6017 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6018 llvm::Value *InitAddr = 6019 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6020 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6021 // ElemLVal.reduce_fini = fini; 6022 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6023 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6024 llvm::Value *FiniAddr = Fini 6025 ? CGF.EmitCastToVoidPtr(Fini) 6026 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6027 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6028 // ElemLVal.reduce_comb = comb; 6029 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6030 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6031 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6032 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6033 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6034 // ElemLVal.flags = 0; 6035 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6036 if (DelayedCreation) { 6037 CGF.EmitStoreOfScalar( 6038 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6039 FlagsLVal); 6040 } else 6041 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6042 FlagsLVal.getType()); 6043 } 6044 if (Data.IsReductionWithTaskMod) { 6045 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6046 // is_ws, int num, void *data); 6047 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6048 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6049 CGM.IntTy, /*isSigned=*/true); 6050 llvm::Value *Args[] = { 6051 IdentTLoc, GTid, 6052 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6053 /*isSigned=*/true), 6054 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6055 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6056 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6057 return CGF.EmitRuntimeCall( 6058 OMPBuilder.getOrCreateRuntimeFunction( 6059 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6060 Args); 6061 } 6062 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6063 llvm::Value *Args[] = { 6064 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6065 /*isSigned=*/true), 6066 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6067 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6068 CGM.VoidPtrTy)}; 6069 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6070 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6071 Args); 6072 } 6073 6074 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6075 SourceLocation Loc, 6076 bool IsWorksharingReduction) { 6077 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6078 // is_ws, int num, void *data); 6079 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6080 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6081 CGM.IntTy, /*isSigned=*/true); 6082 llvm::Value *Args[] = {IdentTLoc, GTid, 6083 llvm::ConstantInt::get(CGM.IntTy, 6084 IsWorksharingReduction ? 1 : 0, 6085 /*isSigned=*/true)}; 6086 (void)CGF.EmitRuntimeCall( 6087 OMPBuilder.getOrCreateRuntimeFunction( 6088 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6089 Args); 6090 } 6091 6092 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6093 SourceLocation Loc, 6094 ReductionCodeGen &RCG, 6095 unsigned N) { 6096 auto Sizes = RCG.getSizes(N); 6097 // Emit threadprivate global variable if the type is non-constant 6098 // (Sizes.second = nullptr). 6099 if (Sizes.second) { 6100 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6101 /*isSigned=*/false); 6102 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6103 CGF, CGM.getContext().getSizeType(), 6104 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6105 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6106 } 6107 } 6108 6109 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6110 SourceLocation Loc, 6111 llvm::Value *ReductionsPtr, 6112 LValue SharedLVal) { 6113 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6114 // *d); 6115 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6116 CGM.IntTy, 6117 /*isSigned=*/true), 6118 ReductionsPtr, 6119 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6120 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6121 return Address( 6122 CGF.EmitRuntimeCall( 6123 OMPBuilder.getOrCreateRuntimeFunction( 6124 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6125 Args), 6126 CGF.Int8Ty, SharedLVal.getAlignment()); 6127 } 6128 6129 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, 6130 const OMPTaskDataTy &Data) { 6131 if (!CGF.HaveInsertPoint()) 6132 return; 6133 6134 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) { 6135 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder. 6136 OMPBuilder.createTaskwait(CGF.Builder); 6137 } else { 6138 llvm::Value *ThreadID = getThreadID(CGF, Loc); 6139 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 6140 auto &M = CGM.getModule(); 6141 Address DependenciesArray = Address::invalid(); 6142 llvm::Value *NumOfElements; 6143 std::tie(NumOfElements, DependenciesArray) = 6144 emitDependClause(CGF, Data.Dependences, Loc); 6145 llvm::Value *DepWaitTaskArgs[6]; 6146 if (!Data.Dependences.empty()) { 6147 DepWaitTaskArgs[0] = UpLoc; 6148 DepWaitTaskArgs[1] = ThreadID; 6149 DepWaitTaskArgs[2] = NumOfElements; 6150 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 6151 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 6152 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6153 6154 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 6155 6156 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 6157 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 6158 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 6159 // is specified. 6160 CGF.EmitRuntimeCall( 6161 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 6162 DepWaitTaskArgs); 6163 6164 } else { 6165 6166 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6167 // global_tid); 6168 llvm::Value *Args[] = {UpLoc, ThreadID}; 6169 // Ignore return result until untied tasks are supported. 6170 CGF.EmitRuntimeCall( 6171 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait), 6172 Args); 6173 } 6174 } 6175 6176 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6177 Region->emitUntiedSwitch(CGF); 6178 } 6179 6180 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6181 OpenMPDirectiveKind InnerKind, 6182 const RegionCodeGenTy &CodeGen, 6183 bool HasCancel) { 6184 if (!CGF.HaveInsertPoint()) 6185 return; 6186 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 6187 InnerKind != OMPD_critical && 6188 InnerKind != OMPD_master && 6189 InnerKind != OMPD_masked); 6190 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6191 } 6192 6193 namespace { 6194 enum RTCancelKind { 6195 CancelNoreq = 0, 6196 CancelParallel = 1, 6197 CancelLoop = 2, 6198 CancelSections = 3, 6199 CancelTaskgroup = 4 6200 }; 6201 } // anonymous namespace 6202 6203 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6204 RTCancelKind CancelKind = CancelNoreq; 6205 if (CancelRegion == OMPD_parallel) 6206 CancelKind = CancelParallel; 6207 else if (CancelRegion == OMPD_for) 6208 CancelKind = CancelLoop; 6209 else if (CancelRegion == OMPD_sections) 6210 CancelKind = CancelSections; 6211 else { 6212 assert(CancelRegion == OMPD_taskgroup); 6213 CancelKind = CancelTaskgroup; 6214 } 6215 return CancelKind; 6216 } 6217 6218 void CGOpenMPRuntime::emitCancellationPointCall( 6219 CodeGenFunction &CGF, SourceLocation Loc, 6220 OpenMPDirectiveKind CancelRegion) { 6221 if (!CGF.HaveInsertPoint()) 6222 return; 6223 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6224 // global_tid, kmp_int32 cncl_kind); 6225 if (auto *OMPRegionInfo = 6226 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6227 // For 'cancellation point taskgroup', the task region info may not have a 6228 // cancel. This may instead happen in another adjacent task. 6229 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6230 llvm::Value *Args[] = { 6231 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6232 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6233 // Ignore return result until untied tasks are supported. 6234 llvm::Value *Result = CGF.EmitRuntimeCall( 6235 OMPBuilder.getOrCreateRuntimeFunction( 6236 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6237 Args); 6238 // if (__kmpc_cancellationpoint()) { 6239 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6240 // exit from construct; 6241 // } 6242 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6243 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6244 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6245 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6246 CGF.EmitBlock(ExitBB); 6247 if (CancelRegion == OMPD_parallel) 6248 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6249 // exit from construct; 6250 CodeGenFunction::JumpDest CancelDest = 6251 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6252 CGF.EmitBranchThroughCleanup(CancelDest); 6253 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6254 } 6255 } 6256 } 6257 6258 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6259 const Expr *IfCond, 6260 OpenMPDirectiveKind CancelRegion) { 6261 if (!CGF.HaveInsertPoint()) 6262 return; 6263 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6264 // kmp_int32 cncl_kind); 6265 auto &M = CGM.getModule(); 6266 if (auto *OMPRegionInfo = 6267 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6268 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6269 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6270 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6271 llvm::Value *Args[] = { 6272 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6273 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6274 // Ignore return result until untied tasks are supported. 6275 llvm::Value *Result = CGF.EmitRuntimeCall( 6276 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6277 // if (__kmpc_cancel()) { 6278 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6279 // exit from construct; 6280 // } 6281 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6282 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6283 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6284 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6285 CGF.EmitBlock(ExitBB); 6286 if (CancelRegion == OMPD_parallel) 6287 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6288 // exit from construct; 6289 CodeGenFunction::JumpDest CancelDest = 6290 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6291 CGF.EmitBranchThroughCleanup(CancelDest); 6292 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6293 }; 6294 if (IfCond) { 6295 emitIfClause(CGF, IfCond, ThenGen, 6296 [](CodeGenFunction &, PrePostActionTy &) {}); 6297 } else { 6298 RegionCodeGenTy ThenRCG(ThenGen); 6299 ThenRCG(CGF); 6300 } 6301 } 6302 } 6303 6304 namespace { 6305 /// Cleanup action for uses_allocators support. 6306 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6307 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6308 6309 public: 6310 OMPUsesAllocatorsActionTy( 6311 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6312 : Allocators(Allocators) {} 6313 void Enter(CodeGenFunction &CGF) override { 6314 if (!CGF.HaveInsertPoint()) 6315 return; 6316 for (const auto &AllocatorData : Allocators) { 6317 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6318 CGF, AllocatorData.first, AllocatorData.second); 6319 } 6320 } 6321 void Exit(CodeGenFunction &CGF) override { 6322 if (!CGF.HaveInsertPoint()) 6323 return; 6324 for (const auto &AllocatorData : Allocators) { 6325 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6326 AllocatorData.first); 6327 } 6328 } 6329 }; 6330 } // namespace 6331 6332 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6333 const OMPExecutableDirective &D, StringRef ParentName, 6334 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6335 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6336 assert(!ParentName.empty() && "Invalid target region parent name!"); 6337 HasEmittedTargetRegion = true; 6338 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6339 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6340 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6341 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6342 if (!D.AllocatorTraits) 6343 continue; 6344 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6345 } 6346 } 6347 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6348 CodeGen.setAction(UsesAllocatorAction); 6349 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6350 IsOffloadEntry, CodeGen); 6351 } 6352 6353 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6354 const Expr *Allocator, 6355 const Expr *AllocatorTraits) { 6356 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6357 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6358 // Use default memspace handle. 6359 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6360 llvm::Value *NumTraits = llvm::ConstantInt::get( 6361 CGF.IntTy, cast<ConstantArrayType>( 6362 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6363 ->getSize() 6364 .getLimitedValue()); 6365 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6366 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6367 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy); 6368 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6369 AllocatorTraitsLVal.getBaseInfo(), 6370 AllocatorTraitsLVal.getTBAAInfo()); 6371 llvm::Value *Traits = 6372 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6373 6374 llvm::Value *AllocatorVal = 6375 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6376 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6377 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6378 // Store to allocator. 6379 CGF.EmitVarDecl(*cast<VarDecl>( 6380 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6381 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6382 AllocatorVal = 6383 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6384 Allocator->getType(), Allocator->getExprLoc()); 6385 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6386 } 6387 6388 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6389 const Expr *Allocator) { 6390 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6391 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6392 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6393 llvm::Value *AllocatorVal = 6394 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6395 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6396 CGF.getContext().VoidPtrTy, 6397 Allocator->getExprLoc()); 6398 (void)CGF.EmitRuntimeCall( 6399 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6400 OMPRTL___kmpc_destroy_allocator), 6401 {ThreadId, AllocatorVal}); 6402 } 6403 6404 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6405 const OMPExecutableDirective &D, StringRef ParentName, 6406 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6407 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6408 // Create a unique name for the entry function using the source location 6409 // information of the current target region. The name will be something like: 6410 // 6411 // __omp_offloading_DD_FFFF_PP_lBB 6412 // 6413 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6414 // mangled name of the function that encloses the target region and BB is the 6415 // line number of the target region. 6416 6417 const bool BuildOutlinedFn = CGM.getLangOpts().OpenMPIsDevice || 6418 !CGM.getLangOpts().OpenMPOffloadMandatory; 6419 unsigned DeviceID; 6420 unsigned FileID; 6421 unsigned Line; 6422 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6423 Line); 6424 SmallString<64> EntryFnName; 6425 { 6426 llvm::raw_svector_ostream OS(EntryFnName); 6427 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6428 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6429 } 6430 6431 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6432 6433 CodeGenFunction CGF(CGM, true); 6434 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6435 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6436 6437 if (BuildOutlinedFn) 6438 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6439 6440 // If this target outline function is not an offload entry, we don't need to 6441 // register it. 6442 if (!IsOffloadEntry) 6443 return; 6444 6445 // The target region ID is used by the runtime library to identify the current 6446 // target region, so it only has to be unique and not necessarily point to 6447 // anything. It could be the pointer to the outlined function that implements 6448 // the target region, but we aren't using that so that the compiler doesn't 6449 // need to keep that, and could therefore inline the host function if proven 6450 // worthwhile during optimization. In the other hand, if emitting code for the 6451 // device, the ID has to be the function address so that it can retrieved from 6452 // the offloading entry and launched by the runtime library. We also mark the 6453 // outlined function to have external linkage in case we are emitting code for 6454 // the device, because these functions will be entry points to the device. 6455 6456 if (CGM.getLangOpts().OpenMPIsDevice) { 6457 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6458 OutlinedFn->setLinkage(llvm::GlobalValue::WeakODRLinkage); 6459 OutlinedFn->setDSOLocal(false); 6460 if (CGM.getTriple().isAMDGCN()) 6461 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 6462 } else { 6463 std::string Name = getName({EntryFnName, "region_id"}); 6464 OutlinedFnID = new llvm::GlobalVariable( 6465 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6466 llvm::GlobalValue::WeakAnyLinkage, 6467 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6468 } 6469 6470 // If we do not allow host fallback we still need a named address to use. 6471 llvm::Constant *TargetRegionEntryAddr = OutlinedFn; 6472 if (!BuildOutlinedFn) { 6473 assert(!CGM.getModule().getGlobalVariable(EntryFnName, true) && 6474 "Named kernel already exists?"); 6475 TargetRegionEntryAddr = new llvm::GlobalVariable( 6476 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6477 llvm::GlobalValue::InternalLinkage, 6478 llvm::Constant::getNullValue(CGM.Int8Ty), EntryFnName); 6479 } 6480 6481 // Register the information for the entry associated with this target region. 6482 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6483 DeviceID, FileID, ParentName, Line, TargetRegionEntryAddr, OutlinedFnID, 6484 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6485 6486 // Add NumTeams and ThreadLimit attributes to the outlined GPU function 6487 int32_t DefaultValTeams = -1; 6488 getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams); 6489 if (DefaultValTeams > 0 && OutlinedFn) { 6490 OutlinedFn->addFnAttr("omp_target_num_teams", 6491 std::to_string(DefaultValTeams)); 6492 } 6493 int32_t DefaultValThreads = -1; 6494 getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads); 6495 if (DefaultValThreads > 0 && OutlinedFn) { 6496 OutlinedFn->addFnAttr("omp_target_thread_limit", 6497 std::to_string(DefaultValThreads)); 6498 } 6499 6500 if (BuildOutlinedFn) 6501 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); 6502 } 6503 6504 /// Checks if the expression is constant or does not have non-trivial function 6505 /// calls. 6506 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6507 // We can skip constant expressions. 6508 // We can skip expressions with trivial calls or simple expressions. 6509 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6510 !E->hasNonTrivialCall(Ctx)) && 6511 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6512 } 6513 6514 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6515 const Stmt *Body) { 6516 const Stmt *Child = Body->IgnoreContainers(); 6517 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6518 Child = nullptr; 6519 for (const Stmt *S : C->body()) { 6520 if (const auto *E = dyn_cast<Expr>(S)) { 6521 if (isTrivial(Ctx, E)) 6522 continue; 6523 } 6524 // Some of the statements can be ignored. 6525 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6526 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6527 continue; 6528 // Analyze declarations. 6529 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6530 if (llvm::all_of(DS->decls(), [](const Decl *D) { 6531 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6532 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6533 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6534 isa<UsingDirectiveDecl>(D) || 6535 isa<OMPDeclareReductionDecl>(D) || 6536 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6537 return true; 6538 const auto *VD = dyn_cast<VarDecl>(D); 6539 if (!VD) 6540 return false; 6541 return VD->hasGlobalStorage() || !VD->isUsed(); 6542 })) 6543 continue; 6544 } 6545 // Found multiple children - cannot get the one child only. 6546 if (Child) 6547 return nullptr; 6548 Child = S; 6549 } 6550 if (Child) 6551 Child = Child->IgnoreContainers(); 6552 } 6553 return Child; 6554 } 6555 6556 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( 6557 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6558 int32_t &DefaultVal) { 6559 6560 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6561 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6562 "Expected target-based executable directive."); 6563 switch (DirectiveKind) { 6564 case OMPD_target: { 6565 const auto *CS = D.getInnermostCapturedStmt(); 6566 const auto *Body = 6567 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6568 const Stmt *ChildStmt = 6569 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6570 if (const auto *NestedDir = 6571 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6572 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6573 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6574 const Expr *NumTeams = 6575 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6576 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6577 if (auto Constant = 6578 NumTeams->getIntegerConstantExpr(CGF.getContext())) 6579 DefaultVal = Constant->getExtValue(); 6580 return NumTeams; 6581 } 6582 DefaultVal = 0; 6583 return nullptr; 6584 } 6585 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6586 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) { 6587 DefaultVal = 1; 6588 return nullptr; 6589 } 6590 DefaultVal = 1; 6591 return nullptr; 6592 } 6593 // A value of -1 is used to check if we need to emit no teams region 6594 DefaultVal = -1; 6595 return nullptr; 6596 } 6597 case OMPD_target_teams: 6598 case OMPD_target_teams_distribute: 6599 case OMPD_target_teams_distribute_simd: 6600 case OMPD_target_teams_distribute_parallel_for: 6601 case OMPD_target_teams_distribute_parallel_for_simd: { 6602 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6603 const Expr *NumTeams = 6604 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6605 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6606 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) 6607 DefaultVal = Constant->getExtValue(); 6608 return NumTeams; 6609 } 6610 DefaultVal = 0; 6611 return nullptr; 6612 } 6613 case OMPD_target_parallel: 6614 case OMPD_target_parallel_for: 6615 case OMPD_target_parallel_for_simd: 6616 case OMPD_target_simd: 6617 DefaultVal = 1; 6618 return nullptr; 6619 case OMPD_parallel: 6620 case OMPD_for: 6621 case OMPD_parallel_for: 6622 case OMPD_parallel_master: 6623 case OMPD_parallel_sections: 6624 case OMPD_for_simd: 6625 case OMPD_parallel_for_simd: 6626 case OMPD_cancel: 6627 case OMPD_cancellation_point: 6628 case OMPD_ordered: 6629 case OMPD_threadprivate: 6630 case OMPD_allocate: 6631 case OMPD_task: 6632 case OMPD_simd: 6633 case OMPD_tile: 6634 case OMPD_unroll: 6635 case OMPD_sections: 6636 case OMPD_section: 6637 case OMPD_single: 6638 case OMPD_master: 6639 case OMPD_critical: 6640 case OMPD_taskyield: 6641 case OMPD_barrier: 6642 case OMPD_taskwait: 6643 case OMPD_taskgroup: 6644 case OMPD_atomic: 6645 case OMPD_flush: 6646 case OMPD_depobj: 6647 case OMPD_scan: 6648 case OMPD_teams: 6649 case OMPD_target_data: 6650 case OMPD_target_exit_data: 6651 case OMPD_target_enter_data: 6652 case OMPD_distribute: 6653 case OMPD_distribute_simd: 6654 case OMPD_distribute_parallel_for: 6655 case OMPD_distribute_parallel_for_simd: 6656 case OMPD_teams_distribute: 6657 case OMPD_teams_distribute_simd: 6658 case OMPD_teams_distribute_parallel_for: 6659 case OMPD_teams_distribute_parallel_for_simd: 6660 case OMPD_target_update: 6661 case OMPD_declare_simd: 6662 case OMPD_declare_variant: 6663 case OMPD_begin_declare_variant: 6664 case OMPD_end_declare_variant: 6665 case OMPD_declare_target: 6666 case OMPD_end_declare_target: 6667 case OMPD_declare_reduction: 6668 case OMPD_declare_mapper: 6669 case OMPD_taskloop: 6670 case OMPD_taskloop_simd: 6671 case OMPD_master_taskloop: 6672 case OMPD_master_taskloop_simd: 6673 case OMPD_parallel_master_taskloop: 6674 case OMPD_parallel_master_taskloop_simd: 6675 case OMPD_requires: 6676 case OMPD_metadirective: 6677 case OMPD_unknown: 6678 break; 6679 default: 6680 break; 6681 } 6682 llvm_unreachable("Unexpected directive kind."); 6683 } 6684 6685 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( 6686 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6687 assert(!CGF.getLangOpts().OpenMPIsDevice && 6688 "Clauses associated with the teams directive expected to be emitted " 6689 "only for the host!"); 6690 CGBuilderTy &Bld = CGF.Builder; 6691 int32_t DefaultNT = -1; 6692 const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT); 6693 if (NumTeams != nullptr) { 6694 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6695 6696 switch (DirectiveKind) { 6697 case OMPD_target: { 6698 const auto *CS = D.getInnermostCapturedStmt(); 6699 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6700 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6701 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6702 /*IgnoreResultAssign*/ true); 6703 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6704 /*isSigned=*/true); 6705 } 6706 case OMPD_target_teams: 6707 case OMPD_target_teams_distribute: 6708 case OMPD_target_teams_distribute_simd: 6709 case OMPD_target_teams_distribute_parallel_for: 6710 case OMPD_target_teams_distribute_parallel_for_simd: { 6711 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6712 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6713 /*IgnoreResultAssign*/ true); 6714 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6715 /*isSigned=*/true); 6716 } 6717 default: 6718 break; 6719 } 6720 } 6721 6722 return llvm::ConstantInt::get(CGF.Int32Ty, DefaultNT); 6723 } 6724 6725 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6726 llvm::Value *DefaultThreadLimitVal) { 6727 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6728 CGF.getContext(), CS->getCapturedStmt()); 6729 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6730 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6731 llvm::Value *NumThreads = nullptr; 6732 llvm::Value *CondVal = nullptr; 6733 // Handle if clause. If if clause present, the number of threads is 6734 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6735 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6736 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6737 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6738 const OMPIfClause *IfClause = nullptr; 6739 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6740 if (C->getNameModifier() == OMPD_unknown || 6741 C->getNameModifier() == OMPD_parallel) { 6742 IfClause = C; 6743 break; 6744 } 6745 } 6746 if (IfClause) { 6747 const Expr *Cond = IfClause->getCondition(); 6748 bool Result; 6749 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6750 if (!Result) 6751 return CGF.Builder.getInt32(1); 6752 } else { 6753 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6754 if (const auto *PreInit = 6755 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6756 for (const auto *I : PreInit->decls()) { 6757 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6758 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6759 } else { 6760 CodeGenFunction::AutoVarEmission Emission = 6761 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6762 CGF.EmitAutoVarCleanups(Emission); 6763 } 6764 } 6765 } 6766 CondVal = CGF.EvaluateExprAsBool(Cond); 6767 } 6768 } 6769 } 6770 // Check the value of num_threads clause iff if clause was not specified 6771 // or is not evaluated to false. 6772 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6773 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6774 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6775 const auto *NumThreadsClause = 6776 Dir->getSingleClause<OMPNumThreadsClause>(); 6777 CodeGenFunction::LexicalScope Scope( 6778 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6779 if (const auto *PreInit = 6780 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6781 for (const auto *I : PreInit->decls()) { 6782 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6783 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6784 } else { 6785 CodeGenFunction::AutoVarEmission Emission = 6786 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6787 CGF.EmitAutoVarCleanups(Emission); 6788 } 6789 } 6790 } 6791 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6792 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6793 /*isSigned=*/false); 6794 if (DefaultThreadLimitVal) 6795 NumThreads = CGF.Builder.CreateSelect( 6796 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6797 DefaultThreadLimitVal, NumThreads); 6798 } else { 6799 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6800 : CGF.Builder.getInt32(0); 6801 } 6802 // Process condition of the if clause. 6803 if (CondVal) { 6804 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6805 CGF.Builder.getInt32(1)); 6806 } 6807 return NumThreads; 6808 } 6809 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6810 return CGF.Builder.getInt32(1); 6811 return DefaultThreadLimitVal; 6812 } 6813 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6814 : CGF.Builder.getInt32(0); 6815 } 6816 6817 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( 6818 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6819 int32_t &DefaultVal) { 6820 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6821 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6822 "Expected target-based executable directive."); 6823 6824 switch (DirectiveKind) { 6825 case OMPD_target: 6826 // Teams have no clause thread_limit 6827 return nullptr; 6828 case OMPD_target_teams: 6829 case OMPD_target_teams_distribute: 6830 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6831 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6832 const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit(); 6833 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6834 if (auto Constant = 6835 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6836 DefaultVal = Constant->getExtValue(); 6837 return ThreadLimit; 6838 } 6839 return nullptr; 6840 case OMPD_target_parallel: 6841 case OMPD_target_parallel_for: 6842 case OMPD_target_parallel_for_simd: 6843 case OMPD_target_teams_distribute_parallel_for: 6844 case OMPD_target_teams_distribute_parallel_for_simd: { 6845 Expr *ThreadLimit = nullptr; 6846 Expr *NumThreads = nullptr; 6847 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6848 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6849 ThreadLimit = ThreadLimitClause->getThreadLimit(); 6850 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6851 if (auto Constant = 6852 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6853 DefaultVal = Constant->getExtValue(); 6854 } 6855 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6856 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6857 NumThreads = NumThreadsClause->getNumThreads(); 6858 if (NumThreads->isIntegerConstantExpr(CGF.getContext())) { 6859 if (auto Constant = 6860 NumThreads->getIntegerConstantExpr(CGF.getContext())) { 6861 if (Constant->getExtValue() < DefaultVal) { 6862 DefaultVal = Constant->getExtValue(); 6863 ThreadLimit = NumThreads; 6864 } 6865 } 6866 } 6867 } 6868 return ThreadLimit; 6869 } 6870 case OMPD_target_teams_distribute_simd: 6871 case OMPD_target_simd: 6872 DefaultVal = 1; 6873 return nullptr; 6874 case OMPD_parallel: 6875 case OMPD_for: 6876 case OMPD_parallel_for: 6877 case OMPD_parallel_master: 6878 case OMPD_parallel_sections: 6879 case OMPD_for_simd: 6880 case OMPD_parallel_for_simd: 6881 case OMPD_cancel: 6882 case OMPD_cancellation_point: 6883 case OMPD_ordered: 6884 case OMPD_threadprivate: 6885 case OMPD_allocate: 6886 case OMPD_task: 6887 case OMPD_simd: 6888 case OMPD_tile: 6889 case OMPD_unroll: 6890 case OMPD_sections: 6891 case OMPD_section: 6892 case OMPD_single: 6893 case OMPD_master: 6894 case OMPD_critical: 6895 case OMPD_taskyield: 6896 case OMPD_barrier: 6897 case OMPD_taskwait: 6898 case OMPD_taskgroup: 6899 case OMPD_atomic: 6900 case OMPD_flush: 6901 case OMPD_depobj: 6902 case OMPD_scan: 6903 case OMPD_teams: 6904 case OMPD_target_data: 6905 case OMPD_target_exit_data: 6906 case OMPD_target_enter_data: 6907 case OMPD_distribute: 6908 case OMPD_distribute_simd: 6909 case OMPD_distribute_parallel_for: 6910 case OMPD_distribute_parallel_for_simd: 6911 case OMPD_teams_distribute: 6912 case OMPD_teams_distribute_simd: 6913 case OMPD_teams_distribute_parallel_for: 6914 case OMPD_teams_distribute_parallel_for_simd: 6915 case OMPD_target_update: 6916 case OMPD_declare_simd: 6917 case OMPD_declare_variant: 6918 case OMPD_begin_declare_variant: 6919 case OMPD_end_declare_variant: 6920 case OMPD_declare_target: 6921 case OMPD_end_declare_target: 6922 case OMPD_declare_reduction: 6923 case OMPD_declare_mapper: 6924 case OMPD_taskloop: 6925 case OMPD_taskloop_simd: 6926 case OMPD_master_taskloop: 6927 case OMPD_master_taskloop_simd: 6928 case OMPD_parallel_master_taskloop: 6929 case OMPD_parallel_master_taskloop_simd: 6930 case OMPD_requires: 6931 case OMPD_unknown: 6932 break; 6933 default: 6934 break; 6935 } 6936 llvm_unreachable("Unsupported directive kind."); 6937 } 6938 6939 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( 6940 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6941 assert(!CGF.getLangOpts().OpenMPIsDevice && 6942 "Clauses associated with the teams directive expected to be emitted " 6943 "only for the host!"); 6944 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6945 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6946 "Expected target-based executable directive."); 6947 CGBuilderTy &Bld = CGF.Builder; 6948 llvm::Value *ThreadLimitVal = nullptr; 6949 llvm::Value *NumThreadsVal = nullptr; 6950 switch (DirectiveKind) { 6951 case OMPD_target: { 6952 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6953 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6954 return NumThreads; 6955 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6956 CGF.getContext(), CS->getCapturedStmt()); 6957 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6958 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6959 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6960 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6961 const auto *ThreadLimitClause = 6962 Dir->getSingleClause<OMPThreadLimitClause>(); 6963 CodeGenFunction::LexicalScope Scope( 6964 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6965 if (const auto *PreInit = 6966 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6967 for (const auto *I : PreInit->decls()) { 6968 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6969 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6970 } else { 6971 CodeGenFunction::AutoVarEmission Emission = 6972 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6973 CGF.EmitAutoVarCleanups(Emission); 6974 } 6975 } 6976 } 6977 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6978 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6979 ThreadLimitVal = 6980 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6981 } 6982 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6983 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6984 CS = Dir->getInnermostCapturedStmt(); 6985 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6986 CGF.getContext(), CS->getCapturedStmt()); 6987 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6988 } 6989 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6990 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6991 CS = Dir->getInnermostCapturedStmt(); 6992 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6993 return NumThreads; 6994 } 6995 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6996 return Bld.getInt32(1); 6997 } 6998 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6999 } 7000 case OMPD_target_teams: { 7001 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7002 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7003 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7004 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7005 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7006 ThreadLimitVal = 7007 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7008 } 7009 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7010 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7011 return NumThreads; 7012 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7013 CGF.getContext(), CS->getCapturedStmt()); 7014 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7015 if (Dir->getDirectiveKind() == OMPD_distribute) { 7016 CS = Dir->getInnermostCapturedStmt(); 7017 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7018 return NumThreads; 7019 } 7020 } 7021 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7022 } 7023 case OMPD_target_teams_distribute: 7024 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7025 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7026 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7027 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7028 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7029 ThreadLimitVal = 7030 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7031 } 7032 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 7033 case OMPD_target_parallel: 7034 case OMPD_target_parallel_for: 7035 case OMPD_target_parallel_for_simd: 7036 case OMPD_target_teams_distribute_parallel_for: 7037 case OMPD_target_teams_distribute_parallel_for_simd: { 7038 llvm::Value *CondVal = nullptr; 7039 // Handle if clause. If if clause present, the number of threads is 7040 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 7041 if (D.hasClausesOfKind<OMPIfClause>()) { 7042 const OMPIfClause *IfClause = nullptr; 7043 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 7044 if (C->getNameModifier() == OMPD_unknown || 7045 C->getNameModifier() == OMPD_parallel) { 7046 IfClause = C; 7047 break; 7048 } 7049 } 7050 if (IfClause) { 7051 const Expr *Cond = IfClause->getCondition(); 7052 bool Result; 7053 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7054 if (!Result) 7055 return Bld.getInt32(1); 7056 } else { 7057 CodeGenFunction::RunCleanupsScope Scope(CGF); 7058 CondVal = CGF.EvaluateExprAsBool(Cond); 7059 } 7060 } 7061 } 7062 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7063 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7064 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7065 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7066 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7067 ThreadLimitVal = 7068 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7069 } 7070 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 7071 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 7072 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 7073 llvm::Value *NumThreads = CGF.EmitScalarExpr( 7074 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 7075 NumThreadsVal = 7076 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 7077 ThreadLimitVal = ThreadLimitVal 7078 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 7079 ThreadLimitVal), 7080 NumThreadsVal, ThreadLimitVal) 7081 : NumThreadsVal; 7082 } 7083 if (!ThreadLimitVal) 7084 ThreadLimitVal = Bld.getInt32(0); 7085 if (CondVal) 7086 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 7087 return ThreadLimitVal; 7088 } 7089 case OMPD_target_teams_distribute_simd: 7090 case OMPD_target_simd: 7091 return Bld.getInt32(1); 7092 case OMPD_parallel: 7093 case OMPD_for: 7094 case OMPD_parallel_for: 7095 case OMPD_parallel_master: 7096 case OMPD_parallel_sections: 7097 case OMPD_for_simd: 7098 case OMPD_parallel_for_simd: 7099 case OMPD_cancel: 7100 case OMPD_cancellation_point: 7101 case OMPD_ordered: 7102 case OMPD_threadprivate: 7103 case OMPD_allocate: 7104 case OMPD_task: 7105 case OMPD_simd: 7106 case OMPD_tile: 7107 case OMPD_unroll: 7108 case OMPD_sections: 7109 case OMPD_section: 7110 case OMPD_single: 7111 case OMPD_master: 7112 case OMPD_critical: 7113 case OMPD_taskyield: 7114 case OMPD_barrier: 7115 case OMPD_taskwait: 7116 case OMPD_taskgroup: 7117 case OMPD_atomic: 7118 case OMPD_flush: 7119 case OMPD_depobj: 7120 case OMPD_scan: 7121 case OMPD_teams: 7122 case OMPD_target_data: 7123 case OMPD_target_exit_data: 7124 case OMPD_target_enter_data: 7125 case OMPD_distribute: 7126 case OMPD_distribute_simd: 7127 case OMPD_distribute_parallel_for: 7128 case OMPD_distribute_parallel_for_simd: 7129 case OMPD_teams_distribute: 7130 case OMPD_teams_distribute_simd: 7131 case OMPD_teams_distribute_parallel_for: 7132 case OMPD_teams_distribute_parallel_for_simd: 7133 case OMPD_target_update: 7134 case OMPD_declare_simd: 7135 case OMPD_declare_variant: 7136 case OMPD_begin_declare_variant: 7137 case OMPD_end_declare_variant: 7138 case OMPD_declare_target: 7139 case OMPD_end_declare_target: 7140 case OMPD_declare_reduction: 7141 case OMPD_declare_mapper: 7142 case OMPD_taskloop: 7143 case OMPD_taskloop_simd: 7144 case OMPD_master_taskloop: 7145 case OMPD_master_taskloop_simd: 7146 case OMPD_parallel_master_taskloop: 7147 case OMPD_parallel_master_taskloop_simd: 7148 case OMPD_requires: 7149 case OMPD_metadirective: 7150 case OMPD_unknown: 7151 break; 7152 default: 7153 break; 7154 } 7155 llvm_unreachable("Unsupported directive kind."); 7156 } 7157 7158 namespace { 7159 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7160 7161 // Utility to handle information from clauses associated with a given 7162 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7163 // It provides a convenient interface to obtain the information and generate 7164 // code for that information. 7165 class MappableExprsHandler { 7166 public: 7167 /// Values for bit flags used to specify the mapping type for 7168 /// offloading. 7169 enum OpenMPOffloadMappingFlags : uint64_t { 7170 /// No flags 7171 OMP_MAP_NONE = 0x0, 7172 /// Allocate memory on the device and move data from host to device. 7173 OMP_MAP_TO = 0x01, 7174 /// Allocate memory on the device and move data from device to host. 7175 OMP_MAP_FROM = 0x02, 7176 /// Always perform the requested mapping action on the element, even 7177 /// if it was already mapped before. 7178 OMP_MAP_ALWAYS = 0x04, 7179 /// Delete the element from the device environment, ignoring the 7180 /// current reference count associated with the element. 7181 OMP_MAP_DELETE = 0x08, 7182 /// The element being mapped is a pointer-pointee pair; both the 7183 /// pointer and the pointee should be mapped. 7184 OMP_MAP_PTR_AND_OBJ = 0x10, 7185 /// This flags signals that the base address of an entry should be 7186 /// passed to the target kernel as an argument. 7187 OMP_MAP_TARGET_PARAM = 0x20, 7188 /// Signal that the runtime library has to return the device pointer 7189 /// in the current position for the data being mapped. Used when we have the 7190 /// use_device_ptr or use_device_addr clause. 7191 OMP_MAP_RETURN_PARAM = 0x40, 7192 /// This flag signals that the reference being passed is a pointer to 7193 /// private data. 7194 OMP_MAP_PRIVATE = 0x80, 7195 /// Pass the element to the device by value. 7196 OMP_MAP_LITERAL = 0x100, 7197 /// Implicit map 7198 OMP_MAP_IMPLICIT = 0x200, 7199 /// Close is a hint to the runtime to allocate memory close to 7200 /// the target device. 7201 OMP_MAP_CLOSE = 0x400, 7202 /// 0x800 is reserved for compatibility with XLC. 7203 /// Produce a runtime error if the data is not already allocated. 7204 OMP_MAP_PRESENT = 0x1000, 7205 // Increment and decrement a separate reference counter so that the data 7206 // cannot be unmapped within the associated region. Thus, this flag is 7207 // intended to be used on 'target' and 'target data' directives because they 7208 // are inherently structured. It is not intended to be used on 'target 7209 // enter data' and 'target exit data' directives because they are inherently 7210 // dynamic. 7211 // This is an OpenMP extension for the sake of OpenACC support. 7212 OMP_MAP_OMPX_HOLD = 0x2000, 7213 /// Signal that the runtime library should use args as an array of 7214 /// descriptor_dim pointers and use args_size as dims. Used when we have 7215 /// non-contiguous list items in target update directive 7216 OMP_MAP_NON_CONTIG = 0x100000000000, 7217 /// The 16 MSBs of the flags indicate whether the entry is member of some 7218 /// struct/class. 7219 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7220 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7221 }; 7222 7223 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7224 static unsigned getFlagMemberOffset() { 7225 unsigned Offset = 0; 7226 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7227 Remain = Remain >> 1) 7228 Offset++; 7229 return Offset; 7230 } 7231 7232 /// Class that holds debugging information for a data mapping to be passed to 7233 /// the runtime library. 7234 class MappingExprInfo { 7235 /// The variable declaration used for the data mapping. 7236 const ValueDecl *MapDecl = nullptr; 7237 /// The original expression used in the map clause, or null if there is 7238 /// none. 7239 const Expr *MapExpr = nullptr; 7240 7241 public: 7242 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 7243 : MapDecl(MapDecl), MapExpr(MapExpr) {} 7244 7245 const ValueDecl *getMapDecl() const { return MapDecl; } 7246 const Expr *getMapExpr() const { return MapExpr; } 7247 }; 7248 7249 /// Class that associates information with a base pointer to be passed to the 7250 /// runtime library. 7251 class BasePointerInfo { 7252 /// The base pointer. 7253 llvm::Value *Ptr = nullptr; 7254 /// The base declaration that refers to this device pointer, or null if 7255 /// there is none. 7256 const ValueDecl *DevPtrDecl = nullptr; 7257 7258 public: 7259 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7260 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7261 llvm::Value *operator*() const { return Ptr; } 7262 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7263 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7264 }; 7265 7266 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 7267 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7268 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7269 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7270 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7271 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7272 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7273 7274 /// This structure contains combined information generated for mappable 7275 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7276 /// mappers, and non-contiguous information. 7277 struct MapCombinedInfoTy { 7278 struct StructNonContiguousInfo { 7279 bool IsNonContiguous = false; 7280 MapDimArrayTy Dims; 7281 MapNonContiguousArrayTy Offsets; 7282 MapNonContiguousArrayTy Counts; 7283 MapNonContiguousArrayTy Strides; 7284 }; 7285 MapExprsArrayTy Exprs; 7286 MapBaseValuesArrayTy BasePointers; 7287 MapValuesArrayTy Pointers; 7288 MapValuesArrayTy Sizes; 7289 MapFlagsArrayTy Types; 7290 MapMappersArrayTy Mappers; 7291 StructNonContiguousInfo NonContigInfo; 7292 7293 /// Append arrays in \a CurInfo. 7294 void append(MapCombinedInfoTy &CurInfo) { 7295 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 7296 BasePointers.append(CurInfo.BasePointers.begin(), 7297 CurInfo.BasePointers.end()); 7298 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7299 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7300 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7301 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7302 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7303 CurInfo.NonContigInfo.Dims.end()); 7304 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7305 CurInfo.NonContigInfo.Offsets.end()); 7306 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7307 CurInfo.NonContigInfo.Counts.end()); 7308 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7309 CurInfo.NonContigInfo.Strides.end()); 7310 } 7311 }; 7312 7313 /// Map between a struct and the its lowest & highest elements which have been 7314 /// mapped. 7315 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7316 /// HE(FieldIndex, Pointer)} 7317 struct StructRangeInfoTy { 7318 MapCombinedInfoTy PreliminaryMapData; 7319 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7320 0, Address::invalid()}; 7321 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7322 0, Address::invalid()}; 7323 Address Base = Address::invalid(); 7324 Address LB = Address::invalid(); 7325 bool IsArraySection = false; 7326 bool HasCompleteRecord = false; 7327 }; 7328 7329 private: 7330 /// Kind that defines how a device pointer has to be returned. 7331 struct MapInfo { 7332 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7333 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7334 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7335 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7336 bool ReturnDevicePointer = false; 7337 bool IsImplicit = false; 7338 const ValueDecl *Mapper = nullptr; 7339 const Expr *VarRef = nullptr; 7340 bool ForDeviceAddr = false; 7341 7342 MapInfo() = default; 7343 MapInfo( 7344 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7345 OpenMPMapClauseKind MapType, 7346 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7347 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7348 bool ReturnDevicePointer, bool IsImplicit, 7349 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 7350 bool ForDeviceAddr = false) 7351 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7352 MotionModifiers(MotionModifiers), 7353 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7354 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 7355 }; 7356 7357 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7358 /// member and there is no map information about it, then emission of that 7359 /// entry is deferred until the whole struct has been processed. 7360 struct DeferredDevicePtrEntryTy { 7361 const Expr *IE = nullptr; 7362 const ValueDecl *VD = nullptr; 7363 bool ForDeviceAddr = false; 7364 7365 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7366 bool ForDeviceAddr) 7367 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7368 }; 7369 7370 /// The target directive from where the mappable clauses were extracted. It 7371 /// is either a executable directive or a user-defined mapper directive. 7372 llvm::PointerUnion<const OMPExecutableDirective *, 7373 const OMPDeclareMapperDecl *> 7374 CurDir; 7375 7376 /// Function the directive is being generated for. 7377 CodeGenFunction &CGF; 7378 7379 /// Set of all first private variables in the current directive. 7380 /// bool data is set to true if the variable is implicitly marked as 7381 /// firstprivate, false otherwise. 7382 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7383 7384 /// Map between device pointer declarations and their expression components. 7385 /// The key value for declarations in 'this' is null. 7386 llvm::DenseMap< 7387 const ValueDecl *, 7388 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7389 DevPointersMap; 7390 7391 /// Map between lambda declarations and their map type. 7392 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap; 7393 7394 llvm::Value *getExprTypeSize(const Expr *E) const { 7395 QualType ExprTy = E->getType().getCanonicalType(); 7396 7397 // Calculate the size for array shaping expression. 7398 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7399 llvm::Value *Size = 7400 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7401 for (const Expr *SE : OAE->getDimensions()) { 7402 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7403 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7404 CGF.getContext().getSizeType(), 7405 SE->getExprLoc()); 7406 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7407 } 7408 return Size; 7409 } 7410 7411 // Reference types are ignored for mapping purposes. 7412 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7413 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7414 7415 // Given that an array section is considered a built-in type, we need to 7416 // do the calculation based on the length of the section instead of relying 7417 // on CGF.getTypeSize(E->getType()). 7418 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7419 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7420 OAE->getBase()->IgnoreParenImpCasts()) 7421 .getCanonicalType(); 7422 7423 // If there is no length associated with the expression and lower bound is 7424 // not specified too, that means we are using the whole length of the 7425 // base. 7426 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7427 !OAE->getLowerBound()) 7428 return CGF.getTypeSize(BaseTy); 7429 7430 llvm::Value *ElemSize; 7431 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7432 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7433 } else { 7434 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7435 assert(ATy && "Expecting array type if not a pointer type."); 7436 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7437 } 7438 7439 // If we don't have a length at this point, that is because we have an 7440 // array section with a single element. 7441 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7442 return ElemSize; 7443 7444 if (const Expr *LenExpr = OAE->getLength()) { 7445 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7446 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7447 CGF.getContext().getSizeType(), 7448 LenExpr->getExprLoc()); 7449 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7450 } 7451 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7452 OAE->getLowerBound() && "expected array_section[lb:]."); 7453 // Size = sizetype - lb * elemtype; 7454 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7455 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7456 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7457 CGF.getContext().getSizeType(), 7458 OAE->getLowerBound()->getExprLoc()); 7459 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7460 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7461 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7462 LengthVal = CGF.Builder.CreateSelect( 7463 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7464 return LengthVal; 7465 } 7466 return CGF.getTypeSize(ExprTy); 7467 } 7468 7469 /// Return the corresponding bits for a given map clause modifier. Add 7470 /// a flag marking the map as a pointer if requested. Add a flag marking the 7471 /// map as the first one of a series of maps that relate to the same map 7472 /// expression. 7473 OpenMPOffloadMappingFlags getMapTypeBits( 7474 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7475 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7476 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7477 OpenMPOffloadMappingFlags Bits = 7478 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7479 switch (MapType) { 7480 case OMPC_MAP_alloc: 7481 case OMPC_MAP_release: 7482 // alloc and release is the default behavior in the runtime library, i.e. 7483 // if we don't pass any bits alloc/release that is what the runtime is 7484 // going to do. Therefore, we don't need to signal anything for these two 7485 // type modifiers. 7486 break; 7487 case OMPC_MAP_to: 7488 Bits |= OMP_MAP_TO; 7489 break; 7490 case OMPC_MAP_from: 7491 Bits |= OMP_MAP_FROM; 7492 break; 7493 case OMPC_MAP_tofrom: 7494 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7495 break; 7496 case OMPC_MAP_delete: 7497 Bits |= OMP_MAP_DELETE; 7498 break; 7499 case OMPC_MAP_unknown: 7500 llvm_unreachable("Unexpected map type!"); 7501 } 7502 if (AddPtrFlag) 7503 Bits |= OMP_MAP_PTR_AND_OBJ; 7504 if (AddIsTargetParamFlag) 7505 Bits |= OMP_MAP_TARGET_PARAM; 7506 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always)) 7507 Bits |= OMP_MAP_ALWAYS; 7508 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close)) 7509 Bits |= OMP_MAP_CLOSE; 7510 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) || 7511 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present)) 7512 Bits |= OMP_MAP_PRESENT; 7513 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold)) 7514 Bits |= OMP_MAP_OMPX_HOLD; 7515 if (IsNonContiguous) 7516 Bits |= OMP_MAP_NON_CONTIG; 7517 return Bits; 7518 } 7519 7520 /// Return true if the provided expression is a final array section. A 7521 /// final array section, is one whose length can't be proved to be one. 7522 bool isFinalArraySectionExpression(const Expr *E) const { 7523 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7524 7525 // It is not an array section and therefore not a unity-size one. 7526 if (!OASE) 7527 return false; 7528 7529 // An array section with no colon always refer to a single element. 7530 if (OASE->getColonLocFirst().isInvalid()) 7531 return false; 7532 7533 const Expr *Length = OASE->getLength(); 7534 7535 // If we don't have a length we have to check if the array has size 1 7536 // for this dimension. Also, we should always expect a length if the 7537 // base type is pointer. 7538 if (!Length) { 7539 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7540 OASE->getBase()->IgnoreParenImpCasts()) 7541 .getCanonicalType(); 7542 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7543 return ATy->getSize().getSExtValue() != 1; 7544 // If we don't have a constant dimension length, we have to consider 7545 // the current section as having any size, so it is not necessarily 7546 // unitary. If it happen to be unity size, that's user fault. 7547 return true; 7548 } 7549 7550 // Check if the length evaluates to 1. 7551 Expr::EvalResult Result; 7552 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7553 return true; // Can have more that size 1. 7554 7555 llvm::APSInt ConstLength = Result.Val.getInt(); 7556 return ConstLength.getSExtValue() != 1; 7557 } 7558 7559 /// Generate the base pointers, section pointers, sizes, map type bits, and 7560 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7561 /// map type, map or motion modifiers, and expression components. 7562 /// \a IsFirstComponent should be set to true if the provided set of 7563 /// components is the first associated with a capture. 7564 void generateInfoForComponentList( 7565 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7566 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7567 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7568 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7569 bool IsFirstComponentList, bool IsImplicit, 7570 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7571 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7572 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7573 OverlappedElements = llvm::None) const { 7574 // The following summarizes what has to be generated for each map and the 7575 // types below. The generated information is expressed in this order: 7576 // base pointer, section pointer, size, flags 7577 // (to add to the ones that come from the map type and modifier). 7578 // 7579 // double d; 7580 // int i[100]; 7581 // float *p; 7582 // 7583 // struct S1 { 7584 // int i; 7585 // float f[50]; 7586 // } 7587 // struct S2 { 7588 // int i; 7589 // float f[50]; 7590 // S1 s; 7591 // double *p; 7592 // struct S2 *ps; 7593 // int &ref; 7594 // } 7595 // S2 s; 7596 // S2 *ps; 7597 // 7598 // map(d) 7599 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7600 // 7601 // map(i) 7602 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7603 // 7604 // map(i[1:23]) 7605 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7606 // 7607 // map(p) 7608 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7609 // 7610 // map(p[1:24]) 7611 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7612 // in unified shared memory mode or for local pointers 7613 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7614 // 7615 // map(s) 7616 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7617 // 7618 // map(s.i) 7619 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7620 // 7621 // map(s.s.f) 7622 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7623 // 7624 // map(s.p) 7625 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7626 // 7627 // map(to: s.p[:22]) 7628 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7629 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7630 // &(s.p), &(s.p[0]), 22*sizeof(double), 7631 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7632 // (*) alloc space for struct members, only this is a target parameter 7633 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7634 // optimizes this entry out, same in the examples below) 7635 // (***) map the pointee (map: to) 7636 // 7637 // map(to: s.ref) 7638 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) 7639 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7640 // (*) alloc space for struct members, only this is a target parameter 7641 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7642 // optimizes this entry out, same in the examples below) 7643 // (***) map the pointee (map: to) 7644 // 7645 // map(s.ps) 7646 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7647 // 7648 // map(from: s.ps->s.i) 7649 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7650 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7651 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7652 // 7653 // map(to: s.ps->ps) 7654 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7655 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7656 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7657 // 7658 // map(s.ps->ps->ps) 7659 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7660 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7661 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7662 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7663 // 7664 // map(to: s.ps->ps->s.f[:22]) 7665 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7666 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7667 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7668 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7669 // 7670 // map(ps) 7671 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7672 // 7673 // map(ps->i) 7674 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7675 // 7676 // map(ps->s.f) 7677 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7678 // 7679 // map(from: ps->p) 7680 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7681 // 7682 // map(to: ps->p[:22]) 7683 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7684 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7685 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7686 // 7687 // map(ps->ps) 7688 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7689 // 7690 // map(from: ps->ps->s.i) 7691 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7692 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7693 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7694 // 7695 // map(from: ps->ps->ps) 7696 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7697 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7698 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7699 // 7700 // map(ps->ps->ps->ps) 7701 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7702 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7703 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7704 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7705 // 7706 // map(to: ps->ps->ps->s.f[:22]) 7707 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7708 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7709 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7710 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7711 // 7712 // map(to: s.f[:22]) map(from: s.p[:33]) 7713 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7714 // sizeof(double*) (**), TARGET_PARAM 7715 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7716 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7717 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7718 // (*) allocate contiguous space needed to fit all mapped members even if 7719 // we allocate space for members not mapped (in this example, 7720 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7721 // them as well because they fall between &s.f[0] and &s.p) 7722 // 7723 // map(from: s.f[:22]) map(to: ps->p[:33]) 7724 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7725 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7726 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7727 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7728 // (*) the struct this entry pertains to is the 2nd element in the list of 7729 // arguments, hence MEMBER_OF(2) 7730 // 7731 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7732 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7733 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7734 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7735 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7736 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7737 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7738 // (*) the struct this entry pertains to is the 4th element in the list 7739 // of arguments, hence MEMBER_OF(4) 7740 7741 // Track if the map information being generated is the first for a capture. 7742 bool IsCaptureFirstInfo = IsFirstComponentList; 7743 // When the variable is on a declare target link or in a to clause with 7744 // unified memory, a reference is needed to hold the host/device address 7745 // of the variable. 7746 bool RequiresReference = false; 7747 7748 // Scan the components from the base to the complete expression. 7749 auto CI = Components.rbegin(); 7750 auto CE = Components.rend(); 7751 auto I = CI; 7752 7753 // Track if the map information being generated is the first for a list of 7754 // components. 7755 bool IsExpressionFirstInfo = true; 7756 bool FirstPointerInComplexData = false; 7757 Address BP = Address::invalid(); 7758 const Expr *AssocExpr = I->getAssociatedExpression(); 7759 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7760 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7761 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7762 7763 if (isa<MemberExpr>(AssocExpr)) { 7764 // The base is the 'this' pointer. The content of the pointer is going 7765 // to be the base of the field being mapped. 7766 BP = CGF.LoadCXXThisAddress(); 7767 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7768 (OASE && 7769 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7770 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7771 } else if (OAShE && 7772 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7773 BP = Address( 7774 CGF.EmitScalarExpr(OAShE->getBase()), 7775 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()), 7776 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7777 } else { 7778 // The base is the reference to the variable. 7779 // BP = &Var. 7780 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7781 if (const auto *VD = 7782 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7783 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7784 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7785 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7786 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7787 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7788 RequiresReference = true; 7789 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7790 } 7791 } 7792 } 7793 7794 // If the variable is a pointer and is being dereferenced (i.e. is not 7795 // the last component), the base has to be the pointer itself, not its 7796 // reference. References are ignored for mapping purposes. 7797 QualType Ty = 7798 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7799 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7800 // No need to generate individual map information for the pointer, it 7801 // can be associated with the combined storage if shared memory mode is 7802 // active or the base declaration is not global variable. 7803 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7804 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7805 !VD || VD->hasLocalStorage()) 7806 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7807 else 7808 FirstPointerInComplexData = true; 7809 ++I; 7810 } 7811 } 7812 7813 // Track whether a component of the list should be marked as MEMBER_OF some 7814 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7815 // in a component list should be marked as MEMBER_OF, all subsequent entries 7816 // do not belong to the base struct. E.g. 7817 // struct S2 s; 7818 // s.ps->ps->ps->f[:] 7819 // (1) (2) (3) (4) 7820 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7821 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7822 // is the pointee of ps(2) which is not member of struct s, so it should not 7823 // be marked as such (it is still PTR_AND_OBJ). 7824 // The variable is initialized to false so that PTR_AND_OBJ entries which 7825 // are not struct members are not considered (e.g. array of pointers to 7826 // data). 7827 bool ShouldBeMemberOf = false; 7828 7829 // Variable keeping track of whether or not we have encountered a component 7830 // in the component list which is a member expression. Useful when we have a 7831 // pointer or a final array section, in which case it is the previous 7832 // component in the list which tells us whether we have a member expression. 7833 // E.g. X.f[:] 7834 // While processing the final array section "[:]" it is "f" which tells us 7835 // whether we are dealing with a member of a declared struct. 7836 const MemberExpr *EncounteredME = nullptr; 7837 7838 // Track for the total number of dimension. Start from one for the dummy 7839 // dimension. 7840 uint64_t DimSize = 1; 7841 7842 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7843 bool IsPrevMemberReference = false; 7844 7845 for (; I != CE; ++I) { 7846 // If the current component is member of a struct (parent struct) mark it. 7847 if (!EncounteredME) { 7848 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7849 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7850 // as MEMBER_OF the parent struct. 7851 if (EncounteredME) { 7852 ShouldBeMemberOf = true; 7853 // Do not emit as complex pointer if this is actually not array-like 7854 // expression. 7855 if (FirstPointerInComplexData) { 7856 QualType Ty = std::prev(I) 7857 ->getAssociatedDeclaration() 7858 ->getType() 7859 .getNonReferenceType(); 7860 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7861 FirstPointerInComplexData = false; 7862 } 7863 } 7864 } 7865 7866 auto Next = std::next(I); 7867 7868 // We need to generate the addresses and sizes if this is the last 7869 // component, if the component is a pointer or if it is an array section 7870 // whose length can't be proved to be one. If this is a pointer, it 7871 // becomes the base address for the following components. 7872 7873 // A final array section, is one whose length can't be proved to be one. 7874 // If the map item is non-contiguous then we don't treat any array section 7875 // as final array section. 7876 bool IsFinalArraySection = 7877 !IsNonContiguous && 7878 isFinalArraySectionExpression(I->getAssociatedExpression()); 7879 7880 // If we have a declaration for the mapping use that, otherwise use 7881 // the base declaration of the map clause. 7882 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 7883 ? I->getAssociatedDeclaration() 7884 : BaseDecl; 7885 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression() 7886 : MapExpr; 7887 7888 // Get information on whether the element is a pointer. Have to do a 7889 // special treatment for array sections given that they are built-in 7890 // types. 7891 const auto *OASE = 7892 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7893 const auto *OAShE = 7894 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7895 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7896 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7897 bool IsPointer = 7898 OAShE || 7899 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7900 .getCanonicalType() 7901 ->isAnyPointerType()) || 7902 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7903 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && 7904 MapDecl && 7905 MapDecl->getType()->isLValueReferenceType(); 7906 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 7907 7908 if (OASE) 7909 ++DimSize; 7910 7911 if (Next == CE || IsMemberReference || IsNonDerefPointer || 7912 IsFinalArraySection) { 7913 // If this is not the last component, we expect the pointer to be 7914 // associated with an array expression or member expression. 7915 assert((Next == CE || 7916 isa<MemberExpr>(Next->getAssociatedExpression()) || 7917 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7918 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7919 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 7920 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7921 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7922 "Unexpected expression"); 7923 7924 Address LB = Address::invalid(); 7925 Address LowestElem = Address::invalid(); 7926 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, 7927 const MemberExpr *E) { 7928 const Expr *BaseExpr = E->getBase(); 7929 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a 7930 // scalar. 7931 LValue BaseLV; 7932 if (E->isArrow()) { 7933 LValueBaseInfo BaseInfo; 7934 TBAAAccessInfo TBAAInfo; 7935 Address Addr = 7936 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); 7937 QualType PtrTy = BaseExpr->getType()->getPointeeType(); 7938 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); 7939 } else { 7940 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); 7941 } 7942 return BaseLV; 7943 }; 7944 if (OAShE) { 7945 LowestElem = LB = 7946 Address(CGF.EmitScalarExpr(OAShE->getBase()), 7947 CGF.ConvertTypeForMem( 7948 OAShE->getBase()->getType()->getPointeeType()), 7949 CGF.getContext().getTypeAlignInChars( 7950 OAShE->getBase()->getType())); 7951 } else if (IsMemberReference) { 7952 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); 7953 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 7954 LowestElem = CGF.EmitLValueForFieldInitialization( 7955 BaseLVal, cast<FieldDecl>(MapDecl)) 7956 .getAddress(CGF); 7957 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) 7958 .getAddress(CGF); 7959 } else { 7960 LowestElem = LB = 7961 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7962 .getAddress(CGF); 7963 } 7964 7965 // If this component is a pointer inside the base struct then we don't 7966 // need to create any entry for it - it will be combined with the object 7967 // it is pointing to into a single PTR_AND_OBJ entry. 7968 bool IsMemberPointerOrAddr = 7969 EncounteredME && 7970 (((IsPointer || ForDeviceAddr) && 7971 I->getAssociatedExpression() == EncounteredME) || 7972 (IsPrevMemberReference && !IsPointer) || 7973 (IsMemberReference && Next != CE && 7974 !Next->getAssociatedExpression()->getType()->isPointerType())); 7975 if (!OverlappedElements.empty() && Next == CE) { 7976 // Handle base element with the info for overlapped elements. 7977 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7978 assert(!IsPointer && 7979 "Unexpected base element with the pointer type."); 7980 // Mark the whole struct as the struct that requires allocation on the 7981 // device. 7982 PartialStruct.LowestElem = {0, LowestElem}; 7983 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7984 I->getAssociatedExpression()->getType()); 7985 Address HB = CGF.Builder.CreateConstGEP( 7986 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 7987 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty), 7988 TypeSize.getQuantity() - 1); 7989 PartialStruct.HighestElem = { 7990 std::numeric_limits<decltype( 7991 PartialStruct.HighestElem.first)>::max(), 7992 HB}; 7993 PartialStruct.Base = BP; 7994 PartialStruct.LB = LB; 7995 assert( 7996 PartialStruct.PreliminaryMapData.BasePointers.empty() && 7997 "Overlapped elements must be used only once for the variable."); 7998 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 7999 // Emit data for non-overlapped data. 8000 OpenMPOffloadMappingFlags Flags = 8001 OMP_MAP_MEMBER_OF | 8002 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 8003 /*AddPtrFlag=*/false, 8004 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 8005 llvm::Value *Size = nullptr; 8006 // Do bitcopy of all non-overlapped structure elements. 8007 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 8008 Component : OverlappedElements) { 8009 Address ComponentLB = Address::invalid(); 8010 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 8011 Component) { 8012 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { 8013 const auto *FD = dyn_cast<FieldDecl>(VD); 8014 if (FD && FD->getType()->isLValueReferenceType()) { 8015 const auto *ME = 8016 cast<MemberExpr>(MC.getAssociatedExpression()); 8017 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8018 ComponentLB = 8019 CGF.EmitLValueForFieldInitialization(BaseLVal, FD) 8020 .getAddress(CGF); 8021 } else { 8022 ComponentLB = 8023 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 8024 .getAddress(CGF); 8025 } 8026 Size = CGF.Builder.CreatePtrDiff( 8027 CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 8028 CGF.EmitCastToVoidPtr(LB.getPointer())); 8029 break; 8030 } 8031 } 8032 assert(Size && "Failed to determine structure size"); 8033 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8034 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8035 CombinedInfo.Pointers.push_back(LB.getPointer()); 8036 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8037 Size, CGF.Int64Ty, /*isSigned=*/true)); 8038 CombinedInfo.Types.push_back(Flags); 8039 CombinedInfo.Mappers.push_back(nullptr); 8040 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8041 : 1); 8042 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 8043 } 8044 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8045 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8046 CombinedInfo.Pointers.push_back(LB.getPointer()); 8047 Size = CGF.Builder.CreatePtrDiff( 8048 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(), 8049 CGF.EmitCastToVoidPtr(LB.getPointer())); 8050 CombinedInfo.Sizes.push_back( 8051 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8052 CombinedInfo.Types.push_back(Flags); 8053 CombinedInfo.Mappers.push_back(nullptr); 8054 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8055 : 1); 8056 break; 8057 } 8058 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 8059 if (!IsMemberPointerOrAddr || 8060 (Next == CE && MapType != OMPC_MAP_unknown)) { 8061 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8062 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8063 CombinedInfo.Pointers.push_back(LB.getPointer()); 8064 CombinedInfo.Sizes.push_back( 8065 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8066 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8067 : 1); 8068 8069 // If Mapper is valid, the last component inherits the mapper. 8070 bool HasMapper = Mapper && Next == CE; 8071 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 8072 8073 // We need to add a pointer flag for each map that comes from the 8074 // same expression except for the first one. We also need to signal 8075 // this map is the first one that relates with the current capture 8076 // (there is a set of entries for each capture). 8077 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 8078 MapType, MapModifiers, MotionModifiers, IsImplicit, 8079 !IsExpressionFirstInfo || RequiresReference || 8080 FirstPointerInComplexData || IsMemberReference, 8081 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 8082 8083 if (!IsExpressionFirstInfo || IsMemberReference) { 8084 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 8085 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 8086 if (IsPointer || (IsMemberReference && Next != CE)) 8087 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 8088 OMP_MAP_DELETE | OMP_MAP_CLOSE); 8089 8090 if (ShouldBeMemberOf) { 8091 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 8092 // should be later updated with the correct value of MEMBER_OF. 8093 Flags |= OMP_MAP_MEMBER_OF; 8094 // From now on, all subsequent PTR_AND_OBJ entries should not be 8095 // marked as MEMBER_OF. 8096 ShouldBeMemberOf = false; 8097 } 8098 } 8099 8100 CombinedInfo.Types.push_back(Flags); 8101 } 8102 8103 // If we have encountered a member expression so far, keep track of the 8104 // mapped member. If the parent is "*this", then the value declaration 8105 // is nullptr. 8106 if (EncounteredME) { 8107 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 8108 unsigned FieldIndex = FD->getFieldIndex(); 8109 8110 // Update info about the lowest and highest elements for this struct 8111 if (!PartialStruct.Base.isValid()) { 8112 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8113 if (IsFinalArraySection) { 8114 Address HB = 8115 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 8116 .getAddress(CGF); 8117 PartialStruct.HighestElem = {FieldIndex, HB}; 8118 } else { 8119 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8120 } 8121 PartialStruct.Base = BP; 8122 PartialStruct.LB = BP; 8123 } else if (FieldIndex < PartialStruct.LowestElem.first) { 8124 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8125 } else if (FieldIndex > PartialStruct.HighestElem.first) { 8126 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8127 } 8128 } 8129 8130 // Need to emit combined struct for array sections. 8131 if (IsFinalArraySection || IsNonContiguous) 8132 PartialStruct.IsArraySection = true; 8133 8134 // If we have a final array section, we are done with this expression. 8135 if (IsFinalArraySection) 8136 break; 8137 8138 // The pointer becomes the base for the next element. 8139 if (Next != CE) 8140 BP = IsMemberReference ? LowestElem : LB; 8141 8142 IsExpressionFirstInfo = false; 8143 IsCaptureFirstInfo = false; 8144 FirstPointerInComplexData = false; 8145 IsPrevMemberReference = IsMemberReference; 8146 } else if (FirstPointerInComplexData) { 8147 QualType Ty = Components.rbegin() 8148 ->getAssociatedDeclaration() 8149 ->getType() 8150 .getNonReferenceType(); 8151 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 8152 FirstPointerInComplexData = false; 8153 } 8154 } 8155 // If ran into the whole component - allocate the space for the whole 8156 // record. 8157 if (!EncounteredME) 8158 PartialStruct.HasCompleteRecord = true; 8159 8160 if (!IsNonContiguous) 8161 return; 8162 8163 const ASTContext &Context = CGF.getContext(); 8164 8165 // For supporting stride in array section, we need to initialize the first 8166 // dimension size as 1, first offset as 0, and first count as 1 8167 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 8168 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8169 MapValuesArrayTy CurStrides; 8170 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8171 uint64_t ElementTypeSize; 8172 8173 // Collect Size information for each dimension and get the element size as 8174 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 8175 // should be [10, 10] and the first stride is 4 btyes. 8176 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8177 Components) { 8178 const Expr *AssocExpr = Component.getAssociatedExpression(); 8179 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8180 8181 if (!OASE) 8182 continue; 8183 8184 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 8185 auto *CAT = Context.getAsConstantArrayType(Ty); 8186 auto *VAT = Context.getAsVariableArrayType(Ty); 8187 8188 // We need all the dimension size except for the last dimension. 8189 assert((VAT || CAT || &Component == &*Components.begin()) && 8190 "Should be either ConstantArray or VariableArray if not the " 8191 "first Component"); 8192 8193 // Get element size if CurStrides is empty. 8194 if (CurStrides.empty()) { 8195 const Type *ElementType = nullptr; 8196 if (CAT) 8197 ElementType = CAT->getElementType().getTypePtr(); 8198 else if (VAT) 8199 ElementType = VAT->getElementType().getTypePtr(); 8200 else 8201 assert(&Component == &*Components.begin() && 8202 "Only expect pointer (non CAT or VAT) when this is the " 8203 "first Component"); 8204 // If ElementType is null, then it means the base is a pointer 8205 // (neither CAT nor VAT) and we'll attempt to get ElementType again 8206 // for next iteration. 8207 if (ElementType) { 8208 // For the case that having pointer as base, we need to remove one 8209 // level of indirection. 8210 if (&Component != &*Components.begin()) 8211 ElementType = ElementType->getPointeeOrArrayElementType(); 8212 ElementTypeSize = 8213 Context.getTypeSizeInChars(ElementType).getQuantity(); 8214 CurStrides.push_back( 8215 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 8216 } 8217 } 8218 // Get dimension value except for the last dimension since we don't need 8219 // it. 8220 if (DimSizes.size() < Components.size() - 1) { 8221 if (CAT) 8222 DimSizes.push_back(llvm::ConstantInt::get( 8223 CGF.Int64Ty, CAT->getSize().getZExtValue())); 8224 else if (VAT) 8225 DimSizes.push_back(CGF.Builder.CreateIntCast( 8226 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 8227 /*IsSigned=*/false)); 8228 } 8229 } 8230 8231 // Skip the dummy dimension since we have already have its information. 8232 auto *DI = DimSizes.begin() + 1; 8233 // Product of dimension. 8234 llvm::Value *DimProd = 8235 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 8236 8237 // Collect info for non-contiguous. Notice that offset, count, and stride 8238 // are only meaningful for array-section, so we insert a null for anything 8239 // other than array-section. 8240 // Also, the size of offset, count, and stride are not the same as 8241 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 8242 // count, and stride are the same as the number of non-contiguous 8243 // declaration in target update to/from clause. 8244 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8245 Components) { 8246 const Expr *AssocExpr = Component.getAssociatedExpression(); 8247 8248 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 8249 llvm::Value *Offset = CGF.Builder.CreateIntCast( 8250 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 8251 /*isSigned=*/false); 8252 CurOffsets.push_back(Offset); 8253 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 8254 CurStrides.push_back(CurStrides.back()); 8255 continue; 8256 } 8257 8258 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8259 8260 if (!OASE) 8261 continue; 8262 8263 // Offset 8264 const Expr *OffsetExpr = OASE->getLowerBound(); 8265 llvm::Value *Offset = nullptr; 8266 if (!OffsetExpr) { 8267 // If offset is absent, then we just set it to zero. 8268 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 8269 } else { 8270 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 8271 CGF.Int64Ty, 8272 /*isSigned=*/false); 8273 } 8274 CurOffsets.push_back(Offset); 8275 8276 // Count 8277 const Expr *CountExpr = OASE->getLength(); 8278 llvm::Value *Count = nullptr; 8279 if (!CountExpr) { 8280 // In Clang, once a high dimension is an array section, we construct all 8281 // the lower dimension as array section, however, for case like 8282 // arr[0:2][2], Clang construct the inner dimension as an array section 8283 // but it actually is not in an array section form according to spec. 8284 if (!OASE->getColonLocFirst().isValid() && 8285 !OASE->getColonLocSecond().isValid()) { 8286 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 8287 } else { 8288 // OpenMP 5.0, 2.1.5 Array Sections, Description. 8289 // When the length is absent it defaults to ⌈(size − 8290 // lower-bound)/stride⌉, where size is the size of the array 8291 // dimension. 8292 const Expr *StrideExpr = OASE->getStride(); 8293 llvm::Value *Stride = 8294 StrideExpr 8295 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8296 CGF.Int64Ty, /*isSigned=*/false) 8297 : nullptr; 8298 if (Stride) 8299 Count = CGF.Builder.CreateUDiv( 8300 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8301 else 8302 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8303 } 8304 } else { 8305 Count = CGF.EmitScalarExpr(CountExpr); 8306 } 8307 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8308 CurCounts.push_back(Count); 8309 8310 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8311 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8312 // Offset Count Stride 8313 // D0 0 1 4 (int) <- dummy dimension 8314 // D1 0 2 8 (2 * (1) * 4) 8315 // D2 1 2 20 (1 * (1 * 5) * 4) 8316 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8317 const Expr *StrideExpr = OASE->getStride(); 8318 llvm::Value *Stride = 8319 StrideExpr 8320 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8321 CGF.Int64Ty, /*isSigned=*/false) 8322 : nullptr; 8323 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8324 if (Stride) 8325 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8326 else 8327 CurStrides.push_back(DimProd); 8328 if (DI != DimSizes.end()) 8329 ++DI; 8330 } 8331 8332 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8333 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8334 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8335 } 8336 8337 /// Return the adjusted map modifiers if the declaration a capture refers to 8338 /// appears in a first-private clause. This is expected to be used only with 8339 /// directives that start with 'target'. 8340 MappableExprsHandler::OpenMPOffloadMappingFlags 8341 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8342 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8343 8344 // A first private variable captured by reference will use only the 8345 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8346 // declaration is known as first-private in this handler. 8347 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8348 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8349 return MappableExprsHandler::OMP_MAP_TO | 8350 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8351 return MappableExprsHandler::OMP_MAP_PRIVATE | 8352 MappableExprsHandler::OMP_MAP_TO; 8353 } 8354 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl()); 8355 if (I != LambdasMap.end()) 8356 // for map(to: lambda): using user specified map type. 8357 return getMapTypeBits( 8358 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(), 8359 /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(), 8360 /*AddPtrFlag=*/false, 8361 /*AddIsTargetParamFlag=*/false, 8362 /*isNonContiguous=*/false); 8363 return MappableExprsHandler::OMP_MAP_TO | 8364 MappableExprsHandler::OMP_MAP_FROM; 8365 } 8366 8367 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8368 // Rotate by getFlagMemberOffset() bits. 8369 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8370 << getFlagMemberOffset()); 8371 } 8372 8373 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8374 OpenMPOffloadMappingFlags MemberOfFlag) { 8375 // If the entry is PTR_AND_OBJ but has not been marked with the special 8376 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8377 // marked as MEMBER_OF. 8378 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8379 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8380 return; 8381 8382 // Reset the placeholder value to prepare the flag for the assignment of the 8383 // proper MEMBER_OF value. 8384 Flags &= ~OMP_MAP_MEMBER_OF; 8385 Flags |= MemberOfFlag; 8386 } 8387 8388 void getPlainLayout(const CXXRecordDecl *RD, 8389 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8390 bool AsBase) const { 8391 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8392 8393 llvm::StructType *St = 8394 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8395 8396 unsigned NumElements = St->getNumElements(); 8397 llvm::SmallVector< 8398 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8399 RecordLayout(NumElements); 8400 8401 // Fill bases. 8402 for (const auto &I : RD->bases()) { 8403 if (I.isVirtual()) 8404 continue; 8405 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8406 // Ignore empty bases. 8407 if (Base->isEmpty() || CGF.getContext() 8408 .getASTRecordLayout(Base) 8409 .getNonVirtualSize() 8410 .isZero()) 8411 continue; 8412 8413 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8414 RecordLayout[FieldIndex] = Base; 8415 } 8416 // Fill in virtual bases. 8417 for (const auto &I : RD->vbases()) { 8418 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8419 // Ignore empty bases. 8420 if (Base->isEmpty()) 8421 continue; 8422 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8423 if (RecordLayout[FieldIndex]) 8424 continue; 8425 RecordLayout[FieldIndex] = Base; 8426 } 8427 // Fill in all the fields. 8428 assert(!RD->isUnion() && "Unexpected union."); 8429 for (const auto *Field : RD->fields()) { 8430 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8431 // will fill in later.) 8432 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8433 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8434 RecordLayout[FieldIndex] = Field; 8435 } 8436 } 8437 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8438 &Data : RecordLayout) { 8439 if (Data.isNull()) 8440 continue; 8441 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8442 getPlainLayout(Base, Layout, /*AsBase=*/true); 8443 else 8444 Layout.push_back(Data.get<const FieldDecl *>()); 8445 } 8446 } 8447 8448 /// Generate all the base pointers, section pointers, sizes, map types, and 8449 /// mappers for the extracted mappable expressions (all included in \a 8450 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8451 /// pair of the relevant declaration and index where it occurs is appended to 8452 /// the device pointers info array. 8453 void generateAllInfoForClauses( 8454 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 8455 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8456 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8457 // We have to process the component lists that relate with the same 8458 // declaration in a single chunk so that we can generate the map flags 8459 // correctly. Therefore, we organize all lists in a map. 8460 enum MapKind { Present, Allocs, Other, Total }; 8461 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8462 SmallVector<SmallVector<MapInfo, 8>, 4>> 8463 Info; 8464 8465 // Helper function to fill the information map for the different supported 8466 // clauses. 8467 auto &&InfoGen = 8468 [&Info, &SkipVarSet]( 8469 const ValueDecl *D, MapKind Kind, 8470 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8471 OpenMPMapClauseKind MapType, 8472 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8473 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8474 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8475 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8476 if (SkipVarSet.contains(D)) 8477 return; 8478 auto It = Info.find(D); 8479 if (It == Info.end()) 8480 It = Info 8481 .insert(std::make_pair( 8482 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 8483 .first; 8484 It->second[Kind].emplace_back( 8485 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 8486 IsImplicit, Mapper, VarRef, ForDeviceAddr); 8487 }; 8488 8489 for (const auto *Cl : Clauses) { 8490 const auto *C = dyn_cast<OMPMapClause>(Cl); 8491 if (!C) 8492 continue; 8493 MapKind Kind = Other; 8494 if (llvm::is_contained(C->getMapTypeModifiers(), 8495 OMPC_MAP_MODIFIER_present)) 8496 Kind = Present; 8497 else if (C->getMapType() == OMPC_MAP_alloc) 8498 Kind = Allocs; 8499 const auto *EI = C->getVarRefs().begin(); 8500 for (const auto L : C->component_lists()) { 8501 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8502 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 8503 C->getMapTypeModifiers(), llvm::None, 8504 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8505 E); 8506 ++EI; 8507 } 8508 } 8509 for (const auto *Cl : Clauses) { 8510 const auto *C = dyn_cast<OMPToClause>(Cl); 8511 if (!C) 8512 continue; 8513 MapKind Kind = Other; 8514 if (llvm::is_contained(C->getMotionModifiers(), 8515 OMPC_MOTION_MODIFIER_present)) 8516 Kind = Present; 8517 const auto *EI = C->getVarRefs().begin(); 8518 for (const auto L : C->component_lists()) { 8519 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None, 8520 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8521 C->isImplicit(), std::get<2>(L), *EI); 8522 ++EI; 8523 } 8524 } 8525 for (const auto *Cl : Clauses) { 8526 const auto *C = dyn_cast<OMPFromClause>(Cl); 8527 if (!C) 8528 continue; 8529 MapKind Kind = Other; 8530 if (llvm::is_contained(C->getMotionModifiers(), 8531 OMPC_MOTION_MODIFIER_present)) 8532 Kind = Present; 8533 const auto *EI = C->getVarRefs().begin(); 8534 for (const auto L : C->component_lists()) { 8535 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None, 8536 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8537 C->isImplicit(), std::get<2>(L), *EI); 8538 ++EI; 8539 } 8540 } 8541 8542 // Look at the use_device_ptr clause information and mark the existing map 8543 // entries as such. If there is no map information for an entry in the 8544 // use_device_ptr list, we create one with map type 'alloc' and zero size 8545 // section. It is the user fault if that was not mapped before. If there is 8546 // no map information and the pointer is a struct member, then we defer the 8547 // emission of that entry until the whole struct has been processed. 8548 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8549 SmallVector<DeferredDevicePtrEntryTy, 4>> 8550 DeferredInfo; 8551 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8552 8553 for (const auto *Cl : Clauses) { 8554 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 8555 if (!C) 8556 continue; 8557 for (const auto L : C->component_lists()) { 8558 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8559 std::get<1>(L); 8560 assert(!Components.empty() && 8561 "Not expecting empty list of components!"); 8562 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8563 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8564 const Expr *IE = Components.back().getAssociatedExpression(); 8565 // If the first component is a member expression, we have to look into 8566 // 'this', which maps to null in the map of map information. Otherwise 8567 // look directly for the information. 8568 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8569 8570 // We potentially have map information for this declaration already. 8571 // Look for the first set of components that refer to it. 8572 if (It != Info.end()) { 8573 bool Found = false; 8574 for (auto &Data : It->second) { 8575 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8576 return MI.Components.back().getAssociatedDeclaration() == VD; 8577 }); 8578 // If we found a map entry, signal that the pointer has to be 8579 // returned and move on to the next declaration. Exclude cases where 8580 // the base pointer is mapped as array subscript, array section or 8581 // array shaping. The base address is passed as a pointer to base in 8582 // this case and cannot be used as a base for use_device_ptr list 8583 // item. 8584 if (CI != Data.end()) { 8585 auto PrevCI = std::next(CI->Components.rbegin()); 8586 const auto *VarD = dyn_cast<VarDecl>(VD); 8587 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8588 isa<MemberExpr>(IE) || 8589 !VD->getType().getNonReferenceType()->isPointerType() || 8590 PrevCI == CI->Components.rend() || 8591 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8592 VarD->hasLocalStorage()) { 8593 CI->ReturnDevicePointer = true; 8594 Found = true; 8595 break; 8596 } 8597 } 8598 } 8599 if (Found) 8600 continue; 8601 } 8602 8603 // We didn't find any match in our map information - generate a zero 8604 // size array section - if the pointer is a struct member we defer this 8605 // action until the whole struct has been processed. 8606 if (isa<MemberExpr>(IE)) { 8607 // Insert the pointer into Info to be processed by 8608 // generateInfoForComponentList. Because it is a member pointer 8609 // without a pointee, no entry will be generated for it, therefore 8610 // we need to generate one after the whole struct has been processed. 8611 // Nonetheless, generateInfoForComponentList must be called to take 8612 // the pointer into account for the calculation of the range of the 8613 // partial struct. 8614 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None, 8615 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8616 nullptr); 8617 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8618 } else { 8619 llvm::Value *Ptr = 8620 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8621 UseDevicePtrCombinedInfo.Exprs.push_back(VD); 8622 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8623 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8624 UseDevicePtrCombinedInfo.Sizes.push_back( 8625 llvm::Constant::getNullValue(CGF.Int64Ty)); 8626 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8627 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8628 } 8629 } 8630 } 8631 8632 // Look at the use_device_addr clause information and mark the existing map 8633 // entries as such. If there is no map information for an entry in the 8634 // use_device_addr list, we create one with map type 'alloc' and zero size 8635 // section. It is the user fault if that was not mapped before. If there is 8636 // no map information and the pointer is a struct member, then we defer the 8637 // emission of that entry until the whole struct has been processed. 8638 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8639 for (const auto *Cl : Clauses) { 8640 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8641 if (!C) 8642 continue; 8643 for (const auto L : C->component_lists()) { 8644 assert(!std::get<1>(L).empty() && 8645 "Not expecting empty list of components!"); 8646 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8647 if (!Processed.insert(VD).second) 8648 continue; 8649 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8650 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8651 // If the first component is a member expression, we have to look into 8652 // 'this', which maps to null in the map of map information. Otherwise 8653 // look directly for the information. 8654 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8655 8656 // We potentially have map information for this declaration already. 8657 // Look for the first set of components that refer to it. 8658 if (It != Info.end()) { 8659 bool Found = false; 8660 for (auto &Data : It->second) { 8661 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8662 return MI.Components.back().getAssociatedDeclaration() == VD; 8663 }); 8664 // If we found a map entry, signal that the pointer has to be 8665 // returned and move on to the next declaration. 8666 if (CI != Data.end()) { 8667 CI->ReturnDevicePointer = true; 8668 Found = true; 8669 break; 8670 } 8671 } 8672 if (Found) 8673 continue; 8674 } 8675 8676 // We didn't find any match in our map information - generate a zero 8677 // size array section - if the pointer is a struct member we defer this 8678 // action until the whole struct has been processed. 8679 if (isa<MemberExpr>(IE)) { 8680 // Insert the pointer into Info to be processed by 8681 // generateInfoForComponentList. Because it is a member pointer 8682 // without a pointee, no entry will be generated for it, therefore 8683 // we need to generate one after the whole struct has been processed. 8684 // Nonetheless, generateInfoForComponentList must be called to take 8685 // the pointer into account for the calculation of the range of the 8686 // partial struct. 8687 InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8688 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8689 nullptr, nullptr, /*ForDeviceAddr=*/true); 8690 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8691 } else { 8692 llvm::Value *Ptr; 8693 if (IE->isGLValue()) 8694 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8695 else 8696 Ptr = CGF.EmitScalarExpr(IE); 8697 CombinedInfo.Exprs.push_back(VD); 8698 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8699 CombinedInfo.Pointers.push_back(Ptr); 8700 CombinedInfo.Sizes.push_back( 8701 llvm::Constant::getNullValue(CGF.Int64Ty)); 8702 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8703 CombinedInfo.Mappers.push_back(nullptr); 8704 } 8705 } 8706 } 8707 8708 for (const auto &Data : Info) { 8709 StructRangeInfoTy PartialStruct; 8710 // Temporary generated information. 8711 MapCombinedInfoTy CurInfo; 8712 const Decl *D = Data.first; 8713 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8714 for (const auto &M : Data.second) { 8715 for (const MapInfo &L : M) { 8716 assert(!L.Components.empty() && 8717 "Not expecting declaration with no component lists."); 8718 8719 // Remember the current base pointer index. 8720 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8721 CurInfo.NonContigInfo.IsNonContiguous = 8722 L.Components.back().isNonContiguous(); 8723 generateInfoForComponentList( 8724 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8725 CurInfo, PartialStruct, /*IsFirstComponentList=*/false, 8726 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); 8727 8728 // If this entry relates with a device pointer, set the relevant 8729 // declaration and add the 'return pointer' flag. 8730 if (L.ReturnDevicePointer) { 8731 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8732 "Unexpected number of mapped base pointers."); 8733 8734 const ValueDecl *RelevantVD = 8735 L.Components.back().getAssociatedDeclaration(); 8736 assert(RelevantVD && 8737 "No relevant declaration related with device pointer??"); 8738 8739 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8740 RelevantVD); 8741 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8742 } 8743 } 8744 } 8745 8746 // Append any pending zero-length pointers which are struct members and 8747 // used with use_device_ptr or use_device_addr. 8748 auto CI = DeferredInfo.find(Data.first); 8749 if (CI != DeferredInfo.end()) { 8750 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8751 llvm::Value *BasePtr; 8752 llvm::Value *Ptr; 8753 if (L.ForDeviceAddr) { 8754 if (L.IE->isGLValue()) 8755 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8756 else 8757 Ptr = this->CGF.EmitScalarExpr(L.IE); 8758 BasePtr = Ptr; 8759 // Entry is RETURN_PARAM. Also, set the placeholder value 8760 // MEMBER_OF=FFFF so that the entry is later updated with the 8761 // correct value of MEMBER_OF. 8762 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8763 } else { 8764 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8765 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8766 L.IE->getExprLoc()); 8767 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8768 // placeholder value MEMBER_OF=FFFF so that the entry is later 8769 // updated with the correct value of MEMBER_OF. 8770 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8771 OMP_MAP_MEMBER_OF); 8772 } 8773 CurInfo.Exprs.push_back(L.VD); 8774 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8775 CurInfo.Pointers.push_back(Ptr); 8776 CurInfo.Sizes.push_back( 8777 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8778 CurInfo.Mappers.push_back(nullptr); 8779 } 8780 } 8781 // If there is an entry in PartialStruct it means we have a struct with 8782 // individual members mapped. Emit an extra combined entry. 8783 if (PartialStruct.Base.isValid()) { 8784 CurInfo.NonContigInfo.Dims.push_back(0); 8785 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8786 } 8787 8788 // We need to append the results of this capture to what we already 8789 // have. 8790 CombinedInfo.append(CurInfo); 8791 } 8792 // Append data for use_device_ptr clauses. 8793 CombinedInfo.append(UseDevicePtrCombinedInfo); 8794 } 8795 8796 public: 8797 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8798 : CurDir(&Dir), CGF(CGF) { 8799 // Extract firstprivate clause information. 8800 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8801 for (const auto *D : C->varlists()) 8802 FirstPrivateDecls.try_emplace( 8803 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8804 // Extract implicit firstprivates from uses_allocators clauses. 8805 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8806 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8807 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8808 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8809 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8810 /*Implicit=*/true); 8811 else if (const auto *VD = dyn_cast<VarDecl>( 8812 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8813 ->getDecl())) 8814 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8815 } 8816 } 8817 // Extract device pointer clause information. 8818 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8819 for (auto L : C->component_lists()) 8820 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8821 // Extract map information. 8822 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) { 8823 if (C->getMapType() != OMPC_MAP_to) 8824 continue; 8825 for (auto L : C->component_lists()) { 8826 const ValueDecl *VD = std::get<0>(L); 8827 const auto *RD = VD ? VD->getType() 8828 .getCanonicalType() 8829 .getNonReferenceType() 8830 ->getAsCXXRecordDecl() 8831 : nullptr; 8832 if (RD && RD->isLambda()) 8833 LambdasMap.try_emplace(std::get<0>(L), C); 8834 } 8835 } 8836 } 8837 8838 /// Constructor for the declare mapper directive. 8839 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8840 : CurDir(&Dir), CGF(CGF) {} 8841 8842 /// Generate code for the combined entry if we have a partially mapped struct 8843 /// and take care of the mapping flags of the arguments corresponding to 8844 /// individual struct members. 8845 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8846 MapFlagsArrayTy &CurTypes, 8847 const StructRangeInfoTy &PartialStruct, 8848 const ValueDecl *VD = nullptr, 8849 bool NotTargetParams = true) const { 8850 if (CurTypes.size() == 1 && 8851 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && 8852 !PartialStruct.IsArraySection) 8853 return; 8854 Address LBAddr = PartialStruct.LowestElem.second; 8855 Address HBAddr = PartialStruct.HighestElem.second; 8856 if (PartialStruct.HasCompleteRecord) { 8857 LBAddr = PartialStruct.LB; 8858 HBAddr = PartialStruct.LB; 8859 } 8860 CombinedInfo.Exprs.push_back(VD); 8861 // Base is the base of the struct 8862 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8863 // Pointer is the address of the lowest element 8864 llvm::Value *LB = LBAddr.getPointer(); 8865 CombinedInfo.Pointers.push_back(LB); 8866 // There should not be a mapper for a combined entry. 8867 CombinedInfo.Mappers.push_back(nullptr); 8868 // Size is (addr of {highest+1} element) - (addr of lowest element) 8869 llvm::Value *HB = HBAddr.getPointer(); 8870 llvm::Value *HAddr = 8871 CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1); 8872 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8873 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8874 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr); 8875 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8876 /*isSigned=*/false); 8877 CombinedInfo.Sizes.push_back(Size); 8878 // Map type is always TARGET_PARAM, if generate info for captures. 8879 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 8880 : OMP_MAP_TARGET_PARAM); 8881 // If any element has the present modifier, then make sure the runtime 8882 // doesn't attempt to allocate the struct. 8883 if (CurTypes.end() != 8884 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8885 return Type & OMP_MAP_PRESENT; 8886 })) 8887 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 8888 // Remove TARGET_PARAM flag from the first element 8889 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 8890 // If any element has the ompx_hold modifier, then make sure the runtime 8891 // uses the hold reference count for the struct as a whole so that it won't 8892 // be unmapped by an extra dynamic reference count decrement. Add it to all 8893 // elements as well so the runtime knows which reference count to check 8894 // when determining whether it's time for device-to-host transfers of 8895 // individual elements. 8896 if (CurTypes.end() != 8897 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8898 return Type & OMP_MAP_OMPX_HOLD; 8899 })) { 8900 CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD; 8901 for (auto &M : CurTypes) 8902 M |= OMP_MAP_OMPX_HOLD; 8903 } 8904 8905 // All other current entries will be MEMBER_OF the combined entry 8906 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8907 // 0xFFFF in the MEMBER_OF field). 8908 OpenMPOffloadMappingFlags MemberOfFlag = 8909 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 8910 for (auto &M : CurTypes) 8911 setCorrectMemberOfFlag(M, MemberOfFlag); 8912 } 8913 8914 /// Generate all the base pointers, section pointers, sizes, map types, and 8915 /// mappers for the extracted mappable expressions (all included in \a 8916 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8917 /// pair of the relevant declaration and index where it occurs is appended to 8918 /// the device pointers info array. 8919 void generateAllInfo( 8920 MapCombinedInfoTy &CombinedInfo, 8921 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8922 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8923 assert(CurDir.is<const OMPExecutableDirective *>() && 8924 "Expect a executable directive"); 8925 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8926 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); 8927 } 8928 8929 /// Generate all the base pointers, section pointers, sizes, map types, and 8930 /// mappers for the extracted map clauses of user-defined mapper (all included 8931 /// in \a CombinedInfo). 8932 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 8933 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8934 "Expect a declare mapper directive"); 8935 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8936 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); 8937 } 8938 8939 /// Emit capture info for lambdas for variables captured by reference. 8940 void generateInfoForLambdaCaptures( 8941 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8942 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8943 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType(); 8944 const auto *RD = VDType->getAsCXXRecordDecl(); 8945 if (!RD || !RD->isLambda()) 8946 return; 8947 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType), 8948 CGF.getContext().getDeclAlign(VD)); 8949 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType); 8950 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8951 FieldDecl *ThisCapture = nullptr; 8952 RD->getCaptureFields(Captures, ThisCapture); 8953 if (ThisCapture) { 8954 LValue ThisLVal = 8955 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8956 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8957 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8958 VDLVal.getPointer(CGF)); 8959 CombinedInfo.Exprs.push_back(VD); 8960 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 8961 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 8962 CombinedInfo.Sizes.push_back( 8963 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8964 CGF.Int64Ty, /*isSigned=*/true)); 8965 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8966 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8967 CombinedInfo.Mappers.push_back(nullptr); 8968 } 8969 for (const LambdaCapture &LC : RD->captures()) { 8970 if (!LC.capturesVariable()) 8971 continue; 8972 const VarDecl *VD = LC.getCapturedVar(); 8973 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8974 continue; 8975 auto It = Captures.find(VD); 8976 assert(It != Captures.end() && "Found lambda capture without field."); 8977 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8978 if (LC.getCaptureKind() == LCK_ByRef) { 8979 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8980 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8981 VDLVal.getPointer(CGF)); 8982 CombinedInfo.Exprs.push_back(VD); 8983 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8984 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 8985 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8986 CGF.getTypeSize( 8987 VD->getType().getCanonicalType().getNonReferenceType()), 8988 CGF.Int64Ty, /*isSigned=*/true)); 8989 } else { 8990 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8991 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8992 VDLVal.getPointer(CGF)); 8993 CombinedInfo.Exprs.push_back(VD); 8994 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8995 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 8996 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8997 } 8998 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8999 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9000 CombinedInfo.Mappers.push_back(nullptr); 9001 } 9002 } 9003 9004 /// Set correct indices for lambdas captures. 9005 void adjustMemberOfForLambdaCaptures( 9006 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 9007 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 9008 MapFlagsArrayTy &Types) const { 9009 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 9010 // Set correct member_of idx for all implicit lambda captures. 9011 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9012 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 9013 continue; 9014 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 9015 assert(BasePtr && "Unable to find base lambda address."); 9016 int TgtIdx = -1; 9017 for (unsigned J = I; J > 0; --J) { 9018 unsigned Idx = J - 1; 9019 if (Pointers[Idx] != BasePtr) 9020 continue; 9021 TgtIdx = Idx; 9022 break; 9023 } 9024 assert(TgtIdx != -1 && "Unable to find parent lambda."); 9025 // All other current entries will be MEMBER_OF the combined entry 9026 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9027 // 0xFFFF in the MEMBER_OF field). 9028 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 9029 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 9030 } 9031 } 9032 9033 /// Generate the base pointers, section pointers, sizes, map types, and 9034 /// mappers associated to a given capture (all included in \a CombinedInfo). 9035 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 9036 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9037 StructRangeInfoTy &PartialStruct) const { 9038 assert(!Cap->capturesVariableArrayType() && 9039 "Not expecting to generate map info for a variable array type!"); 9040 9041 // We need to know when we generating information for the first component 9042 const ValueDecl *VD = Cap->capturesThis() 9043 ? nullptr 9044 : Cap->getCapturedVar()->getCanonicalDecl(); 9045 9046 // for map(to: lambda): skip here, processing it in 9047 // generateDefaultMapInfo 9048 if (LambdasMap.count(VD)) 9049 return; 9050 9051 // If this declaration appears in a is_device_ptr clause we just have to 9052 // pass the pointer by value. If it is a reference to a declaration, we just 9053 // pass its value. 9054 if (DevPointersMap.count(VD)) { 9055 CombinedInfo.Exprs.push_back(VD); 9056 CombinedInfo.BasePointers.emplace_back(Arg, VD); 9057 CombinedInfo.Pointers.push_back(Arg); 9058 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9059 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 9060 /*isSigned=*/true)); 9061 CombinedInfo.Types.push_back( 9062 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 9063 OMP_MAP_TARGET_PARAM); 9064 CombinedInfo.Mappers.push_back(nullptr); 9065 return; 9066 } 9067 9068 using MapData = 9069 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 9070 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 9071 const ValueDecl *, const Expr *>; 9072 SmallVector<MapData, 4> DeclComponentLists; 9073 assert(CurDir.is<const OMPExecutableDirective *>() && 9074 "Expect a executable directive"); 9075 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9076 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 9077 const auto *EI = C->getVarRefs().begin(); 9078 for (const auto L : C->decl_component_lists(VD)) { 9079 const ValueDecl *VDecl, *Mapper; 9080 // The Expression is not correct if the mapping is implicit 9081 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 9082 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9083 std::tie(VDecl, Components, Mapper) = L; 9084 assert(VDecl == VD && "We got information for the wrong declaration??"); 9085 assert(!Components.empty() && 9086 "Not expecting declaration with no component lists."); 9087 DeclComponentLists.emplace_back(Components, C->getMapType(), 9088 C->getMapTypeModifiers(), 9089 C->isImplicit(), Mapper, E); 9090 ++EI; 9091 } 9092 } 9093 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 9094 const MapData &RHS) { 9095 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 9096 OpenMPMapClauseKind MapType = std::get<1>(RHS); 9097 bool HasPresent = 9098 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9099 bool HasAllocs = MapType == OMPC_MAP_alloc; 9100 MapModifiers = std::get<2>(RHS); 9101 MapType = std::get<1>(LHS); 9102 bool HasPresentR = 9103 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9104 bool HasAllocsR = MapType == OMPC_MAP_alloc; 9105 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 9106 }); 9107 9108 // Find overlapping elements (including the offset from the base element). 9109 llvm::SmallDenseMap< 9110 const MapData *, 9111 llvm::SmallVector< 9112 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 9113 4> 9114 OverlappedData; 9115 size_t Count = 0; 9116 for (const MapData &L : DeclComponentLists) { 9117 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9118 OpenMPMapClauseKind MapType; 9119 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9120 bool IsImplicit; 9121 const ValueDecl *Mapper; 9122 const Expr *VarRef; 9123 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9124 L; 9125 ++Count; 9126 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 9127 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 9128 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 9129 VarRef) = L1; 9130 auto CI = Components.rbegin(); 9131 auto CE = Components.rend(); 9132 auto SI = Components1.rbegin(); 9133 auto SE = Components1.rend(); 9134 for (; CI != CE && SI != SE; ++CI, ++SI) { 9135 if (CI->getAssociatedExpression()->getStmtClass() != 9136 SI->getAssociatedExpression()->getStmtClass()) 9137 break; 9138 // Are we dealing with different variables/fields? 9139 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 9140 break; 9141 } 9142 // Found overlapping if, at least for one component, reached the head 9143 // of the components list. 9144 if (CI == CE || SI == SE) { 9145 // Ignore it if it is the same component. 9146 if (CI == CE && SI == SE) 9147 continue; 9148 const auto It = (SI == SE) ? CI : SI; 9149 // If one component is a pointer and another one is a kind of 9150 // dereference of this pointer (array subscript, section, dereference, 9151 // etc.), it is not an overlapping. 9152 // Same, if one component is a base and another component is a 9153 // dereferenced pointer memberexpr with the same base. 9154 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 9155 (std::prev(It)->getAssociatedDeclaration() && 9156 std::prev(It) 9157 ->getAssociatedDeclaration() 9158 ->getType() 9159 ->isPointerType()) || 9160 (It->getAssociatedDeclaration() && 9161 It->getAssociatedDeclaration()->getType()->isPointerType() && 9162 std::next(It) != CE && std::next(It) != SE)) 9163 continue; 9164 const MapData &BaseData = CI == CE ? L : L1; 9165 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 9166 SI == SE ? Components : Components1; 9167 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 9168 OverlappedElements.getSecond().push_back(SubData); 9169 } 9170 } 9171 } 9172 // Sort the overlapped elements for each item. 9173 llvm::SmallVector<const FieldDecl *, 4> Layout; 9174 if (!OverlappedData.empty()) { 9175 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 9176 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 9177 while (BaseType != OrigType) { 9178 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 9179 OrigType = BaseType->getPointeeOrArrayElementType(); 9180 } 9181 9182 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 9183 getPlainLayout(CRD, Layout, /*AsBase=*/false); 9184 else { 9185 const auto *RD = BaseType->getAsRecordDecl(); 9186 Layout.append(RD->field_begin(), RD->field_end()); 9187 } 9188 } 9189 for (auto &Pair : OverlappedData) { 9190 llvm::stable_sort( 9191 Pair.getSecond(), 9192 [&Layout]( 9193 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 9194 OMPClauseMappableExprCommon::MappableExprComponentListRef 9195 Second) { 9196 auto CI = First.rbegin(); 9197 auto CE = First.rend(); 9198 auto SI = Second.rbegin(); 9199 auto SE = Second.rend(); 9200 for (; CI != CE && SI != SE; ++CI, ++SI) { 9201 if (CI->getAssociatedExpression()->getStmtClass() != 9202 SI->getAssociatedExpression()->getStmtClass()) 9203 break; 9204 // Are we dealing with different variables/fields? 9205 if (CI->getAssociatedDeclaration() != 9206 SI->getAssociatedDeclaration()) 9207 break; 9208 } 9209 9210 // Lists contain the same elements. 9211 if (CI == CE && SI == SE) 9212 return false; 9213 9214 // List with less elements is less than list with more elements. 9215 if (CI == CE || SI == SE) 9216 return CI == CE; 9217 9218 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 9219 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 9220 if (FD1->getParent() == FD2->getParent()) 9221 return FD1->getFieldIndex() < FD2->getFieldIndex(); 9222 const auto *It = 9223 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 9224 return FD == FD1 || FD == FD2; 9225 }); 9226 return *It == FD1; 9227 }); 9228 } 9229 9230 // Associated with a capture, because the mapping flags depend on it. 9231 // Go through all of the elements with the overlapped elements. 9232 bool IsFirstComponentList = true; 9233 for (const auto &Pair : OverlappedData) { 9234 const MapData &L = *Pair.getFirst(); 9235 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9236 OpenMPMapClauseKind MapType; 9237 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9238 bool IsImplicit; 9239 const ValueDecl *Mapper; 9240 const Expr *VarRef; 9241 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9242 L; 9243 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 9244 OverlappedComponents = Pair.getSecond(); 9245 generateInfoForComponentList( 9246 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 9247 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 9248 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 9249 IsFirstComponentList = false; 9250 } 9251 // Go through other elements without overlapped elements. 9252 for (const MapData &L : DeclComponentLists) { 9253 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9254 OpenMPMapClauseKind MapType; 9255 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9256 bool IsImplicit; 9257 const ValueDecl *Mapper; 9258 const Expr *VarRef; 9259 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9260 L; 9261 auto It = OverlappedData.find(&L); 9262 if (It == OverlappedData.end()) 9263 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 9264 Components, CombinedInfo, PartialStruct, 9265 IsFirstComponentList, IsImplicit, Mapper, 9266 /*ForDeviceAddr=*/false, VD, VarRef); 9267 IsFirstComponentList = false; 9268 } 9269 } 9270 9271 /// Generate the default map information for a given capture \a CI, 9272 /// record field declaration \a RI and captured value \a CV. 9273 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 9274 const FieldDecl &RI, llvm::Value *CV, 9275 MapCombinedInfoTy &CombinedInfo) const { 9276 bool IsImplicit = true; 9277 // Do the default mapping. 9278 if (CI.capturesThis()) { 9279 CombinedInfo.Exprs.push_back(nullptr); 9280 CombinedInfo.BasePointers.push_back(CV); 9281 CombinedInfo.Pointers.push_back(CV); 9282 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 9283 CombinedInfo.Sizes.push_back( 9284 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 9285 CGF.Int64Ty, /*isSigned=*/true)); 9286 // Default map type. 9287 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 9288 } else if (CI.capturesVariableByCopy()) { 9289 const VarDecl *VD = CI.getCapturedVar(); 9290 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9291 CombinedInfo.BasePointers.push_back(CV); 9292 CombinedInfo.Pointers.push_back(CV); 9293 if (!RI.getType()->isAnyPointerType()) { 9294 // We have to signal to the runtime captures passed by value that are 9295 // not pointers. 9296 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 9297 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9298 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 9299 } else { 9300 // Pointers are implicitly mapped with a zero size and no flags 9301 // (other than first map that is added for all implicit maps). 9302 CombinedInfo.Types.push_back(OMP_MAP_NONE); 9303 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 9304 } 9305 auto I = FirstPrivateDecls.find(VD); 9306 if (I != FirstPrivateDecls.end()) 9307 IsImplicit = I->getSecond(); 9308 } else { 9309 assert(CI.capturesVariable() && "Expected captured reference."); 9310 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 9311 QualType ElementType = PtrTy->getPointeeType(); 9312 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9313 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 9314 // The default map type for a scalar/complex type is 'to' because by 9315 // default the value doesn't have to be retrieved. For an aggregate 9316 // type, the default is 'tofrom'. 9317 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 9318 const VarDecl *VD = CI.getCapturedVar(); 9319 auto I = FirstPrivateDecls.find(VD); 9320 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9321 CombinedInfo.BasePointers.push_back(CV); 9322 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9323 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9324 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9325 AlignmentSource::Decl)); 9326 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 9327 } else { 9328 CombinedInfo.Pointers.push_back(CV); 9329 } 9330 if (I != FirstPrivateDecls.end()) 9331 IsImplicit = I->getSecond(); 9332 } 9333 // Every default map produces a single argument which is a target parameter. 9334 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 9335 9336 // Add flag stating this is an implicit map. 9337 if (IsImplicit) 9338 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 9339 9340 // No user-defined mapper for default mapping. 9341 CombinedInfo.Mappers.push_back(nullptr); 9342 } 9343 }; 9344 } // anonymous namespace 9345 9346 static void emitNonContiguousDescriptor( 9347 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9348 CGOpenMPRuntime::TargetDataInfo &Info) { 9349 CodeGenModule &CGM = CGF.CGM; 9350 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 9351 &NonContigInfo = CombinedInfo.NonContigInfo; 9352 9353 // Build an array of struct descriptor_dim and then assign it to 9354 // offload_args. 9355 // 9356 // struct descriptor_dim { 9357 // uint64_t offset; 9358 // uint64_t count; 9359 // uint64_t stride 9360 // }; 9361 ASTContext &C = CGF.getContext(); 9362 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9363 RecordDecl *RD; 9364 RD = C.buildImplicitRecord("descriptor_dim"); 9365 RD->startDefinition(); 9366 addFieldToRecordDecl(C, RD, Int64Ty); 9367 addFieldToRecordDecl(C, RD, Int64Ty); 9368 addFieldToRecordDecl(C, RD, Int64Ty); 9369 RD->completeDefinition(); 9370 QualType DimTy = C.getRecordType(RD); 9371 9372 enum { OffsetFD = 0, CountFD, StrideFD }; 9373 // We need two index variable here since the size of "Dims" is the same as the 9374 // size of Components, however, the size of offset, count, and stride is equal 9375 // to the size of base declaration that is non-contiguous. 9376 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9377 // Skip emitting ir if dimension size is 1 since it cannot be 9378 // non-contiguous. 9379 if (NonContigInfo.Dims[I] == 1) 9380 continue; 9381 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9382 QualType ArrayTy = 9383 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9384 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9385 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9386 unsigned RevIdx = EE - II - 1; 9387 LValue DimsLVal = CGF.MakeAddrLValue( 9388 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9389 // Offset 9390 LValue OffsetLVal = CGF.EmitLValueForField( 9391 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9392 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9393 // Count 9394 LValue CountLVal = CGF.EmitLValueForField( 9395 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9396 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9397 // Stride 9398 LValue StrideLVal = CGF.EmitLValueForField( 9399 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9400 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9401 } 9402 // args[I] = &dims 9403 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9404 DimsAddr, CGM.Int8PtrTy, CGM.Int8Ty); 9405 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9406 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9407 Info.PointersArray, 0, I); 9408 Address PAddr(P, CGM.VoidPtrTy, CGF.getPointerAlign()); 9409 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9410 ++L; 9411 } 9412 } 9413 9414 // Try to extract the base declaration from a `this->x` expression if possible. 9415 static ValueDecl *getDeclFromThisExpr(const Expr *E) { 9416 if (!E) 9417 return nullptr; 9418 9419 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts())) 9420 if (const MemberExpr *ME = 9421 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts())) 9422 return ME->getMemberDecl(); 9423 return nullptr; 9424 } 9425 9426 /// Emit a string constant containing the names of the values mapped to the 9427 /// offloading runtime library. 9428 llvm::Constant * 9429 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9430 MappableExprsHandler::MappingExprInfo &MapExprs) { 9431 9432 uint32_t SrcLocStrSize; 9433 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) 9434 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 9435 9436 SourceLocation Loc; 9437 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { 9438 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr())) 9439 Loc = VD->getLocation(); 9440 else 9441 Loc = MapExprs.getMapExpr()->getExprLoc(); 9442 } else { 9443 Loc = MapExprs.getMapDecl()->getLocation(); 9444 } 9445 9446 std::string ExprName; 9447 if (MapExprs.getMapExpr()) { 9448 PrintingPolicy P(CGF.getContext().getLangOpts()); 9449 llvm::raw_string_ostream OS(ExprName); 9450 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9451 OS.flush(); 9452 } else { 9453 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9454 } 9455 9456 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9457 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName, 9458 PLoc.getLine(), PLoc.getColumn(), 9459 SrcLocStrSize); 9460 } 9461 9462 /// Emit the arrays used to pass the captures and map information to the 9463 /// offloading runtime library. If there is no map or capture information, 9464 /// return nullptr by reference. 9465 static void emitOffloadingArrays( 9466 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9467 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9468 bool IsNonContiguous = false) { 9469 CodeGenModule &CGM = CGF.CGM; 9470 ASTContext &Ctx = CGF.getContext(); 9471 9472 // Reset the array information. 9473 Info.clearArrayInfo(); 9474 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9475 9476 if (Info.NumberOfPtrs) { 9477 // Detect if we have any capture size requiring runtime evaluation of the 9478 // size so that a constant array could be eventually used. 9479 9480 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9481 QualType PointerArrayType = Ctx.getConstantArrayType( 9482 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9483 /*IndexTypeQuals=*/0); 9484 9485 Info.BasePointersArray = 9486 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9487 Info.PointersArray = 9488 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9489 Address MappersArray = 9490 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9491 Info.MappersArray = MappersArray.getPointer(); 9492 9493 // If we don't have any VLA types or other types that require runtime 9494 // evaluation, we can use a constant array for the map sizes, otherwise we 9495 // need to fill up the arrays as we do for the pointers. 9496 QualType Int64Ty = 9497 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9498 SmallVector<llvm::Constant *> ConstSizes( 9499 CombinedInfo.Sizes.size(), llvm::ConstantInt::get(CGF.Int64Ty, 0)); 9500 llvm::SmallBitVector RuntimeSizes(CombinedInfo.Sizes.size()); 9501 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9502 if (auto *CI = dyn_cast<llvm::Constant>(CombinedInfo.Sizes[I])) { 9503 if (!isa<llvm::ConstantExpr>(CI) && !isa<llvm::GlobalValue>(CI)) { 9504 if (IsNonContiguous && (CombinedInfo.Types[I] & 9505 MappableExprsHandler::OMP_MAP_NON_CONTIG)) 9506 ConstSizes[I] = llvm::ConstantInt::get( 9507 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]); 9508 else 9509 ConstSizes[I] = CI; 9510 continue; 9511 } 9512 } 9513 RuntimeSizes.set(I); 9514 } 9515 9516 if (RuntimeSizes.all()) { 9517 QualType SizeArrayType = Ctx.getConstantArrayType( 9518 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9519 /*IndexTypeQuals=*/0); 9520 Info.SizesArray = 9521 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9522 } else { 9523 auto *SizesArrayInit = llvm::ConstantArray::get( 9524 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9525 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9526 auto *SizesArrayGbl = new llvm::GlobalVariable( 9527 CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true, 9528 llvm::GlobalValue::PrivateLinkage, SizesArrayInit, Name); 9529 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9530 if (RuntimeSizes.any()) { 9531 QualType SizeArrayType = Ctx.getConstantArrayType( 9532 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9533 /*IndexTypeQuals=*/0); 9534 Address Buffer = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes"); 9535 llvm::Value *GblConstPtr = 9536 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9537 SizesArrayGbl, CGM.Int64Ty->getPointerTo()); 9538 CGF.Builder.CreateMemCpy( 9539 Buffer, 9540 Address(GblConstPtr, CGM.Int64Ty, 9541 CGM.getNaturalTypeAlignment(Ctx.getIntTypeForBitwidth( 9542 /*DestWidth=*/64, /*Signed=*/false))), 9543 CGF.getTypeSize(SizeArrayType)); 9544 Info.SizesArray = Buffer.getPointer(); 9545 } else { 9546 Info.SizesArray = SizesArrayGbl; 9547 } 9548 } 9549 9550 // The map types are always constant so we don't need to generate code to 9551 // fill arrays. Instead, we create an array constant. 9552 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9553 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9554 std::string MaptypesName = 9555 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9556 auto *MapTypesArrayGbl = 9557 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9558 Info.MapTypesArray = MapTypesArrayGbl; 9559 9560 // The information types are only built if there is debug information 9561 // requested. 9562 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9563 Info.MapNamesArray = llvm::Constant::getNullValue( 9564 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9565 } else { 9566 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9567 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9568 }; 9569 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9570 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9571 std::string MapnamesName = 9572 CGM.getOpenMPRuntime().getName({"offload_mapnames"}); 9573 auto *MapNamesArrayGbl = 9574 OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName); 9575 Info.MapNamesArray = MapNamesArrayGbl; 9576 } 9577 9578 // If there's a present map type modifier, it must not be applied to the end 9579 // of a region, so generate a separate map type array in that case. 9580 if (Info.separateBeginEndCalls()) { 9581 bool EndMapTypesDiffer = false; 9582 for (uint64_t &Type : Mapping) { 9583 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9584 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9585 EndMapTypesDiffer = true; 9586 } 9587 } 9588 if (EndMapTypesDiffer) { 9589 MapTypesArrayGbl = 9590 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9591 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9592 } 9593 } 9594 9595 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9596 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9597 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9598 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9599 Info.BasePointersArray, 0, I); 9600 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9601 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9602 Address BPAddr(BP, BPVal->getType(), 9603 Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9604 CGF.Builder.CreateStore(BPVal, BPAddr); 9605 9606 if (Info.requiresDevicePointerInfo()) 9607 if (const ValueDecl *DevVD = 9608 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9609 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9610 9611 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9612 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9613 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9614 Info.PointersArray, 0, I); 9615 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9616 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9617 Address PAddr(P, PVal->getType(), Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9618 CGF.Builder.CreateStore(PVal, PAddr); 9619 9620 if (RuntimeSizes.test(I)) { 9621 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9622 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9623 Info.SizesArray, 9624 /*Idx0=*/0, 9625 /*Idx1=*/I); 9626 Address SAddr(S, CGM.Int64Ty, Ctx.getTypeAlignInChars(Int64Ty)); 9627 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9628 CGM.Int64Ty, 9629 /*isSigned=*/true), 9630 SAddr); 9631 } 9632 9633 // Fill up the mapper array. 9634 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9635 if (CombinedInfo.Mappers[I]) { 9636 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9637 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9638 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9639 Info.HasMapper = true; 9640 } 9641 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9642 CGF.Builder.CreateStore(MFunc, MAddr); 9643 } 9644 } 9645 9646 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9647 Info.NumberOfPtrs == 0) 9648 return; 9649 9650 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9651 } 9652 9653 namespace { 9654 /// Additional arguments for emitOffloadingArraysArgument function. 9655 struct ArgumentsOptions { 9656 bool ForEndCall = false; 9657 ArgumentsOptions() = default; 9658 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9659 }; 9660 } // namespace 9661 9662 /// Emit the arguments to be passed to the runtime library based on the 9663 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 9664 /// ForEndCall, emit map types to be passed for the end of the region instead of 9665 /// the beginning. 9666 static void emitOffloadingArraysArgument( 9667 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9668 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9669 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, 9670 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, 9671 const ArgumentsOptions &Options = ArgumentsOptions()) { 9672 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9673 "expected region end call to runtime only when end call is separate"); 9674 CodeGenModule &CGM = CGF.CGM; 9675 if (Info.NumberOfPtrs) { 9676 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9677 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9678 Info.BasePointersArray, 9679 /*Idx0=*/0, /*Idx1=*/0); 9680 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9681 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9682 Info.PointersArray, 9683 /*Idx0=*/0, 9684 /*Idx1=*/0); 9685 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9686 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9687 /*Idx0=*/0, /*Idx1=*/0); 9688 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9689 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9690 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9691 : Info.MapTypesArray, 9692 /*Idx0=*/0, 9693 /*Idx1=*/0); 9694 9695 // Only emit the mapper information arrays if debug information is 9696 // requested. 9697 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9698 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9699 else 9700 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9701 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9702 Info.MapNamesArray, 9703 /*Idx0=*/0, 9704 /*Idx1=*/0); 9705 // If there is no user-defined mapper, set the mapper array to nullptr to 9706 // avoid an unnecessary data privatization 9707 if (!Info.HasMapper) 9708 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9709 else 9710 MappersArrayArg = 9711 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9712 } else { 9713 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9714 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9715 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9716 MapTypesArrayArg = 9717 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9718 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9719 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9720 } 9721 } 9722 9723 /// Check for inner distribute directive. 9724 static const OMPExecutableDirective * 9725 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9726 const auto *CS = D.getInnermostCapturedStmt(); 9727 const auto *Body = 9728 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9729 const Stmt *ChildStmt = 9730 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9731 9732 if (const auto *NestedDir = 9733 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9734 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9735 switch (D.getDirectiveKind()) { 9736 case OMPD_target: 9737 if (isOpenMPDistributeDirective(DKind)) 9738 return NestedDir; 9739 if (DKind == OMPD_teams) { 9740 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9741 /*IgnoreCaptured=*/true); 9742 if (!Body) 9743 return nullptr; 9744 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9745 if (const auto *NND = 9746 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9747 DKind = NND->getDirectiveKind(); 9748 if (isOpenMPDistributeDirective(DKind)) 9749 return NND; 9750 } 9751 } 9752 return nullptr; 9753 case OMPD_target_teams: 9754 if (isOpenMPDistributeDirective(DKind)) 9755 return NestedDir; 9756 return nullptr; 9757 case OMPD_target_parallel: 9758 case OMPD_target_simd: 9759 case OMPD_target_parallel_for: 9760 case OMPD_target_parallel_for_simd: 9761 return nullptr; 9762 case OMPD_target_teams_distribute: 9763 case OMPD_target_teams_distribute_simd: 9764 case OMPD_target_teams_distribute_parallel_for: 9765 case OMPD_target_teams_distribute_parallel_for_simd: 9766 case OMPD_parallel: 9767 case OMPD_for: 9768 case OMPD_parallel_for: 9769 case OMPD_parallel_master: 9770 case OMPD_parallel_sections: 9771 case OMPD_for_simd: 9772 case OMPD_parallel_for_simd: 9773 case OMPD_cancel: 9774 case OMPD_cancellation_point: 9775 case OMPD_ordered: 9776 case OMPD_threadprivate: 9777 case OMPD_allocate: 9778 case OMPD_task: 9779 case OMPD_simd: 9780 case OMPD_tile: 9781 case OMPD_unroll: 9782 case OMPD_sections: 9783 case OMPD_section: 9784 case OMPD_single: 9785 case OMPD_master: 9786 case OMPD_critical: 9787 case OMPD_taskyield: 9788 case OMPD_barrier: 9789 case OMPD_taskwait: 9790 case OMPD_taskgroup: 9791 case OMPD_atomic: 9792 case OMPD_flush: 9793 case OMPD_depobj: 9794 case OMPD_scan: 9795 case OMPD_teams: 9796 case OMPD_target_data: 9797 case OMPD_target_exit_data: 9798 case OMPD_target_enter_data: 9799 case OMPD_distribute: 9800 case OMPD_distribute_simd: 9801 case OMPD_distribute_parallel_for: 9802 case OMPD_distribute_parallel_for_simd: 9803 case OMPD_teams_distribute: 9804 case OMPD_teams_distribute_simd: 9805 case OMPD_teams_distribute_parallel_for: 9806 case OMPD_teams_distribute_parallel_for_simd: 9807 case OMPD_target_update: 9808 case OMPD_declare_simd: 9809 case OMPD_declare_variant: 9810 case OMPD_begin_declare_variant: 9811 case OMPD_end_declare_variant: 9812 case OMPD_declare_target: 9813 case OMPD_end_declare_target: 9814 case OMPD_declare_reduction: 9815 case OMPD_declare_mapper: 9816 case OMPD_taskloop: 9817 case OMPD_taskloop_simd: 9818 case OMPD_master_taskloop: 9819 case OMPD_master_taskloop_simd: 9820 case OMPD_parallel_master_taskloop: 9821 case OMPD_parallel_master_taskloop_simd: 9822 case OMPD_requires: 9823 case OMPD_metadirective: 9824 case OMPD_unknown: 9825 default: 9826 llvm_unreachable("Unexpected directive."); 9827 } 9828 } 9829 9830 return nullptr; 9831 } 9832 9833 /// Emit the user-defined mapper function. The code generation follows the 9834 /// pattern in the example below. 9835 /// \code 9836 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9837 /// void *base, void *begin, 9838 /// int64_t size, int64_t type, 9839 /// void *name = nullptr) { 9840 /// // Allocate space for an array section first or add a base/begin for 9841 /// // pointer dereference. 9842 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9843 /// !maptype.IsDelete) 9844 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9845 /// size*sizeof(Ty), clearToFromMember(type)); 9846 /// // Map members. 9847 /// for (unsigned i = 0; i < size; i++) { 9848 /// // For each component specified by this mapper: 9849 /// for (auto c : begin[i]->all_components) { 9850 /// if (c.hasMapper()) 9851 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9852 /// c.arg_type, c.arg_name); 9853 /// else 9854 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9855 /// c.arg_begin, c.arg_size, c.arg_type, 9856 /// c.arg_name); 9857 /// } 9858 /// } 9859 /// // Delete the array section. 9860 /// if (size > 1 && maptype.IsDelete) 9861 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9862 /// size*sizeof(Ty), clearToFromMember(type)); 9863 /// } 9864 /// \endcode 9865 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9866 CodeGenFunction *CGF) { 9867 if (UDMMap.count(D) > 0) 9868 return; 9869 ASTContext &C = CGM.getContext(); 9870 QualType Ty = D->getType(); 9871 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9872 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9873 auto *MapperVarDecl = 9874 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9875 SourceLocation Loc = D->getLocation(); 9876 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9877 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty); 9878 9879 // Prepare mapper function arguments and attributes. 9880 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9881 C.VoidPtrTy, ImplicitParamDecl::Other); 9882 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9883 ImplicitParamDecl::Other); 9884 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9885 C.VoidPtrTy, ImplicitParamDecl::Other); 9886 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9887 ImplicitParamDecl::Other); 9888 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9889 ImplicitParamDecl::Other); 9890 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9891 ImplicitParamDecl::Other); 9892 FunctionArgList Args; 9893 Args.push_back(&HandleArg); 9894 Args.push_back(&BaseArg); 9895 Args.push_back(&BeginArg); 9896 Args.push_back(&SizeArg); 9897 Args.push_back(&TypeArg); 9898 Args.push_back(&NameArg); 9899 const CGFunctionInfo &FnInfo = 9900 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9901 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9902 SmallString<64> TyStr; 9903 llvm::raw_svector_ostream Out(TyStr); 9904 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9905 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9906 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9907 Name, &CGM.getModule()); 9908 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9909 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9910 // Start the mapper function code generation. 9911 CodeGenFunction MapperCGF(CGM); 9912 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9913 // Compute the starting and end addresses of array elements. 9914 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9915 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9916 C.getPointerType(Int64Ty), Loc); 9917 // Prepare common arguments for array initiation and deletion. 9918 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9919 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9920 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9921 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9922 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9923 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9924 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9925 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9926 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9927 // Convert the size in bytes into the number of array elements. 9928 Size = MapperCGF.Builder.CreateExactUDiv( 9929 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9930 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9931 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 9932 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size); 9933 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9934 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9935 C.getPointerType(Int64Ty), Loc); 9936 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar( 9937 MapperCGF.GetAddrOfLocalVar(&NameArg), 9938 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9939 9940 // Emit array initiation if this is an array section and \p MapType indicates 9941 // that memory allocation is required. 9942 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9943 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9944 MapName, ElementSize, HeadBB, /*IsInit=*/true); 9945 9946 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9947 9948 // Emit the loop header block. 9949 MapperCGF.EmitBlock(HeadBB); 9950 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9951 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9952 // Evaluate whether the initial condition is satisfied. 9953 llvm::Value *IsEmpty = 9954 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9955 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9956 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9957 9958 // Emit the loop body block. 9959 MapperCGF.EmitBlock(BodyBB); 9960 llvm::BasicBlock *LastBB = BodyBB; 9961 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9962 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9963 PtrPHI->addIncoming(PtrBegin, EntryBB); 9964 Address PtrCurrent(PtrPHI, ElemTy, 9965 MapperCGF.GetAddrOfLocalVar(&BeginArg) 9966 .getAlignment() 9967 .alignmentOfArrayElement(ElementSize)); 9968 // Privatize the declared variable of mapper to be the current array element. 9969 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9970 Scope.addPrivate(MapperVarDecl, PtrCurrent); 9971 (void)Scope.Privatize(); 9972 9973 // Get map clause information. Fill up the arrays with all mapped variables. 9974 MappableExprsHandler::MapCombinedInfoTy Info; 9975 MappableExprsHandler MEHandler(*D, MapperCGF); 9976 MEHandler.generateAllInfoForMapper(Info); 9977 9978 // Call the runtime API __tgt_mapper_num_components to get the number of 9979 // pre-existing components. 9980 llvm::Value *OffloadingArgs[] = {Handle}; 9981 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9982 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9983 OMPRTL___tgt_mapper_num_components), 9984 OffloadingArgs); 9985 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9986 PreviousSize, 9987 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9988 9989 // Fill up the runtime mapper handle for all components. 9990 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 9991 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9992 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9993 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9994 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9995 llvm::Value *CurSizeArg = Info.Sizes[I]; 9996 llvm::Value *CurNameArg = 9997 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9998 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 9999 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 10000 10001 // Extract the MEMBER_OF field from the map type. 10002 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 10003 llvm::Value *MemberMapType = 10004 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 10005 10006 // Combine the map type inherited from user-defined mapper with that 10007 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 10008 // bits of the \a MapType, which is the input argument of the mapper 10009 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 10010 // bits of MemberMapType. 10011 // [OpenMP 5.0], 1.2.6. map-type decay. 10012 // | alloc | to | from | tofrom | release | delete 10013 // ---------------------------------------------------------- 10014 // alloc | alloc | alloc | alloc | alloc | release | delete 10015 // to | alloc | to | alloc | to | release | delete 10016 // from | alloc | alloc | from | from | release | delete 10017 // tofrom | alloc | to | from | tofrom | release | delete 10018 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 10019 MapType, 10020 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 10021 MappableExprsHandler::OMP_MAP_FROM)); 10022 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 10023 llvm::BasicBlock *AllocElseBB = 10024 MapperCGF.createBasicBlock("omp.type.alloc.else"); 10025 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 10026 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 10027 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 10028 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 10029 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 10030 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 10031 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 10032 MapperCGF.EmitBlock(AllocBB); 10033 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 10034 MemberMapType, 10035 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10036 MappableExprsHandler::OMP_MAP_FROM))); 10037 MapperCGF.Builder.CreateBr(EndBB); 10038 MapperCGF.EmitBlock(AllocElseBB); 10039 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 10040 LeftToFrom, 10041 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 10042 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 10043 // In case of to, clear OMP_MAP_FROM. 10044 MapperCGF.EmitBlock(ToBB); 10045 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 10046 MemberMapType, 10047 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 10048 MapperCGF.Builder.CreateBr(EndBB); 10049 MapperCGF.EmitBlock(ToElseBB); 10050 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 10051 LeftToFrom, 10052 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 10053 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 10054 // In case of from, clear OMP_MAP_TO. 10055 MapperCGF.EmitBlock(FromBB); 10056 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 10057 MemberMapType, 10058 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 10059 // In case of tofrom, do nothing. 10060 MapperCGF.EmitBlock(EndBB); 10061 LastBB = EndBB; 10062 llvm::PHINode *CurMapType = 10063 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 10064 CurMapType->addIncoming(AllocMapType, AllocBB); 10065 CurMapType->addIncoming(ToMapType, ToBB); 10066 CurMapType->addIncoming(FromMapType, FromBB); 10067 CurMapType->addIncoming(MemberMapType, ToElseBB); 10068 10069 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 10070 CurSizeArg, CurMapType, CurNameArg}; 10071 if (Info.Mappers[I]) { 10072 // Call the corresponding mapper function. 10073 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 10074 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 10075 assert(MapperFunc && "Expect a valid mapper function is available."); 10076 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 10077 } else { 10078 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10079 // data structure. 10080 MapperCGF.EmitRuntimeCall( 10081 OMPBuilder.getOrCreateRuntimeFunction( 10082 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 10083 OffloadingArgs); 10084 } 10085 } 10086 10087 // Update the pointer to point to the next element that needs to be mapped, 10088 // and check whether we have mapped all elements. 10089 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 10090 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 10091 PtrPHI->addIncoming(PtrNext, LastBB); 10092 llvm::Value *IsDone = 10093 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 10094 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 10095 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 10096 10097 MapperCGF.EmitBlock(ExitBB); 10098 // Emit array deletion if this is an array section and \p MapType indicates 10099 // that deletion is required. 10100 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10101 MapName, ElementSize, DoneBB, /*IsInit=*/false); 10102 10103 // Emit the function exit block. 10104 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 10105 MapperCGF.FinishFunction(); 10106 UDMMap.try_emplace(D, Fn); 10107 if (CGF) { 10108 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 10109 Decls.second.push_back(D); 10110 } 10111 } 10112 10113 /// Emit the array initialization or deletion portion for user-defined mapper 10114 /// code generation. First, it evaluates whether an array section is mapped and 10115 /// whether the \a MapType instructs to delete this section. If \a IsInit is 10116 /// true, and \a MapType indicates to not delete this array, array 10117 /// initialization code is generated. If \a IsInit is false, and \a MapType 10118 /// indicates to not this array, array deletion code is generated. 10119 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 10120 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 10121 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 10122 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, 10123 bool IsInit) { 10124 StringRef Prefix = IsInit ? ".init" : ".del"; 10125 10126 // Evaluate if this is an array section. 10127 llvm::BasicBlock *BodyBB = 10128 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 10129 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 10130 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 10131 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 10132 MapType, 10133 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 10134 llvm::Value *DeleteCond; 10135 llvm::Value *Cond; 10136 if (IsInit) { 10137 // base != begin? 10138 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin); 10139 // IsPtrAndObj? 10140 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 10141 MapType, 10142 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ)); 10143 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 10144 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 10145 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 10146 DeleteCond = MapperCGF.Builder.CreateIsNull( 10147 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10148 } else { 10149 Cond = IsArray; 10150 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 10151 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10152 } 10153 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 10154 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 10155 10156 MapperCGF.EmitBlock(BodyBB); 10157 // Get the array size by multiplying element size and element number (i.e., \p 10158 // Size). 10159 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 10160 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10161 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 10162 // memory allocation/deletion purpose only. 10163 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 10164 MapType, 10165 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10166 MappableExprsHandler::OMP_MAP_FROM))); 10167 MapTypeArg = MapperCGF.Builder.CreateOr( 10168 MapTypeArg, 10169 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT)); 10170 10171 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10172 // data structure. 10173 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 10174 ArraySize, MapTypeArg, MapName}; 10175 MapperCGF.EmitRuntimeCall( 10176 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10177 OMPRTL___tgt_push_mapper_component), 10178 OffloadingArgs); 10179 } 10180 10181 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 10182 const OMPDeclareMapperDecl *D) { 10183 auto I = UDMMap.find(D); 10184 if (I != UDMMap.end()) 10185 return I->second; 10186 emitUserDefinedMapper(D); 10187 return UDMMap.lookup(D); 10188 } 10189 10190 llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall( 10191 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10192 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10193 const OMPLoopDirective &D)> 10194 SizeEmitter) { 10195 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 10196 const OMPExecutableDirective *TD = &D; 10197 // Get nested teams distribute kind directive, if any. 10198 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 10199 TD = getNestedDistributeDirective(CGM.getContext(), D); 10200 if (!TD) 10201 return llvm::ConstantInt::get(CGF.Int64Ty, 0); 10202 10203 const auto *LD = cast<OMPLoopDirective>(TD); 10204 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) 10205 return NumIterations; 10206 return llvm::ConstantInt::get(CGF.Int64Ty, 0); 10207 } 10208 10209 void CGOpenMPRuntime::emitTargetCall( 10210 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10211 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 10212 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 10213 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10214 const OMPLoopDirective &D)> 10215 SizeEmitter) { 10216 if (!CGF.HaveInsertPoint()) 10217 return; 10218 10219 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsDevice && 10220 CGM.getLangOpts().OpenMPOffloadMandatory; 10221 10222 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!"); 10223 10224 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10225 D.hasClausesOfKind<OMPNowaitClause>() || 10226 D.hasClausesOfKind<OMPInReductionClause>(); 10227 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 10228 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 10229 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 10230 PrePostActionTy &) { 10231 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10232 }; 10233 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 10234 10235 CodeGenFunction::OMPTargetDataInfo InputInfo; 10236 llvm::Value *MapTypesArray = nullptr; 10237 llvm::Value *MapNamesArray = nullptr; 10238 // Generate code for the host fallback function. 10239 auto &&FallbackGen = [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, 10240 &CS, OffloadingMandatory](CodeGenFunction &CGF) { 10241 if (OffloadingMandatory) { 10242 CGF.Builder.CreateUnreachable(); 10243 } else { 10244 if (RequiresOuterTask) { 10245 CapturedVars.clear(); 10246 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10247 } 10248 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10249 } 10250 }; 10251 // Fill up the pointer arrays and transfer execution to the device. 10252 auto &&ThenGen = [this, Device, OutlinedFnID, &D, &InputInfo, &MapTypesArray, 10253 &MapNamesArray, SizeEmitter, 10254 FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) { 10255 if (Device.getInt() == OMPC_DEVICE_ancestor) { 10256 // Reverse offloading is not supported, so just execute on the host. 10257 FallbackGen(CGF); 10258 return; 10259 } 10260 10261 // On top of the arrays that were filled up, the target offloading call 10262 // takes as arguments the device id as well as the host pointer. The host 10263 // pointer is used by the runtime library to identify the current target 10264 // region, so it only has to be unique and not necessarily point to 10265 // anything. It could be the pointer to the outlined function that 10266 // implements the target region, but we aren't using that so that the 10267 // compiler doesn't need to keep that, and could therefore inline the host 10268 // function if proven worthwhile during optimization. 10269 10270 // From this point on, we need to have an ID of the target region defined. 10271 assert(OutlinedFnID && "Invalid outlined function ID!"); 10272 (void)OutlinedFnID; 10273 10274 // Emit device ID if any. 10275 llvm::Value *DeviceID; 10276 if (Device.getPointer()) { 10277 assert((Device.getInt() == OMPC_DEVICE_unknown || 10278 Device.getInt() == OMPC_DEVICE_device_num) && 10279 "Expected device_num modifier."); 10280 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 10281 DeviceID = 10282 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 10283 } else { 10284 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10285 } 10286 10287 // Emit the number of elements in the offloading arrays. 10288 llvm::Value *PointerNum = 10289 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10290 10291 // Return value of the runtime offloading call. 10292 llvm::Value *Return; 10293 10294 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 10295 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 10296 10297 // Source location for the ident struct 10298 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10299 10300 // Get tripcount for the target loop-based directive. 10301 llvm::Value *NumIterations = 10302 emitTargetNumIterationsCall(CGF, D, SizeEmitter); 10303 10304 // Arguments for the target kernel. 10305 SmallVector<llvm::Value *> KernelArgs{ 10306 CGF.Builder.getInt32(/* Version */ 1), 10307 PointerNum, 10308 InputInfo.BasePointersArray.getPointer(), 10309 InputInfo.PointersArray.getPointer(), 10310 InputInfo.SizesArray.getPointer(), 10311 MapTypesArray, 10312 MapNamesArray, 10313 InputInfo.MappersArray.getPointer(), 10314 NumIterations}; 10315 10316 // Arguments passed to the 'nowait' variant. 10317 SmallVector<llvm::Value *> NoWaitKernelArgs{ 10318 CGF.Builder.getInt32(0), 10319 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 10320 CGF.Builder.getInt32(0), 10321 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 10322 }; 10323 10324 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>(); 10325 10326 // The target region is an outlined function launched by the runtime 10327 // via calls to __tgt_target_kernel(). 10328 // 10329 // Note that on the host and CPU targets, the runtime implementation of 10330 // these calls simply call the outlined function without forking threads. 10331 // The outlined functions themselves have runtime calls to 10332 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 10333 // the compiler in emitTeamsCall() and emitParallelCall(). 10334 // 10335 // In contrast, on the NVPTX target, the implementation of 10336 // __tgt_target_teams() launches a GPU kernel with the requested number 10337 // of teams and threads so no additional calls to the runtime are required. 10338 // Check the error code and execute the host version if required. 10339 CGF.Builder.restoreIP( 10340 HasNoWait ? OMPBuilder.emitTargetKernel( 10341 CGF.Builder, Return, RTLoc, DeviceID, NumTeams, 10342 NumThreads, OutlinedFnID, KernelArgs, NoWaitKernelArgs) 10343 : OMPBuilder.emitTargetKernel(CGF.Builder, Return, RTLoc, 10344 DeviceID, NumTeams, NumThreads, 10345 OutlinedFnID, KernelArgs)); 10346 10347 llvm::BasicBlock *OffloadFailedBlock = 10348 CGF.createBasicBlock("omp_offload.failed"); 10349 llvm::BasicBlock *OffloadContBlock = 10350 CGF.createBasicBlock("omp_offload.cont"); 10351 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10352 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10353 10354 CGF.EmitBlock(OffloadFailedBlock); 10355 FallbackGen(CGF); 10356 10357 CGF.EmitBranch(OffloadContBlock); 10358 10359 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10360 }; 10361 10362 // Notify that the host version must be executed. 10363 auto &&ElseGen = [FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) { 10364 FallbackGen(CGF); 10365 }; 10366 10367 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10368 &MapNamesArray, &CapturedVars, RequiresOuterTask, 10369 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10370 // Fill up the arrays with all the captured variables. 10371 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10372 10373 // Get mappable expression information. 10374 MappableExprsHandler MEHandler(D, CGF); 10375 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10376 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 10377 10378 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10379 auto *CV = CapturedVars.begin(); 10380 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10381 CE = CS.capture_end(); 10382 CI != CE; ++CI, ++RI, ++CV) { 10383 MappableExprsHandler::MapCombinedInfoTy CurInfo; 10384 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10385 10386 // VLA sizes are passed to the outlined region by copy and do not have map 10387 // information associated. 10388 if (CI->capturesVariableArrayType()) { 10389 CurInfo.Exprs.push_back(nullptr); 10390 CurInfo.BasePointers.push_back(*CV); 10391 CurInfo.Pointers.push_back(*CV); 10392 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10393 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10394 // Copy to the device as an argument. No need to retrieve it. 10395 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10396 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10397 MappableExprsHandler::OMP_MAP_IMPLICIT); 10398 CurInfo.Mappers.push_back(nullptr); 10399 } else { 10400 // If we have any information in the map clause, we use it, otherwise we 10401 // just do a default mapping. 10402 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10403 if (!CI->capturesThis()) 10404 MappedVarSet.insert(CI->getCapturedVar()); 10405 else 10406 MappedVarSet.insert(nullptr); 10407 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10408 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10409 // Generate correct mapping for variables captured by reference in 10410 // lambdas. 10411 if (CI->capturesVariable()) 10412 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10413 CurInfo, LambdaPointers); 10414 } 10415 // We expect to have at least an element of information for this capture. 10416 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10417 "Non-existing map pointer for capture!"); 10418 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10419 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10420 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10421 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10422 "Inconsistent map information sizes!"); 10423 10424 // If there is an entry in PartialStruct it means we have a struct with 10425 // individual members mapped. Emit an extra combined entry. 10426 if (PartialStruct.Base.isValid()) { 10427 CombinedInfo.append(PartialStruct.PreliminaryMapData); 10428 MEHandler.emitCombinedEntry( 10429 CombinedInfo, CurInfo.Types, PartialStruct, nullptr, 10430 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 10431 } 10432 10433 // We need to append the results of this capture to what we already have. 10434 CombinedInfo.append(CurInfo); 10435 } 10436 // Adjust MEMBER_OF flags for the lambdas captures. 10437 MEHandler.adjustMemberOfForLambdaCaptures( 10438 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10439 CombinedInfo.Types); 10440 // Map any list items in a map clause that were not captures because they 10441 // weren't referenced within the construct. 10442 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 10443 10444 TargetDataInfo Info; 10445 // Fill up the arrays and create the arguments. 10446 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10447 emitOffloadingArraysArgument( 10448 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10449 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 10450 {/*ForEndCall=*/false}); 10451 10452 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10453 InputInfo.BasePointersArray = 10454 Address(Info.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 10455 InputInfo.PointersArray = 10456 Address(Info.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 10457 InputInfo.SizesArray = 10458 Address(Info.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); 10459 InputInfo.MappersArray = 10460 Address(Info.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 10461 MapTypesArray = Info.MapTypesArray; 10462 MapNamesArray = Info.MapNamesArray; 10463 if (RequiresOuterTask) 10464 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10465 else 10466 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10467 }; 10468 10469 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10470 CodeGenFunction &CGF, PrePostActionTy &) { 10471 if (RequiresOuterTask) { 10472 CodeGenFunction::OMPTargetDataInfo InputInfo; 10473 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10474 } else { 10475 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10476 } 10477 }; 10478 10479 // If we have a target function ID it means that we need to support 10480 // offloading, otherwise, just execute on the host. We need to execute on host 10481 // regardless of the conditional in the if clause if, e.g., the user do not 10482 // specify target triples. 10483 if (OutlinedFnID) { 10484 if (IfCond) { 10485 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10486 } else { 10487 RegionCodeGenTy ThenRCG(TargetThenGen); 10488 ThenRCG(CGF); 10489 } 10490 } else { 10491 RegionCodeGenTy ElseRCG(TargetElseGen); 10492 ElseRCG(CGF); 10493 } 10494 } 10495 10496 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10497 StringRef ParentName) { 10498 if (!S) 10499 return; 10500 10501 // Codegen OMP target directives that offload compute to the device. 10502 bool RequiresDeviceCodegen = 10503 isa<OMPExecutableDirective>(S) && 10504 isOpenMPTargetExecutionDirective( 10505 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10506 10507 if (RequiresDeviceCodegen) { 10508 const auto &E = *cast<OMPExecutableDirective>(S); 10509 unsigned DeviceID; 10510 unsigned FileID; 10511 unsigned Line; 10512 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10513 FileID, Line); 10514 10515 // Is this a target region that should not be emitted as an entry point? If 10516 // so just signal we are done with this target region. 10517 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10518 ParentName, Line)) 10519 return; 10520 10521 switch (E.getDirectiveKind()) { 10522 case OMPD_target: 10523 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10524 cast<OMPTargetDirective>(E)); 10525 break; 10526 case OMPD_target_parallel: 10527 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10528 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10529 break; 10530 case OMPD_target_teams: 10531 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10532 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10533 break; 10534 case OMPD_target_teams_distribute: 10535 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10536 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10537 break; 10538 case OMPD_target_teams_distribute_simd: 10539 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10540 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10541 break; 10542 case OMPD_target_parallel_for: 10543 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10544 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10545 break; 10546 case OMPD_target_parallel_for_simd: 10547 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10548 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10549 break; 10550 case OMPD_target_simd: 10551 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10552 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10553 break; 10554 case OMPD_target_teams_distribute_parallel_for: 10555 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10556 CGM, ParentName, 10557 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10558 break; 10559 case OMPD_target_teams_distribute_parallel_for_simd: 10560 CodeGenFunction:: 10561 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10562 CGM, ParentName, 10563 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10564 break; 10565 case OMPD_parallel: 10566 case OMPD_for: 10567 case OMPD_parallel_for: 10568 case OMPD_parallel_master: 10569 case OMPD_parallel_sections: 10570 case OMPD_for_simd: 10571 case OMPD_parallel_for_simd: 10572 case OMPD_cancel: 10573 case OMPD_cancellation_point: 10574 case OMPD_ordered: 10575 case OMPD_threadprivate: 10576 case OMPD_allocate: 10577 case OMPD_task: 10578 case OMPD_simd: 10579 case OMPD_tile: 10580 case OMPD_unroll: 10581 case OMPD_sections: 10582 case OMPD_section: 10583 case OMPD_single: 10584 case OMPD_master: 10585 case OMPD_critical: 10586 case OMPD_taskyield: 10587 case OMPD_barrier: 10588 case OMPD_taskwait: 10589 case OMPD_taskgroup: 10590 case OMPD_atomic: 10591 case OMPD_flush: 10592 case OMPD_depobj: 10593 case OMPD_scan: 10594 case OMPD_teams: 10595 case OMPD_target_data: 10596 case OMPD_target_exit_data: 10597 case OMPD_target_enter_data: 10598 case OMPD_distribute: 10599 case OMPD_distribute_simd: 10600 case OMPD_distribute_parallel_for: 10601 case OMPD_distribute_parallel_for_simd: 10602 case OMPD_teams_distribute: 10603 case OMPD_teams_distribute_simd: 10604 case OMPD_teams_distribute_parallel_for: 10605 case OMPD_teams_distribute_parallel_for_simd: 10606 case OMPD_target_update: 10607 case OMPD_declare_simd: 10608 case OMPD_declare_variant: 10609 case OMPD_begin_declare_variant: 10610 case OMPD_end_declare_variant: 10611 case OMPD_declare_target: 10612 case OMPD_end_declare_target: 10613 case OMPD_declare_reduction: 10614 case OMPD_declare_mapper: 10615 case OMPD_taskloop: 10616 case OMPD_taskloop_simd: 10617 case OMPD_master_taskloop: 10618 case OMPD_master_taskloop_simd: 10619 case OMPD_parallel_master_taskloop: 10620 case OMPD_parallel_master_taskloop_simd: 10621 case OMPD_requires: 10622 case OMPD_metadirective: 10623 case OMPD_unknown: 10624 default: 10625 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10626 } 10627 return; 10628 } 10629 10630 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10631 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10632 return; 10633 10634 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10635 return; 10636 } 10637 10638 // If this is a lambda function, look into its body. 10639 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10640 S = L->getBody(); 10641 10642 // Keep looking for target regions recursively. 10643 for (const Stmt *II : S->children()) 10644 scanForTargetRegionsFunctions(II, ParentName); 10645 } 10646 10647 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { 10648 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10649 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10650 if (!DevTy) 10651 return false; 10652 // Do not emit device_type(nohost) functions for the host. 10653 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10654 return true; 10655 // Do not emit device_type(host) functions for the device. 10656 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10657 return true; 10658 return false; 10659 } 10660 10661 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10662 // If emitting code for the host, we do not process FD here. Instead we do 10663 // the normal code generation. 10664 if (!CGM.getLangOpts().OpenMPIsDevice) { 10665 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) 10666 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10667 CGM.getLangOpts().OpenMPIsDevice)) 10668 return true; 10669 return false; 10670 } 10671 10672 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10673 // Try to detect target regions in the function. 10674 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10675 StringRef Name = CGM.getMangledName(GD); 10676 scanForTargetRegionsFunctions(FD->getBody(), Name); 10677 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10678 CGM.getLangOpts().OpenMPIsDevice)) 10679 return true; 10680 } 10681 10682 // Do not to emit function if it is not marked as declare target. 10683 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10684 AlreadyEmittedTargetDecls.count(VD) == 0; 10685 } 10686 10687 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10688 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()), 10689 CGM.getLangOpts().OpenMPIsDevice)) 10690 return true; 10691 10692 if (!CGM.getLangOpts().OpenMPIsDevice) 10693 return false; 10694 10695 // Check if there are Ctors/Dtors in this declaration and look for target 10696 // regions in it. We use the complete variant to produce the kernel name 10697 // mangling. 10698 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10699 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10700 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10701 StringRef ParentName = 10702 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10703 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10704 } 10705 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10706 StringRef ParentName = 10707 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10708 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10709 } 10710 } 10711 10712 // Do not to emit variable if it is not marked as declare target. 10713 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10714 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10715 cast<VarDecl>(GD.getDecl())); 10716 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10717 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10718 HasRequiresUnifiedSharedMemory)) { 10719 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10720 return true; 10721 } 10722 return false; 10723 } 10724 10725 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10726 llvm::Constant *Addr) { 10727 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10728 !CGM.getLangOpts().OpenMPIsDevice) 10729 return; 10730 10731 // If we have host/nohost variables, they do not need to be registered. 10732 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10733 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10734 if (DevTy && *DevTy != OMPDeclareTargetDeclAttr::DT_Any) 10735 return; 10736 10737 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10738 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10739 if (!Res) { 10740 if (CGM.getLangOpts().OpenMPIsDevice) { 10741 // Register non-target variables being emitted in device code (debug info 10742 // may cause this). 10743 StringRef VarName = CGM.getMangledName(VD); 10744 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10745 } 10746 return; 10747 } 10748 // Register declare target variables. 10749 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10750 StringRef VarName; 10751 CharUnits VarSize; 10752 llvm::GlobalValue::LinkageTypes Linkage; 10753 10754 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10755 !HasRequiresUnifiedSharedMemory) { 10756 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10757 VarName = CGM.getMangledName(VD); 10758 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10759 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10760 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10761 } else { 10762 VarSize = CharUnits::Zero(); 10763 } 10764 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10765 // Temp solution to prevent optimizations of the internal variables. 10766 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10767 // Do not create a "ref-variable" if the original is not also available 10768 // on the host. 10769 if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName)) 10770 return; 10771 std::string RefName = getName({VarName, "ref"}); 10772 if (!CGM.GetGlobalValue(RefName)) { 10773 llvm::Constant *AddrRef = 10774 getOrCreateInternalVariable(Addr->getType(), RefName); 10775 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10776 GVAddrRef->setConstant(/*Val=*/true); 10777 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10778 GVAddrRef->setInitializer(Addr); 10779 CGM.addCompilerUsedGlobal(GVAddrRef); 10780 } 10781 } 10782 } else { 10783 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10784 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10785 HasRequiresUnifiedSharedMemory)) && 10786 "Declare target attribute must link or to with unified memory."); 10787 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10788 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10789 else 10790 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10791 10792 if (CGM.getLangOpts().OpenMPIsDevice) { 10793 VarName = Addr->getName(); 10794 Addr = nullptr; 10795 } else { 10796 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10797 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10798 } 10799 VarSize = CGM.getPointerSize(); 10800 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10801 } 10802 10803 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10804 VarName, Addr, VarSize, Flags, Linkage); 10805 } 10806 10807 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10808 if (isa<FunctionDecl>(GD.getDecl()) || 10809 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10810 return emitTargetFunctions(GD); 10811 10812 return emitTargetGlobalVariable(GD); 10813 } 10814 10815 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10816 for (const VarDecl *VD : DeferredGlobalVariables) { 10817 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10818 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10819 if (!Res) 10820 continue; 10821 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10822 !HasRequiresUnifiedSharedMemory) { 10823 CGM.EmitGlobal(VD); 10824 } else { 10825 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10826 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10827 HasRequiresUnifiedSharedMemory)) && 10828 "Expected link clause or to clause with unified memory."); 10829 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10830 } 10831 } 10832 } 10833 10834 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10835 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10836 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10837 " Expected target-based directive."); 10838 } 10839 10840 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10841 for (const OMPClause *Clause : D->clauselists()) { 10842 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10843 HasRequiresUnifiedSharedMemory = true; 10844 } else if (const auto *AC = 10845 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10846 switch (AC->getAtomicDefaultMemOrderKind()) { 10847 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10848 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10849 break; 10850 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10851 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10852 break; 10853 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10854 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10855 break; 10856 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10857 break; 10858 } 10859 } 10860 } 10861 } 10862 10863 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10864 return RequiresAtomicOrdering; 10865 } 10866 10867 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10868 LangAS &AS) { 10869 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10870 return false; 10871 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10872 switch(A->getAllocatorType()) { 10873 case OMPAllocateDeclAttr::OMPNullMemAlloc: 10874 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10875 // Not supported, fallback to the default mem space. 10876 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10877 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10878 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10879 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10880 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10881 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10882 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10883 AS = LangAS::Default; 10884 return true; 10885 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10886 llvm_unreachable("Expected predefined allocator for the variables with the " 10887 "static storage."); 10888 } 10889 return false; 10890 } 10891 10892 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10893 return HasRequiresUnifiedSharedMemory; 10894 } 10895 10896 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10897 CodeGenModule &CGM) 10898 : CGM(CGM) { 10899 if (CGM.getLangOpts().OpenMPIsDevice) { 10900 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10901 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10902 } 10903 } 10904 10905 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10906 if (CGM.getLangOpts().OpenMPIsDevice) 10907 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10908 } 10909 10910 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10911 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 10912 return true; 10913 10914 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10915 // Do not to emit function if it is marked as declare target as it was already 10916 // emitted. 10917 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10918 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10919 if (auto *F = dyn_cast_or_null<llvm::Function>( 10920 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10921 return !F->isDeclaration(); 10922 return false; 10923 } 10924 return true; 10925 } 10926 10927 return !AlreadyEmittedTargetDecls.insert(D).second; 10928 } 10929 10930 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10931 // If we don't have entries or if we are emitting code for the device, we 10932 // don't need to do anything. 10933 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10934 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 10935 (OffloadEntriesInfoManager.empty() && 10936 !HasEmittedDeclareTargetRegion && 10937 !HasEmittedTargetRegion)) 10938 return nullptr; 10939 10940 // Create and register the function that handles the requires directives. 10941 ASTContext &C = CGM.getContext(); 10942 10943 llvm::Function *RequiresRegFn; 10944 { 10945 CodeGenFunction CGF(CGM); 10946 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10947 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10948 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10949 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 10950 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10951 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 10952 // TODO: check for other requires clauses. 10953 // The requires directive takes effect only when a target region is 10954 // present in the compilation unit. Otherwise it is ignored and not 10955 // passed to the runtime. This avoids the runtime from throwing an error 10956 // for mismatching requires clauses across compilation units that don't 10957 // contain at least 1 target region. 10958 assert((HasEmittedTargetRegion || 10959 HasEmittedDeclareTargetRegion || 10960 !OffloadEntriesInfoManager.empty()) && 10961 "Target or declare target region expected."); 10962 if (HasRequiresUnifiedSharedMemory) 10963 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 10964 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10965 CGM.getModule(), OMPRTL___tgt_register_requires), 10966 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 10967 CGF.FinishFunction(); 10968 } 10969 return RequiresRegFn; 10970 } 10971 10972 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10973 const OMPExecutableDirective &D, 10974 SourceLocation Loc, 10975 llvm::Function *OutlinedFn, 10976 ArrayRef<llvm::Value *> CapturedVars) { 10977 if (!CGF.HaveInsertPoint()) 10978 return; 10979 10980 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10981 CodeGenFunction::RunCleanupsScope Scope(CGF); 10982 10983 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10984 llvm::Value *Args[] = { 10985 RTLoc, 10986 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10987 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10988 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10989 RealArgs.append(std::begin(Args), std::end(Args)); 10990 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10991 10992 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 10993 CGM.getModule(), OMPRTL___kmpc_fork_teams); 10994 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10995 } 10996 10997 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10998 const Expr *NumTeams, 10999 const Expr *ThreadLimit, 11000 SourceLocation Loc) { 11001 if (!CGF.HaveInsertPoint()) 11002 return; 11003 11004 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11005 11006 llvm::Value *NumTeamsVal = 11007 NumTeams 11008 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 11009 CGF.CGM.Int32Ty, /* isSigned = */ true) 11010 : CGF.Builder.getInt32(0); 11011 11012 llvm::Value *ThreadLimitVal = 11013 ThreadLimit 11014 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 11015 CGF.CGM.Int32Ty, /* isSigned = */ true) 11016 : CGF.Builder.getInt32(0); 11017 11018 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 11019 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 11020 ThreadLimitVal}; 11021 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11022 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 11023 PushNumTeamsArgs); 11024 } 11025 11026 void CGOpenMPRuntime::emitTargetDataCalls( 11027 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11028 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11029 if (!CGF.HaveInsertPoint()) 11030 return; 11031 11032 // Action used to replace the default codegen action and turn privatization 11033 // off. 11034 PrePostActionTy NoPrivAction; 11035 11036 // Generate the code for the opening of the data environment. Capture all the 11037 // arguments of the runtime call by reference because they are used in the 11038 // closing of the region. 11039 auto &&BeginThenGen = [this, &D, Device, &Info, 11040 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 11041 // Fill up the arrays with all the mapped variables. 11042 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11043 11044 // Get map clause information. 11045 MappableExprsHandler MEHandler(D, CGF); 11046 MEHandler.generateAllInfo(CombinedInfo); 11047 11048 // Fill up the arrays and create the arguments. 11049 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11050 /*IsNonContiguous=*/true); 11051 11052 llvm::Value *BasePointersArrayArg = nullptr; 11053 llvm::Value *PointersArrayArg = nullptr; 11054 llvm::Value *SizesArrayArg = nullptr; 11055 llvm::Value *MapTypesArrayArg = nullptr; 11056 llvm::Value *MapNamesArrayArg = nullptr; 11057 llvm::Value *MappersArrayArg = nullptr; 11058 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11059 SizesArrayArg, MapTypesArrayArg, 11060 MapNamesArrayArg, MappersArrayArg, Info); 11061 11062 // Emit device ID if any. 11063 llvm::Value *DeviceID = nullptr; 11064 if (Device) { 11065 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11066 CGF.Int64Ty, /*isSigned=*/true); 11067 } else { 11068 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11069 } 11070 11071 // Emit the number of elements in the offloading arrays. 11072 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11073 // 11074 // Source location for the ident struct 11075 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11076 11077 llvm::Value *OffloadingArgs[] = {RTLoc, 11078 DeviceID, 11079 PointerNum, 11080 BasePointersArrayArg, 11081 PointersArrayArg, 11082 SizesArrayArg, 11083 MapTypesArrayArg, 11084 MapNamesArrayArg, 11085 MappersArrayArg}; 11086 CGF.EmitRuntimeCall( 11087 OMPBuilder.getOrCreateRuntimeFunction( 11088 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 11089 OffloadingArgs); 11090 11091 // If device pointer privatization is required, emit the body of the region 11092 // here. It will have to be duplicated: with and without privatization. 11093 if (!Info.CaptureDeviceAddrMap.empty()) 11094 CodeGen(CGF); 11095 }; 11096 11097 // Generate code for the closing of the data region. 11098 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 11099 PrePostActionTy &) { 11100 assert(Info.isValid() && "Invalid data environment closing arguments."); 11101 11102 llvm::Value *BasePointersArrayArg = nullptr; 11103 llvm::Value *PointersArrayArg = nullptr; 11104 llvm::Value *SizesArrayArg = nullptr; 11105 llvm::Value *MapTypesArrayArg = nullptr; 11106 llvm::Value *MapNamesArrayArg = nullptr; 11107 llvm::Value *MappersArrayArg = nullptr; 11108 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11109 SizesArrayArg, MapTypesArrayArg, 11110 MapNamesArrayArg, MappersArrayArg, Info, 11111 {/*ForEndCall=*/true}); 11112 11113 // Emit device ID if any. 11114 llvm::Value *DeviceID = nullptr; 11115 if (Device) { 11116 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11117 CGF.Int64Ty, /*isSigned=*/true); 11118 } else { 11119 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11120 } 11121 11122 // Emit the number of elements in the offloading arrays. 11123 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11124 11125 // Source location for the ident struct 11126 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11127 11128 llvm::Value *OffloadingArgs[] = {RTLoc, 11129 DeviceID, 11130 PointerNum, 11131 BasePointersArrayArg, 11132 PointersArrayArg, 11133 SizesArrayArg, 11134 MapTypesArrayArg, 11135 MapNamesArrayArg, 11136 MappersArrayArg}; 11137 CGF.EmitRuntimeCall( 11138 OMPBuilder.getOrCreateRuntimeFunction( 11139 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 11140 OffloadingArgs); 11141 }; 11142 11143 // If we need device pointer privatization, we need to emit the body of the 11144 // region with no privatization in the 'else' branch of the conditional. 11145 // Otherwise, we don't have to do anything. 11146 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 11147 PrePostActionTy &) { 11148 if (!Info.CaptureDeviceAddrMap.empty()) { 11149 CodeGen.setAction(NoPrivAction); 11150 CodeGen(CGF); 11151 } 11152 }; 11153 11154 // We don't have to do anything to close the region if the if clause evaluates 11155 // to false. 11156 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 11157 11158 if (IfCond) { 11159 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 11160 } else { 11161 RegionCodeGenTy RCG(BeginThenGen); 11162 RCG(CGF); 11163 } 11164 11165 // If we don't require privatization of device pointers, we emit the body in 11166 // between the runtime calls. This avoids duplicating the body code. 11167 if (Info.CaptureDeviceAddrMap.empty()) { 11168 CodeGen.setAction(NoPrivAction); 11169 CodeGen(CGF); 11170 } 11171 11172 if (IfCond) { 11173 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 11174 } else { 11175 RegionCodeGenTy RCG(EndThenGen); 11176 RCG(CGF); 11177 } 11178 } 11179 11180 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 11181 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11182 const Expr *Device) { 11183 if (!CGF.HaveInsertPoint()) 11184 return; 11185 11186 assert((isa<OMPTargetEnterDataDirective>(D) || 11187 isa<OMPTargetExitDataDirective>(D) || 11188 isa<OMPTargetUpdateDirective>(D)) && 11189 "Expecting either target enter, exit data, or update directives."); 11190 11191 CodeGenFunction::OMPTargetDataInfo InputInfo; 11192 llvm::Value *MapTypesArray = nullptr; 11193 llvm::Value *MapNamesArray = nullptr; 11194 // Generate the code for the opening of the data environment. 11195 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 11196 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 11197 // Emit device ID if any. 11198 llvm::Value *DeviceID = nullptr; 11199 if (Device) { 11200 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11201 CGF.Int64Ty, /*isSigned=*/true); 11202 } else { 11203 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11204 } 11205 11206 // Emit the number of elements in the offloading arrays. 11207 llvm::Constant *PointerNum = 11208 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 11209 11210 // Source location for the ident struct 11211 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11212 11213 llvm::Value *OffloadingArgs[] = {RTLoc, 11214 DeviceID, 11215 PointerNum, 11216 InputInfo.BasePointersArray.getPointer(), 11217 InputInfo.PointersArray.getPointer(), 11218 InputInfo.SizesArray.getPointer(), 11219 MapTypesArray, 11220 MapNamesArray, 11221 InputInfo.MappersArray.getPointer()}; 11222 11223 // Select the right runtime function call for each standalone 11224 // directive. 11225 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 11226 RuntimeFunction RTLFn; 11227 switch (D.getDirectiveKind()) { 11228 case OMPD_target_enter_data: 11229 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 11230 : OMPRTL___tgt_target_data_begin_mapper; 11231 break; 11232 case OMPD_target_exit_data: 11233 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 11234 : OMPRTL___tgt_target_data_end_mapper; 11235 break; 11236 case OMPD_target_update: 11237 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 11238 : OMPRTL___tgt_target_data_update_mapper; 11239 break; 11240 case OMPD_parallel: 11241 case OMPD_for: 11242 case OMPD_parallel_for: 11243 case OMPD_parallel_master: 11244 case OMPD_parallel_sections: 11245 case OMPD_for_simd: 11246 case OMPD_parallel_for_simd: 11247 case OMPD_cancel: 11248 case OMPD_cancellation_point: 11249 case OMPD_ordered: 11250 case OMPD_threadprivate: 11251 case OMPD_allocate: 11252 case OMPD_task: 11253 case OMPD_simd: 11254 case OMPD_tile: 11255 case OMPD_unroll: 11256 case OMPD_sections: 11257 case OMPD_section: 11258 case OMPD_single: 11259 case OMPD_master: 11260 case OMPD_critical: 11261 case OMPD_taskyield: 11262 case OMPD_barrier: 11263 case OMPD_taskwait: 11264 case OMPD_taskgroup: 11265 case OMPD_atomic: 11266 case OMPD_flush: 11267 case OMPD_depobj: 11268 case OMPD_scan: 11269 case OMPD_teams: 11270 case OMPD_target_data: 11271 case OMPD_distribute: 11272 case OMPD_distribute_simd: 11273 case OMPD_distribute_parallel_for: 11274 case OMPD_distribute_parallel_for_simd: 11275 case OMPD_teams_distribute: 11276 case OMPD_teams_distribute_simd: 11277 case OMPD_teams_distribute_parallel_for: 11278 case OMPD_teams_distribute_parallel_for_simd: 11279 case OMPD_declare_simd: 11280 case OMPD_declare_variant: 11281 case OMPD_begin_declare_variant: 11282 case OMPD_end_declare_variant: 11283 case OMPD_declare_target: 11284 case OMPD_end_declare_target: 11285 case OMPD_declare_reduction: 11286 case OMPD_declare_mapper: 11287 case OMPD_taskloop: 11288 case OMPD_taskloop_simd: 11289 case OMPD_master_taskloop: 11290 case OMPD_master_taskloop_simd: 11291 case OMPD_parallel_master_taskloop: 11292 case OMPD_parallel_master_taskloop_simd: 11293 case OMPD_target: 11294 case OMPD_target_simd: 11295 case OMPD_target_teams_distribute: 11296 case OMPD_target_teams_distribute_simd: 11297 case OMPD_target_teams_distribute_parallel_for: 11298 case OMPD_target_teams_distribute_parallel_for_simd: 11299 case OMPD_target_teams: 11300 case OMPD_target_parallel: 11301 case OMPD_target_parallel_for: 11302 case OMPD_target_parallel_for_simd: 11303 case OMPD_requires: 11304 case OMPD_metadirective: 11305 case OMPD_unknown: 11306 default: 11307 llvm_unreachable("Unexpected standalone target data directive."); 11308 break; 11309 } 11310 CGF.EmitRuntimeCall( 11311 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 11312 OffloadingArgs); 11313 }; 11314 11315 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 11316 &MapNamesArray](CodeGenFunction &CGF, 11317 PrePostActionTy &) { 11318 // Fill up the arrays with all the mapped variables. 11319 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11320 11321 // Get map clause information. 11322 MappableExprsHandler MEHandler(D, CGF); 11323 MEHandler.generateAllInfo(CombinedInfo); 11324 11325 TargetDataInfo Info; 11326 // Fill up the arrays and create the arguments. 11327 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11328 /*IsNonContiguous=*/true); 11329 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 11330 D.hasClausesOfKind<OMPNowaitClause>(); 11331 emitOffloadingArraysArgument( 11332 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 11333 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 11334 {/*ForEndCall=*/false}); 11335 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11336 InputInfo.BasePointersArray = 11337 Address(Info.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 11338 InputInfo.PointersArray = 11339 Address(Info.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 11340 InputInfo.SizesArray = 11341 Address(Info.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); 11342 InputInfo.MappersArray = 11343 Address(Info.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 11344 MapTypesArray = Info.MapTypesArray; 11345 MapNamesArray = Info.MapNamesArray; 11346 if (RequiresOuterTask) 11347 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11348 else 11349 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11350 }; 11351 11352 if (IfCond) { 11353 emitIfClause(CGF, IfCond, TargetThenGen, 11354 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11355 } else { 11356 RegionCodeGenTy ThenRCG(TargetThenGen); 11357 ThenRCG(CGF); 11358 } 11359 } 11360 11361 namespace { 11362 /// Kind of parameter in a function with 'declare simd' directive. 11363 enum ParamKindTy { 11364 Linear, 11365 LinearRef, 11366 LinearUVal, 11367 LinearVal, 11368 Uniform, 11369 Vector, 11370 }; 11371 /// Attribute set of the parameter. 11372 struct ParamAttrTy { 11373 ParamKindTy Kind = Vector; 11374 llvm::APSInt StrideOrArg; 11375 llvm::APSInt Alignment; 11376 bool HasVarStride = false; 11377 }; 11378 } // namespace 11379 11380 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11381 ArrayRef<ParamAttrTy> ParamAttrs) { 11382 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11383 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11384 // of that clause. The VLEN value must be power of 2. 11385 // In other case the notion of the function`s "characteristic data type" (CDT) 11386 // is used to compute the vector length. 11387 // CDT is defined in the following order: 11388 // a) For non-void function, the CDT is the return type. 11389 // b) If the function has any non-uniform, non-linear parameters, then the 11390 // CDT is the type of the first such parameter. 11391 // c) If the CDT determined by a) or b) above is struct, union, or class 11392 // type which is pass-by-value (except for the type that maps to the 11393 // built-in complex data type), the characteristic data type is int. 11394 // d) If none of the above three cases is applicable, the CDT is int. 11395 // The VLEN is then determined based on the CDT and the size of vector 11396 // register of that ISA for which current vector version is generated. The 11397 // VLEN is computed using the formula below: 11398 // VLEN = sizeof(vector_register) / sizeof(CDT), 11399 // where vector register size specified in section 3.2.1 Registers and the 11400 // Stack Frame of original AMD64 ABI document. 11401 QualType RetType = FD->getReturnType(); 11402 if (RetType.isNull()) 11403 return 0; 11404 ASTContext &C = FD->getASTContext(); 11405 QualType CDT; 11406 if (!RetType.isNull() && !RetType->isVoidType()) { 11407 CDT = RetType; 11408 } else { 11409 unsigned Offset = 0; 11410 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11411 if (ParamAttrs[Offset].Kind == Vector) 11412 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11413 ++Offset; 11414 } 11415 if (CDT.isNull()) { 11416 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11417 if (ParamAttrs[I + Offset].Kind == Vector) { 11418 CDT = FD->getParamDecl(I)->getType(); 11419 break; 11420 } 11421 } 11422 } 11423 } 11424 if (CDT.isNull()) 11425 CDT = C.IntTy; 11426 CDT = CDT->getCanonicalTypeUnqualified(); 11427 if (CDT->isRecordType() || CDT->isUnionType()) 11428 CDT = C.IntTy; 11429 return C.getTypeSize(CDT); 11430 } 11431 11432 /// Mangle the parameter part of the vector function name according to 11433 /// their OpenMP classification. The mangling function is defined in 11434 /// section 4.5 of the AAVFABI(2021Q1). 11435 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11436 SmallString<256> Buffer; 11437 llvm::raw_svector_ostream Out(Buffer); 11438 for (const auto &ParamAttr : ParamAttrs) { 11439 switch (ParamAttr.Kind) { 11440 case Linear: 11441 Out << 'l'; 11442 break; 11443 case LinearRef: 11444 Out << 'R'; 11445 break; 11446 case LinearUVal: 11447 Out << 'U'; 11448 break; 11449 case LinearVal: 11450 Out << 'L'; 11451 break; 11452 case Uniform: 11453 Out << 'u'; 11454 break; 11455 case Vector: 11456 Out << 'v'; 11457 break; 11458 } 11459 if (ParamAttr.HasVarStride) 11460 Out << "s" << ParamAttr.StrideOrArg; 11461 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef || 11462 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) { 11463 // Don't print the step value if it is not present or if it is 11464 // equal to 1. 11465 if (ParamAttr.StrideOrArg < 0) 11466 Out << 'n' << -ParamAttr.StrideOrArg; 11467 else if (ParamAttr.StrideOrArg != 1) 11468 Out << ParamAttr.StrideOrArg; 11469 } 11470 11471 if (!!ParamAttr.Alignment) 11472 Out << 'a' << ParamAttr.Alignment; 11473 } 11474 11475 return std::string(Out.str()); 11476 } 11477 11478 static void 11479 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11480 const llvm::APSInt &VLENVal, 11481 ArrayRef<ParamAttrTy> ParamAttrs, 11482 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11483 struct ISADataTy { 11484 char ISA; 11485 unsigned VecRegSize; 11486 }; 11487 ISADataTy ISAData[] = { 11488 { 11489 'b', 128 11490 }, // SSE 11491 { 11492 'c', 256 11493 }, // AVX 11494 { 11495 'd', 256 11496 }, // AVX2 11497 { 11498 'e', 512 11499 }, // AVX512 11500 }; 11501 llvm::SmallVector<char, 2> Masked; 11502 switch (State) { 11503 case OMPDeclareSimdDeclAttr::BS_Undefined: 11504 Masked.push_back('N'); 11505 Masked.push_back('M'); 11506 break; 11507 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11508 Masked.push_back('N'); 11509 break; 11510 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11511 Masked.push_back('M'); 11512 break; 11513 } 11514 for (char Mask : Masked) { 11515 for (const ISADataTy &Data : ISAData) { 11516 SmallString<256> Buffer; 11517 llvm::raw_svector_ostream Out(Buffer); 11518 Out << "_ZGV" << Data.ISA << Mask; 11519 if (!VLENVal) { 11520 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11521 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11522 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11523 } else { 11524 Out << VLENVal; 11525 } 11526 Out << mangleVectorParameters(ParamAttrs); 11527 Out << '_' << Fn->getName(); 11528 Fn->addFnAttr(Out.str()); 11529 } 11530 } 11531 } 11532 11533 // This are the Functions that are needed to mangle the name of the 11534 // vector functions generated by the compiler, according to the rules 11535 // defined in the "Vector Function ABI specifications for AArch64", 11536 // available at 11537 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11538 11539 /// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1). 11540 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11541 QT = QT.getCanonicalType(); 11542 11543 if (QT->isVoidType()) 11544 return false; 11545 11546 if (Kind == ParamKindTy::Uniform) 11547 return false; 11548 11549 if (Kind == ParamKindTy::LinearUVal || ParamKindTy::LinearRef) 11550 return false; 11551 11552 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) && 11553 !QT->isReferenceType()) 11554 return false; 11555 11556 return true; 11557 } 11558 11559 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11560 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11561 QT = QT.getCanonicalType(); 11562 unsigned Size = C.getTypeSize(QT); 11563 11564 // Only scalars and complex within 16 bytes wide set PVB to true. 11565 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11566 return false; 11567 11568 if (QT->isFloatingType()) 11569 return true; 11570 11571 if (QT->isIntegerType()) 11572 return true; 11573 11574 if (QT->isPointerType()) 11575 return true; 11576 11577 // TODO: Add support for complex types (section 3.1.2, item 2). 11578 11579 return false; 11580 } 11581 11582 /// Computes the lane size (LS) of a return type or of an input parameter, 11583 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11584 /// TODO: Add support for references, section 3.2.1, item 1. 11585 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11586 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11587 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11588 if (getAArch64PBV(PTy, C)) 11589 return C.getTypeSize(PTy); 11590 } 11591 if (getAArch64PBV(QT, C)) 11592 return C.getTypeSize(QT); 11593 11594 return C.getTypeSize(C.getUIntPtrType()); 11595 } 11596 11597 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11598 // signature of the scalar function, as defined in 3.2.2 of the 11599 // AAVFABI. 11600 static std::tuple<unsigned, unsigned, bool> 11601 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11602 QualType RetType = FD->getReturnType().getCanonicalType(); 11603 11604 ASTContext &C = FD->getASTContext(); 11605 11606 bool OutputBecomesInput = false; 11607 11608 llvm::SmallVector<unsigned, 8> Sizes; 11609 if (!RetType->isVoidType()) { 11610 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11611 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11612 OutputBecomesInput = true; 11613 } 11614 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11615 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11616 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11617 } 11618 11619 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11620 // The LS of a function parameter / return value can only be a power 11621 // of 2, starting from 8 bits, up to 128. 11622 assert(llvm::all_of(Sizes, 11623 [](unsigned Size) { 11624 return Size == 8 || Size == 16 || Size == 32 || 11625 Size == 64 || Size == 128; 11626 }) && 11627 "Invalid size"); 11628 11629 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11630 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11631 OutputBecomesInput); 11632 } 11633 11634 // Function used to add the attribute. The parameter `VLEN` is 11635 // templated to allow the use of "x" when targeting scalable functions 11636 // for SVE. 11637 template <typename T> 11638 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11639 char ISA, StringRef ParSeq, 11640 StringRef MangledName, bool OutputBecomesInput, 11641 llvm::Function *Fn) { 11642 SmallString<256> Buffer; 11643 llvm::raw_svector_ostream Out(Buffer); 11644 Out << Prefix << ISA << LMask << VLEN; 11645 if (OutputBecomesInput) 11646 Out << "v"; 11647 Out << ParSeq << "_" << MangledName; 11648 Fn->addFnAttr(Out.str()); 11649 } 11650 11651 // Helper function to generate the Advanced SIMD names depending on 11652 // the value of the NDS when simdlen is not present. 11653 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11654 StringRef Prefix, char ISA, 11655 StringRef ParSeq, StringRef MangledName, 11656 bool OutputBecomesInput, 11657 llvm::Function *Fn) { 11658 switch (NDS) { 11659 case 8: 11660 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11661 OutputBecomesInput, Fn); 11662 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11663 OutputBecomesInput, Fn); 11664 break; 11665 case 16: 11666 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11667 OutputBecomesInput, Fn); 11668 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11669 OutputBecomesInput, Fn); 11670 break; 11671 case 32: 11672 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11673 OutputBecomesInput, Fn); 11674 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11675 OutputBecomesInput, Fn); 11676 break; 11677 case 64: 11678 case 128: 11679 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11680 OutputBecomesInput, Fn); 11681 break; 11682 default: 11683 llvm_unreachable("Scalar type is too wide."); 11684 } 11685 } 11686 11687 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11688 static void emitAArch64DeclareSimdFunction( 11689 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11690 ArrayRef<ParamAttrTy> ParamAttrs, 11691 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11692 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11693 11694 // Get basic data for building the vector signature. 11695 const auto Data = getNDSWDS(FD, ParamAttrs); 11696 const unsigned NDS = std::get<0>(Data); 11697 const unsigned WDS = std::get<1>(Data); 11698 const bool OutputBecomesInput = std::get<2>(Data); 11699 11700 // Check the values provided via `simdlen` by the user. 11701 // 1. A `simdlen(1)` doesn't produce vector signatures, 11702 if (UserVLEN == 1) { 11703 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11704 DiagnosticsEngine::Warning, 11705 "The clause simdlen(1) has no effect when targeting aarch64."); 11706 CGM.getDiags().Report(SLoc, DiagID); 11707 return; 11708 } 11709 11710 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11711 // Advanced SIMD output. 11712 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11713 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11714 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11715 "power of 2 when targeting Advanced SIMD."); 11716 CGM.getDiags().Report(SLoc, DiagID); 11717 return; 11718 } 11719 11720 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11721 // limits. 11722 if (ISA == 's' && UserVLEN != 0) { 11723 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11724 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11725 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11726 "lanes in the architectural constraints " 11727 "for SVE (min is 128-bit, max is " 11728 "2048-bit, by steps of 128-bit)"); 11729 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11730 return; 11731 } 11732 } 11733 11734 // Sort out parameter sequence. 11735 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11736 StringRef Prefix = "_ZGV"; 11737 // Generate simdlen from user input (if any). 11738 if (UserVLEN) { 11739 if (ISA == 's') { 11740 // SVE generates only a masked function. 11741 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11742 OutputBecomesInput, Fn); 11743 } else { 11744 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11745 // Advanced SIMD generates one or two functions, depending on 11746 // the `[not]inbranch` clause. 11747 switch (State) { 11748 case OMPDeclareSimdDeclAttr::BS_Undefined: 11749 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11750 OutputBecomesInput, Fn); 11751 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11752 OutputBecomesInput, Fn); 11753 break; 11754 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11755 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11756 OutputBecomesInput, Fn); 11757 break; 11758 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11759 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11760 OutputBecomesInput, Fn); 11761 break; 11762 } 11763 } 11764 } else { 11765 // If no user simdlen is provided, follow the AAVFABI rules for 11766 // generating the vector length. 11767 if (ISA == 's') { 11768 // SVE, section 3.4.1, item 1. 11769 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11770 OutputBecomesInput, Fn); 11771 } else { 11772 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11773 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11774 // two vector names depending on the use of the clause 11775 // `[not]inbranch`. 11776 switch (State) { 11777 case OMPDeclareSimdDeclAttr::BS_Undefined: 11778 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11779 OutputBecomesInput, Fn); 11780 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11781 OutputBecomesInput, Fn); 11782 break; 11783 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11784 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11785 OutputBecomesInput, Fn); 11786 break; 11787 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11788 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11789 OutputBecomesInput, Fn); 11790 break; 11791 } 11792 } 11793 } 11794 } 11795 11796 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11797 llvm::Function *Fn) { 11798 ASTContext &C = CGM.getContext(); 11799 FD = FD->getMostRecentDecl(); 11800 while (FD) { 11801 // Map params to their positions in function decl. 11802 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11803 if (isa<CXXMethodDecl>(FD)) 11804 ParamPositions.try_emplace(FD, 0); 11805 unsigned ParamPos = ParamPositions.size(); 11806 for (const ParmVarDecl *P : FD->parameters()) { 11807 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11808 ++ParamPos; 11809 } 11810 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11811 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11812 // Mark uniform parameters. 11813 for (const Expr *E : Attr->uniforms()) { 11814 E = E->IgnoreParenImpCasts(); 11815 unsigned Pos; 11816 if (isa<CXXThisExpr>(E)) { 11817 Pos = ParamPositions[FD]; 11818 } else { 11819 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11820 ->getCanonicalDecl(); 11821 auto It = ParamPositions.find(PVD); 11822 assert(It != ParamPositions.end() && "Function parameter not found"); 11823 Pos = It->second; 11824 } 11825 ParamAttrs[Pos].Kind = Uniform; 11826 } 11827 // Get alignment info. 11828 auto *NI = Attr->alignments_begin(); 11829 for (const Expr *E : Attr->aligneds()) { 11830 E = E->IgnoreParenImpCasts(); 11831 unsigned Pos; 11832 QualType ParmTy; 11833 if (isa<CXXThisExpr>(E)) { 11834 Pos = ParamPositions[FD]; 11835 ParmTy = E->getType(); 11836 } else { 11837 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11838 ->getCanonicalDecl(); 11839 auto It = ParamPositions.find(PVD); 11840 assert(It != ParamPositions.end() && "Function parameter not found"); 11841 Pos = It->second; 11842 ParmTy = PVD->getType(); 11843 } 11844 ParamAttrs[Pos].Alignment = 11845 (*NI) 11846 ? (*NI)->EvaluateKnownConstInt(C) 11847 : llvm::APSInt::getUnsigned( 11848 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11849 .getQuantity()); 11850 ++NI; 11851 } 11852 // Mark linear parameters. 11853 auto *SI = Attr->steps_begin(); 11854 auto *MI = Attr->modifiers_begin(); 11855 for (const Expr *E : Attr->linears()) { 11856 E = E->IgnoreParenImpCasts(); 11857 unsigned Pos; 11858 bool IsReferenceType = false; 11859 // Rescaling factor needed to compute the linear parameter 11860 // value in the mangled name. 11861 unsigned PtrRescalingFactor = 1; 11862 if (isa<CXXThisExpr>(E)) { 11863 Pos = ParamPositions[FD]; 11864 auto *P = cast<PointerType>(E->getType()); 11865 PtrRescalingFactor = CGM.getContext() 11866 .getTypeSizeInChars(P->getPointeeType()) 11867 .getQuantity(); 11868 } else { 11869 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11870 ->getCanonicalDecl(); 11871 auto It = ParamPositions.find(PVD); 11872 assert(It != ParamPositions.end() && "Function parameter not found"); 11873 Pos = It->second; 11874 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 11875 PtrRescalingFactor = CGM.getContext() 11876 .getTypeSizeInChars(P->getPointeeType()) 11877 .getQuantity(); 11878 else if (PVD->getType()->isReferenceType()) { 11879 IsReferenceType = true; 11880 PtrRescalingFactor = 11881 CGM.getContext() 11882 .getTypeSizeInChars(PVD->getType().getNonReferenceType()) 11883 .getQuantity(); 11884 } 11885 } 11886 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 11887 if (*MI == OMPC_LINEAR_ref) 11888 ParamAttr.Kind = LinearRef; 11889 else if (*MI == OMPC_LINEAR_uval) 11890 ParamAttr.Kind = LinearUVal; 11891 else if (IsReferenceType) 11892 ParamAttr.Kind = LinearVal; 11893 else 11894 ParamAttr.Kind = Linear; 11895 // Assuming a stride of 1, for `linear` without modifiers. 11896 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 11897 if (*SI) { 11898 Expr::EvalResult Result; 11899 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 11900 if (const auto *DRE = 11901 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 11902 if (const auto *StridePVD = 11903 dyn_cast<ParmVarDecl>(DRE->getDecl())) { 11904 ParamAttr.HasVarStride = true; 11905 auto It = ParamPositions.find(StridePVD->getCanonicalDecl()); 11906 assert(It != ParamPositions.end() && 11907 "Function parameter not found"); 11908 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second); 11909 } 11910 } 11911 } else { 11912 ParamAttr.StrideOrArg = Result.Val.getInt(); 11913 } 11914 } 11915 // If we are using a linear clause on a pointer, we need to 11916 // rescale the value of linear_step with the byte size of the 11917 // pointee type. 11918 if (!ParamAttr.HasVarStride && 11919 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef)) 11920 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 11921 ++SI; 11922 ++MI; 11923 } 11924 llvm::APSInt VLENVal; 11925 SourceLocation ExprLoc; 11926 const Expr *VLENExpr = Attr->getSimdlen(); 11927 if (VLENExpr) { 11928 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11929 ExprLoc = VLENExpr->getExprLoc(); 11930 } 11931 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11932 if (CGM.getTriple().isX86()) { 11933 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11934 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11935 unsigned VLEN = VLENVal.getExtValue(); 11936 StringRef MangledName = Fn->getName(); 11937 if (CGM.getTarget().hasFeature("sve")) 11938 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11939 MangledName, 's', 128, Fn, ExprLoc); 11940 if (CGM.getTarget().hasFeature("neon")) 11941 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11942 MangledName, 'n', 128, Fn, ExprLoc); 11943 } 11944 } 11945 FD = FD->getPreviousDecl(); 11946 } 11947 } 11948 11949 namespace { 11950 /// Cleanup action for doacross support. 11951 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 11952 public: 11953 static const int DoacrossFinArgs = 2; 11954 11955 private: 11956 llvm::FunctionCallee RTLFn; 11957 llvm::Value *Args[DoacrossFinArgs]; 11958 11959 public: 11960 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 11961 ArrayRef<llvm::Value *> CallArgs) 11962 : RTLFn(RTLFn) { 11963 assert(CallArgs.size() == DoacrossFinArgs); 11964 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11965 } 11966 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11967 if (!CGF.HaveInsertPoint()) 11968 return; 11969 CGF.EmitRuntimeCall(RTLFn, Args); 11970 } 11971 }; 11972 } // namespace 11973 11974 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11975 const OMPLoopDirective &D, 11976 ArrayRef<Expr *> NumIterations) { 11977 if (!CGF.HaveInsertPoint()) 11978 return; 11979 11980 ASTContext &C = CGM.getContext(); 11981 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11982 RecordDecl *RD; 11983 if (KmpDimTy.isNull()) { 11984 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11985 // kmp_int64 lo; // lower 11986 // kmp_int64 up; // upper 11987 // kmp_int64 st; // stride 11988 // }; 11989 RD = C.buildImplicitRecord("kmp_dim"); 11990 RD->startDefinition(); 11991 addFieldToRecordDecl(C, RD, Int64Ty); 11992 addFieldToRecordDecl(C, RD, Int64Ty); 11993 addFieldToRecordDecl(C, RD, Int64Ty); 11994 RD->completeDefinition(); 11995 KmpDimTy = C.getRecordType(RD); 11996 } else { 11997 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11998 } 11999 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 12000 QualType ArrayTy = 12001 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 12002 12003 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 12004 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 12005 enum { LowerFD = 0, UpperFD, StrideFD }; 12006 // Fill dims with data. 12007 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 12008 LValue DimsLVal = CGF.MakeAddrLValue( 12009 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 12010 // dims.upper = num_iterations; 12011 LValue UpperLVal = CGF.EmitLValueForField( 12012 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 12013 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 12014 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 12015 Int64Ty, NumIterations[I]->getExprLoc()); 12016 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 12017 // dims.stride = 1; 12018 LValue StrideLVal = CGF.EmitLValueForField( 12019 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 12020 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 12021 StrideLVal); 12022 } 12023 12024 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 12025 // kmp_int32 num_dims, struct kmp_dim * dims); 12026 llvm::Value *Args[] = { 12027 emitUpdateLocation(CGF, D.getBeginLoc()), 12028 getThreadID(CGF, D.getBeginLoc()), 12029 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 12030 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12031 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 12032 CGM.VoidPtrTy)}; 12033 12034 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12035 CGM.getModule(), OMPRTL___kmpc_doacross_init); 12036 CGF.EmitRuntimeCall(RTLFn, Args); 12037 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 12038 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 12039 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12040 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 12041 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 12042 llvm::makeArrayRef(FiniArgs)); 12043 } 12044 12045 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12046 const OMPDependClause *C) { 12047 QualType Int64Ty = 12048 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 12049 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 12050 QualType ArrayTy = CGM.getContext().getConstantArrayType( 12051 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 12052 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 12053 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 12054 const Expr *CounterVal = C->getLoopData(I); 12055 assert(CounterVal); 12056 llvm::Value *CntVal = CGF.EmitScalarConversion( 12057 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 12058 CounterVal->getExprLoc()); 12059 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 12060 /*Volatile=*/false, Int64Ty); 12061 } 12062 llvm::Value *Args[] = { 12063 emitUpdateLocation(CGF, C->getBeginLoc()), 12064 getThreadID(CGF, C->getBeginLoc()), 12065 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 12066 llvm::FunctionCallee RTLFn; 12067 if (C->getDependencyKind() == OMPC_DEPEND_source) { 12068 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12069 OMPRTL___kmpc_doacross_post); 12070 } else { 12071 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 12072 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12073 OMPRTL___kmpc_doacross_wait); 12074 } 12075 CGF.EmitRuntimeCall(RTLFn, Args); 12076 } 12077 12078 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 12079 llvm::FunctionCallee Callee, 12080 ArrayRef<llvm::Value *> Args) const { 12081 assert(Loc.isValid() && "Outlined function call location must be valid."); 12082 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 12083 12084 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 12085 if (Fn->doesNotThrow()) { 12086 CGF.EmitNounwindRuntimeCall(Fn, Args); 12087 return; 12088 } 12089 } 12090 CGF.EmitRuntimeCall(Callee, Args); 12091 } 12092 12093 void CGOpenMPRuntime::emitOutlinedFunctionCall( 12094 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 12095 ArrayRef<llvm::Value *> Args) const { 12096 emitCall(CGF, Loc, OutlinedFn, Args); 12097 } 12098 12099 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 12100 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 12101 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 12102 HasEmittedDeclareTargetRegion = true; 12103 } 12104 12105 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 12106 const VarDecl *NativeParam, 12107 const VarDecl *TargetParam) const { 12108 return CGF.GetAddrOfLocalVar(NativeParam); 12109 } 12110 12111 /// Return allocator value from expression, or return a null allocator (default 12112 /// when no allocator specified). 12113 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF, 12114 const Expr *Allocator) { 12115 llvm::Value *AllocVal; 12116 if (Allocator) { 12117 AllocVal = CGF.EmitScalarExpr(Allocator); 12118 // According to the standard, the original allocator type is a enum 12119 // (integer). Convert to pointer type, if required. 12120 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 12121 CGF.getContext().VoidPtrTy, 12122 Allocator->getExprLoc()); 12123 } else { 12124 // If no allocator specified, it defaults to the null allocator. 12125 AllocVal = llvm::Constant::getNullValue( 12126 CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy)); 12127 } 12128 return AllocVal; 12129 } 12130 12131 /// Return the alignment from an allocate directive if present. 12132 static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) { 12133 llvm::Optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD); 12134 12135 if (!AllocateAlignment) 12136 return nullptr; 12137 12138 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity()); 12139 } 12140 12141 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 12142 const VarDecl *VD) { 12143 if (!VD) 12144 return Address::invalid(); 12145 Address UntiedAddr = Address::invalid(); 12146 Address UntiedRealAddr = Address::invalid(); 12147 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12148 if (It != FunctionToUntiedTaskStackMap.end()) { 12149 const UntiedLocalVarsAddressesMap &UntiedData = 12150 UntiedLocalVarsStack[It->second]; 12151 auto I = UntiedData.find(VD); 12152 if (I != UntiedData.end()) { 12153 UntiedAddr = I->second.first; 12154 UntiedRealAddr = I->second.second; 12155 } 12156 } 12157 const VarDecl *CVD = VD->getCanonicalDecl(); 12158 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 12159 // Use the default allocation. 12160 if (!isAllocatableDecl(VD)) 12161 return UntiedAddr; 12162 llvm::Value *Size; 12163 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 12164 if (CVD->getType()->isVariablyModifiedType()) { 12165 Size = CGF.getTypeSize(CVD->getType()); 12166 // Align the size: ((size + align - 1) / align) * align 12167 Size = CGF.Builder.CreateNUWAdd( 12168 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 12169 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 12170 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 12171 } else { 12172 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 12173 Size = CGM.getSize(Sz.alignTo(Align)); 12174 } 12175 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 12176 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 12177 const Expr *Allocator = AA->getAllocator(); 12178 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator); 12179 llvm::Value *Alignment = getAlignmentValue(CGM, CVD); 12180 SmallVector<llvm::Value *, 4> Args; 12181 Args.push_back(ThreadID); 12182 if (Alignment) 12183 Args.push_back(Alignment); 12184 Args.push_back(Size); 12185 Args.push_back(AllocVal); 12186 llvm::omp::RuntimeFunction FnID = 12187 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc; 12188 llvm::Value *Addr = CGF.EmitRuntimeCall( 12189 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args, 12190 getName({CVD->getName(), ".void.addr"})); 12191 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12192 CGM.getModule(), OMPRTL___kmpc_free); 12193 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 12194 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12195 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 12196 if (UntiedAddr.isValid()) 12197 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 12198 12199 // Cleanup action for allocate support. 12200 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 12201 llvm::FunctionCallee RTLFn; 12202 SourceLocation::UIntTy LocEncoding; 12203 Address Addr; 12204 const Expr *AllocExpr; 12205 12206 public: 12207 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 12208 SourceLocation::UIntTy LocEncoding, Address Addr, 12209 const Expr *AllocExpr) 12210 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 12211 AllocExpr(AllocExpr) {} 12212 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12213 if (!CGF.HaveInsertPoint()) 12214 return; 12215 llvm::Value *Args[3]; 12216 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 12217 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 12218 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12219 Addr.getPointer(), CGF.VoidPtrTy); 12220 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr); 12221 Args[2] = AllocVal; 12222 CGF.EmitRuntimeCall(RTLFn, Args); 12223 } 12224 }; 12225 Address VDAddr = 12226 UntiedRealAddr.isValid() 12227 ? UntiedRealAddr 12228 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align); 12229 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 12230 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 12231 VDAddr, Allocator); 12232 if (UntiedRealAddr.isValid()) 12233 if (auto *Region = 12234 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 12235 Region->emitUntiedSwitch(CGF); 12236 return VDAddr; 12237 } 12238 return UntiedAddr; 12239 } 12240 12241 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 12242 const VarDecl *VD) const { 12243 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12244 if (It == FunctionToUntiedTaskStackMap.end()) 12245 return false; 12246 return UntiedLocalVarsStack[It->second].count(VD) > 0; 12247 } 12248 12249 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 12250 CodeGenModule &CGM, const OMPLoopDirective &S) 12251 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 12252 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12253 if (!NeedToPush) 12254 return; 12255 NontemporalDeclsSet &DS = 12256 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 12257 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 12258 for (const Stmt *Ref : C->private_refs()) { 12259 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 12260 const ValueDecl *VD; 12261 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 12262 VD = DRE->getDecl(); 12263 } else { 12264 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 12265 assert((ME->isImplicitCXXThis() || 12266 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 12267 "Expected member of current class."); 12268 VD = ME->getMemberDecl(); 12269 } 12270 DS.insert(VD); 12271 } 12272 } 12273 } 12274 12275 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 12276 if (!NeedToPush) 12277 return; 12278 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 12279 } 12280 12281 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 12282 CodeGenFunction &CGF, 12283 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 12284 std::pair<Address, Address>> &LocalVars) 12285 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 12286 if (!NeedToPush) 12287 return; 12288 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 12289 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 12290 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 12291 } 12292 12293 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 12294 if (!NeedToPush) 12295 return; 12296 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 12297 } 12298 12299 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 12300 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12301 12302 return llvm::any_of( 12303 CGM.getOpenMPRuntime().NontemporalDeclsStack, 12304 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); }); 12305 } 12306 12307 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 12308 const OMPExecutableDirective &S, 12309 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 12310 const { 12311 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 12312 // Vars in target/task regions must be excluded completely. 12313 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 12314 isOpenMPTaskingDirective(S.getDirectiveKind())) { 12315 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12316 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 12317 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 12318 for (const CapturedStmt::Capture &Cap : CS->captures()) { 12319 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 12320 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 12321 } 12322 } 12323 // Exclude vars in private clauses. 12324 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 12325 for (const Expr *Ref : C->varlists()) { 12326 if (!Ref->getType()->isScalarType()) 12327 continue; 12328 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12329 if (!DRE) 12330 continue; 12331 NeedToCheckForLPCs.insert(DRE->getDecl()); 12332 } 12333 } 12334 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 12335 for (const Expr *Ref : C->varlists()) { 12336 if (!Ref->getType()->isScalarType()) 12337 continue; 12338 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12339 if (!DRE) 12340 continue; 12341 NeedToCheckForLPCs.insert(DRE->getDecl()); 12342 } 12343 } 12344 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12345 for (const Expr *Ref : C->varlists()) { 12346 if (!Ref->getType()->isScalarType()) 12347 continue; 12348 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12349 if (!DRE) 12350 continue; 12351 NeedToCheckForLPCs.insert(DRE->getDecl()); 12352 } 12353 } 12354 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 12355 for (const Expr *Ref : C->varlists()) { 12356 if (!Ref->getType()->isScalarType()) 12357 continue; 12358 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12359 if (!DRE) 12360 continue; 12361 NeedToCheckForLPCs.insert(DRE->getDecl()); 12362 } 12363 } 12364 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 12365 for (const Expr *Ref : C->varlists()) { 12366 if (!Ref->getType()->isScalarType()) 12367 continue; 12368 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12369 if (!DRE) 12370 continue; 12371 NeedToCheckForLPCs.insert(DRE->getDecl()); 12372 } 12373 } 12374 for (const Decl *VD : NeedToCheckForLPCs) { 12375 for (const LastprivateConditionalData &Data : 12376 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12377 if (Data.DeclToUniqueName.count(VD) > 0) { 12378 if (!Data.Disabled) 12379 NeedToAddForLPCsAsDisabled.insert(VD); 12380 break; 12381 } 12382 } 12383 } 12384 } 12385 12386 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12387 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12388 : CGM(CGF.CGM), 12389 Action((CGM.getLangOpts().OpenMP >= 50 && 12390 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12391 [](const OMPLastprivateClause *C) { 12392 return C->getKind() == 12393 OMPC_LASTPRIVATE_conditional; 12394 })) 12395 ? ActionToDo::PushAsLastprivateConditional 12396 : ActionToDo::DoNotPush) { 12397 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12398 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12399 return; 12400 assert(Action == ActionToDo::PushAsLastprivateConditional && 12401 "Expected a push action."); 12402 LastprivateConditionalData &Data = 12403 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12404 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12405 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12406 continue; 12407 12408 for (const Expr *Ref : C->varlists()) { 12409 Data.DeclToUniqueName.insert(std::make_pair( 12410 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12411 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12412 } 12413 } 12414 Data.IVLVal = IVLVal; 12415 Data.Fn = CGF.CurFn; 12416 } 12417 12418 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12419 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12420 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12421 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12422 if (CGM.getLangOpts().OpenMP < 50) 12423 return; 12424 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12425 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12426 if (!NeedToAddForLPCsAsDisabled.empty()) { 12427 Action = ActionToDo::DisableLastprivateConditional; 12428 LastprivateConditionalData &Data = 12429 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12430 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12431 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12432 Data.Fn = CGF.CurFn; 12433 Data.Disabled = true; 12434 } 12435 } 12436 12437 CGOpenMPRuntime::LastprivateConditionalRAII 12438 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12439 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12440 return LastprivateConditionalRAII(CGF, S); 12441 } 12442 12443 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12444 if (CGM.getLangOpts().OpenMP < 50) 12445 return; 12446 if (Action == ActionToDo::DisableLastprivateConditional) { 12447 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12448 "Expected list of disabled private vars."); 12449 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12450 } 12451 if (Action == ActionToDo::PushAsLastprivateConditional) { 12452 assert( 12453 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12454 "Expected list of lastprivate conditional vars."); 12455 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12456 } 12457 } 12458 12459 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12460 const VarDecl *VD) { 12461 ASTContext &C = CGM.getContext(); 12462 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12463 if (I == LastprivateConditionalToTypes.end()) 12464 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12465 QualType NewType; 12466 const FieldDecl *VDField; 12467 const FieldDecl *FiredField; 12468 LValue BaseLVal; 12469 auto VI = I->getSecond().find(VD); 12470 if (VI == I->getSecond().end()) { 12471 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12472 RD->startDefinition(); 12473 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12474 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12475 RD->completeDefinition(); 12476 NewType = C.getRecordType(RD); 12477 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12478 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12479 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12480 } else { 12481 NewType = std::get<0>(VI->getSecond()); 12482 VDField = std::get<1>(VI->getSecond()); 12483 FiredField = std::get<2>(VI->getSecond()); 12484 BaseLVal = std::get<3>(VI->getSecond()); 12485 } 12486 LValue FiredLVal = 12487 CGF.EmitLValueForField(BaseLVal, FiredField); 12488 CGF.EmitStoreOfScalar( 12489 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12490 FiredLVal); 12491 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12492 } 12493 12494 namespace { 12495 /// Checks if the lastprivate conditional variable is referenced in LHS. 12496 class LastprivateConditionalRefChecker final 12497 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12498 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12499 const Expr *FoundE = nullptr; 12500 const Decl *FoundD = nullptr; 12501 StringRef UniqueDeclName; 12502 LValue IVLVal; 12503 llvm::Function *FoundFn = nullptr; 12504 SourceLocation Loc; 12505 12506 public: 12507 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12508 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12509 llvm::reverse(LPM)) { 12510 auto It = D.DeclToUniqueName.find(E->getDecl()); 12511 if (It == D.DeclToUniqueName.end()) 12512 continue; 12513 if (D.Disabled) 12514 return false; 12515 FoundE = E; 12516 FoundD = E->getDecl()->getCanonicalDecl(); 12517 UniqueDeclName = It->second; 12518 IVLVal = D.IVLVal; 12519 FoundFn = D.Fn; 12520 break; 12521 } 12522 return FoundE == E; 12523 } 12524 bool VisitMemberExpr(const MemberExpr *E) { 12525 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12526 return false; 12527 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12528 llvm::reverse(LPM)) { 12529 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12530 if (It == D.DeclToUniqueName.end()) 12531 continue; 12532 if (D.Disabled) 12533 return false; 12534 FoundE = E; 12535 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12536 UniqueDeclName = It->second; 12537 IVLVal = D.IVLVal; 12538 FoundFn = D.Fn; 12539 break; 12540 } 12541 return FoundE == E; 12542 } 12543 bool VisitStmt(const Stmt *S) { 12544 for (const Stmt *Child : S->children()) { 12545 if (!Child) 12546 continue; 12547 if (const auto *E = dyn_cast<Expr>(Child)) 12548 if (!E->isGLValue()) 12549 continue; 12550 if (Visit(Child)) 12551 return true; 12552 } 12553 return false; 12554 } 12555 explicit LastprivateConditionalRefChecker( 12556 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12557 : LPM(LPM) {} 12558 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12559 getFoundData() const { 12560 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12561 } 12562 }; 12563 } // namespace 12564 12565 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12566 LValue IVLVal, 12567 StringRef UniqueDeclName, 12568 LValue LVal, 12569 SourceLocation Loc) { 12570 // Last updated loop counter for the lastprivate conditional var. 12571 // int<xx> last_iv = 0; 12572 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12573 llvm::Constant *LastIV = 12574 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12575 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12576 IVLVal.getAlignment().getAsAlign()); 12577 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12578 12579 // Last value of the lastprivate conditional. 12580 // decltype(priv_a) last_a; 12581 llvm::GlobalVariable *Last = getOrCreateInternalVariable( 12582 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12583 Last->setAlignment(LVal.getAlignment().getAsAlign()); 12584 LValue LastLVal = CGF.MakeAddrLValue( 12585 Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType()); 12586 12587 // Global loop counter. Required to handle inner parallel-for regions. 12588 // iv 12589 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12590 12591 // #pragma omp critical(a) 12592 // if (last_iv <= iv) { 12593 // last_iv = iv; 12594 // last_a = priv_a; 12595 // } 12596 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12597 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12598 Action.Enter(CGF); 12599 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12600 // (last_iv <= iv) ? Check if the variable is updated and store new 12601 // value in global var. 12602 llvm::Value *CmpRes; 12603 if (IVLVal.getType()->isSignedIntegerType()) { 12604 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12605 } else { 12606 assert(IVLVal.getType()->isUnsignedIntegerType() && 12607 "Loop iteration variable must be integer."); 12608 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12609 } 12610 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12611 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12612 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12613 // { 12614 CGF.EmitBlock(ThenBB); 12615 12616 // last_iv = iv; 12617 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12618 12619 // last_a = priv_a; 12620 switch (CGF.getEvaluationKind(LVal.getType())) { 12621 case TEK_Scalar: { 12622 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12623 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12624 break; 12625 } 12626 case TEK_Complex: { 12627 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12628 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12629 break; 12630 } 12631 case TEK_Aggregate: 12632 llvm_unreachable( 12633 "Aggregates are not supported in lastprivate conditional."); 12634 } 12635 // } 12636 CGF.EmitBranch(ExitBB); 12637 // There is no need to emit line number for unconditional branch. 12638 (void)ApplyDebugLocation::CreateEmpty(CGF); 12639 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12640 }; 12641 12642 if (CGM.getLangOpts().OpenMPSimd) { 12643 // Do not emit as a critical region as no parallel region could be emitted. 12644 RegionCodeGenTy ThenRCG(CodeGen); 12645 ThenRCG(CGF); 12646 } else { 12647 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12648 } 12649 } 12650 12651 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12652 const Expr *LHS) { 12653 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12654 return; 12655 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12656 if (!Checker.Visit(LHS)) 12657 return; 12658 const Expr *FoundE; 12659 const Decl *FoundD; 12660 StringRef UniqueDeclName; 12661 LValue IVLVal; 12662 llvm::Function *FoundFn; 12663 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12664 Checker.getFoundData(); 12665 if (FoundFn != CGF.CurFn) { 12666 // Special codegen for inner parallel regions. 12667 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12668 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12669 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12670 "Lastprivate conditional is not found in outer region."); 12671 QualType StructTy = std::get<0>(It->getSecond()); 12672 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12673 LValue PrivLVal = CGF.EmitLValue(FoundE); 12674 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12675 PrivLVal.getAddress(CGF), 12676 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)), 12677 CGF.ConvertTypeForMem(StructTy)); 12678 LValue BaseLVal = 12679 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12680 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12681 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12682 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12683 FiredLVal, llvm::AtomicOrdering::Unordered, 12684 /*IsVolatile=*/true, /*isInit=*/false); 12685 return; 12686 } 12687 12688 // Private address of the lastprivate conditional in the current context. 12689 // priv_a 12690 LValue LVal = CGF.EmitLValue(FoundE); 12691 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12692 FoundE->getExprLoc()); 12693 } 12694 12695 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12696 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12697 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12698 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12699 return; 12700 auto Range = llvm::reverse(LastprivateConditionalStack); 12701 auto It = llvm::find_if( 12702 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12703 if (It == Range.end() || It->Fn != CGF.CurFn) 12704 return; 12705 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12706 assert(LPCI != LastprivateConditionalToTypes.end() && 12707 "Lastprivates must be registered already."); 12708 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12709 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12710 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12711 for (const auto &Pair : It->DeclToUniqueName) { 12712 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12713 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD)) 12714 continue; 12715 auto I = LPCI->getSecond().find(Pair.first); 12716 assert(I != LPCI->getSecond().end() && 12717 "Lastprivate must be rehistered already."); 12718 // bool Cmp = priv_a.Fired != 0; 12719 LValue BaseLVal = std::get<3>(I->getSecond()); 12720 LValue FiredLVal = 12721 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12722 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12723 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12724 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12725 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12726 // if (Cmp) { 12727 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12728 CGF.EmitBlock(ThenBB); 12729 Address Addr = CGF.GetAddrOfLocalVar(VD); 12730 LValue LVal; 12731 if (VD->getType()->isReferenceType()) 12732 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12733 AlignmentSource::Decl); 12734 else 12735 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12736 AlignmentSource::Decl); 12737 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12738 D.getBeginLoc()); 12739 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12740 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12741 // } 12742 } 12743 } 12744 12745 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12746 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12747 SourceLocation Loc) { 12748 if (CGF.getLangOpts().OpenMP < 50) 12749 return; 12750 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12751 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12752 "Unknown lastprivate conditional variable."); 12753 StringRef UniqueName = It->second; 12754 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12755 // The variable was not updated in the region - exit. 12756 if (!GV) 12757 return; 12758 LValue LPLVal = CGF.MakeAddrLValue( 12759 Address(GV, GV->getValueType(), PrivLVal.getAlignment()), 12760 PrivLVal.getType().getNonReferenceType()); 12761 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12762 CGF.EmitStoreOfScalar(Res, PrivLVal); 12763 } 12764 12765 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12766 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12767 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12768 llvm_unreachable("Not supported in SIMD-only mode"); 12769 } 12770 12771 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12772 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12773 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12774 llvm_unreachable("Not supported in SIMD-only mode"); 12775 } 12776 12777 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12778 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12779 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12780 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12781 bool Tied, unsigned &NumberOfParts) { 12782 llvm_unreachable("Not supported in SIMD-only mode"); 12783 } 12784 12785 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12786 SourceLocation Loc, 12787 llvm::Function *OutlinedFn, 12788 ArrayRef<llvm::Value *> CapturedVars, 12789 const Expr *IfCond, 12790 llvm::Value *NumThreads) { 12791 llvm_unreachable("Not supported in SIMD-only mode"); 12792 } 12793 12794 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12795 CodeGenFunction &CGF, StringRef CriticalName, 12796 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12797 const Expr *Hint) { 12798 llvm_unreachable("Not supported in SIMD-only mode"); 12799 } 12800 12801 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12802 const RegionCodeGenTy &MasterOpGen, 12803 SourceLocation Loc) { 12804 llvm_unreachable("Not supported in SIMD-only mode"); 12805 } 12806 12807 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, 12808 const RegionCodeGenTy &MasterOpGen, 12809 SourceLocation Loc, 12810 const Expr *Filter) { 12811 llvm_unreachable("Not supported in SIMD-only mode"); 12812 } 12813 12814 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12815 SourceLocation Loc) { 12816 llvm_unreachable("Not supported in SIMD-only mode"); 12817 } 12818 12819 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12820 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12821 SourceLocation Loc) { 12822 llvm_unreachable("Not supported in SIMD-only mode"); 12823 } 12824 12825 void CGOpenMPSIMDRuntime::emitSingleRegion( 12826 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12827 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12828 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12829 ArrayRef<const Expr *> AssignmentOps) { 12830 llvm_unreachable("Not supported in SIMD-only mode"); 12831 } 12832 12833 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12834 const RegionCodeGenTy &OrderedOpGen, 12835 SourceLocation Loc, 12836 bool IsThreads) { 12837 llvm_unreachable("Not supported in SIMD-only mode"); 12838 } 12839 12840 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12841 SourceLocation Loc, 12842 OpenMPDirectiveKind Kind, 12843 bool EmitChecks, 12844 bool ForceSimpleCall) { 12845 llvm_unreachable("Not supported in SIMD-only mode"); 12846 } 12847 12848 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12849 CodeGenFunction &CGF, SourceLocation Loc, 12850 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12851 bool Ordered, const DispatchRTInput &DispatchValues) { 12852 llvm_unreachable("Not supported in SIMD-only mode"); 12853 } 12854 12855 void CGOpenMPSIMDRuntime::emitForStaticInit( 12856 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12857 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12858 llvm_unreachable("Not supported in SIMD-only mode"); 12859 } 12860 12861 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12862 CodeGenFunction &CGF, SourceLocation Loc, 12863 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12864 llvm_unreachable("Not supported in SIMD-only mode"); 12865 } 12866 12867 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12868 SourceLocation Loc, 12869 unsigned IVSize, 12870 bool IVSigned) { 12871 llvm_unreachable("Not supported in SIMD-only mode"); 12872 } 12873 12874 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12875 SourceLocation Loc, 12876 OpenMPDirectiveKind DKind) { 12877 llvm_unreachable("Not supported in SIMD-only mode"); 12878 } 12879 12880 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12881 SourceLocation Loc, 12882 unsigned IVSize, bool IVSigned, 12883 Address IL, Address LB, 12884 Address UB, Address ST) { 12885 llvm_unreachable("Not supported in SIMD-only mode"); 12886 } 12887 12888 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12889 llvm::Value *NumThreads, 12890 SourceLocation Loc) { 12891 llvm_unreachable("Not supported in SIMD-only mode"); 12892 } 12893 12894 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12895 ProcBindKind ProcBind, 12896 SourceLocation Loc) { 12897 llvm_unreachable("Not supported in SIMD-only mode"); 12898 } 12899 12900 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12901 const VarDecl *VD, 12902 Address VDAddr, 12903 SourceLocation Loc) { 12904 llvm_unreachable("Not supported in SIMD-only mode"); 12905 } 12906 12907 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12908 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12909 CodeGenFunction *CGF) { 12910 llvm_unreachable("Not supported in SIMD-only mode"); 12911 } 12912 12913 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12914 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12915 llvm_unreachable("Not supported in SIMD-only mode"); 12916 } 12917 12918 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 12919 ArrayRef<const Expr *> Vars, 12920 SourceLocation Loc, 12921 llvm::AtomicOrdering AO) { 12922 llvm_unreachable("Not supported in SIMD-only mode"); 12923 } 12924 12925 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 12926 const OMPExecutableDirective &D, 12927 llvm::Function *TaskFunction, 12928 QualType SharedsTy, Address Shareds, 12929 const Expr *IfCond, 12930 const OMPTaskDataTy &Data) { 12931 llvm_unreachable("Not supported in SIMD-only mode"); 12932 } 12933 12934 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 12935 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 12936 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 12937 const Expr *IfCond, const OMPTaskDataTy &Data) { 12938 llvm_unreachable("Not supported in SIMD-only mode"); 12939 } 12940 12941 void CGOpenMPSIMDRuntime::emitReduction( 12942 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 12943 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 12944 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 12945 assert(Options.SimpleReduction && "Only simple reduction is expected."); 12946 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 12947 ReductionOps, Options); 12948 } 12949 12950 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 12951 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 12952 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 12953 llvm_unreachable("Not supported in SIMD-only mode"); 12954 } 12955 12956 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 12957 SourceLocation Loc, 12958 bool IsWorksharingReduction) { 12959 llvm_unreachable("Not supported in SIMD-only mode"); 12960 } 12961 12962 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 12963 SourceLocation Loc, 12964 ReductionCodeGen &RCG, 12965 unsigned N) { 12966 llvm_unreachable("Not supported in SIMD-only mode"); 12967 } 12968 12969 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 12970 SourceLocation Loc, 12971 llvm::Value *ReductionsPtr, 12972 LValue SharedLVal) { 12973 llvm_unreachable("Not supported in SIMD-only mode"); 12974 } 12975 12976 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 12977 SourceLocation Loc, 12978 const OMPTaskDataTy &Data) { 12979 llvm_unreachable("Not supported in SIMD-only mode"); 12980 } 12981 12982 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 12983 CodeGenFunction &CGF, SourceLocation Loc, 12984 OpenMPDirectiveKind CancelRegion) { 12985 llvm_unreachable("Not supported in SIMD-only mode"); 12986 } 12987 12988 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 12989 SourceLocation Loc, const Expr *IfCond, 12990 OpenMPDirectiveKind CancelRegion) { 12991 llvm_unreachable("Not supported in SIMD-only mode"); 12992 } 12993 12994 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 12995 const OMPExecutableDirective &D, StringRef ParentName, 12996 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 12997 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 12998 llvm_unreachable("Not supported in SIMD-only mode"); 12999 } 13000 13001 void CGOpenMPSIMDRuntime::emitTargetCall( 13002 CodeGenFunction &CGF, const OMPExecutableDirective &D, 13003 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 13004 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 13005 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 13006 const OMPLoopDirective &D)> 13007 SizeEmitter) { 13008 llvm_unreachable("Not supported in SIMD-only mode"); 13009 } 13010 13011 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 13012 llvm_unreachable("Not supported in SIMD-only mode"); 13013 } 13014 13015 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 13016 llvm_unreachable("Not supported in SIMD-only mode"); 13017 } 13018 13019 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 13020 return false; 13021 } 13022 13023 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 13024 const OMPExecutableDirective &D, 13025 SourceLocation Loc, 13026 llvm::Function *OutlinedFn, 13027 ArrayRef<llvm::Value *> CapturedVars) { 13028 llvm_unreachable("Not supported in SIMD-only mode"); 13029 } 13030 13031 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 13032 const Expr *NumTeams, 13033 const Expr *ThreadLimit, 13034 SourceLocation Loc) { 13035 llvm_unreachable("Not supported in SIMD-only mode"); 13036 } 13037 13038 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 13039 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13040 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 13041 llvm_unreachable("Not supported in SIMD-only mode"); 13042 } 13043 13044 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 13045 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13046 const Expr *Device) { 13047 llvm_unreachable("Not supported in SIMD-only mode"); 13048 } 13049 13050 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 13051 const OMPLoopDirective &D, 13052 ArrayRef<Expr *> NumIterations) { 13053 llvm_unreachable("Not supported in SIMD-only mode"); 13054 } 13055 13056 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 13057 const OMPDependClause *C) { 13058 llvm_unreachable("Not supported in SIMD-only mode"); 13059 } 13060 13061 const VarDecl * 13062 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 13063 const VarDecl *NativeParam) const { 13064 llvm_unreachable("Not supported in SIMD-only mode"); 13065 } 13066 13067 Address 13068 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 13069 const VarDecl *NativeParam, 13070 const VarDecl *TargetParam) const { 13071 llvm_unreachable("Not supported in SIMD-only mode"); 13072 } 13073