1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Attr.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/OpenMPClause.h" 21 #include "clang/AST/StmtOpenMP.h" 22 #include "clang/AST/StmtVisitor.h" 23 #include "clang/Basic/BitmaskEnum.h" 24 #include "clang/Basic/FileManager.h" 25 #include "clang/Basic/OpenMPKinds.h" 26 #include "clang/Basic/SourceManager.h" 27 #include "clang/CodeGen/ConstantInitBuilder.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/SetOperations.h" 30 #include "llvm/ADT/StringExtras.h" 31 #include "llvm/Bitcode/BitcodeReader.h" 32 #include "llvm/IR/Constants.h" 33 #include "llvm/IR/DerivedTypes.h" 34 #include "llvm/IR/GlobalValue.h" 35 #include "llvm/IR/Value.h" 36 #include "llvm/Support/AtomicOrdering.h" 37 #include "llvm/Support/Format.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include <cassert> 40 #include <numeric> 41 42 using namespace clang; 43 using namespace CodeGen; 44 using namespace llvm::omp; 45 46 namespace { 47 /// Base class for handling code generation inside OpenMP regions. 48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 49 public: 50 /// Kinds of OpenMP regions used in codegen. 51 enum CGOpenMPRegionKind { 52 /// Region with outlined function for standalone 'parallel' 53 /// directive. 54 ParallelOutlinedRegion, 55 /// Region with outlined function for standalone 'task' directive. 56 TaskOutlinedRegion, 57 /// Region for constructs that do not require function outlining, 58 /// like 'for', 'sections', 'atomic' etc. directives. 59 InlinedRegion, 60 /// Region with outlined function for standalone 'target' directive. 61 TargetRegion, 62 }; 63 64 CGOpenMPRegionInfo(const CapturedStmt &CS, 65 const CGOpenMPRegionKind RegionKind, 66 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 67 bool HasCancel) 68 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 69 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 70 71 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 73 bool HasCancel) 74 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 75 Kind(Kind), HasCancel(HasCancel) {} 76 77 /// Get a variable or parameter for storing global thread id 78 /// inside OpenMP construct. 79 virtual const VarDecl *getThreadIDVariable() const = 0; 80 81 /// Emit the captured statement body. 82 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 83 84 /// Get an LValue for the current ThreadID variable. 85 /// \return LValue for thread id variable. This LValue always has type int32*. 86 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 87 88 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 89 90 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 91 92 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 93 94 bool hasCancel() const { return HasCancel; } 95 96 static bool classof(const CGCapturedStmtInfo *Info) { 97 return Info->getKind() == CR_OpenMP; 98 } 99 100 ~CGOpenMPRegionInfo() override = default; 101 102 protected: 103 CGOpenMPRegionKind RegionKind; 104 RegionCodeGenTy CodeGen; 105 OpenMPDirectiveKind Kind; 106 bool HasCancel; 107 }; 108 109 /// API for captured statement code generation in OpenMP constructs. 110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 111 public: 112 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 113 const RegionCodeGenTy &CodeGen, 114 OpenMPDirectiveKind Kind, bool HasCancel, 115 StringRef HelperName) 116 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 117 HasCancel), 118 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 119 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 120 } 121 122 /// Get a variable or parameter for storing global thread id 123 /// inside OpenMP construct. 124 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 125 126 /// Get the name of the capture helper. 127 StringRef getHelperName() const override { return HelperName; } 128 129 static bool classof(const CGCapturedStmtInfo *Info) { 130 return CGOpenMPRegionInfo::classof(Info) && 131 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 132 ParallelOutlinedRegion; 133 } 134 135 private: 136 /// A variable or parameter storing global thread id for OpenMP 137 /// constructs. 138 const VarDecl *ThreadIDVar; 139 StringRef HelperName; 140 }; 141 142 /// API for captured statement code generation in OpenMP constructs. 143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 144 public: 145 class UntiedTaskActionTy final : public PrePostActionTy { 146 bool Untied; 147 const VarDecl *PartIDVar; 148 const RegionCodeGenTy UntiedCodeGen; 149 llvm::SwitchInst *UntiedSwitch = nullptr; 150 151 public: 152 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 153 const RegionCodeGenTy &UntiedCodeGen) 154 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 155 void Enter(CodeGenFunction &CGF) override { 156 if (Untied) { 157 // Emit task switching point. 158 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 159 CGF.GetAddrOfLocalVar(PartIDVar), 160 PartIDVar->getType()->castAs<PointerType>()); 161 llvm::Value *Res = 162 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 163 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 164 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 165 CGF.EmitBlock(DoneBB); 166 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 167 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 168 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 169 CGF.Builder.GetInsertBlock()); 170 emitUntiedSwitch(CGF); 171 } 172 } 173 void emitUntiedSwitch(CodeGenFunction &CGF) const { 174 if (Untied) { 175 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 176 CGF.GetAddrOfLocalVar(PartIDVar), 177 PartIDVar->getType()->castAs<PointerType>()); 178 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 179 PartIdLVal); 180 UntiedCodeGen(CGF); 181 CodeGenFunction::JumpDest CurPoint = 182 CGF.getJumpDestInCurrentScope(".untied.next."); 183 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 184 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 185 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 186 CGF.Builder.GetInsertBlock()); 187 CGF.EmitBranchThroughCleanup(CurPoint); 188 CGF.EmitBlock(CurPoint.getBlock()); 189 } 190 } 191 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 192 }; 193 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 194 const VarDecl *ThreadIDVar, 195 const RegionCodeGenTy &CodeGen, 196 OpenMPDirectiveKind Kind, bool HasCancel, 197 const UntiedTaskActionTy &Action) 198 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 199 ThreadIDVar(ThreadIDVar), Action(Action) { 200 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 201 } 202 203 /// Get a variable or parameter for storing global thread id 204 /// inside OpenMP construct. 205 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 206 207 /// Get an LValue for the current ThreadID variable. 208 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 209 210 /// Get the name of the capture helper. 211 StringRef getHelperName() const override { return ".omp_outlined."; } 212 213 void emitUntiedSwitch(CodeGenFunction &CGF) override { 214 Action.emitUntiedSwitch(CGF); 215 } 216 217 static bool classof(const CGCapturedStmtInfo *Info) { 218 return CGOpenMPRegionInfo::classof(Info) && 219 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 220 TaskOutlinedRegion; 221 } 222 223 private: 224 /// A variable or parameter storing global thread id for OpenMP 225 /// constructs. 226 const VarDecl *ThreadIDVar; 227 /// Action for emitting code for untied tasks. 228 const UntiedTaskActionTy &Action; 229 }; 230 231 /// API for inlined captured statement code generation in OpenMP 232 /// constructs. 233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 234 public: 235 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 236 const RegionCodeGenTy &CodeGen, 237 OpenMPDirectiveKind Kind, bool HasCancel) 238 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 239 OldCSI(OldCSI), 240 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 241 242 // Retrieve the value of the context parameter. 243 llvm::Value *getContextValue() const override { 244 if (OuterRegionInfo) 245 return OuterRegionInfo->getContextValue(); 246 llvm_unreachable("No context value for inlined OpenMP region"); 247 } 248 249 void setContextValue(llvm::Value *V) override { 250 if (OuterRegionInfo) { 251 OuterRegionInfo->setContextValue(V); 252 return; 253 } 254 llvm_unreachable("No context value for inlined OpenMP region"); 255 } 256 257 /// Lookup the captured field decl for a variable. 258 const FieldDecl *lookup(const VarDecl *VD) const override { 259 if (OuterRegionInfo) 260 return OuterRegionInfo->lookup(VD); 261 // If there is no outer outlined region,no need to lookup in a list of 262 // captured variables, we can use the original one. 263 return nullptr; 264 } 265 266 FieldDecl *getThisFieldDecl() const override { 267 if (OuterRegionInfo) 268 return OuterRegionInfo->getThisFieldDecl(); 269 return nullptr; 270 } 271 272 /// Get a variable or parameter for storing global thread id 273 /// inside OpenMP construct. 274 const VarDecl *getThreadIDVariable() const override { 275 if (OuterRegionInfo) 276 return OuterRegionInfo->getThreadIDVariable(); 277 return nullptr; 278 } 279 280 /// Get an LValue for the current ThreadID variable. 281 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 282 if (OuterRegionInfo) 283 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 284 llvm_unreachable("No LValue for inlined OpenMP construct"); 285 } 286 287 /// Get the name of the capture helper. 288 StringRef getHelperName() const override { 289 if (auto *OuterRegionInfo = getOldCSI()) 290 return OuterRegionInfo->getHelperName(); 291 llvm_unreachable("No helper name for inlined OpenMP construct"); 292 } 293 294 void emitUntiedSwitch(CodeGenFunction &CGF) override { 295 if (OuterRegionInfo) 296 OuterRegionInfo->emitUntiedSwitch(CGF); 297 } 298 299 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 300 301 static bool classof(const CGCapturedStmtInfo *Info) { 302 return CGOpenMPRegionInfo::classof(Info) && 303 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 304 } 305 306 ~CGOpenMPInlinedRegionInfo() override = default; 307 308 private: 309 /// CodeGen info about outer OpenMP region. 310 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 311 CGOpenMPRegionInfo *OuterRegionInfo; 312 }; 313 314 /// API for captured statement code generation in OpenMP target 315 /// constructs. For this captures, implicit parameters are used instead of the 316 /// captured fields. The name of the target region has to be unique in a given 317 /// application so it is provided by the client, because only the client has 318 /// the information to generate that. 319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 320 public: 321 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 322 const RegionCodeGenTy &CodeGen, StringRef HelperName) 323 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 324 /*HasCancel=*/false), 325 HelperName(HelperName) {} 326 327 /// This is unused for target regions because each starts executing 328 /// with a single thread. 329 const VarDecl *getThreadIDVariable() const override { return nullptr; } 330 331 /// Get the name of the capture helper. 332 StringRef getHelperName() const override { return HelperName; } 333 334 static bool classof(const CGCapturedStmtInfo *Info) { 335 return CGOpenMPRegionInfo::classof(Info) && 336 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 337 } 338 339 private: 340 StringRef HelperName; 341 }; 342 343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 344 llvm_unreachable("No codegen for expressions"); 345 } 346 /// API for generation of expressions captured in a innermost OpenMP 347 /// region. 348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 349 public: 350 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 351 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 352 OMPD_unknown, 353 /*HasCancel=*/false), 354 PrivScope(CGF) { 355 // Make sure the globals captured in the provided statement are local by 356 // using the privatization logic. We assume the same variable is not 357 // captured more than once. 358 for (const auto &C : CS.captures()) { 359 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 360 continue; 361 362 const VarDecl *VD = C.getCapturedVar(); 363 if (VD->isLocalVarDeclOrParm()) 364 continue; 365 366 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 367 /*RefersToEnclosingVariableOrCapture=*/false, 368 VD->getType().getNonReferenceType(), VK_LValue, 369 C.getLocation()); 370 PrivScope.addPrivate( 371 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 372 } 373 (void)PrivScope.Privatize(); 374 } 375 376 /// Lookup the captured field decl for a variable. 377 const FieldDecl *lookup(const VarDecl *VD) const override { 378 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 379 return FD; 380 return nullptr; 381 } 382 383 /// Emit the captured statement body. 384 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 385 llvm_unreachable("No body for expressions"); 386 } 387 388 /// Get a variable or parameter for storing global thread id 389 /// inside OpenMP construct. 390 const VarDecl *getThreadIDVariable() const override { 391 llvm_unreachable("No thread id for expressions"); 392 } 393 394 /// Get the name of the capture helper. 395 StringRef getHelperName() const override { 396 llvm_unreachable("No helper name for expressions"); 397 } 398 399 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 400 401 private: 402 /// Private scope to capture global variables. 403 CodeGenFunction::OMPPrivateScope PrivScope; 404 }; 405 406 /// RAII for emitting code of OpenMP constructs. 407 class InlinedOpenMPRegionRAII { 408 CodeGenFunction &CGF; 409 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 410 FieldDecl *LambdaThisCaptureField = nullptr; 411 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 412 413 public: 414 /// Constructs region for combined constructs. 415 /// \param CodeGen Code generation sequence for combined directives. Includes 416 /// a list of functions used for code generation of implicitly inlined 417 /// regions. 418 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 419 OpenMPDirectiveKind Kind, bool HasCancel) 420 : CGF(CGF) { 421 // Start emission for the construct. 422 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 423 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 424 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 425 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 426 CGF.LambdaThisCaptureField = nullptr; 427 BlockInfo = CGF.BlockInfo; 428 CGF.BlockInfo = nullptr; 429 } 430 431 ~InlinedOpenMPRegionRAII() { 432 // Restore original CapturedStmtInfo only if we're done with code emission. 433 auto *OldCSI = 434 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 435 delete CGF.CapturedStmtInfo; 436 CGF.CapturedStmtInfo = OldCSI; 437 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 438 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 439 CGF.BlockInfo = BlockInfo; 440 } 441 }; 442 443 /// Values for bit flags used in the ident_t to describe the fields. 444 /// All enumeric elements are named and described in accordance with the code 445 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 446 enum OpenMPLocationFlags : unsigned { 447 /// Use trampoline for internal microtask. 448 OMP_IDENT_IMD = 0x01, 449 /// Use c-style ident structure. 450 OMP_IDENT_KMPC = 0x02, 451 /// Atomic reduction option for kmpc_reduce. 452 OMP_ATOMIC_REDUCE = 0x10, 453 /// Explicit 'barrier' directive. 454 OMP_IDENT_BARRIER_EXPL = 0x20, 455 /// Implicit barrier in code. 456 OMP_IDENT_BARRIER_IMPL = 0x40, 457 /// Implicit barrier in 'for' directive. 458 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 459 /// Implicit barrier in 'sections' directive. 460 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 461 /// Implicit barrier in 'single' directive. 462 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 463 /// Call of __kmp_for_static_init for static loop. 464 OMP_IDENT_WORK_LOOP = 0x200, 465 /// Call of __kmp_for_static_init for sections. 466 OMP_IDENT_WORK_SECTIONS = 0x400, 467 /// Call of __kmp_for_static_init for distribute. 468 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 469 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 470 }; 471 472 namespace { 473 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 474 /// Values for bit flags for marking which requires clauses have been used. 475 enum OpenMPOffloadingRequiresDirFlags : int64_t { 476 /// flag undefined. 477 OMP_REQ_UNDEFINED = 0x000, 478 /// no requires clause present. 479 OMP_REQ_NONE = 0x001, 480 /// reverse_offload clause. 481 OMP_REQ_REVERSE_OFFLOAD = 0x002, 482 /// unified_address clause. 483 OMP_REQ_UNIFIED_ADDRESS = 0x004, 484 /// unified_shared_memory clause. 485 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 486 /// dynamic_allocators clause. 487 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 488 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 489 }; 490 491 enum OpenMPOffloadingReservedDeviceIDs { 492 /// Device ID if the device was not defined, runtime should get it 493 /// from environment variables in the spec. 494 OMP_DEVICEID_UNDEF = -1, 495 }; 496 } // anonymous namespace 497 498 /// Describes ident structure that describes a source location. 499 /// All descriptions are taken from 500 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 501 /// Original structure: 502 /// typedef struct ident { 503 /// kmp_int32 reserved_1; /**< might be used in Fortran; 504 /// see above */ 505 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 506 /// KMP_IDENT_KMPC identifies this union 507 /// member */ 508 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 509 /// see above */ 510 ///#if USE_ITT_BUILD 511 /// /* but currently used for storing 512 /// region-specific ITT */ 513 /// /* contextual information. */ 514 ///#endif /* USE_ITT_BUILD */ 515 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 516 /// C++ */ 517 /// char const *psource; /**< String describing the source location. 518 /// The string is composed of semi-colon separated 519 // fields which describe the source file, 520 /// the function and a pair of line numbers that 521 /// delimit the construct. 522 /// */ 523 /// } ident_t; 524 enum IdentFieldIndex { 525 /// might be used in Fortran 526 IdentField_Reserved_1, 527 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 528 IdentField_Flags, 529 /// Not really used in Fortran any more 530 IdentField_Reserved_2, 531 /// Source[4] in Fortran, do not use for C++ 532 IdentField_Reserved_3, 533 /// String describing the source location. The string is composed of 534 /// semi-colon separated fields which describe the source file, the function 535 /// and a pair of line numbers that delimit the construct. 536 IdentField_PSource 537 }; 538 539 /// Schedule types for 'omp for' loops (these enumerators are taken from 540 /// the enum sched_type in kmp.h). 541 enum OpenMPSchedType { 542 /// Lower bound for default (unordered) versions. 543 OMP_sch_lower = 32, 544 OMP_sch_static_chunked = 33, 545 OMP_sch_static = 34, 546 OMP_sch_dynamic_chunked = 35, 547 OMP_sch_guided_chunked = 36, 548 OMP_sch_runtime = 37, 549 OMP_sch_auto = 38, 550 /// static with chunk adjustment (e.g., simd) 551 OMP_sch_static_balanced_chunked = 45, 552 /// Lower bound for 'ordered' versions. 553 OMP_ord_lower = 64, 554 OMP_ord_static_chunked = 65, 555 OMP_ord_static = 66, 556 OMP_ord_dynamic_chunked = 67, 557 OMP_ord_guided_chunked = 68, 558 OMP_ord_runtime = 69, 559 OMP_ord_auto = 70, 560 OMP_sch_default = OMP_sch_static, 561 /// dist_schedule types 562 OMP_dist_sch_static_chunked = 91, 563 OMP_dist_sch_static = 92, 564 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 565 /// Set if the monotonic schedule modifier was present. 566 OMP_sch_modifier_monotonic = (1 << 29), 567 /// Set if the nonmonotonic schedule modifier was present. 568 OMP_sch_modifier_nonmonotonic = (1 << 30), 569 }; 570 571 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 572 /// region. 573 class CleanupTy final : public EHScopeStack::Cleanup { 574 PrePostActionTy *Action; 575 576 public: 577 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 578 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 579 if (!CGF.HaveInsertPoint()) 580 return; 581 Action->Exit(CGF); 582 } 583 }; 584 585 } // anonymous namespace 586 587 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 588 CodeGenFunction::RunCleanupsScope Scope(CGF); 589 if (PrePostAction) { 590 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 591 Callback(CodeGen, CGF, *PrePostAction); 592 } else { 593 PrePostActionTy Action; 594 Callback(CodeGen, CGF, Action); 595 } 596 } 597 598 /// Check if the combiner is a call to UDR combiner and if it is so return the 599 /// UDR decl used for reduction. 600 static const OMPDeclareReductionDecl * 601 getReductionInit(const Expr *ReductionOp) { 602 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 603 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 604 if (const auto *DRE = 605 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 606 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 607 return DRD; 608 return nullptr; 609 } 610 611 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 612 const OMPDeclareReductionDecl *DRD, 613 const Expr *InitOp, 614 Address Private, Address Original, 615 QualType Ty) { 616 if (DRD->getInitializer()) { 617 std::pair<llvm::Function *, llvm::Function *> Reduction = 618 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 619 const auto *CE = cast<CallExpr>(InitOp); 620 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 621 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 622 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 623 const auto *LHSDRE = 624 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 625 const auto *RHSDRE = 626 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 627 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 628 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 629 [=]() { return Private; }); 630 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 631 [=]() { return Original; }); 632 (void)PrivateScope.Privatize(); 633 RValue Func = RValue::get(Reduction.second); 634 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 635 CGF.EmitIgnoredExpr(InitOp); 636 } else { 637 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 638 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 639 auto *GV = new llvm::GlobalVariable( 640 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 641 llvm::GlobalValue::PrivateLinkage, Init, Name); 642 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 643 RValue InitRVal; 644 switch (CGF.getEvaluationKind(Ty)) { 645 case TEK_Scalar: 646 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 647 break; 648 case TEK_Complex: 649 InitRVal = 650 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 651 break; 652 case TEK_Aggregate: 653 InitRVal = RValue::getAggregate(LV.getAddress(CGF)); 654 break; 655 } 656 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 657 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 658 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 659 /*IsInitializer=*/false); 660 } 661 } 662 663 /// Emit initialization of arrays of complex types. 664 /// \param DestAddr Address of the array. 665 /// \param Type Type of array. 666 /// \param Init Initial expression of array. 667 /// \param SrcAddr Address of the original array. 668 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 669 QualType Type, bool EmitDeclareReductionInit, 670 const Expr *Init, 671 const OMPDeclareReductionDecl *DRD, 672 Address SrcAddr = Address::invalid()) { 673 // Perform element-by-element initialization. 674 QualType ElementTy; 675 676 // Drill down to the base element type on both arrays. 677 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 678 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 679 DestAddr = 680 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 681 if (DRD) 682 SrcAddr = 683 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 684 685 llvm::Value *SrcBegin = nullptr; 686 if (DRD) 687 SrcBegin = SrcAddr.getPointer(); 688 llvm::Value *DestBegin = DestAddr.getPointer(); 689 // Cast from pointer to array type to pointer to single element. 690 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 691 // The basic structure here is a while-do loop. 692 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 693 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 694 llvm::Value *IsEmpty = 695 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 696 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 697 698 // Enter the loop body, making that address the current address. 699 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 700 CGF.EmitBlock(BodyBB); 701 702 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 703 704 llvm::PHINode *SrcElementPHI = nullptr; 705 Address SrcElementCurrent = Address::invalid(); 706 if (DRD) { 707 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 708 "omp.arraycpy.srcElementPast"); 709 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 710 SrcElementCurrent = 711 Address(SrcElementPHI, 712 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 713 } 714 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 715 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 716 DestElementPHI->addIncoming(DestBegin, EntryBB); 717 Address DestElementCurrent = 718 Address(DestElementPHI, 719 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 720 721 // Emit copy. 722 { 723 CodeGenFunction::RunCleanupsScope InitScope(CGF); 724 if (EmitDeclareReductionInit) { 725 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 726 SrcElementCurrent, ElementTy); 727 } else 728 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 729 /*IsInitializer=*/false); 730 } 731 732 if (DRD) { 733 // Shift the address forward by one element. 734 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 735 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 736 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 737 } 738 739 // Shift the address forward by one element. 740 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 741 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 742 // Check whether we've reached the end. 743 llvm::Value *Done = 744 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 745 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 746 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 747 748 // Done. 749 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 750 } 751 752 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 753 return CGF.EmitOMPSharedLValue(E); 754 } 755 756 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 757 const Expr *E) { 758 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 759 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 760 return LValue(); 761 } 762 763 void ReductionCodeGen::emitAggregateInitialization( 764 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 765 const OMPDeclareReductionDecl *DRD) { 766 // Emit VarDecl with copy init for arrays. 767 // Get the address of the original variable captured in current 768 // captured region. 769 const auto *PrivateVD = 770 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 771 bool EmitDeclareReductionInit = 772 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 773 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 774 EmitDeclareReductionInit, 775 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 776 : PrivateVD->getInit(), 777 DRD, SharedLVal.getAddress(CGF)); 778 } 779 780 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 781 ArrayRef<const Expr *> Origs, 782 ArrayRef<const Expr *> Privates, 783 ArrayRef<const Expr *> ReductionOps) { 784 ClausesData.reserve(Shareds.size()); 785 SharedAddresses.reserve(Shareds.size()); 786 Sizes.reserve(Shareds.size()); 787 BaseDecls.reserve(Shareds.size()); 788 const auto *IOrig = Origs.begin(); 789 const auto *IPriv = Privates.begin(); 790 const auto *IRed = ReductionOps.begin(); 791 for (const Expr *Ref : Shareds) { 792 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 793 std::advance(IOrig, 1); 794 std::advance(IPriv, 1); 795 std::advance(IRed, 1); 796 } 797 } 798 799 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 800 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 801 "Number of generated lvalues must be exactly N."); 802 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 803 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 804 SharedAddresses.emplace_back(First, Second); 805 if (ClausesData[N].Shared == ClausesData[N].Ref) { 806 OrigAddresses.emplace_back(First, Second); 807 } else { 808 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 809 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 810 OrigAddresses.emplace_back(First, Second); 811 } 812 } 813 814 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 815 const auto *PrivateVD = 816 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 817 QualType PrivateType = PrivateVD->getType(); 818 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 819 if (!PrivateType->isVariablyModifiedType()) { 820 Sizes.emplace_back( 821 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 822 nullptr); 823 return; 824 } 825 llvm::Value *Size; 826 llvm::Value *SizeInChars; 827 auto *ElemType = 828 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 829 ->getElementType(); 830 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 831 if (AsArraySection) { 832 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 833 OrigAddresses[N].first.getPointer(CGF)); 834 Size = CGF.Builder.CreateNUWAdd( 835 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 836 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 837 } else { 838 SizeInChars = 839 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 840 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 841 } 842 Sizes.emplace_back(SizeInChars, Size); 843 CodeGenFunction::OpaqueValueMapping OpaqueMap( 844 CGF, 845 cast<OpaqueValueExpr>( 846 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 847 RValue::get(Size)); 848 CGF.EmitVariablyModifiedType(PrivateType); 849 } 850 851 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 852 llvm::Value *Size) { 853 const auto *PrivateVD = 854 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 855 QualType PrivateType = PrivateVD->getType(); 856 if (!PrivateType->isVariablyModifiedType()) { 857 assert(!Size && !Sizes[N].second && 858 "Size should be nullptr for non-variably modified reduction " 859 "items."); 860 return; 861 } 862 CodeGenFunction::OpaqueValueMapping OpaqueMap( 863 CGF, 864 cast<OpaqueValueExpr>( 865 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 866 RValue::get(Size)); 867 CGF.EmitVariablyModifiedType(PrivateType); 868 } 869 870 void ReductionCodeGen::emitInitialization( 871 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 872 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 873 assert(SharedAddresses.size() > N && "No variable was generated"); 874 const auto *PrivateVD = 875 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 876 const OMPDeclareReductionDecl *DRD = 877 getReductionInit(ClausesData[N].ReductionOp); 878 QualType PrivateType = PrivateVD->getType(); 879 PrivateAddr = CGF.Builder.CreateElementBitCast( 880 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 881 QualType SharedType = SharedAddresses[N].first.getType(); 882 SharedLVal = CGF.MakeAddrLValue( 883 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 884 CGF.ConvertTypeForMem(SharedType)), 885 SharedType, SharedAddresses[N].first.getBaseInfo(), 886 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 887 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 888 if (DRD && DRD->getInitializer()) 889 (void)DefaultInit(CGF); 890 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 891 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 892 (void)DefaultInit(CGF); 893 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 894 PrivateAddr, SharedLVal.getAddress(CGF), 895 SharedLVal.getType()); 896 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 897 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 898 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 899 PrivateVD->getType().getQualifiers(), 900 /*IsInitializer=*/false); 901 } 902 } 903 904 bool ReductionCodeGen::needCleanups(unsigned N) { 905 const auto *PrivateVD = 906 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 907 QualType PrivateType = PrivateVD->getType(); 908 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 909 return DTorKind != QualType::DK_none; 910 } 911 912 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 913 Address PrivateAddr) { 914 const auto *PrivateVD = 915 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 916 QualType PrivateType = PrivateVD->getType(); 917 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 918 if (needCleanups(N)) { 919 PrivateAddr = CGF.Builder.CreateElementBitCast( 920 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 921 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 922 } 923 } 924 925 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 926 LValue BaseLV) { 927 BaseTy = BaseTy.getNonReferenceType(); 928 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 929 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 930 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 931 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 932 } else { 933 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 934 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 935 } 936 BaseTy = BaseTy->getPointeeType(); 937 } 938 return CGF.MakeAddrLValue( 939 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 940 CGF.ConvertTypeForMem(ElTy)), 941 BaseLV.getType(), BaseLV.getBaseInfo(), 942 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 943 } 944 945 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 946 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 947 llvm::Value *Addr) { 948 Address Tmp = Address::invalid(); 949 Address TopTmp = Address::invalid(); 950 Address MostTopTmp = Address::invalid(); 951 BaseTy = BaseTy.getNonReferenceType(); 952 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 953 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 954 Tmp = CGF.CreateMemTemp(BaseTy); 955 if (TopTmp.isValid()) 956 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 957 else 958 MostTopTmp = Tmp; 959 TopTmp = Tmp; 960 BaseTy = BaseTy->getPointeeType(); 961 } 962 llvm::Type *Ty = BaseLVType; 963 if (Tmp.isValid()) 964 Ty = Tmp.getElementType(); 965 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 966 if (Tmp.isValid()) { 967 CGF.Builder.CreateStore(Addr, Tmp); 968 return MostTopTmp; 969 } 970 return Address(Addr, BaseLVAlignment); 971 } 972 973 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 974 const VarDecl *OrigVD = nullptr; 975 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 976 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 977 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 978 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 979 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 980 Base = TempASE->getBase()->IgnoreParenImpCasts(); 981 DE = cast<DeclRefExpr>(Base); 982 OrigVD = cast<VarDecl>(DE->getDecl()); 983 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 984 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 985 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 986 Base = TempASE->getBase()->IgnoreParenImpCasts(); 987 DE = cast<DeclRefExpr>(Base); 988 OrigVD = cast<VarDecl>(DE->getDecl()); 989 } 990 return OrigVD; 991 } 992 993 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 994 Address PrivateAddr) { 995 const DeclRefExpr *DE; 996 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 997 BaseDecls.emplace_back(OrigVD); 998 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 999 LValue BaseLValue = 1000 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1001 OriginalBaseLValue); 1002 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1003 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); 1004 llvm::Value *PrivatePointer = 1005 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1006 PrivateAddr.getPointer(), 1007 SharedAddresses[N].first.getAddress(CGF).getType()); 1008 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1009 return castToBase(CGF, OrigVD->getType(), 1010 SharedAddresses[N].first.getType(), 1011 OriginalBaseLValue.getAddress(CGF).getType(), 1012 OriginalBaseLValue.getAlignment(), Ptr); 1013 } 1014 BaseDecls.emplace_back( 1015 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1016 return PrivateAddr; 1017 } 1018 1019 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1020 const OMPDeclareReductionDecl *DRD = 1021 getReductionInit(ClausesData[N].ReductionOp); 1022 return DRD && DRD->getInitializer(); 1023 } 1024 1025 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1026 return CGF.EmitLoadOfPointerLValue( 1027 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1028 getThreadIDVariable()->getType()->castAs<PointerType>()); 1029 } 1030 1031 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1032 if (!CGF.HaveInsertPoint()) 1033 return; 1034 // 1.2.2 OpenMP Language Terminology 1035 // Structured block - An executable statement with a single entry at the 1036 // top and a single exit at the bottom. 1037 // The point of exit cannot be a branch out of the structured block. 1038 // longjmp() and throw() must not violate the entry/exit criteria. 1039 CGF.EHStack.pushTerminate(); 1040 CodeGen(CGF); 1041 CGF.EHStack.popTerminate(); 1042 } 1043 1044 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1045 CodeGenFunction &CGF) { 1046 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1047 getThreadIDVariable()->getType(), 1048 AlignmentSource::Decl); 1049 } 1050 1051 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1052 QualType FieldTy) { 1053 auto *Field = FieldDecl::Create( 1054 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1055 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1056 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1057 Field->setAccess(AS_public); 1058 DC->addDecl(Field); 1059 return Field; 1060 } 1061 1062 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1063 StringRef Separator) 1064 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1065 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1066 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1067 1068 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1069 OMPBuilder.initialize(); 1070 loadOffloadInfoMetadata(); 1071 } 1072 1073 void CGOpenMPRuntime::clear() { 1074 InternalVars.clear(); 1075 // Clean non-target variable declarations possibly used only in debug info. 1076 for (const auto &Data : EmittedNonTargetVariables) { 1077 if (!Data.getValue().pointsToAliveValue()) 1078 continue; 1079 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1080 if (!GV) 1081 continue; 1082 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1083 continue; 1084 GV->eraseFromParent(); 1085 } 1086 } 1087 1088 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1089 SmallString<128> Buffer; 1090 llvm::raw_svector_ostream OS(Buffer); 1091 StringRef Sep = FirstSeparator; 1092 for (StringRef Part : Parts) { 1093 OS << Sep << Part; 1094 Sep = Separator; 1095 } 1096 return std::string(OS.str()); 1097 } 1098 1099 static llvm::Function * 1100 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1101 const Expr *CombinerInitializer, const VarDecl *In, 1102 const VarDecl *Out, bool IsCombiner) { 1103 // void .omp_combiner.(Ty *in, Ty *out); 1104 ASTContext &C = CGM.getContext(); 1105 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1106 FunctionArgList Args; 1107 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1108 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1109 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1110 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1111 Args.push_back(&OmpOutParm); 1112 Args.push_back(&OmpInParm); 1113 const CGFunctionInfo &FnInfo = 1114 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1115 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1116 std::string Name = CGM.getOpenMPRuntime().getName( 1117 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1118 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1119 Name, &CGM.getModule()); 1120 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1121 if (CGM.getLangOpts().Optimize) { 1122 Fn->removeFnAttr(llvm::Attribute::NoInline); 1123 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1124 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1125 } 1126 CodeGenFunction CGF(CGM); 1127 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1128 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1129 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1130 Out->getLocation()); 1131 CodeGenFunction::OMPPrivateScope Scope(CGF); 1132 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1133 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1134 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1135 .getAddress(CGF); 1136 }); 1137 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1138 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1139 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1140 .getAddress(CGF); 1141 }); 1142 (void)Scope.Privatize(); 1143 if (!IsCombiner && Out->hasInit() && 1144 !CGF.isTrivialInitializer(Out->getInit())) { 1145 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1146 Out->getType().getQualifiers(), 1147 /*IsInitializer=*/true); 1148 } 1149 if (CombinerInitializer) 1150 CGF.EmitIgnoredExpr(CombinerInitializer); 1151 Scope.ForceCleanup(); 1152 CGF.FinishFunction(); 1153 return Fn; 1154 } 1155 1156 void CGOpenMPRuntime::emitUserDefinedReduction( 1157 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1158 if (UDRMap.count(D) > 0) 1159 return; 1160 llvm::Function *Combiner = emitCombinerOrInitializer( 1161 CGM, D->getType(), D->getCombiner(), 1162 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1163 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1164 /*IsCombiner=*/true); 1165 llvm::Function *Initializer = nullptr; 1166 if (const Expr *Init = D->getInitializer()) { 1167 Initializer = emitCombinerOrInitializer( 1168 CGM, D->getType(), 1169 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1170 : nullptr, 1171 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1172 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1173 /*IsCombiner=*/false); 1174 } 1175 UDRMap.try_emplace(D, Combiner, Initializer); 1176 if (CGF) { 1177 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1178 Decls.second.push_back(D); 1179 } 1180 } 1181 1182 std::pair<llvm::Function *, llvm::Function *> 1183 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1184 auto I = UDRMap.find(D); 1185 if (I != UDRMap.end()) 1186 return I->second; 1187 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1188 return UDRMap.lookup(D); 1189 } 1190 1191 namespace { 1192 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1193 // Builder if one is present. 1194 struct PushAndPopStackRAII { 1195 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1196 bool HasCancel) 1197 : OMPBuilder(OMPBuilder) { 1198 if (!OMPBuilder) 1199 return; 1200 1201 // The following callback is the crucial part of clangs cleanup process. 1202 // 1203 // NOTE: 1204 // Once the OpenMPIRBuilder is used to create parallel regions (and 1205 // similar), the cancellation destination (Dest below) is determined via 1206 // IP. That means if we have variables to finalize we split the block at IP, 1207 // use the new block (=BB) as destination to build a JumpDest (via 1208 // getJumpDestInCurrentScope(BB)) which then is fed to 1209 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1210 // to push & pop an FinalizationInfo object. 1211 // The FiniCB will still be needed but at the point where the 1212 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1213 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1214 assert(IP.getBlock()->end() == IP.getPoint() && 1215 "Clang CG should cause non-terminated block!"); 1216 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1217 CGF.Builder.restoreIP(IP); 1218 CodeGenFunction::JumpDest Dest = 1219 CGF.getOMPCancelDestination(OMPD_parallel); 1220 CGF.EmitBranchThroughCleanup(Dest); 1221 }; 1222 1223 // TODO: Remove this once we emit parallel regions through the 1224 // OpenMPIRBuilder as it can do this setup internally. 1225 llvm::OpenMPIRBuilder::FinalizationInfo FI( 1226 {FiniCB, OMPD_parallel, HasCancel}); 1227 OMPBuilder->pushFinalizationCB(std::move(FI)); 1228 } 1229 ~PushAndPopStackRAII() { 1230 if (OMPBuilder) 1231 OMPBuilder->popFinalizationCB(); 1232 } 1233 llvm::OpenMPIRBuilder *OMPBuilder; 1234 }; 1235 } // namespace 1236 1237 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1238 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1239 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1240 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1241 assert(ThreadIDVar->getType()->isPointerType() && 1242 "thread id variable must be of type kmp_int32 *"); 1243 CodeGenFunction CGF(CGM, true); 1244 bool HasCancel = false; 1245 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1246 HasCancel = OPD->hasCancel(); 1247 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1248 HasCancel = OPD->hasCancel(); 1249 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1250 HasCancel = OPSD->hasCancel(); 1251 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1252 HasCancel = OPFD->hasCancel(); 1253 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1254 HasCancel = OPFD->hasCancel(); 1255 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1256 HasCancel = OPFD->hasCancel(); 1257 else if (const auto *OPFD = 1258 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1259 HasCancel = OPFD->hasCancel(); 1260 else if (const auto *OPFD = 1261 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1262 HasCancel = OPFD->hasCancel(); 1263 1264 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1265 // parallel region to make cancellation barriers work properly. 1266 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1267 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel); 1268 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1269 HasCancel, OutlinedHelperName); 1270 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1271 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1272 } 1273 1274 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1275 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1276 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1277 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1278 return emitParallelOrTeamsOutlinedFunction( 1279 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1280 } 1281 1282 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1283 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1284 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1285 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1286 return emitParallelOrTeamsOutlinedFunction( 1287 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1288 } 1289 1290 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1291 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1292 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1293 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1294 bool Tied, unsigned &NumberOfParts) { 1295 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1296 PrePostActionTy &) { 1297 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1298 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1299 llvm::Value *TaskArgs[] = { 1300 UpLoc, ThreadID, 1301 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1302 TaskTVar->getType()->castAs<PointerType>()) 1303 .getPointer(CGF)}; 1304 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1305 CGM.getModule(), OMPRTL___kmpc_omp_task), 1306 TaskArgs); 1307 }; 1308 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1309 UntiedCodeGen); 1310 CodeGen.setAction(Action); 1311 assert(!ThreadIDVar->getType()->isPointerType() && 1312 "thread id variable must be of type kmp_int32 for tasks"); 1313 const OpenMPDirectiveKind Region = 1314 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1315 : OMPD_task; 1316 const CapturedStmt *CS = D.getCapturedStmt(Region); 1317 bool HasCancel = false; 1318 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1319 HasCancel = TD->hasCancel(); 1320 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1321 HasCancel = TD->hasCancel(); 1322 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1323 HasCancel = TD->hasCancel(); 1324 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1325 HasCancel = TD->hasCancel(); 1326 1327 CodeGenFunction CGF(CGM, true); 1328 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1329 InnermostKind, HasCancel, Action); 1330 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1331 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1332 if (!Tied) 1333 NumberOfParts = Action.getNumberOfParts(); 1334 return Res; 1335 } 1336 1337 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1338 const RecordDecl *RD, const CGRecordLayout &RL, 1339 ArrayRef<llvm::Constant *> Data) { 1340 llvm::StructType *StructTy = RL.getLLVMType(); 1341 unsigned PrevIdx = 0; 1342 ConstantInitBuilder CIBuilder(CGM); 1343 auto DI = Data.begin(); 1344 for (const FieldDecl *FD : RD->fields()) { 1345 unsigned Idx = RL.getLLVMFieldNo(FD); 1346 // Fill the alignment. 1347 for (unsigned I = PrevIdx; I < Idx; ++I) 1348 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1349 PrevIdx = Idx + 1; 1350 Fields.add(*DI); 1351 ++DI; 1352 } 1353 } 1354 1355 template <class... As> 1356 static llvm::GlobalVariable * 1357 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1358 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1359 As &&... Args) { 1360 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1361 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1362 ConstantInitBuilder CIBuilder(CGM); 1363 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1364 buildStructValue(Fields, CGM, RD, RL, Data); 1365 return Fields.finishAndCreateGlobal( 1366 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1367 std::forward<As>(Args)...); 1368 } 1369 1370 template <typename T> 1371 static void 1372 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1373 ArrayRef<llvm::Constant *> Data, 1374 T &Parent) { 1375 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1376 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1377 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1378 buildStructValue(Fields, CGM, RD, RL, Data); 1379 Fields.finishAndAddTo(Parent); 1380 } 1381 1382 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1383 bool AtCurrentPoint) { 1384 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1385 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1386 1387 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1388 if (AtCurrentPoint) { 1389 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1390 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1391 } else { 1392 Elem.second.ServiceInsertPt = 1393 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1394 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1395 } 1396 } 1397 1398 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1399 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1400 if (Elem.second.ServiceInsertPt) { 1401 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1402 Elem.second.ServiceInsertPt = nullptr; 1403 Ptr->eraseFromParent(); 1404 } 1405 } 1406 1407 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1408 SourceLocation Loc, 1409 unsigned Flags) { 1410 llvm::Constant *SrcLocStr; 1411 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1412 Loc.isInvalid()) { 1413 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 1414 } else { 1415 std::string FunctionName = ""; 1416 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1417 FunctionName = FD->getQualifiedNameAsString(); 1418 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1419 const char *FileName = PLoc.getFilename(); 1420 unsigned Line = PLoc.getLine(); 1421 unsigned Column = PLoc.getColumn(); 1422 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName, 1423 Line, Column); 1424 } 1425 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1426 return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags), 1427 Reserved2Flags); 1428 } 1429 1430 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1431 SourceLocation Loc) { 1432 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1433 1434 llvm::Value *ThreadID = nullptr; 1435 // Check whether we've already cached a load of the thread id in this 1436 // function. 1437 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1438 if (I != OpenMPLocThreadIDMap.end()) { 1439 ThreadID = I->second.ThreadID; 1440 if (ThreadID != nullptr) 1441 return ThreadID; 1442 } 1443 // If exceptions are enabled, do not use parameter to avoid possible crash. 1444 if (auto *OMPRegionInfo = 1445 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1446 if (OMPRegionInfo->getThreadIDVariable()) { 1447 // Check if this an outlined function with thread id passed as argument. 1448 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1449 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1450 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1451 !CGF.getLangOpts().CXXExceptions || 1452 CGF.Builder.GetInsertBlock() == TopBlock || 1453 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1454 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1455 TopBlock || 1456 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1457 CGF.Builder.GetInsertBlock()) { 1458 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1459 // If value loaded in entry block, cache it and use it everywhere in 1460 // function. 1461 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1462 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1463 Elem.second.ThreadID = ThreadID; 1464 } 1465 return ThreadID; 1466 } 1467 } 1468 } 1469 1470 // This is not an outlined function region - need to call __kmpc_int32 1471 // kmpc_global_thread_num(ident_t *loc). 1472 // Generate thread id value and cache this value for use across the 1473 // function. 1474 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1475 if (!Elem.second.ServiceInsertPt) 1476 setLocThreadIdInsertPt(CGF); 1477 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1478 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1479 llvm::CallInst *Call = CGF.Builder.CreateCall( 1480 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1481 OMPRTL___kmpc_global_thread_num), 1482 emitUpdateLocation(CGF, Loc)); 1483 Call->setCallingConv(CGF.getRuntimeCC()); 1484 Elem.second.ThreadID = Call; 1485 return Call; 1486 } 1487 1488 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1489 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1490 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1491 clearLocThreadIdInsertPt(CGF); 1492 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1493 } 1494 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1495 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1496 UDRMap.erase(D); 1497 FunctionUDRMap.erase(CGF.CurFn); 1498 } 1499 auto I = FunctionUDMMap.find(CGF.CurFn); 1500 if (I != FunctionUDMMap.end()) { 1501 for(const auto *D : I->second) 1502 UDMMap.erase(D); 1503 FunctionUDMMap.erase(I); 1504 } 1505 LastprivateConditionalToTypes.erase(CGF.CurFn); 1506 } 1507 1508 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1509 return OMPBuilder.IdentPtr; 1510 } 1511 1512 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1513 if (!Kmpc_MicroTy) { 1514 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1515 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1516 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1517 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1518 } 1519 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1520 } 1521 1522 llvm::FunctionCallee 1523 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 1524 assert((IVSize == 32 || IVSize == 64) && 1525 "IV size is not compatible with the omp runtime"); 1526 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1527 : "__kmpc_for_static_init_4u") 1528 : (IVSigned ? "__kmpc_for_static_init_8" 1529 : "__kmpc_for_static_init_8u"); 1530 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1531 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1532 llvm::Type *TypeParams[] = { 1533 getIdentTyPointerTy(), // loc 1534 CGM.Int32Ty, // tid 1535 CGM.Int32Ty, // schedtype 1536 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1537 PtrTy, // p_lower 1538 PtrTy, // p_upper 1539 PtrTy, // p_stride 1540 ITy, // incr 1541 ITy // chunk 1542 }; 1543 auto *FnTy = 1544 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1545 return CGM.CreateRuntimeFunction(FnTy, Name); 1546 } 1547 1548 llvm::FunctionCallee 1549 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1550 assert((IVSize == 32 || IVSize == 64) && 1551 "IV size is not compatible with the omp runtime"); 1552 StringRef Name = 1553 IVSize == 32 1554 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1555 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1556 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1557 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1558 CGM.Int32Ty, // tid 1559 CGM.Int32Ty, // schedtype 1560 ITy, // lower 1561 ITy, // upper 1562 ITy, // stride 1563 ITy // chunk 1564 }; 1565 auto *FnTy = 1566 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1567 return CGM.CreateRuntimeFunction(FnTy, Name); 1568 } 1569 1570 llvm::FunctionCallee 1571 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1572 assert((IVSize == 32 || IVSize == 64) && 1573 "IV size is not compatible with the omp runtime"); 1574 StringRef Name = 1575 IVSize == 32 1576 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1577 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1578 llvm::Type *TypeParams[] = { 1579 getIdentTyPointerTy(), // loc 1580 CGM.Int32Ty, // tid 1581 }; 1582 auto *FnTy = 1583 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1584 return CGM.CreateRuntimeFunction(FnTy, Name); 1585 } 1586 1587 llvm::FunctionCallee 1588 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1589 assert((IVSize == 32 || IVSize == 64) && 1590 "IV size is not compatible with the omp runtime"); 1591 StringRef Name = 1592 IVSize == 32 1593 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1594 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1595 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1596 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1597 llvm::Type *TypeParams[] = { 1598 getIdentTyPointerTy(), // loc 1599 CGM.Int32Ty, // tid 1600 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1601 PtrTy, // p_lower 1602 PtrTy, // p_upper 1603 PtrTy // p_stride 1604 }; 1605 auto *FnTy = 1606 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1607 return CGM.CreateRuntimeFunction(FnTy, Name); 1608 } 1609 1610 /// Obtain information that uniquely identifies a target entry. This 1611 /// consists of the file and device IDs as well as line number associated with 1612 /// the relevant entry source location. 1613 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1614 unsigned &DeviceID, unsigned &FileID, 1615 unsigned &LineNum) { 1616 SourceManager &SM = C.getSourceManager(); 1617 1618 // The loc should be always valid and have a file ID (the user cannot use 1619 // #pragma directives in macros) 1620 1621 assert(Loc.isValid() && "Source location is expected to be always valid."); 1622 1623 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1624 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1625 1626 llvm::sys::fs::UniqueID ID; 1627 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1628 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1629 << PLoc.getFilename() << EC.message(); 1630 1631 DeviceID = ID.getDevice(); 1632 FileID = ID.getFile(); 1633 LineNum = PLoc.getLine(); 1634 } 1635 1636 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1637 if (CGM.getLangOpts().OpenMPSimd) 1638 return Address::invalid(); 1639 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1640 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1641 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1642 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1643 HasRequiresUnifiedSharedMemory))) { 1644 SmallString<64> PtrName; 1645 { 1646 llvm::raw_svector_ostream OS(PtrName); 1647 OS << CGM.getMangledName(GlobalDecl(VD)); 1648 if (!VD->isExternallyVisible()) { 1649 unsigned DeviceID, FileID, Line; 1650 getTargetEntryUniqueInfo(CGM.getContext(), 1651 VD->getCanonicalDecl()->getBeginLoc(), 1652 DeviceID, FileID, Line); 1653 OS << llvm::format("_%x", FileID); 1654 } 1655 OS << "_decl_tgt_ref_ptr"; 1656 } 1657 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1658 if (!Ptr) { 1659 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1660 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1661 PtrName); 1662 1663 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1664 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1665 1666 if (!CGM.getLangOpts().OpenMPIsDevice) 1667 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1668 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1669 } 1670 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 1671 } 1672 return Address::invalid(); 1673 } 1674 1675 llvm::Constant * 1676 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1677 assert(!CGM.getLangOpts().OpenMPUseTLS || 1678 !CGM.getContext().getTargetInfo().isTLSSupported()); 1679 // Lookup the entry, lazily creating it if necessary. 1680 std::string Suffix = getName({"cache", ""}); 1681 return getOrCreateInternalVariable( 1682 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1683 } 1684 1685 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1686 const VarDecl *VD, 1687 Address VDAddr, 1688 SourceLocation Loc) { 1689 if (CGM.getLangOpts().OpenMPUseTLS && 1690 CGM.getContext().getTargetInfo().isTLSSupported()) 1691 return VDAddr; 1692 1693 llvm::Type *VarTy = VDAddr.getElementType(); 1694 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1695 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1696 CGM.Int8PtrTy), 1697 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1698 getOrCreateThreadPrivateCache(VD)}; 1699 return Address(CGF.EmitRuntimeCall( 1700 OMPBuilder.getOrCreateRuntimeFunction( 1701 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1702 Args), 1703 VDAddr.getAlignment()); 1704 } 1705 1706 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1707 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1708 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1709 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1710 // library. 1711 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1712 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1713 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1714 OMPLoc); 1715 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1716 // to register constructor/destructor for variable. 1717 llvm::Value *Args[] = { 1718 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1719 Ctor, CopyCtor, Dtor}; 1720 CGF.EmitRuntimeCall( 1721 OMPBuilder.getOrCreateRuntimeFunction( 1722 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1723 Args); 1724 } 1725 1726 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1727 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1728 bool PerformInit, CodeGenFunction *CGF) { 1729 if (CGM.getLangOpts().OpenMPUseTLS && 1730 CGM.getContext().getTargetInfo().isTLSSupported()) 1731 return nullptr; 1732 1733 VD = VD->getDefinition(CGM.getContext()); 1734 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1735 QualType ASTTy = VD->getType(); 1736 1737 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1738 const Expr *Init = VD->getAnyInitializer(); 1739 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1740 // Generate function that re-emits the declaration's initializer into the 1741 // threadprivate copy of the variable VD 1742 CodeGenFunction CtorCGF(CGM); 1743 FunctionArgList Args; 1744 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1745 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1746 ImplicitParamDecl::Other); 1747 Args.push_back(&Dst); 1748 1749 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1750 CGM.getContext().VoidPtrTy, Args); 1751 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1752 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1753 llvm::Function *Fn = 1754 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1755 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1756 Args, Loc, Loc); 1757 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1758 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1759 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1760 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1761 Arg = CtorCGF.Builder.CreateElementBitCast( 1762 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1763 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1764 /*IsInitializer=*/true); 1765 ArgVal = CtorCGF.EmitLoadOfScalar( 1766 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1767 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1768 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1769 CtorCGF.FinishFunction(); 1770 Ctor = Fn; 1771 } 1772 if (VD->getType().isDestructedType() != QualType::DK_none) { 1773 // Generate function that emits destructor call for the threadprivate copy 1774 // of the variable VD 1775 CodeGenFunction DtorCGF(CGM); 1776 FunctionArgList Args; 1777 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1778 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1779 ImplicitParamDecl::Other); 1780 Args.push_back(&Dst); 1781 1782 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1783 CGM.getContext().VoidTy, Args); 1784 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1785 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1786 llvm::Function *Fn = 1787 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1788 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1789 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1790 Loc, Loc); 1791 // Create a scope with an artificial location for the body of this function. 1792 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1793 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1794 DtorCGF.GetAddrOfLocalVar(&Dst), 1795 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1796 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1797 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1798 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1799 DtorCGF.FinishFunction(); 1800 Dtor = Fn; 1801 } 1802 // Do not emit init function if it is not required. 1803 if (!Ctor && !Dtor) 1804 return nullptr; 1805 1806 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1807 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1808 /*isVarArg=*/false) 1809 ->getPointerTo(); 1810 // Copying constructor for the threadprivate variable. 1811 // Must be NULL - reserved by runtime, but currently it requires that this 1812 // parameter is always NULL. Otherwise it fires assertion. 1813 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1814 if (Ctor == nullptr) { 1815 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1816 /*isVarArg=*/false) 1817 ->getPointerTo(); 1818 Ctor = llvm::Constant::getNullValue(CtorTy); 1819 } 1820 if (Dtor == nullptr) { 1821 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1822 /*isVarArg=*/false) 1823 ->getPointerTo(); 1824 Dtor = llvm::Constant::getNullValue(DtorTy); 1825 } 1826 if (!CGF) { 1827 auto *InitFunctionTy = 1828 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1829 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1830 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1831 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1832 CodeGenFunction InitCGF(CGM); 1833 FunctionArgList ArgList; 1834 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1835 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1836 Loc, Loc); 1837 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1838 InitCGF.FinishFunction(); 1839 return InitFunction; 1840 } 1841 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1842 } 1843 return nullptr; 1844 } 1845 1846 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1847 llvm::GlobalVariable *Addr, 1848 bool PerformInit) { 1849 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1850 !CGM.getLangOpts().OpenMPIsDevice) 1851 return false; 1852 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1853 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1854 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1855 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1856 HasRequiresUnifiedSharedMemory)) 1857 return CGM.getLangOpts().OpenMPIsDevice; 1858 VD = VD->getDefinition(CGM.getContext()); 1859 assert(VD && "Unknown VarDecl"); 1860 1861 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1862 return CGM.getLangOpts().OpenMPIsDevice; 1863 1864 QualType ASTTy = VD->getType(); 1865 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1866 1867 // Produce the unique prefix to identify the new target regions. We use 1868 // the source location of the variable declaration which we know to not 1869 // conflict with any target region. 1870 unsigned DeviceID; 1871 unsigned FileID; 1872 unsigned Line; 1873 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1874 SmallString<128> Buffer, Out; 1875 { 1876 llvm::raw_svector_ostream OS(Buffer); 1877 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1878 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1879 } 1880 1881 const Expr *Init = VD->getAnyInitializer(); 1882 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1883 llvm::Constant *Ctor; 1884 llvm::Constant *ID; 1885 if (CGM.getLangOpts().OpenMPIsDevice) { 1886 // Generate function that re-emits the declaration's initializer into 1887 // the threadprivate copy of the variable VD 1888 CodeGenFunction CtorCGF(CGM); 1889 1890 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1891 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1892 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1893 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1894 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1895 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1896 FunctionArgList(), Loc, Loc); 1897 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1898 CtorCGF.EmitAnyExprToMem(Init, 1899 Address(Addr, CGM.getContext().getDeclAlign(VD)), 1900 Init->getType().getQualifiers(), 1901 /*IsInitializer=*/true); 1902 CtorCGF.FinishFunction(); 1903 Ctor = Fn; 1904 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1905 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1906 } else { 1907 Ctor = new llvm::GlobalVariable( 1908 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1909 llvm::GlobalValue::PrivateLinkage, 1910 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1911 ID = Ctor; 1912 } 1913 1914 // Register the information for the entry associated with the constructor. 1915 Out.clear(); 1916 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1917 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1918 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1919 } 1920 if (VD->getType().isDestructedType() != QualType::DK_none) { 1921 llvm::Constant *Dtor; 1922 llvm::Constant *ID; 1923 if (CGM.getLangOpts().OpenMPIsDevice) { 1924 // Generate function that emits destructor call for the threadprivate 1925 // copy of the variable VD 1926 CodeGenFunction DtorCGF(CGM); 1927 1928 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1929 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1930 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1931 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1932 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1933 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1934 FunctionArgList(), Loc, Loc); 1935 // Create a scope with an artificial location for the body of this 1936 // function. 1937 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1938 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 1939 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1940 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1941 DtorCGF.FinishFunction(); 1942 Dtor = Fn; 1943 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1944 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 1945 } else { 1946 Dtor = new llvm::GlobalVariable( 1947 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1948 llvm::GlobalValue::PrivateLinkage, 1949 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 1950 ID = Dtor; 1951 } 1952 // Register the information for the entry associated with the destructor. 1953 Out.clear(); 1954 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1955 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 1956 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 1957 } 1958 return CGM.getLangOpts().OpenMPIsDevice; 1959 } 1960 1961 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 1962 QualType VarType, 1963 StringRef Name) { 1964 std::string Suffix = getName({"artificial", ""}); 1965 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 1966 llvm::Value *GAddr = 1967 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 1968 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 1969 CGM.getTarget().isTLSSupported()) { 1970 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 1971 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 1972 } 1973 std::string CacheSuffix = getName({"cache", ""}); 1974 llvm::Value *Args[] = { 1975 emitUpdateLocation(CGF, SourceLocation()), 1976 getThreadID(CGF, SourceLocation()), 1977 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 1978 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 1979 /*isSigned=*/false), 1980 getOrCreateInternalVariable( 1981 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 1982 return Address( 1983 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1984 CGF.EmitRuntimeCall( 1985 OMPBuilder.getOrCreateRuntimeFunction( 1986 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1987 Args), 1988 VarLVType->getPointerTo(/*AddrSpace=*/0)), 1989 CGM.getContext().getTypeAlignInChars(VarType)); 1990 } 1991 1992 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 1993 const RegionCodeGenTy &ThenGen, 1994 const RegionCodeGenTy &ElseGen) { 1995 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 1996 1997 // If the condition constant folds and can be elided, try to avoid emitting 1998 // the condition and the dead arm of the if/else. 1999 bool CondConstant; 2000 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2001 if (CondConstant) 2002 ThenGen(CGF); 2003 else 2004 ElseGen(CGF); 2005 return; 2006 } 2007 2008 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2009 // emit the conditional branch. 2010 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2011 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2012 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2013 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2014 2015 // Emit the 'then' code. 2016 CGF.EmitBlock(ThenBlock); 2017 ThenGen(CGF); 2018 CGF.EmitBranch(ContBlock); 2019 // Emit the 'else' code if present. 2020 // There is no need to emit line number for unconditional branch. 2021 (void)ApplyDebugLocation::CreateEmpty(CGF); 2022 CGF.EmitBlock(ElseBlock); 2023 ElseGen(CGF); 2024 // There is no need to emit line number for unconditional branch. 2025 (void)ApplyDebugLocation::CreateEmpty(CGF); 2026 CGF.EmitBranch(ContBlock); 2027 // Emit the continuation block for code after the if. 2028 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2029 } 2030 2031 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2032 llvm::Function *OutlinedFn, 2033 ArrayRef<llvm::Value *> CapturedVars, 2034 const Expr *IfCond) { 2035 if (!CGF.HaveInsertPoint()) 2036 return; 2037 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2038 auto &M = CGM.getModule(); 2039 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2040 this](CodeGenFunction &CGF, PrePostActionTy &) { 2041 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2042 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2043 llvm::Value *Args[] = { 2044 RTLoc, 2045 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2046 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2047 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2048 RealArgs.append(std::begin(Args), std::end(Args)); 2049 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2050 2051 llvm::FunctionCallee RTLFn = 2052 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2053 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2054 }; 2055 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2056 this](CodeGenFunction &CGF, PrePostActionTy &) { 2057 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2058 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2059 // Build calls: 2060 // __kmpc_serialized_parallel(&Loc, GTid); 2061 llvm::Value *Args[] = {RTLoc, ThreadID}; 2062 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2063 M, OMPRTL___kmpc_serialized_parallel), 2064 Args); 2065 2066 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2067 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2068 Address ZeroAddrBound = 2069 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2070 /*Name=*/".bound.zero.addr"); 2071 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 2072 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2073 // ThreadId for serialized parallels is 0. 2074 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2075 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2076 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2077 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2078 2079 // __kmpc_end_serialized_parallel(&Loc, GTid); 2080 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2081 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2082 M, OMPRTL___kmpc_end_serialized_parallel), 2083 EndArgs); 2084 }; 2085 if (IfCond) { 2086 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2087 } else { 2088 RegionCodeGenTy ThenRCG(ThenGen); 2089 ThenRCG(CGF); 2090 } 2091 } 2092 2093 // If we're inside an (outlined) parallel region, use the region info's 2094 // thread-ID variable (it is passed in a first argument of the outlined function 2095 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2096 // regular serial code region, get thread ID by calling kmp_int32 2097 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2098 // return the address of that temp. 2099 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2100 SourceLocation Loc) { 2101 if (auto *OMPRegionInfo = 2102 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2103 if (OMPRegionInfo->getThreadIDVariable()) 2104 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2105 2106 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2107 QualType Int32Ty = 2108 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2109 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2110 CGF.EmitStoreOfScalar(ThreadID, 2111 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2112 2113 return ThreadIDTemp; 2114 } 2115 2116 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 2117 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2118 SmallString<256> Buffer; 2119 llvm::raw_svector_ostream Out(Buffer); 2120 Out << Name; 2121 StringRef RuntimeName = Out.str(); 2122 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2123 if (Elem.second) { 2124 assert(Elem.second->getType()->getPointerElementType() == Ty && 2125 "OMP internal variable has different type than requested"); 2126 return &*Elem.second; 2127 } 2128 2129 return Elem.second = new llvm::GlobalVariable( 2130 CGM.getModule(), Ty, /*IsConstant*/ false, 2131 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2132 Elem.first(), /*InsertBefore=*/nullptr, 2133 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2134 } 2135 2136 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2137 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2138 std::string Name = getName({Prefix, "var"}); 2139 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2140 } 2141 2142 namespace { 2143 /// Common pre(post)-action for different OpenMP constructs. 2144 class CommonActionTy final : public PrePostActionTy { 2145 llvm::FunctionCallee EnterCallee; 2146 ArrayRef<llvm::Value *> EnterArgs; 2147 llvm::FunctionCallee ExitCallee; 2148 ArrayRef<llvm::Value *> ExitArgs; 2149 bool Conditional; 2150 llvm::BasicBlock *ContBlock = nullptr; 2151 2152 public: 2153 CommonActionTy(llvm::FunctionCallee EnterCallee, 2154 ArrayRef<llvm::Value *> EnterArgs, 2155 llvm::FunctionCallee ExitCallee, 2156 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2157 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2158 ExitArgs(ExitArgs), Conditional(Conditional) {} 2159 void Enter(CodeGenFunction &CGF) override { 2160 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2161 if (Conditional) { 2162 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2163 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2164 ContBlock = CGF.createBasicBlock("omp_if.end"); 2165 // Generate the branch (If-stmt) 2166 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2167 CGF.EmitBlock(ThenBlock); 2168 } 2169 } 2170 void Done(CodeGenFunction &CGF) { 2171 // Emit the rest of blocks/branches 2172 CGF.EmitBranch(ContBlock); 2173 CGF.EmitBlock(ContBlock, true); 2174 } 2175 void Exit(CodeGenFunction &CGF) override { 2176 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2177 } 2178 }; 2179 } // anonymous namespace 2180 2181 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2182 StringRef CriticalName, 2183 const RegionCodeGenTy &CriticalOpGen, 2184 SourceLocation Loc, const Expr *Hint) { 2185 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2186 // CriticalOpGen(); 2187 // __kmpc_end_critical(ident_t *, gtid, Lock); 2188 // Prepare arguments and build a call to __kmpc_critical 2189 if (!CGF.HaveInsertPoint()) 2190 return; 2191 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2192 getCriticalRegionLock(CriticalName)}; 2193 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2194 std::end(Args)); 2195 if (Hint) { 2196 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2197 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2198 } 2199 CommonActionTy Action( 2200 OMPBuilder.getOrCreateRuntimeFunction( 2201 CGM.getModule(), 2202 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2203 EnterArgs, 2204 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2205 OMPRTL___kmpc_end_critical), 2206 Args); 2207 CriticalOpGen.setAction(Action); 2208 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2209 } 2210 2211 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2212 const RegionCodeGenTy &MasterOpGen, 2213 SourceLocation Loc) { 2214 if (!CGF.HaveInsertPoint()) 2215 return; 2216 // if(__kmpc_master(ident_t *, gtid)) { 2217 // MasterOpGen(); 2218 // __kmpc_end_master(ident_t *, gtid); 2219 // } 2220 // Prepare arguments and build a call to __kmpc_master 2221 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2222 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2223 CGM.getModule(), OMPRTL___kmpc_master), 2224 Args, 2225 OMPBuilder.getOrCreateRuntimeFunction( 2226 CGM.getModule(), OMPRTL___kmpc_end_master), 2227 Args, 2228 /*Conditional=*/true); 2229 MasterOpGen.setAction(Action); 2230 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2231 Action.Done(CGF); 2232 } 2233 2234 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2235 SourceLocation Loc) { 2236 if (!CGF.HaveInsertPoint()) 2237 return; 2238 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2239 OMPBuilder.CreateTaskyield(CGF.Builder); 2240 } else { 2241 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2242 llvm::Value *Args[] = { 2243 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2244 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2245 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2246 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2247 Args); 2248 } 2249 2250 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2251 Region->emitUntiedSwitch(CGF); 2252 } 2253 2254 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2255 const RegionCodeGenTy &TaskgroupOpGen, 2256 SourceLocation Loc) { 2257 if (!CGF.HaveInsertPoint()) 2258 return; 2259 // __kmpc_taskgroup(ident_t *, gtid); 2260 // TaskgroupOpGen(); 2261 // __kmpc_end_taskgroup(ident_t *, gtid); 2262 // Prepare arguments and build a call to __kmpc_taskgroup 2263 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2264 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2265 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2266 Args, 2267 OMPBuilder.getOrCreateRuntimeFunction( 2268 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2269 Args); 2270 TaskgroupOpGen.setAction(Action); 2271 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2272 } 2273 2274 /// Given an array of pointers to variables, project the address of a 2275 /// given variable. 2276 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2277 unsigned Index, const VarDecl *Var) { 2278 // Pull out the pointer to the variable. 2279 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2280 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2281 2282 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2283 Addr = CGF.Builder.CreateElementBitCast( 2284 Addr, CGF.ConvertTypeForMem(Var->getType())); 2285 return Addr; 2286 } 2287 2288 static llvm::Value *emitCopyprivateCopyFunction( 2289 CodeGenModule &CGM, llvm::Type *ArgsType, 2290 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2291 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2292 SourceLocation Loc) { 2293 ASTContext &C = CGM.getContext(); 2294 // void copy_func(void *LHSArg, void *RHSArg); 2295 FunctionArgList Args; 2296 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2297 ImplicitParamDecl::Other); 2298 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2299 ImplicitParamDecl::Other); 2300 Args.push_back(&LHSArg); 2301 Args.push_back(&RHSArg); 2302 const auto &CGFI = 2303 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2304 std::string Name = 2305 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2306 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2307 llvm::GlobalValue::InternalLinkage, Name, 2308 &CGM.getModule()); 2309 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2310 Fn->setDoesNotRecurse(); 2311 CodeGenFunction CGF(CGM); 2312 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2313 // Dest = (void*[n])(LHSArg); 2314 // Src = (void*[n])(RHSArg); 2315 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2316 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2317 ArgsType), CGF.getPointerAlign()); 2318 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2319 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2320 ArgsType), CGF.getPointerAlign()); 2321 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2322 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2323 // ... 2324 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2325 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2326 const auto *DestVar = 2327 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2328 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2329 2330 const auto *SrcVar = 2331 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2332 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2333 2334 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2335 QualType Type = VD->getType(); 2336 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2337 } 2338 CGF.FinishFunction(); 2339 return Fn; 2340 } 2341 2342 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2343 const RegionCodeGenTy &SingleOpGen, 2344 SourceLocation Loc, 2345 ArrayRef<const Expr *> CopyprivateVars, 2346 ArrayRef<const Expr *> SrcExprs, 2347 ArrayRef<const Expr *> DstExprs, 2348 ArrayRef<const Expr *> AssignmentOps) { 2349 if (!CGF.HaveInsertPoint()) 2350 return; 2351 assert(CopyprivateVars.size() == SrcExprs.size() && 2352 CopyprivateVars.size() == DstExprs.size() && 2353 CopyprivateVars.size() == AssignmentOps.size()); 2354 ASTContext &C = CGM.getContext(); 2355 // int32 did_it = 0; 2356 // if(__kmpc_single(ident_t *, gtid)) { 2357 // SingleOpGen(); 2358 // __kmpc_end_single(ident_t *, gtid); 2359 // did_it = 1; 2360 // } 2361 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2362 // <copy_func>, did_it); 2363 2364 Address DidIt = Address::invalid(); 2365 if (!CopyprivateVars.empty()) { 2366 // int32 did_it = 0; 2367 QualType KmpInt32Ty = 2368 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2369 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2370 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2371 } 2372 // Prepare arguments and build a call to __kmpc_single 2373 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2374 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2375 CGM.getModule(), OMPRTL___kmpc_single), 2376 Args, 2377 OMPBuilder.getOrCreateRuntimeFunction( 2378 CGM.getModule(), OMPRTL___kmpc_end_single), 2379 Args, 2380 /*Conditional=*/true); 2381 SingleOpGen.setAction(Action); 2382 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2383 if (DidIt.isValid()) { 2384 // did_it = 1; 2385 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2386 } 2387 Action.Done(CGF); 2388 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2389 // <copy_func>, did_it); 2390 if (DidIt.isValid()) { 2391 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2392 QualType CopyprivateArrayTy = C.getConstantArrayType( 2393 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2394 /*IndexTypeQuals=*/0); 2395 // Create a list of all private variables for copyprivate. 2396 Address CopyprivateList = 2397 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2398 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2399 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2400 CGF.Builder.CreateStore( 2401 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2402 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2403 CGF.VoidPtrTy), 2404 Elem); 2405 } 2406 // Build function that copies private values from single region to all other 2407 // threads in the corresponding parallel region. 2408 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2409 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2410 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2411 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2412 Address CL = 2413 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2414 CGF.VoidPtrTy); 2415 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2416 llvm::Value *Args[] = { 2417 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2418 getThreadID(CGF, Loc), // i32 <gtid> 2419 BufSize, // size_t <buf_size> 2420 CL.getPointer(), // void *<copyprivate list> 2421 CpyFn, // void (*) (void *, void *) <copy_func> 2422 DidItVal // i32 did_it 2423 }; 2424 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2425 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2426 Args); 2427 } 2428 } 2429 2430 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2431 const RegionCodeGenTy &OrderedOpGen, 2432 SourceLocation Loc, bool IsThreads) { 2433 if (!CGF.HaveInsertPoint()) 2434 return; 2435 // __kmpc_ordered(ident_t *, gtid); 2436 // OrderedOpGen(); 2437 // __kmpc_end_ordered(ident_t *, gtid); 2438 // Prepare arguments and build a call to __kmpc_ordered 2439 if (IsThreads) { 2440 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2441 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2442 CGM.getModule(), OMPRTL___kmpc_ordered), 2443 Args, 2444 OMPBuilder.getOrCreateRuntimeFunction( 2445 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2446 Args); 2447 OrderedOpGen.setAction(Action); 2448 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2449 return; 2450 } 2451 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2452 } 2453 2454 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2455 unsigned Flags; 2456 if (Kind == OMPD_for) 2457 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2458 else if (Kind == OMPD_sections) 2459 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2460 else if (Kind == OMPD_single) 2461 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2462 else if (Kind == OMPD_barrier) 2463 Flags = OMP_IDENT_BARRIER_EXPL; 2464 else 2465 Flags = OMP_IDENT_BARRIER_IMPL; 2466 return Flags; 2467 } 2468 2469 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2470 CodeGenFunction &CGF, const OMPLoopDirective &S, 2471 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2472 // Check if the loop directive is actually a doacross loop directive. In this 2473 // case choose static, 1 schedule. 2474 if (llvm::any_of( 2475 S.getClausesOfKind<OMPOrderedClause>(), 2476 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2477 ScheduleKind = OMPC_SCHEDULE_static; 2478 // Chunk size is 1 in this case. 2479 llvm::APInt ChunkSize(32, 1); 2480 ChunkExpr = IntegerLiteral::Create( 2481 CGF.getContext(), ChunkSize, 2482 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2483 SourceLocation()); 2484 } 2485 } 2486 2487 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2488 OpenMPDirectiveKind Kind, bool EmitChecks, 2489 bool ForceSimpleCall) { 2490 // Check if we should use the OMPBuilder 2491 auto *OMPRegionInfo = 2492 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2493 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2494 CGF.Builder.restoreIP(OMPBuilder.CreateBarrier( 2495 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2496 return; 2497 } 2498 2499 if (!CGF.HaveInsertPoint()) 2500 return; 2501 // Build call __kmpc_cancel_barrier(loc, thread_id); 2502 // Build call __kmpc_barrier(loc, thread_id); 2503 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2504 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2505 // thread_id); 2506 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2507 getThreadID(CGF, Loc)}; 2508 if (OMPRegionInfo) { 2509 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2510 llvm::Value *Result = CGF.EmitRuntimeCall( 2511 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2512 OMPRTL___kmpc_cancel_barrier), 2513 Args); 2514 if (EmitChecks) { 2515 // if (__kmpc_cancel_barrier()) { 2516 // exit from construct; 2517 // } 2518 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2519 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2520 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2521 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2522 CGF.EmitBlock(ExitBB); 2523 // exit from construct; 2524 CodeGenFunction::JumpDest CancelDestination = 2525 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2526 CGF.EmitBranchThroughCleanup(CancelDestination); 2527 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2528 } 2529 return; 2530 } 2531 } 2532 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2533 CGM.getModule(), OMPRTL___kmpc_barrier), 2534 Args); 2535 } 2536 2537 /// Map the OpenMP loop schedule to the runtime enumeration. 2538 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2539 bool Chunked, bool Ordered) { 2540 switch (ScheduleKind) { 2541 case OMPC_SCHEDULE_static: 2542 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2543 : (Ordered ? OMP_ord_static : OMP_sch_static); 2544 case OMPC_SCHEDULE_dynamic: 2545 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2546 case OMPC_SCHEDULE_guided: 2547 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2548 case OMPC_SCHEDULE_runtime: 2549 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2550 case OMPC_SCHEDULE_auto: 2551 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2552 case OMPC_SCHEDULE_unknown: 2553 assert(!Chunked && "chunk was specified but schedule kind not known"); 2554 return Ordered ? OMP_ord_static : OMP_sch_static; 2555 } 2556 llvm_unreachable("Unexpected runtime schedule"); 2557 } 2558 2559 /// Map the OpenMP distribute schedule to the runtime enumeration. 2560 static OpenMPSchedType 2561 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2562 // only static is allowed for dist_schedule 2563 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2564 } 2565 2566 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2567 bool Chunked) const { 2568 OpenMPSchedType Schedule = 2569 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2570 return Schedule == OMP_sch_static; 2571 } 2572 2573 bool CGOpenMPRuntime::isStaticNonchunked( 2574 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2575 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2576 return Schedule == OMP_dist_sch_static; 2577 } 2578 2579 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2580 bool Chunked) const { 2581 OpenMPSchedType Schedule = 2582 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2583 return Schedule == OMP_sch_static_chunked; 2584 } 2585 2586 bool CGOpenMPRuntime::isStaticChunked( 2587 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2588 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2589 return Schedule == OMP_dist_sch_static_chunked; 2590 } 2591 2592 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2593 OpenMPSchedType Schedule = 2594 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2595 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2596 return Schedule != OMP_sch_static; 2597 } 2598 2599 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2600 OpenMPScheduleClauseModifier M1, 2601 OpenMPScheduleClauseModifier M2) { 2602 int Modifier = 0; 2603 switch (M1) { 2604 case OMPC_SCHEDULE_MODIFIER_monotonic: 2605 Modifier = OMP_sch_modifier_monotonic; 2606 break; 2607 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2608 Modifier = OMP_sch_modifier_nonmonotonic; 2609 break; 2610 case OMPC_SCHEDULE_MODIFIER_simd: 2611 if (Schedule == OMP_sch_static_chunked) 2612 Schedule = OMP_sch_static_balanced_chunked; 2613 break; 2614 case OMPC_SCHEDULE_MODIFIER_last: 2615 case OMPC_SCHEDULE_MODIFIER_unknown: 2616 break; 2617 } 2618 switch (M2) { 2619 case OMPC_SCHEDULE_MODIFIER_monotonic: 2620 Modifier = OMP_sch_modifier_monotonic; 2621 break; 2622 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2623 Modifier = OMP_sch_modifier_nonmonotonic; 2624 break; 2625 case OMPC_SCHEDULE_MODIFIER_simd: 2626 if (Schedule == OMP_sch_static_chunked) 2627 Schedule = OMP_sch_static_balanced_chunked; 2628 break; 2629 case OMPC_SCHEDULE_MODIFIER_last: 2630 case OMPC_SCHEDULE_MODIFIER_unknown: 2631 break; 2632 } 2633 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2634 // If the static schedule kind is specified or if the ordered clause is 2635 // specified, and if the nonmonotonic modifier is not specified, the effect is 2636 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2637 // modifier is specified, the effect is as if the nonmonotonic modifier is 2638 // specified. 2639 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2640 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2641 Schedule == OMP_sch_static_balanced_chunked || 2642 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2643 Schedule == OMP_dist_sch_static_chunked || 2644 Schedule == OMP_dist_sch_static)) 2645 Modifier = OMP_sch_modifier_nonmonotonic; 2646 } 2647 return Schedule | Modifier; 2648 } 2649 2650 void CGOpenMPRuntime::emitForDispatchInit( 2651 CodeGenFunction &CGF, SourceLocation Loc, 2652 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2653 bool Ordered, const DispatchRTInput &DispatchValues) { 2654 if (!CGF.HaveInsertPoint()) 2655 return; 2656 OpenMPSchedType Schedule = getRuntimeSchedule( 2657 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2658 assert(Ordered || 2659 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2660 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2661 Schedule != OMP_sch_static_balanced_chunked)); 2662 // Call __kmpc_dispatch_init( 2663 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2664 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2665 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2666 2667 // If the Chunk was not specified in the clause - use default value 1. 2668 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2669 : CGF.Builder.getIntN(IVSize, 1); 2670 llvm::Value *Args[] = { 2671 emitUpdateLocation(CGF, Loc), 2672 getThreadID(CGF, Loc), 2673 CGF.Builder.getInt32(addMonoNonMonoModifier( 2674 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2675 DispatchValues.LB, // Lower 2676 DispatchValues.UB, // Upper 2677 CGF.Builder.getIntN(IVSize, 1), // Stride 2678 Chunk // Chunk 2679 }; 2680 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2681 } 2682 2683 static void emitForStaticInitCall( 2684 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2685 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2686 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2687 const CGOpenMPRuntime::StaticRTInput &Values) { 2688 if (!CGF.HaveInsertPoint()) 2689 return; 2690 2691 assert(!Values.Ordered); 2692 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2693 Schedule == OMP_sch_static_balanced_chunked || 2694 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2695 Schedule == OMP_dist_sch_static || 2696 Schedule == OMP_dist_sch_static_chunked); 2697 2698 // Call __kmpc_for_static_init( 2699 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2700 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2701 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2702 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2703 llvm::Value *Chunk = Values.Chunk; 2704 if (Chunk == nullptr) { 2705 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2706 Schedule == OMP_dist_sch_static) && 2707 "expected static non-chunked schedule"); 2708 // If the Chunk was not specified in the clause - use default value 1. 2709 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2710 } else { 2711 assert((Schedule == OMP_sch_static_chunked || 2712 Schedule == OMP_sch_static_balanced_chunked || 2713 Schedule == OMP_ord_static_chunked || 2714 Schedule == OMP_dist_sch_static_chunked) && 2715 "expected static chunked schedule"); 2716 } 2717 llvm::Value *Args[] = { 2718 UpdateLocation, 2719 ThreadId, 2720 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2721 M2)), // Schedule type 2722 Values.IL.getPointer(), // &isLastIter 2723 Values.LB.getPointer(), // &LB 2724 Values.UB.getPointer(), // &UB 2725 Values.ST.getPointer(), // &Stride 2726 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2727 Chunk // Chunk 2728 }; 2729 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2730 } 2731 2732 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2733 SourceLocation Loc, 2734 OpenMPDirectiveKind DKind, 2735 const OpenMPScheduleTy &ScheduleKind, 2736 const StaticRTInput &Values) { 2737 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2738 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2739 assert(isOpenMPWorksharingDirective(DKind) && 2740 "Expected loop-based or sections-based directive."); 2741 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2742 isOpenMPLoopDirective(DKind) 2743 ? OMP_IDENT_WORK_LOOP 2744 : OMP_IDENT_WORK_SECTIONS); 2745 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2746 llvm::FunctionCallee StaticInitFunction = 2747 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2748 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2749 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2750 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2751 } 2752 2753 void CGOpenMPRuntime::emitDistributeStaticInit( 2754 CodeGenFunction &CGF, SourceLocation Loc, 2755 OpenMPDistScheduleClauseKind SchedKind, 2756 const CGOpenMPRuntime::StaticRTInput &Values) { 2757 OpenMPSchedType ScheduleNum = 2758 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2759 llvm::Value *UpdatedLocation = 2760 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2761 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2762 llvm::FunctionCallee StaticInitFunction = 2763 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2764 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2765 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2766 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2767 } 2768 2769 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2770 SourceLocation Loc, 2771 OpenMPDirectiveKind DKind) { 2772 if (!CGF.HaveInsertPoint()) 2773 return; 2774 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2775 llvm::Value *Args[] = { 2776 emitUpdateLocation(CGF, Loc, 2777 isOpenMPDistributeDirective(DKind) 2778 ? OMP_IDENT_WORK_DISTRIBUTE 2779 : isOpenMPLoopDirective(DKind) 2780 ? OMP_IDENT_WORK_LOOP 2781 : OMP_IDENT_WORK_SECTIONS), 2782 getThreadID(CGF, Loc)}; 2783 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2784 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2785 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2786 Args); 2787 } 2788 2789 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2790 SourceLocation Loc, 2791 unsigned IVSize, 2792 bool IVSigned) { 2793 if (!CGF.HaveInsertPoint()) 2794 return; 2795 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2796 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2797 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2798 } 2799 2800 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2801 SourceLocation Loc, unsigned IVSize, 2802 bool IVSigned, Address IL, 2803 Address LB, Address UB, 2804 Address ST) { 2805 // Call __kmpc_dispatch_next( 2806 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2807 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2808 // kmp_int[32|64] *p_stride); 2809 llvm::Value *Args[] = { 2810 emitUpdateLocation(CGF, Loc), 2811 getThreadID(CGF, Loc), 2812 IL.getPointer(), // &isLastIter 2813 LB.getPointer(), // &Lower 2814 UB.getPointer(), // &Upper 2815 ST.getPointer() // &Stride 2816 }; 2817 llvm::Value *Call = 2818 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2819 return CGF.EmitScalarConversion( 2820 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2821 CGF.getContext().BoolTy, Loc); 2822 } 2823 2824 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2825 llvm::Value *NumThreads, 2826 SourceLocation Loc) { 2827 if (!CGF.HaveInsertPoint()) 2828 return; 2829 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2830 llvm::Value *Args[] = { 2831 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2832 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2833 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2834 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2835 Args); 2836 } 2837 2838 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2839 ProcBindKind ProcBind, 2840 SourceLocation Loc) { 2841 if (!CGF.HaveInsertPoint()) 2842 return; 2843 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2844 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2845 llvm::Value *Args[] = { 2846 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2847 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2848 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2849 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2850 Args); 2851 } 2852 2853 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2854 SourceLocation Loc, llvm::AtomicOrdering AO) { 2855 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2856 OMPBuilder.CreateFlush(CGF.Builder); 2857 } else { 2858 if (!CGF.HaveInsertPoint()) 2859 return; 2860 // Build call void __kmpc_flush(ident_t *loc) 2861 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2862 CGM.getModule(), OMPRTL___kmpc_flush), 2863 emitUpdateLocation(CGF, Loc)); 2864 } 2865 } 2866 2867 namespace { 2868 /// Indexes of fields for type kmp_task_t. 2869 enum KmpTaskTFields { 2870 /// List of shared variables. 2871 KmpTaskTShareds, 2872 /// Task routine. 2873 KmpTaskTRoutine, 2874 /// Partition id for the untied tasks. 2875 KmpTaskTPartId, 2876 /// Function with call of destructors for private variables. 2877 Data1, 2878 /// Task priority. 2879 Data2, 2880 /// (Taskloops only) Lower bound. 2881 KmpTaskTLowerBound, 2882 /// (Taskloops only) Upper bound. 2883 KmpTaskTUpperBound, 2884 /// (Taskloops only) Stride. 2885 KmpTaskTStride, 2886 /// (Taskloops only) Is last iteration flag. 2887 KmpTaskTLastIter, 2888 /// (Taskloops only) Reduction data. 2889 KmpTaskTReductions, 2890 }; 2891 } // anonymous namespace 2892 2893 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2894 return OffloadEntriesTargetRegion.empty() && 2895 OffloadEntriesDeviceGlobalVar.empty(); 2896 } 2897 2898 /// Initialize target region entry. 2899 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2900 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2901 StringRef ParentName, unsigned LineNum, 2902 unsigned Order) { 2903 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2904 "only required for the device " 2905 "code generation."); 2906 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 2907 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 2908 OMPTargetRegionEntryTargetRegion); 2909 ++OffloadingEntriesNum; 2910 } 2911 2912 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2913 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2914 StringRef ParentName, unsigned LineNum, 2915 llvm::Constant *Addr, llvm::Constant *ID, 2916 OMPTargetRegionEntryKind Flags) { 2917 // If we are emitting code for a target, the entry is already initialized, 2918 // only has to be registered. 2919 if (CGM.getLangOpts().OpenMPIsDevice) { 2920 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 2921 unsigned DiagID = CGM.getDiags().getCustomDiagID( 2922 DiagnosticsEngine::Error, 2923 "Unable to find target region on line '%0' in the device code."); 2924 CGM.getDiags().Report(DiagID) << LineNum; 2925 return; 2926 } 2927 auto &Entry = 2928 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 2929 assert(Entry.isValid() && "Entry not initialized!"); 2930 Entry.setAddress(Addr); 2931 Entry.setID(ID); 2932 Entry.setFlags(Flags); 2933 } else { 2934 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 2935 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 2936 ++OffloadingEntriesNum; 2937 } 2938 } 2939 2940 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 2941 unsigned DeviceID, unsigned FileID, StringRef ParentName, 2942 unsigned LineNum) const { 2943 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 2944 if (PerDevice == OffloadEntriesTargetRegion.end()) 2945 return false; 2946 auto PerFile = PerDevice->second.find(FileID); 2947 if (PerFile == PerDevice->second.end()) 2948 return false; 2949 auto PerParentName = PerFile->second.find(ParentName); 2950 if (PerParentName == PerFile->second.end()) 2951 return false; 2952 auto PerLine = PerParentName->second.find(LineNum); 2953 if (PerLine == PerParentName->second.end()) 2954 return false; 2955 // Fail if this entry is already registered. 2956 if (PerLine->second.getAddress() || PerLine->second.getID()) 2957 return false; 2958 return true; 2959 } 2960 2961 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 2962 const OffloadTargetRegionEntryInfoActTy &Action) { 2963 // Scan all target region entries and perform the provided action. 2964 for (const auto &D : OffloadEntriesTargetRegion) 2965 for (const auto &F : D.second) 2966 for (const auto &P : F.second) 2967 for (const auto &L : P.second) 2968 Action(D.first, F.first, P.first(), L.first, L.second); 2969 } 2970 2971 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2972 initializeDeviceGlobalVarEntryInfo(StringRef Name, 2973 OMPTargetGlobalVarEntryKind Flags, 2974 unsigned Order) { 2975 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2976 "only required for the device " 2977 "code generation."); 2978 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 2979 ++OffloadingEntriesNum; 2980 } 2981 2982 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2983 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 2984 CharUnits VarSize, 2985 OMPTargetGlobalVarEntryKind Flags, 2986 llvm::GlobalValue::LinkageTypes Linkage) { 2987 if (CGM.getLangOpts().OpenMPIsDevice) { 2988 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 2989 assert(Entry.isValid() && Entry.getFlags() == Flags && 2990 "Entry not initialized!"); 2991 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 2992 "Resetting with the new address."); 2993 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 2994 if (Entry.getVarSize().isZero()) { 2995 Entry.setVarSize(VarSize); 2996 Entry.setLinkage(Linkage); 2997 } 2998 return; 2999 } 3000 Entry.setVarSize(VarSize); 3001 Entry.setLinkage(Linkage); 3002 Entry.setAddress(Addr); 3003 } else { 3004 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3005 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3006 assert(Entry.isValid() && Entry.getFlags() == Flags && 3007 "Entry not initialized!"); 3008 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3009 "Resetting with the new address."); 3010 if (Entry.getVarSize().isZero()) { 3011 Entry.setVarSize(VarSize); 3012 Entry.setLinkage(Linkage); 3013 } 3014 return; 3015 } 3016 OffloadEntriesDeviceGlobalVar.try_emplace( 3017 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3018 ++OffloadingEntriesNum; 3019 } 3020 } 3021 3022 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3023 actOnDeviceGlobalVarEntriesInfo( 3024 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3025 // Scan all target region entries and perform the provided action. 3026 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3027 Action(E.getKey(), E.getValue()); 3028 } 3029 3030 void CGOpenMPRuntime::createOffloadEntry( 3031 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3032 llvm::GlobalValue::LinkageTypes Linkage) { 3033 StringRef Name = Addr->getName(); 3034 llvm::Module &M = CGM.getModule(); 3035 llvm::LLVMContext &C = M.getContext(); 3036 3037 // Create constant string with the name. 3038 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3039 3040 std::string StringName = getName({"omp_offloading", "entry_name"}); 3041 auto *Str = new llvm::GlobalVariable( 3042 M, StrPtrInit->getType(), /*isConstant=*/true, 3043 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3044 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3045 3046 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 3047 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 3048 llvm::ConstantInt::get(CGM.SizeTy, Size), 3049 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3050 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3051 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3052 llvm::GlobalVariable *Entry = createGlobalStruct( 3053 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3054 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3055 3056 // The entry has to be created in the section the linker expects it to be. 3057 Entry->setSection("omp_offloading_entries"); 3058 } 3059 3060 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3061 // Emit the offloading entries and metadata so that the device codegen side 3062 // can easily figure out what to emit. The produced metadata looks like 3063 // this: 3064 // 3065 // !omp_offload.info = !{!1, ...} 3066 // 3067 // Right now we only generate metadata for function that contain target 3068 // regions. 3069 3070 // If we are in simd mode or there are no entries, we don't need to do 3071 // anything. 3072 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3073 return; 3074 3075 llvm::Module &M = CGM.getModule(); 3076 llvm::LLVMContext &C = M.getContext(); 3077 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3078 SourceLocation, StringRef>, 3079 16> 3080 OrderedEntries(OffloadEntriesInfoManager.size()); 3081 llvm::SmallVector<StringRef, 16> ParentFunctions( 3082 OffloadEntriesInfoManager.size()); 3083 3084 // Auxiliary methods to create metadata values and strings. 3085 auto &&GetMDInt = [this](unsigned V) { 3086 return llvm::ConstantAsMetadata::get( 3087 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3088 }; 3089 3090 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3091 3092 // Create the offloading info metadata node. 3093 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3094 3095 // Create function that emits metadata for each target region entry; 3096 auto &&TargetRegionMetadataEmitter = 3097 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3098 &GetMDString]( 3099 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3100 unsigned Line, 3101 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3102 // Generate metadata for target regions. Each entry of this metadata 3103 // contains: 3104 // - Entry 0 -> Kind of this type of metadata (0). 3105 // - Entry 1 -> Device ID of the file where the entry was identified. 3106 // - Entry 2 -> File ID of the file where the entry was identified. 3107 // - Entry 3 -> Mangled name of the function where the entry was 3108 // identified. 3109 // - Entry 4 -> Line in the file where the entry was identified. 3110 // - Entry 5 -> Order the entry was created. 3111 // The first element of the metadata node is the kind. 3112 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3113 GetMDInt(FileID), GetMDString(ParentName), 3114 GetMDInt(Line), GetMDInt(E.getOrder())}; 3115 3116 SourceLocation Loc; 3117 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3118 E = CGM.getContext().getSourceManager().fileinfo_end(); 3119 I != E; ++I) { 3120 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3121 I->getFirst()->getUniqueID().getFile() == FileID) { 3122 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3123 I->getFirst(), Line, 1); 3124 break; 3125 } 3126 } 3127 // Save this entry in the right position of the ordered entries array. 3128 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3129 ParentFunctions[E.getOrder()] = ParentName; 3130 3131 // Add metadata to the named metadata node. 3132 MD->addOperand(llvm::MDNode::get(C, Ops)); 3133 }; 3134 3135 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3136 TargetRegionMetadataEmitter); 3137 3138 // Create function that emits metadata for each device global variable entry; 3139 auto &&DeviceGlobalVarMetadataEmitter = 3140 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3141 MD](StringRef MangledName, 3142 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3143 &E) { 3144 // Generate metadata for global variables. Each entry of this metadata 3145 // contains: 3146 // - Entry 0 -> Kind of this type of metadata (1). 3147 // - Entry 1 -> Mangled name of the variable. 3148 // - Entry 2 -> Declare target kind. 3149 // - Entry 3 -> Order the entry was created. 3150 // The first element of the metadata node is the kind. 3151 llvm::Metadata *Ops[] = { 3152 GetMDInt(E.getKind()), GetMDString(MangledName), 3153 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3154 3155 // Save this entry in the right position of the ordered entries array. 3156 OrderedEntries[E.getOrder()] = 3157 std::make_tuple(&E, SourceLocation(), MangledName); 3158 3159 // Add metadata to the named metadata node. 3160 MD->addOperand(llvm::MDNode::get(C, Ops)); 3161 }; 3162 3163 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3164 DeviceGlobalVarMetadataEmitter); 3165 3166 for (const auto &E : OrderedEntries) { 3167 assert(std::get<0>(E) && "All ordered entries must exist!"); 3168 if (const auto *CE = 3169 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3170 std::get<0>(E))) { 3171 if (!CE->getID() || !CE->getAddress()) { 3172 // Do not blame the entry if the parent funtion is not emitted. 3173 StringRef FnName = ParentFunctions[CE->getOrder()]; 3174 if (!CGM.GetGlobalValue(FnName)) 3175 continue; 3176 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3177 DiagnosticsEngine::Error, 3178 "Offloading entry for target region in %0 is incorrect: either the " 3179 "address or the ID is invalid."); 3180 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3181 continue; 3182 } 3183 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3184 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3185 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3186 OffloadEntryInfoDeviceGlobalVar>( 3187 std::get<0>(E))) { 3188 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3189 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3190 CE->getFlags()); 3191 switch (Flags) { 3192 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3193 if (CGM.getLangOpts().OpenMPIsDevice && 3194 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3195 continue; 3196 if (!CE->getAddress()) { 3197 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3198 DiagnosticsEngine::Error, "Offloading entry for declare target " 3199 "variable %0 is incorrect: the " 3200 "address is invalid."); 3201 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3202 continue; 3203 } 3204 // The vaiable has no definition - no need to add the entry. 3205 if (CE->getVarSize().isZero()) 3206 continue; 3207 break; 3208 } 3209 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3210 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3211 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3212 "Declaret target link address is set."); 3213 if (CGM.getLangOpts().OpenMPIsDevice) 3214 continue; 3215 if (!CE->getAddress()) { 3216 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3217 DiagnosticsEngine::Error, 3218 "Offloading entry for declare target variable is incorrect: the " 3219 "address is invalid."); 3220 CGM.getDiags().Report(DiagID); 3221 continue; 3222 } 3223 break; 3224 } 3225 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3226 CE->getVarSize().getQuantity(), Flags, 3227 CE->getLinkage()); 3228 } else { 3229 llvm_unreachable("Unsupported entry kind."); 3230 } 3231 } 3232 } 3233 3234 /// Loads all the offload entries information from the host IR 3235 /// metadata. 3236 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3237 // If we are in target mode, load the metadata from the host IR. This code has 3238 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3239 3240 if (!CGM.getLangOpts().OpenMPIsDevice) 3241 return; 3242 3243 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3244 return; 3245 3246 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3247 if (auto EC = Buf.getError()) { 3248 CGM.getDiags().Report(diag::err_cannot_open_file) 3249 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3250 return; 3251 } 3252 3253 llvm::LLVMContext C; 3254 auto ME = expectedToErrorOrAndEmitErrors( 3255 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3256 3257 if (auto EC = ME.getError()) { 3258 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3259 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3260 CGM.getDiags().Report(DiagID) 3261 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3262 return; 3263 } 3264 3265 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3266 if (!MD) 3267 return; 3268 3269 for (llvm::MDNode *MN : MD->operands()) { 3270 auto &&GetMDInt = [MN](unsigned Idx) { 3271 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3272 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3273 }; 3274 3275 auto &&GetMDString = [MN](unsigned Idx) { 3276 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3277 return V->getString(); 3278 }; 3279 3280 switch (GetMDInt(0)) { 3281 default: 3282 llvm_unreachable("Unexpected metadata!"); 3283 break; 3284 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3285 OffloadingEntryInfoTargetRegion: 3286 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3287 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3288 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3289 /*Order=*/GetMDInt(5)); 3290 break; 3291 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3292 OffloadingEntryInfoDeviceGlobalVar: 3293 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3294 /*MangledName=*/GetMDString(1), 3295 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3296 /*Flags=*/GetMDInt(2)), 3297 /*Order=*/GetMDInt(3)); 3298 break; 3299 } 3300 } 3301 } 3302 3303 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3304 if (!KmpRoutineEntryPtrTy) { 3305 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3306 ASTContext &C = CGM.getContext(); 3307 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3308 FunctionProtoType::ExtProtoInfo EPI; 3309 KmpRoutineEntryPtrQTy = C.getPointerType( 3310 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3311 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3312 } 3313 } 3314 3315 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3316 // Make sure the type of the entry is already created. This is the type we 3317 // have to create: 3318 // struct __tgt_offload_entry{ 3319 // void *addr; // Pointer to the offload entry info. 3320 // // (function or global) 3321 // char *name; // Name of the function or global. 3322 // size_t size; // Size of the entry info (0 if it a function). 3323 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3324 // int32_t reserved; // Reserved, to use by the runtime library. 3325 // }; 3326 if (TgtOffloadEntryQTy.isNull()) { 3327 ASTContext &C = CGM.getContext(); 3328 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3329 RD->startDefinition(); 3330 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3331 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3332 addFieldToRecordDecl(C, RD, C.getSizeType()); 3333 addFieldToRecordDecl( 3334 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3335 addFieldToRecordDecl( 3336 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3337 RD->completeDefinition(); 3338 RD->addAttr(PackedAttr::CreateImplicit(C)); 3339 TgtOffloadEntryQTy = C.getRecordType(RD); 3340 } 3341 return TgtOffloadEntryQTy; 3342 } 3343 3344 namespace { 3345 struct PrivateHelpersTy { 3346 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3347 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3348 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3349 PrivateElemInit(PrivateElemInit) {} 3350 const Expr *OriginalRef = nullptr; 3351 const VarDecl *Original = nullptr; 3352 const VarDecl *PrivateCopy = nullptr; 3353 const VarDecl *PrivateElemInit = nullptr; 3354 }; 3355 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3356 } // anonymous namespace 3357 3358 static RecordDecl * 3359 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3360 if (!Privates.empty()) { 3361 ASTContext &C = CGM.getContext(); 3362 // Build struct .kmp_privates_t. { 3363 // /* private vars */ 3364 // }; 3365 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3366 RD->startDefinition(); 3367 for (const auto &Pair : Privates) { 3368 const VarDecl *VD = Pair.second.Original; 3369 QualType Type = VD->getType().getNonReferenceType(); 3370 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3371 if (VD->hasAttrs()) { 3372 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3373 E(VD->getAttrs().end()); 3374 I != E; ++I) 3375 FD->addAttr(*I); 3376 } 3377 } 3378 RD->completeDefinition(); 3379 return RD; 3380 } 3381 return nullptr; 3382 } 3383 3384 static RecordDecl * 3385 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3386 QualType KmpInt32Ty, 3387 QualType KmpRoutineEntryPointerQTy) { 3388 ASTContext &C = CGM.getContext(); 3389 // Build struct kmp_task_t { 3390 // void * shareds; 3391 // kmp_routine_entry_t routine; 3392 // kmp_int32 part_id; 3393 // kmp_cmplrdata_t data1; 3394 // kmp_cmplrdata_t data2; 3395 // For taskloops additional fields: 3396 // kmp_uint64 lb; 3397 // kmp_uint64 ub; 3398 // kmp_int64 st; 3399 // kmp_int32 liter; 3400 // void * reductions; 3401 // }; 3402 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3403 UD->startDefinition(); 3404 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3405 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3406 UD->completeDefinition(); 3407 QualType KmpCmplrdataTy = C.getRecordType(UD); 3408 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3409 RD->startDefinition(); 3410 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3411 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3412 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3413 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3414 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3415 if (isOpenMPTaskLoopDirective(Kind)) { 3416 QualType KmpUInt64Ty = 3417 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3418 QualType KmpInt64Ty = 3419 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3420 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3421 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3422 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3423 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3424 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3425 } 3426 RD->completeDefinition(); 3427 return RD; 3428 } 3429 3430 static RecordDecl * 3431 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3432 ArrayRef<PrivateDataTy> Privates) { 3433 ASTContext &C = CGM.getContext(); 3434 // Build struct kmp_task_t_with_privates { 3435 // kmp_task_t task_data; 3436 // .kmp_privates_t. privates; 3437 // }; 3438 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3439 RD->startDefinition(); 3440 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3441 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3442 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3443 RD->completeDefinition(); 3444 return RD; 3445 } 3446 3447 /// Emit a proxy function which accepts kmp_task_t as the second 3448 /// argument. 3449 /// \code 3450 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3451 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3452 /// For taskloops: 3453 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3454 /// tt->reductions, tt->shareds); 3455 /// return 0; 3456 /// } 3457 /// \endcode 3458 static llvm::Function * 3459 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3460 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3461 QualType KmpTaskTWithPrivatesPtrQTy, 3462 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3463 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3464 llvm::Value *TaskPrivatesMap) { 3465 ASTContext &C = CGM.getContext(); 3466 FunctionArgList Args; 3467 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3468 ImplicitParamDecl::Other); 3469 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3470 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3471 ImplicitParamDecl::Other); 3472 Args.push_back(&GtidArg); 3473 Args.push_back(&TaskTypeArg); 3474 const auto &TaskEntryFnInfo = 3475 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3476 llvm::FunctionType *TaskEntryTy = 3477 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3478 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3479 auto *TaskEntry = llvm::Function::Create( 3480 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3481 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3482 TaskEntry->setDoesNotRecurse(); 3483 CodeGenFunction CGF(CGM); 3484 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3485 Loc, Loc); 3486 3487 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3488 // tt, 3489 // For taskloops: 3490 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3491 // tt->task_data.shareds); 3492 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3493 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3494 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3495 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3496 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3497 const auto *KmpTaskTWithPrivatesQTyRD = 3498 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3499 LValue Base = 3500 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3501 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3502 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3503 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3504 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3505 3506 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3507 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3508 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3509 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3510 CGF.ConvertTypeForMem(SharedsPtrTy)); 3511 3512 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3513 llvm::Value *PrivatesParam; 3514 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3515 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3516 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3517 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3518 } else { 3519 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3520 } 3521 3522 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3523 TaskPrivatesMap, 3524 CGF.Builder 3525 .CreatePointerBitCastOrAddrSpaceCast( 3526 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3527 .getPointer()}; 3528 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3529 std::end(CommonArgs)); 3530 if (isOpenMPTaskLoopDirective(Kind)) { 3531 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3532 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3533 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3534 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3535 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3536 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3537 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3538 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3539 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3540 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3541 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3542 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3543 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3544 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3545 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3546 CallArgs.push_back(LBParam); 3547 CallArgs.push_back(UBParam); 3548 CallArgs.push_back(StParam); 3549 CallArgs.push_back(LIParam); 3550 CallArgs.push_back(RParam); 3551 } 3552 CallArgs.push_back(SharedsParam); 3553 3554 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3555 CallArgs); 3556 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3557 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3558 CGF.FinishFunction(); 3559 return TaskEntry; 3560 } 3561 3562 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3563 SourceLocation Loc, 3564 QualType KmpInt32Ty, 3565 QualType KmpTaskTWithPrivatesPtrQTy, 3566 QualType KmpTaskTWithPrivatesQTy) { 3567 ASTContext &C = CGM.getContext(); 3568 FunctionArgList Args; 3569 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3570 ImplicitParamDecl::Other); 3571 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3572 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3573 ImplicitParamDecl::Other); 3574 Args.push_back(&GtidArg); 3575 Args.push_back(&TaskTypeArg); 3576 const auto &DestructorFnInfo = 3577 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3578 llvm::FunctionType *DestructorFnTy = 3579 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3580 std::string Name = 3581 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3582 auto *DestructorFn = 3583 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3584 Name, &CGM.getModule()); 3585 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3586 DestructorFnInfo); 3587 DestructorFn->setDoesNotRecurse(); 3588 CodeGenFunction CGF(CGM); 3589 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3590 Args, Loc, Loc); 3591 3592 LValue Base = CGF.EmitLoadOfPointerLValue( 3593 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3594 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3595 const auto *KmpTaskTWithPrivatesQTyRD = 3596 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3597 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3598 Base = CGF.EmitLValueForField(Base, *FI); 3599 for (const auto *Field : 3600 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3601 if (QualType::DestructionKind DtorKind = 3602 Field->getType().isDestructedType()) { 3603 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3604 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3605 } 3606 } 3607 CGF.FinishFunction(); 3608 return DestructorFn; 3609 } 3610 3611 /// Emit a privates mapping function for correct handling of private and 3612 /// firstprivate variables. 3613 /// \code 3614 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3615 /// **noalias priv1,..., <tyn> **noalias privn) { 3616 /// *priv1 = &.privates.priv1; 3617 /// ...; 3618 /// *privn = &.privates.privn; 3619 /// } 3620 /// \endcode 3621 static llvm::Value * 3622 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3623 ArrayRef<const Expr *> PrivateVars, 3624 ArrayRef<const Expr *> FirstprivateVars, 3625 ArrayRef<const Expr *> LastprivateVars, 3626 QualType PrivatesQTy, 3627 ArrayRef<PrivateDataTy> Privates) { 3628 ASTContext &C = CGM.getContext(); 3629 FunctionArgList Args; 3630 ImplicitParamDecl TaskPrivatesArg( 3631 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3632 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3633 ImplicitParamDecl::Other); 3634 Args.push_back(&TaskPrivatesArg); 3635 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 3636 unsigned Counter = 1; 3637 for (const Expr *E : PrivateVars) { 3638 Args.push_back(ImplicitParamDecl::Create( 3639 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3640 C.getPointerType(C.getPointerType(E->getType())) 3641 .withConst() 3642 .withRestrict(), 3643 ImplicitParamDecl::Other)); 3644 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3645 PrivateVarsPos[VD] = Counter; 3646 ++Counter; 3647 } 3648 for (const Expr *E : FirstprivateVars) { 3649 Args.push_back(ImplicitParamDecl::Create( 3650 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3651 C.getPointerType(C.getPointerType(E->getType())) 3652 .withConst() 3653 .withRestrict(), 3654 ImplicitParamDecl::Other)); 3655 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3656 PrivateVarsPos[VD] = Counter; 3657 ++Counter; 3658 } 3659 for (const Expr *E : LastprivateVars) { 3660 Args.push_back(ImplicitParamDecl::Create( 3661 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3662 C.getPointerType(C.getPointerType(E->getType())) 3663 .withConst() 3664 .withRestrict(), 3665 ImplicitParamDecl::Other)); 3666 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3667 PrivateVarsPos[VD] = Counter; 3668 ++Counter; 3669 } 3670 const auto &TaskPrivatesMapFnInfo = 3671 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3672 llvm::FunctionType *TaskPrivatesMapTy = 3673 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3674 std::string Name = 3675 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3676 auto *TaskPrivatesMap = llvm::Function::Create( 3677 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3678 &CGM.getModule()); 3679 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3680 TaskPrivatesMapFnInfo); 3681 if (CGM.getLangOpts().Optimize) { 3682 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3683 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3684 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3685 } 3686 CodeGenFunction CGF(CGM); 3687 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3688 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3689 3690 // *privi = &.privates.privi; 3691 LValue Base = CGF.EmitLoadOfPointerLValue( 3692 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3693 TaskPrivatesArg.getType()->castAs<PointerType>()); 3694 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3695 Counter = 0; 3696 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3697 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3698 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3699 LValue RefLVal = 3700 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3701 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3702 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3703 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3704 ++Counter; 3705 } 3706 CGF.FinishFunction(); 3707 return TaskPrivatesMap; 3708 } 3709 3710 /// Emit initialization for private variables in task-based directives. 3711 static void emitPrivatesInit(CodeGenFunction &CGF, 3712 const OMPExecutableDirective &D, 3713 Address KmpTaskSharedsPtr, LValue TDBase, 3714 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3715 QualType SharedsTy, QualType SharedsPtrTy, 3716 const OMPTaskDataTy &Data, 3717 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3718 ASTContext &C = CGF.getContext(); 3719 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3720 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3721 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3722 ? OMPD_taskloop 3723 : OMPD_task; 3724 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3725 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3726 LValue SrcBase; 3727 bool IsTargetTask = 3728 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3729 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3730 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 3731 // PointersArray and SizesArray. The original variables for these arrays are 3732 // not captured and we get their addresses explicitly. 3733 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3734 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3735 SrcBase = CGF.MakeAddrLValue( 3736 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3737 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3738 SharedsTy); 3739 } 3740 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3741 for (const PrivateDataTy &Pair : Privates) { 3742 const VarDecl *VD = Pair.second.PrivateCopy; 3743 const Expr *Init = VD->getAnyInitializer(); 3744 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3745 !CGF.isTrivialInitializer(Init)))) { 3746 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3747 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3748 const VarDecl *OriginalVD = Pair.second.Original; 3749 // Check if the variable is the target-based BasePointersArray, 3750 // PointersArray or SizesArray. 3751 LValue SharedRefLValue; 3752 QualType Type = PrivateLValue.getType(); 3753 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3754 if (IsTargetTask && !SharedField) { 3755 assert(isa<ImplicitParamDecl>(OriginalVD) && 3756 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3757 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3758 ->getNumParams() == 0 && 3759 isa<TranslationUnitDecl>( 3760 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3761 ->getDeclContext()) && 3762 "Expected artificial target data variable."); 3763 SharedRefLValue = 3764 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3765 } else if (ForDup) { 3766 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3767 SharedRefLValue = CGF.MakeAddrLValue( 3768 Address(SharedRefLValue.getPointer(CGF), 3769 C.getDeclAlign(OriginalVD)), 3770 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3771 SharedRefLValue.getTBAAInfo()); 3772 } else if (CGF.LambdaCaptureFields.count( 3773 Pair.second.Original->getCanonicalDecl()) > 0 || 3774 dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) { 3775 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3776 } else { 3777 // Processing for implicitly captured variables. 3778 InlinedOpenMPRegionRAII Region( 3779 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3780 /*HasCancel=*/false); 3781 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3782 } 3783 if (Type->isArrayType()) { 3784 // Initialize firstprivate array. 3785 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3786 // Perform simple memcpy. 3787 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3788 } else { 3789 // Initialize firstprivate array using element-by-element 3790 // initialization. 3791 CGF.EmitOMPAggregateAssign( 3792 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3793 Type, 3794 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3795 Address SrcElement) { 3796 // Clean up any temporaries needed by the initialization. 3797 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3798 InitScope.addPrivate( 3799 Elem, [SrcElement]() -> Address { return SrcElement; }); 3800 (void)InitScope.Privatize(); 3801 // Emit initialization for single element. 3802 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3803 CGF, &CapturesInfo); 3804 CGF.EmitAnyExprToMem(Init, DestElement, 3805 Init->getType().getQualifiers(), 3806 /*IsInitializer=*/false); 3807 }); 3808 } 3809 } else { 3810 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3811 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3812 return SharedRefLValue.getAddress(CGF); 3813 }); 3814 (void)InitScope.Privatize(); 3815 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3816 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3817 /*capturedByInit=*/false); 3818 } 3819 } else { 3820 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3821 } 3822 } 3823 ++FI; 3824 } 3825 } 3826 3827 /// Check if duplication function is required for taskloops. 3828 static bool checkInitIsRequired(CodeGenFunction &CGF, 3829 ArrayRef<PrivateDataTy> Privates) { 3830 bool InitRequired = false; 3831 for (const PrivateDataTy &Pair : Privates) { 3832 const VarDecl *VD = Pair.second.PrivateCopy; 3833 const Expr *Init = VD->getAnyInitializer(); 3834 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3835 !CGF.isTrivialInitializer(Init)); 3836 if (InitRequired) 3837 break; 3838 } 3839 return InitRequired; 3840 } 3841 3842 3843 /// Emit task_dup function (for initialization of 3844 /// private/firstprivate/lastprivate vars and last_iter flag) 3845 /// \code 3846 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3847 /// lastpriv) { 3848 /// // setup lastprivate flag 3849 /// task_dst->last = lastpriv; 3850 /// // could be constructor calls here... 3851 /// } 3852 /// \endcode 3853 static llvm::Value * 3854 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3855 const OMPExecutableDirective &D, 3856 QualType KmpTaskTWithPrivatesPtrQTy, 3857 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3858 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3859 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3860 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3861 ASTContext &C = CGM.getContext(); 3862 FunctionArgList Args; 3863 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3864 KmpTaskTWithPrivatesPtrQTy, 3865 ImplicitParamDecl::Other); 3866 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3867 KmpTaskTWithPrivatesPtrQTy, 3868 ImplicitParamDecl::Other); 3869 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 3870 ImplicitParamDecl::Other); 3871 Args.push_back(&DstArg); 3872 Args.push_back(&SrcArg); 3873 Args.push_back(&LastprivArg); 3874 const auto &TaskDupFnInfo = 3875 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3876 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3877 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 3878 auto *TaskDup = llvm::Function::Create( 3879 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3880 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 3881 TaskDup->setDoesNotRecurse(); 3882 CodeGenFunction CGF(CGM); 3883 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 3884 Loc); 3885 3886 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3887 CGF.GetAddrOfLocalVar(&DstArg), 3888 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3889 // task_dst->liter = lastpriv; 3890 if (WithLastIter) { 3891 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3892 LValue Base = CGF.EmitLValueForField( 3893 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3894 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3895 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 3896 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 3897 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 3898 } 3899 3900 // Emit initial values for private copies (if any). 3901 assert(!Privates.empty()); 3902 Address KmpTaskSharedsPtr = Address::invalid(); 3903 if (!Data.FirstprivateVars.empty()) { 3904 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3905 CGF.GetAddrOfLocalVar(&SrcArg), 3906 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3907 LValue Base = CGF.EmitLValueForField( 3908 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3909 KmpTaskSharedsPtr = Address( 3910 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 3911 Base, *std::next(KmpTaskTQTyRD->field_begin(), 3912 KmpTaskTShareds)), 3913 Loc), 3914 CGM.getNaturalTypeAlignment(SharedsTy)); 3915 } 3916 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 3917 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 3918 CGF.FinishFunction(); 3919 return TaskDup; 3920 } 3921 3922 /// Checks if destructor function is required to be generated. 3923 /// \return true if cleanups are required, false otherwise. 3924 static bool 3925 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 3926 bool NeedsCleanup = false; 3927 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3928 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 3929 for (const FieldDecl *FD : PrivateRD->fields()) { 3930 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 3931 if (NeedsCleanup) 3932 break; 3933 } 3934 return NeedsCleanup; 3935 } 3936 3937 namespace { 3938 /// Loop generator for OpenMP iterator expression. 3939 class OMPIteratorGeneratorScope final 3940 : public CodeGenFunction::OMPPrivateScope { 3941 CodeGenFunction &CGF; 3942 const OMPIteratorExpr *E = nullptr; 3943 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 3944 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 3945 OMPIteratorGeneratorScope() = delete; 3946 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 3947 3948 public: 3949 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 3950 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 3951 if (!E) 3952 return; 3953 SmallVector<llvm::Value *, 4> Uppers; 3954 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 3955 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 3956 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 3957 addPrivate(VD, [&CGF, VD]() { 3958 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 3959 }); 3960 const OMPIteratorHelperData &HelperData = E->getHelper(I); 3961 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 3962 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 3963 "counter.addr"); 3964 }); 3965 } 3966 Privatize(); 3967 3968 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 3969 const OMPIteratorHelperData &HelperData = E->getHelper(I); 3970 LValue CLVal = 3971 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 3972 HelperData.CounterVD->getType()); 3973 // Counter = 0; 3974 CGF.EmitStoreOfScalar( 3975 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 3976 CLVal); 3977 CodeGenFunction::JumpDest &ContDest = 3978 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 3979 CodeGenFunction::JumpDest &ExitDest = 3980 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 3981 // N = <number-of_iterations>; 3982 llvm::Value *N = Uppers[I]; 3983 // cont: 3984 // if (Counter < N) goto body; else goto exit; 3985 CGF.EmitBlock(ContDest.getBlock()); 3986 auto *CVal = 3987 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 3988 llvm::Value *Cmp = 3989 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 3990 ? CGF.Builder.CreateICmpSLT(CVal, N) 3991 : CGF.Builder.CreateICmpULT(CVal, N); 3992 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 3993 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 3994 // body: 3995 CGF.EmitBlock(BodyBB); 3996 // Iteri = Begini + Counter * Stepi; 3997 CGF.EmitIgnoredExpr(HelperData.Update); 3998 } 3999 } 4000 ~OMPIteratorGeneratorScope() { 4001 if (!E) 4002 return; 4003 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4004 // Counter = Counter + 1; 4005 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4006 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4007 // goto cont; 4008 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4009 // exit: 4010 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4011 } 4012 } 4013 }; 4014 } // namespace 4015 4016 static std::pair<llvm::Value *, llvm::Value *> 4017 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4018 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4019 llvm::Value *Addr; 4020 if (OASE) { 4021 const Expr *Base = OASE->getBase(); 4022 Addr = CGF.EmitScalarExpr(Base); 4023 } else { 4024 Addr = CGF.EmitLValue(E).getPointer(CGF); 4025 } 4026 llvm::Value *SizeVal; 4027 QualType Ty = E->getType(); 4028 if (OASE) { 4029 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4030 for (const Expr *SE : OASE->getDimensions()) { 4031 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4032 Sz = CGF.EmitScalarConversion( 4033 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4034 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4035 } 4036 } else if (const auto *ASE = 4037 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4038 LValue UpAddrLVal = 4039 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4040 llvm::Value *UpAddr = 4041 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1); 4042 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4043 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4044 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4045 } else { 4046 SizeVal = CGF.getTypeSize(Ty); 4047 } 4048 return std::make_pair(Addr, SizeVal); 4049 } 4050 4051 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4052 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4053 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4054 if (KmpTaskAffinityInfoTy.isNull()) { 4055 RecordDecl *KmpAffinityInfoRD = 4056 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4057 KmpAffinityInfoRD->startDefinition(); 4058 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4059 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4060 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4061 KmpAffinityInfoRD->completeDefinition(); 4062 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4063 } 4064 } 4065 4066 CGOpenMPRuntime::TaskResultTy 4067 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4068 const OMPExecutableDirective &D, 4069 llvm::Function *TaskFunction, QualType SharedsTy, 4070 Address Shareds, const OMPTaskDataTy &Data) { 4071 ASTContext &C = CGM.getContext(); 4072 llvm::SmallVector<PrivateDataTy, 4> Privates; 4073 // Aggregate privates and sort them by the alignment. 4074 const auto *I = Data.PrivateCopies.begin(); 4075 for (const Expr *E : Data.PrivateVars) { 4076 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4077 Privates.emplace_back( 4078 C.getDeclAlign(VD), 4079 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4080 /*PrivateElemInit=*/nullptr)); 4081 ++I; 4082 } 4083 I = Data.FirstprivateCopies.begin(); 4084 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4085 for (const Expr *E : Data.FirstprivateVars) { 4086 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4087 Privates.emplace_back( 4088 C.getDeclAlign(VD), 4089 PrivateHelpersTy( 4090 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4091 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4092 ++I; 4093 ++IElemInitRef; 4094 } 4095 I = Data.LastprivateCopies.begin(); 4096 for (const Expr *E : Data.LastprivateVars) { 4097 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4098 Privates.emplace_back( 4099 C.getDeclAlign(VD), 4100 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4101 /*PrivateElemInit=*/nullptr)); 4102 ++I; 4103 } 4104 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { 4105 return L.first > R.first; 4106 }); 4107 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4108 // Build type kmp_routine_entry_t (if not built yet). 4109 emitKmpRoutineEntryT(KmpInt32Ty); 4110 // Build type kmp_task_t (if not built yet). 4111 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4112 if (SavedKmpTaskloopTQTy.isNull()) { 4113 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4114 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4115 } 4116 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4117 } else { 4118 assert((D.getDirectiveKind() == OMPD_task || 4119 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4120 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4121 "Expected taskloop, task or target directive"); 4122 if (SavedKmpTaskTQTy.isNull()) { 4123 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4124 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4125 } 4126 KmpTaskTQTy = SavedKmpTaskTQTy; 4127 } 4128 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4129 // Build particular struct kmp_task_t for the given task. 4130 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4131 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4132 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4133 QualType KmpTaskTWithPrivatesPtrQTy = 4134 C.getPointerType(KmpTaskTWithPrivatesQTy); 4135 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4136 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4137 KmpTaskTWithPrivatesTy->getPointerTo(); 4138 llvm::Value *KmpTaskTWithPrivatesTySize = 4139 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4140 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4141 4142 // Emit initial values for private copies (if any). 4143 llvm::Value *TaskPrivatesMap = nullptr; 4144 llvm::Type *TaskPrivatesMapTy = 4145 std::next(TaskFunction->arg_begin(), 3)->getType(); 4146 if (!Privates.empty()) { 4147 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4148 TaskPrivatesMap = emitTaskPrivateMappingFunction( 4149 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 4150 FI->getType(), Privates); 4151 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4152 TaskPrivatesMap, TaskPrivatesMapTy); 4153 } else { 4154 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4155 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4156 } 4157 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4158 // kmp_task_t *tt); 4159 llvm::Function *TaskEntry = emitProxyTaskFunction( 4160 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4161 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4162 TaskPrivatesMap); 4163 4164 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4165 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4166 // kmp_routine_entry_t *task_entry); 4167 // Task flags. Format is taken from 4168 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 4169 // description of kmp_tasking_flags struct. 4170 enum { 4171 TiedFlag = 0x1, 4172 FinalFlag = 0x2, 4173 DestructorsFlag = 0x8, 4174 PriorityFlag = 0x20, 4175 DetachableFlag = 0x40, 4176 }; 4177 unsigned Flags = Data.Tied ? TiedFlag : 0; 4178 bool NeedsCleanup = false; 4179 if (!Privates.empty()) { 4180 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 4181 if (NeedsCleanup) 4182 Flags = Flags | DestructorsFlag; 4183 } 4184 if (Data.Priority.getInt()) 4185 Flags = Flags | PriorityFlag; 4186 if (D.hasClausesOfKind<OMPDetachClause>()) 4187 Flags = Flags | DetachableFlag; 4188 llvm::Value *TaskFlags = 4189 Data.Final.getPointer() 4190 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4191 CGF.Builder.getInt32(FinalFlag), 4192 CGF.Builder.getInt32(/*C=*/0)) 4193 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4194 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4195 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4196 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4197 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4198 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4199 TaskEntry, KmpRoutineEntryPtrTy)}; 4200 llvm::Value *NewTask; 4201 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4202 // Check if we have any device clause associated with the directive. 4203 const Expr *Device = nullptr; 4204 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4205 Device = C->getDevice(); 4206 // Emit device ID if any otherwise use default value. 4207 llvm::Value *DeviceID; 4208 if (Device) 4209 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4210 CGF.Int64Ty, /*isSigned=*/true); 4211 else 4212 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4213 AllocArgs.push_back(DeviceID); 4214 NewTask = CGF.EmitRuntimeCall( 4215 OMPBuilder.getOrCreateRuntimeFunction( 4216 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4217 AllocArgs); 4218 } else { 4219 NewTask = 4220 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4221 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4222 AllocArgs); 4223 } 4224 // Emit detach clause initialization. 4225 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4226 // task_descriptor); 4227 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4228 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4229 LValue EvtLVal = CGF.EmitLValue(Evt); 4230 4231 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4232 // int gtid, kmp_task_t *task); 4233 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4234 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4235 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4236 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4237 OMPBuilder.getOrCreateRuntimeFunction( 4238 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4239 {Loc, Tid, NewTask}); 4240 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4241 Evt->getExprLoc()); 4242 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4243 } 4244 // Process affinity clauses. 4245 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4246 // Process list of affinity data. 4247 ASTContext &C = CGM.getContext(); 4248 Address AffinitiesArray = Address::invalid(); 4249 // Calculate number of elements to form the array of affinity data. 4250 llvm::Value *NumOfElements = nullptr; 4251 unsigned NumAffinities = 0; 4252 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4253 if (const Expr *Modifier = C->getModifier()) { 4254 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4255 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4256 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4257 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4258 NumOfElements = 4259 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4260 } 4261 } else { 4262 NumAffinities += C->varlist_size(); 4263 } 4264 } 4265 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4266 // Fields ids in kmp_task_affinity_info record. 4267 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4268 4269 QualType KmpTaskAffinityInfoArrayTy; 4270 if (NumOfElements) { 4271 NumOfElements = CGF.Builder.CreateNUWAdd( 4272 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4273 OpaqueValueExpr OVE( 4274 Loc, 4275 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4276 VK_RValue); 4277 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4278 RValue::get(NumOfElements)); 4279 KmpTaskAffinityInfoArrayTy = 4280 C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal, 4281 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4282 // Properly emit variable-sized array. 4283 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4284 ImplicitParamDecl::Other); 4285 CGF.EmitVarDecl(*PD); 4286 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4287 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4288 /*isSigned=*/false); 4289 } else { 4290 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4291 KmpTaskAffinityInfoTy, 4292 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4293 ArrayType::Normal, /*IndexTypeQuals=*/0); 4294 AffinitiesArray = 4295 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4296 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4297 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4298 /*isSigned=*/false); 4299 } 4300 4301 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4302 // Fill array by elements without iterators. 4303 unsigned Pos = 0; 4304 bool HasIterator = false; 4305 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4306 if (C->getModifier()) { 4307 HasIterator = true; 4308 continue; 4309 } 4310 for (const Expr *E : C->varlists()) { 4311 llvm::Value *Addr; 4312 llvm::Value *Size; 4313 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4314 LValue Base = 4315 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4316 KmpTaskAffinityInfoTy); 4317 // affs[i].base_addr = &<Affinities[i].second>; 4318 LValue BaseAddrLVal = CGF.EmitLValueForField( 4319 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4320 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4321 BaseAddrLVal); 4322 // affs[i].len = sizeof(<Affinities[i].second>); 4323 LValue LenLVal = CGF.EmitLValueForField( 4324 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4325 CGF.EmitStoreOfScalar(Size, LenLVal); 4326 ++Pos; 4327 } 4328 } 4329 LValue PosLVal; 4330 if (HasIterator) { 4331 PosLVal = CGF.MakeAddrLValue( 4332 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4333 C.getSizeType()); 4334 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4335 } 4336 // Process elements with iterators. 4337 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4338 const Expr *Modifier = C->getModifier(); 4339 if (!Modifier) 4340 continue; 4341 OMPIteratorGeneratorScope IteratorScope( 4342 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4343 for (const Expr *E : C->varlists()) { 4344 llvm::Value *Addr; 4345 llvm::Value *Size; 4346 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4347 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4348 LValue Base = CGF.MakeAddrLValue( 4349 Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx), 4350 AffinitiesArray.getAlignment()), 4351 KmpTaskAffinityInfoTy); 4352 // affs[i].base_addr = &<Affinities[i].second>; 4353 LValue BaseAddrLVal = CGF.EmitLValueForField( 4354 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4355 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4356 BaseAddrLVal); 4357 // affs[i].len = sizeof(<Affinities[i].second>); 4358 LValue LenLVal = CGF.EmitLValueForField( 4359 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4360 CGF.EmitStoreOfScalar(Size, LenLVal); 4361 Idx = CGF.Builder.CreateNUWAdd( 4362 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4363 CGF.EmitStoreOfScalar(Idx, PosLVal); 4364 } 4365 } 4366 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4367 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4368 // naffins, kmp_task_affinity_info_t *affin_list); 4369 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4370 llvm::Value *GTid = getThreadID(CGF, Loc); 4371 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4372 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4373 // FIXME: Emit the function and ignore its result for now unless the 4374 // runtime function is properly implemented. 4375 (void)CGF.EmitRuntimeCall( 4376 OMPBuilder.getOrCreateRuntimeFunction( 4377 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4378 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4379 } 4380 llvm::Value *NewTaskNewTaskTTy = 4381 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4382 NewTask, KmpTaskTWithPrivatesPtrTy); 4383 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4384 KmpTaskTWithPrivatesQTy); 4385 LValue TDBase = 4386 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4387 // Fill the data in the resulting kmp_task_t record. 4388 // Copy shareds if there are any. 4389 Address KmpTaskSharedsPtr = Address::invalid(); 4390 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4391 KmpTaskSharedsPtr = 4392 Address(CGF.EmitLoadOfScalar( 4393 CGF.EmitLValueForField( 4394 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4395 KmpTaskTShareds)), 4396 Loc), 4397 CGM.getNaturalTypeAlignment(SharedsTy)); 4398 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4399 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4400 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4401 } 4402 // Emit initial values for private copies (if any). 4403 TaskResultTy Result; 4404 if (!Privates.empty()) { 4405 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4406 SharedsTy, SharedsPtrTy, Data, Privates, 4407 /*ForDup=*/false); 4408 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4409 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4410 Result.TaskDupFn = emitTaskDupFunction( 4411 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4412 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4413 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4414 } 4415 } 4416 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4417 enum { Priority = 0, Destructors = 1 }; 4418 // Provide pointer to function with destructors for privates. 4419 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4420 const RecordDecl *KmpCmplrdataUD = 4421 (*FI)->getType()->getAsUnionType()->getDecl(); 4422 if (NeedsCleanup) { 4423 llvm::Value *DestructorFn = emitDestructorsFunction( 4424 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4425 KmpTaskTWithPrivatesQTy); 4426 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4427 LValue DestructorsLV = CGF.EmitLValueForField( 4428 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4429 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4430 DestructorFn, KmpRoutineEntryPtrTy), 4431 DestructorsLV); 4432 } 4433 // Set priority. 4434 if (Data.Priority.getInt()) { 4435 LValue Data2LV = CGF.EmitLValueForField( 4436 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4437 LValue PriorityLV = CGF.EmitLValueForField( 4438 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4439 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4440 } 4441 Result.NewTask = NewTask; 4442 Result.TaskEntry = TaskEntry; 4443 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4444 Result.TDBase = TDBase; 4445 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4446 return Result; 4447 } 4448 4449 namespace { 4450 /// Dependence kind for RTL. 4451 enum RTLDependenceKindTy { 4452 DepIn = 0x01, 4453 DepInOut = 0x3, 4454 DepMutexInOutSet = 0x4 4455 }; 4456 /// Fields ids in kmp_depend_info record. 4457 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4458 } // namespace 4459 4460 /// Translates internal dependency kind into the runtime kind. 4461 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4462 RTLDependenceKindTy DepKind; 4463 switch (K) { 4464 case OMPC_DEPEND_in: 4465 DepKind = DepIn; 4466 break; 4467 // Out and InOut dependencies must use the same code. 4468 case OMPC_DEPEND_out: 4469 case OMPC_DEPEND_inout: 4470 DepKind = DepInOut; 4471 break; 4472 case OMPC_DEPEND_mutexinoutset: 4473 DepKind = DepMutexInOutSet; 4474 break; 4475 case OMPC_DEPEND_source: 4476 case OMPC_DEPEND_sink: 4477 case OMPC_DEPEND_depobj: 4478 case OMPC_DEPEND_unknown: 4479 llvm_unreachable("Unknown task dependence type"); 4480 } 4481 return DepKind; 4482 } 4483 4484 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4485 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4486 QualType &FlagsTy) { 4487 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4488 if (KmpDependInfoTy.isNull()) { 4489 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4490 KmpDependInfoRD->startDefinition(); 4491 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4492 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4493 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4494 KmpDependInfoRD->completeDefinition(); 4495 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4496 } 4497 } 4498 4499 std::pair<llvm::Value *, LValue> 4500 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4501 SourceLocation Loc) { 4502 ASTContext &C = CGM.getContext(); 4503 QualType FlagsTy; 4504 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4505 RecordDecl *KmpDependInfoRD = 4506 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4507 LValue Base = CGF.EmitLoadOfPointerLValue( 4508 DepobjLVal.getAddress(CGF), 4509 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4510 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4511 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4512 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4513 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4514 Base.getTBAAInfo()); 4515 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4516 Addr.getPointer(), 4517 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4518 LValue NumDepsBase = CGF.MakeAddrLValue( 4519 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4520 Base.getBaseInfo(), Base.getTBAAInfo()); 4521 // NumDeps = deps[i].base_addr; 4522 LValue BaseAddrLVal = CGF.EmitLValueForField( 4523 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4524 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4525 return std::make_pair(NumDeps, Base); 4526 } 4527 4528 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4529 llvm::PointerUnion<unsigned *, LValue *> Pos, 4530 const OMPTaskDataTy::DependData &Data, 4531 Address DependenciesArray) { 4532 CodeGenModule &CGM = CGF.CGM; 4533 ASTContext &C = CGM.getContext(); 4534 QualType FlagsTy; 4535 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4536 RecordDecl *KmpDependInfoRD = 4537 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4538 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4539 4540 OMPIteratorGeneratorScope IteratorScope( 4541 CGF, cast_or_null<OMPIteratorExpr>( 4542 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4543 : nullptr)); 4544 for (const Expr *E : Data.DepExprs) { 4545 llvm::Value *Addr; 4546 llvm::Value *Size; 4547 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4548 LValue Base; 4549 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4550 Base = CGF.MakeAddrLValue( 4551 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4552 } else { 4553 LValue &PosLVal = *Pos.get<LValue *>(); 4554 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4555 Base = CGF.MakeAddrLValue( 4556 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx), 4557 DependenciesArray.getAlignment()), 4558 KmpDependInfoTy); 4559 } 4560 // deps[i].base_addr = &<Dependencies[i].second>; 4561 LValue BaseAddrLVal = CGF.EmitLValueForField( 4562 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4563 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4564 BaseAddrLVal); 4565 // deps[i].len = sizeof(<Dependencies[i].second>); 4566 LValue LenLVal = CGF.EmitLValueForField( 4567 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4568 CGF.EmitStoreOfScalar(Size, LenLVal); 4569 // deps[i].flags = <Dependencies[i].first>; 4570 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4571 LValue FlagsLVal = CGF.EmitLValueForField( 4572 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4573 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4574 FlagsLVal); 4575 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4576 ++(*P); 4577 } else { 4578 LValue &PosLVal = *Pos.get<LValue *>(); 4579 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4580 Idx = CGF.Builder.CreateNUWAdd(Idx, 4581 llvm::ConstantInt::get(Idx->getType(), 1)); 4582 CGF.EmitStoreOfScalar(Idx, PosLVal); 4583 } 4584 } 4585 } 4586 4587 static SmallVector<llvm::Value *, 4> 4588 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4589 const OMPTaskDataTy::DependData &Data) { 4590 assert(Data.DepKind == OMPC_DEPEND_depobj && 4591 "Expected depobj dependecy kind."); 4592 SmallVector<llvm::Value *, 4> Sizes; 4593 SmallVector<LValue, 4> SizeLVals; 4594 ASTContext &C = CGF.getContext(); 4595 QualType FlagsTy; 4596 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4597 RecordDecl *KmpDependInfoRD = 4598 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4599 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4600 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4601 { 4602 OMPIteratorGeneratorScope IteratorScope( 4603 CGF, cast_or_null<OMPIteratorExpr>( 4604 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4605 : nullptr)); 4606 for (const Expr *E : Data.DepExprs) { 4607 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4608 LValue Base = CGF.EmitLoadOfPointerLValue( 4609 DepobjLVal.getAddress(CGF), 4610 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4611 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4612 Base.getAddress(CGF), KmpDependInfoPtrT); 4613 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4614 Base.getTBAAInfo()); 4615 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4616 Addr.getPointer(), 4617 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4618 LValue NumDepsBase = CGF.MakeAddrLValue( 4619 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4620 Base.getBaseInfo(), Base.getTBAAInfo()); 4621 // NumDeps = deps[i].base_addr; 4622 LValue BaseAddrLVal = CGF.EmitLValueForField( 4623 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4624 llvm::Value *NumDeps = 4625 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4626 LValue NumLVal = CGF.MakeAddrLValue( 4627 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4628 C.getUIntPtrType()); 4629 CGF.InitTempAlloca(NumLVal.getAddress(CGF), 4630 llvm::ConstantInt::get(CGF.IntPtrTy, 0)); 4631 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4632 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4633 CGF.EmitStoreOfScalar(Add, NumLVal); 4634 SizeLVals.push_back(NumLVal); 4635 } 4636 } 4637 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4638 llvm::Value *Size = 4639 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4640 Sizes.push_back(Size); 4641 } 4642 return Sizes; 4643 } 4644 4645 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4646 LValue PosLVal, 4647 const OMPTaskDataTy::DependData &Data, 4648 Address DependenciesArray) { 4649 assert(Data.DepKind == OMPC_DEPEND_depobj && 4650 "Expected depobj dependecy kind."); 4651 ASTContext &C = CGF.getContext(); 4652 QualType FlagsTy; 4653 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4654 RecordDecl *KmpDependInfoRD = 4655 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4656 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4657 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4658 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4659 { 4660 OMPIteratorGeneratorScope IteratorScope( 4661 CGF, cast_or_null<OMPIteratorExpr>( 4662 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4663 : nullptr)); 4664 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4665 const Expr *E = Data.DepExprs[I]; 4666 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4667 LValue Base = CGF.EmitLoadOfPointerLValue( 4668 DepobjLVal.getAddress(CGF), 4669 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4670 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4671 Base.getAddress(CGF), KmpDependInfoPtrT); 4672 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4673 Base.getTBAAInfo()); 4674 4675 // Get number of elements in a single depobj. 4676 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4677 Addr.getPointer(), 4678 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4679 LValue NumDepsBase = CGF.MakeAddrLValue( 4680 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4681 Base.getBaseInfo(), Base.getTBAAInfo()); 4682 // NumDeps = deps[i].base_addr; 4683 LValue BaseAddrLVal = CGF.EmitLValueForField( 4684 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4685 llvm::Value *NumDeps = 4686 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4687 4688 // memcopy dependency data. 4689 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4690 ElSize, 4691 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4692 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4693 Address DepAddr = 4694 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos), 4695 DependenciesArray.getAlignment()); 4696 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4697 4698 // Increase pos. 4699 // pos += size; 4700 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4701 CGF.EmitStoreOfScalar(Add, PosLVal); 4702 } 4703 } 4704 } 4705 4706 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4707 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4708 SourceLocation Loc) { 4709 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4710 return D.DepExprs.empty(); 4711 })) 4712 return std::make_pair(nullptr, Address::invalid()); 4713 // Process list of dependencies. 4714 ASTContext &C = CGM.getContext(); 4715 Address DependenciesArray = Address::invalid(); 4716 llvm::Value *NumOfElements = nullptr; 4717 unsigned NumDependencies = std::accumulate( 4718 Dependencies.begin(), Dependencies.end(), 0, 4719 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4720 return D.DepKind == OMPC_DEPEND_depobj 4721 ? V 4722 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4723 }); 4724 QualType FlagsTy; 4725 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4726 bool HasDepobjDeps = false; 4727 bool HasRegularWithIterators = false; 4728 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4729 llvm::Value *NumOfRegularWithIterators = 4730 llvm::ConstantInt::get(CGF.IntPtrTy, 1); 4731 // Calculate number of depobj dependecies and regular deps with the iterators. 4732 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4733 if (D.DepKind == OMPC_DEPEND_depobj) { 4734 SmallVector<llvm::Value *, 4> Sizes = 4735 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4736 for (llvm::Value *Size : Sizes) { 4737 NumOfDepobjElements = 4738 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4739 } 4740 HasDepobjDeps = true; 4741 continue; 4742 } 4743 // Include number of iterations, if any. 4744 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4745 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4746 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4747 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4748 NumOfRegularWithIterators = 4749 CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz); 4750 } 4751 HasRegularWithIterators = true; 4752 continue; 4753 } 4754 } 4755 4756 QualType KmpDependInfoArrayTy; 4757 if (HasDepobjDeps || HasRegularWithIterators) { 4758 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4759 /*isSigned=*/false); 4760 if (HasDepobjDeps) { 4761 NumOfElements = 4762 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4763 } 4764 if (HasRegularWithIterators) { 4765 NumOfElements = 4766 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4767 } 4768 OpaqueValueExpr OVE(Loc, 4769 C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4770 VK_RValue); 4771 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4772 RValue::get(NumOfElements)); 4773 KmpDependInfoArrayTy = 4774 C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, 4775 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4776 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4777 // Properly emit variable-sized array. 4778 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4779 ImplicitParamDecl::Other); 4780 CGF.EmitVarDecl(*PD); 4781 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4782 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4783 /*isSigned=*/false); 4784 } else { 4785 KmpDependInfoArrayTy = C.getConstantArrayType( 4786 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4787 ArrayType::Normal, /*IndexTypeQuals=*/0); 4788 DependenciesArray = 4789 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4790 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4791 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4792 /*isSigned=*/false); 4793 } 4794 unsigned Pos = 0; 4795 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4796 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4797 Dependencies[I].IteratorExpr) 4798 continue; 4799 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4800 DependenciesArray); 4801 } 4802 // Copy regular dependecies with iterators. 4803 LValue PosLVal = CGF.MakeAddrLValue( 4804 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4805 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4806 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4807 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4808 !Dependencies[I].IteratorExpr) 4809 continue; 4810 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4811 DependenciesArray); 4812 } 4813 // Copy final depobj arrays without iterators. 4814 if (HasDepobjDeps) { 4815 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4816 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4817 continue; 4818 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4819 DependenciesArray); 4820 } 4821 } 4822 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4823 DependenciesArray, CGF.VoidPtrTy); 4824 return std::make_pair(NumOfElements, DependenciesArray); 4825 } 4826 4827 Address CGOpenMPRuntime::emitDepobjDependClause( 4828 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4829 SourceLocation Loc) { 4830 if (Dependencies.DepExprs.empty()) 4831 return Address::invalid(); 4832 // Process list of dependencies. 4833 ASTContext &C = CGM.getContext(); 4834 Address DependenciesArray = Address::invalid(); 4835 unsigned NumDependencies = Dependencies.DepExprs.size(); 4836 QualType FlagsTy; 4837 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4838 RecordDecl *KmpDependInfoRD = 4839 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4840 4841 llvm::Value *Size; 4842 // Define type kmp_depend_info[<Dependencies.size()>]; 4843 // For depobj reserve one extra element to store the number of elements. 4844 // It is required to handle depobj(x) update(in) construct. 4845 // kmp_depend_info[<Dependencies.size()>] deps; 4846 llvm::Value *NumDepsVal; 4847 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4848 if (const auto *IE = 4849 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4850 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4851 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4852 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4853 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4854 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4855 } 4856 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4857 NumDepsVal); 4858 CharUnits SizeInBytes = 4859 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4860 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4861 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4862 NumDepsVal = 4863 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4864 } else { 4865 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4866 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4867 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 4868 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4869 Size = CGM.getSize(Sz.alignTo(Align)); 4870 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 4871 } 4872 // Need to allocate on the dynamic memory. 4873 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4874 // Use default allocator. 4875 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4876 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 4877 4878 llvm::Value *Addr = 4879 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4880 CGM.getModule(), OMPRTL___kmpc_alloc), 4881 Args, ".dep.arr.addr"); 4882 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4883 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 4884 DependenciesArray = Address(Addr, Align); 4885 // Write number of elements in the first element of array for depobj. 4886 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 4887 // deps[i].base_addr = NumDependencies; 4888 LValue BaseAddrLVal = CGF.EmitLValueForField( 4889 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4890 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 4891 llvm::PointerUnion<unsigned *, LValue *> Pos; 4892 unsigned Idx = 1; 4893 LValue PosLVal; 4894 if (Dependencies.IteratorExpr) { 4895 PosLVal = CGF.MakeAddrLValue( 4896 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 4897 C.getSizeType()); 4898 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 4899 /*IsInit=*/true); 4900 Pos = &PosLVal; 4901 } else { 4902 Pos = &Idx; 4903 } 4904 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 4905 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4906 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 4907 return DependenciesArray; 4908 } 4909 4910 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 4911 SourceLocation Loc) { 4912 ASTContext &C = CGM.getContext(); 4913 QualType FlagsTy; 4914 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4915 LValue Base = CGF.EmitLoadOfPointerLValue( 4916 DepobjLVal.getAddress(CGF), 4917 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4918 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4919 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4920 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4921 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4922 Addr.getPointer(), 4923 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4924 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 4925 CGF.VoidPtrTy); 4926 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4927 // Use default allocator. 4928 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4929 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 4930 4931 // _kmpc_free(gtid, addr, nullptr); 4932 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4933 CGM.getModule(), OMPRTL___kmpc_free), 4934 Args); 4935 } 4936 4937 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 4938 OpenMPDependClauseKind NewDepKind, 4939 SourceLocation Loc) { 4940 ASTContext &C = CGM.getContext(); 4941 QualType FlagsTy; 4942 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4943 RecordDecl *KmpDependInfoRD = 4944 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4945 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4946 llvm::Value *NumDeps; 4947 LValue Base; 4948 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 4949 4950 Address Begin = Base.getAddress(CGF); 4951 // Cast from pointer to array type to pointer to single element. 4952 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps); 4953 // The basic structure here is a while-do loop. 4954 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 4955 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 4956 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 4957 CGF.EmitBlock(BodyBB); 4958 llvm::PHINode *ElementPHI = 4959 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 4960 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 4961 Begin = Address(ElementPHI, Begin.getAlignment()); 4962 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 4963 Base.getTBAAInfo()); 4964 // deps[i].flags = NewDepKind; 4965 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 4966 LValue FlagsLVal = CGF.EmitLValueForField( 4967 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4968 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4969 FlagsLVal); 4970 4971 // Shift the address forward by one element. 4972 Address ElementNext = 4973 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 4974 ElementPHI->addIncoming(ElementNext.getPointer(), 4975 CGF.Builder.GetInsertBlock()); 4976 llvm::Value *IsEmpty = 4977 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 4978 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 4979 // Done. 4980 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 4981 } 4982 4983 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 4984 const OMPExecutableDirective &D, 4985 llvm::Function *TaskFunction, 4986 QualType SharedsTy, Address Shareds, 4987 const Expr *IfCond, 4988 const OMPTaskDataTy &Data) { 4989 if (!CGF.HaveInsertPoint()) 4990 return; 4991 4992 TaskResultTy Result = 4993 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 4994 llvm::Value *NewTask = Result.NewTask; 4995 llvm::Function *TaskEntry = Result.TaskEntry; 4996 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 4997 LValue TDBase = Result.TDBase; 4998 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 4999 // Process list of dependences. 5000 Address DependenciesArray = Address::invalid(); 5001 llvm::Value *NumOfElements; 5002 std::tie(NumOfElements, DependenciesArray) = 5003 emitDependClause(CGF, Data.Dependences, Loc); 5004 5005 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5006 // libcall. 5007 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5008 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5009 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5010 // list is not empty 5011 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5012 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5013 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5014 llvm::Value *DepTaskArgs[7]; 5015 if (!Data.Dependences.empty()) { 5016 DepTaskArgs[0] = UpLoc; 5017 DepTaskArgs[1] = ThreadID; 5018 DepTaskArgs[2] = NewTask; 5019 DepTaskArgs[3] = NumOfElements; 5020 DepTaskArgs[4] = DependenciesArray.getPointer(); 5021 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5022 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5023 } 5024 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5025 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5026 if (!Data.Tied) { 5027 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5028 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5029 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5030 } 5031 if (!Data.Dependences.empty()) { 5032 CGF.EmitRuntimeCall( 5033 OMPBuilder.getOrCreateRuntimeFunction( 5034 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5035 DepTaskArgs); 5036 } else { 5037 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5038 CGM.getModule(), OMPRTL___kmpc_omp_task), 5039 TaskArgs); 5040 } 5041 // Check if parent region is untied and build return for untied task; 5042 if (auto *Region = 5043 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5044 Region->emitUntiedSwitch(CGF); 5045 }; 5046 5047 llvm::Value *DepWaitTaskArgs[6]; 5048 if (!Data.Dependences.empty()) { 5049 DepWaitTaskArgs[0] = UpLoc; 5050 DepWaitTaskArgs[1] = ThreadID; 5051 DepWaitTaskArgs[2] = NumOfElements; 5052 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5053 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5054 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5055 } 5056 auto &M = CGM.getModule(); 5057 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5058 TaskEntry, &Data, &DepWaitTaskArgs, 5059 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5060 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5061 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5062 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5063 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5064 // is specified. 5065 if (!Data.Dependences.empty()) 5066 CGF.EmitRuntimeCall( 5067 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5068 DepWaitTaskArgs); 5069 // Call proxy_task_entry(gtid, new_task); 5070 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5071 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5072 Action.Enter(CGF); 5073 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5074 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5075 OutlinedFnArgs); 5076 }; 5077 5078 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5079 // kmp_task_t *new_task); 5080 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5081 // kmp_task_t *new_task); 5082 RegionCodeGenTy RCG(CodeGen); 5083 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5084 M, OMPRTL___kmpc_omp_task_begin_if0), 5085 TaskArgs, 5086 OMPBuilder.getOrCreateRuntimeFunction( 5087 M, OMPRTL___kmpc_omp_task_complete_if0), 5088 TaskArgs); 5089 RCG.setAction(Action); 5090 RCG(CGF); 5091 }; 5092 5093 if (IfCond) { 5094 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5095 } else { 5096 RegionCodeGenTy ThenRCG(ThenCodeGen); 5097 ThenRCG(CGF); 5098 } 5099 } 5100 5101 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5102 const OMPLoopDirective &D, 5103 llvm::Function *TaskFunction, 5104 QualType SharedsTy, Address Shareds, 5105 const Expr *IfCond, 5106 const OMPTaskDataTy &Data) { 5107 if (!CGF.HaveInsertPoint()) 5108 return; 5109 TaskResultTy Result = 5110 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5111 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5112 // libcall. 5113 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5114 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5115 // sched, kmp_uint64 grainsize, void *task_dup); 5116 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5117 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5118 llvm::Value *IfVal; 5119 if (IfCond) { 5120 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5121 /*isSigned=*/true); 5122 } else { 5123 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5124 } 5125 5126 LValue LBLVal = CGF.EmitLValueForField( 5127 Result.TDBase, 5128 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5129 const auto *LBVar = 5130 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5131 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5132 LBLVal.getQuals(), 5133 /*IsInitializer=*/true); 5134 LValue UBLVal = CGF.EmitLValueForField( 5135 Result.TDBase, 5136 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5137 const auto *UBVar = 5138 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5139 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5140 UBLVal.getQuals(), 5141 /*IsInitializer=*/true); 5142 LValue StLVal = CGF.EmitLValueForField( 5143 Result.TDBase, 5144 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5145 const auto *StVar = 5146 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5147 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5148 StLVal.getQuals(), 5149 /*IsInitializer=*/true); 5150 // Store reductions address. 5151 LValue RedLVal = CGF.EmitLValueForField( 5152 Result.TDBase, 5153 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5154 if (Data.Reductions) { 5155 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5156 } else { 5157 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5158 CGF.getContext().VoidPtrTy); 5159 } 5160 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5161 llvm::Value *TaskArgs[] = { 5162 UpLoc, 5163 ThreadID, 5164 Result.NewTask, 5165 IfVal, 5166 LBLVal.getPointer(CGF), 5167 UBLVal.getPointer(CGF), 5168 CGF.EmitLoadOfScalar(StLVal, Loc), 5169 llvm::ConstantInt::getSigned( 5170 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5171 llvm::ConstantInt::getSigned( 5172 CGF.IntTy, Data.Schedule.getPointer() 5173 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5174 : NoSchedule), 5175 Data.Schedule.getPointer() 5176 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5177 /*isSigned=*/false) 5178 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5179 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5180 Result.TaskDupFn, CGF.VoidPtrTy) 5181 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5182 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5183 CGM.getModule(), OMPRTL___kmpc_taskloop), 5184 TaskArgs); 5185 } 5186 5187 /// Emit reduction operation for each element of array (required for 5188 /// array sections) LHS op = RHS. 5189 /// \param Type Type of array. 5190 /// \param LHSVar Variable on the left side of the reduction operation 5191 /// (references element of array in original variable). 5192 /// \param RHSVar Variable on the right side of the reduction operation 5193 /// (references element of array in original variable). 5194 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5195 /// RHSVar. 5196 static void EmitOMPAggregateReduction( 5197 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5198 const VarDecl *RHSVar, 5199 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5200 const Expr *, const Expr *)> &RedOpGen, 5201 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5202 const Expr *UpExpr = nullptr) { 5203 // Perform element-by-element initialization. 5204 QualType ElementTy; 5205 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5206 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5207 5208 // Drill down to the base element type on both arrays. 5209 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5210 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5211 5212 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5213 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5214 // Cast from pointer to array type to pointer to single element. 5215 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5216 // The basic structure here is a while-do loop. 5217 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5218 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5219 llvm::Value *IsEmpty = 5220 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5221 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5222 5223 // Enter the loop body, making that address the current address. 5224 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5225 CGF.EmitBlock(BodyBB); 5226 5227 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5228 5229 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5230 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5231 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5232 Address RHSElementCurrent = 5233 Address(RHSElementPHI, 5234 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5235 5236 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5237 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5238 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5239 Address LHSElementCurrent = 5240 Address(LHSElementPHI, 5241 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5242 5243 // Emit copy. 5244 CodeGenFunction::OMPPrivateScope Scope(CGF); 5245 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5246 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5247 Scope.Privatize(); 5248 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5249 Scope.ForceCleanup(); 5250 5251 // Shift the address forward by one element. 5252 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5253 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5254 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5255 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5256 // Check whether we've reached the end. 5257 llvm::Value *Done = 5258 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5259 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5260 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5261 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5262 5263 // Done. 5264 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5265 } 5266 5267 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5268 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5269 /// UDR combiner function. 5270 static void emitReductionCombiner(CodeGenFunction &CGF, 5271 const Expr *ReductionOp) { 5272 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5273 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5274 if (const auto *DRE = 5275 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5276 if (const auto *DRD = 5277 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5278 std::pair<llvm::Function *, llvm::Function *> Reduction = 5279 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5280 RValue Func = RValue::get(Reduction.first); 5281 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5282 CGF.EmitIgnoredExpr(ReductionOp); 5283 return; 5284 } 5285 CGF.EmitIgnoredExpr(ReductionOp); 5286 } 5287 5288 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5289 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5290 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5291 ArrayRef<const Expr *> ReductionOps) { 5292 ASTContext &C = CGM.getContext(); 5293 5294 // void reduction_func(void *LHSArg, void *RHSArg); 5295 FunctionArgList Args; 5296 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5297 ImplicitParamDecl::Other); 5298 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5299 ImplicitParamDecl::Other); 5300 Args.push_back(&LHSArg); 5301 Args.push_back(&RHSArg); 5302 const auto &CGFI = 5303 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5304 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5305 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5306 llvm::GlobalValue::InternalLinkage, Name, 5307 &CGM.getModule()); 5308 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5309 Fn->setDoesNotRecurse(); 5310 CodeGenFunction CGF(CGM); 5311 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5312 5313 // Dst = (void*[n])(LHSArg); 5314 // Src = (void*[n])(RHSArg); 5315 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5316 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5317 ArgsType), CGF.getPointerAlign()); 5318 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5319 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5320 ArgsType), CGF.getPointerAlign()); 5321 5322 // ... 5323 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5324 // ... 5325 CodeGenFunction::OMPPrivateScope Scope(CGF); 5326 auto IPriv = Privates.begin(); 5327 unsigned Idx = 0; 5328 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5329 const auto *RHSVar = 5330 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5331 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5332 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5333 }); 5334 const auto *LHSVar = 5335 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5336 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5337 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5338 }); 5339 QualType PrivTy = (*IPriv)->getType(); 5340 if (PrivTy->isVariablyModifiedType()) { 5341 // Get array size and emit VLA type. 5342 ++Idx; 5343 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5344 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5345 const VariableArrayType *VLA = 5346 CGF.getContext().getAsVariableArrayType(PrivTy); 5347 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5348 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5349 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5350 CGF.EmitVariablyModifiedType(PrivTy); 5351 } 5352 } 5353 Scope.Privatize(); 5354 IPriv = Privates.begin(); 5355 auto ILHS = LHSExprs.begin(); 5356 auto IRHS = RHSExprs.begin(); 5357 for (const Expr *E : ReductionOps) { 5358 if ((*IPriv)->getType()->isArrayType()) { 5359 // Emit reduction for array section. 5360 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5361 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5362 EmitOMPAggregateReduction( 5363 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5364 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5365 emitReductionCombiner(CGF, E); 5366 }); 5367 } else { 5368 // Emit reduction for array subscript or single variable. 5369 emitReductionCombiner(CGF, E); 5370 } 5371 ++IPriv; 5372 ++ILHS; 5373 ++IRHS; 5374 } 5375 Scope.ForceCleanup(); 5376 CGF.FinishFunction(); 5377 return Fn; 5378 } 5379 5380 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5381 const Expr *ReductionOp, 5382 const Expr *PrivateRef, 5383 const DeclRefExpr *LHS, 5384 const DeclRefExpr *RHS) { 5385 if (PrivateRef->getType()->isArrayType()) { 5386 // Emit reduction for array section. 5387 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5388 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5389 EmitOMPAggregateReduction( 5390 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5391 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5392 emitReductionCombiner(CGF, ReductionOp); 5393 }); 5394 } else { 5395 // Emit reduction for array subscript or single variable. 5396 emitReductionCombiner(CGF, ReductionOp); 5397 } 5398 } 5399 5400 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5401 ArrayRef<const Expr *> Privates, 5402 ArrayRef<const Expr *> LHSExprs, 5403 ArrayRef<const Expr *> RHSExprs, 5404 ArrayRef<const Expr *> ReductionOps, 5405 ReductionOptionsTy Options) { 5406 if (!CGF.HaveInsertPoint()) 5407 return; 5408 5409 bool WithNowait = Options.WithNowait; 5410 bool SimpleReduction = Options.SimpleReduction; 5411 5412 // Next code should be emitted for reduction: 5413 // 5414 // static kmp_critical_name lock = { 0 }; 5415 // 5416 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5417 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5418 // ... 5419 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5420 // *(Type<n>-1*)rhs[<n>-1]); 5421 // } 5422 // 5423 // ... 5424 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5425 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5426 // RedList, reduce_func, &<lock>)) { 5427 // case 1: 5428 // ... 5429 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5430 // ... 5431 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5432 // break; 5433 // case 2: 5434 // ... 5435 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5436 // ... 5437 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5438 // break; 5439 // default:; 5440 // } 5441 // 5442 // if SimpleReduction is true, only the next code is generated: 5443 // ... 5444 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5445 // ... 5446 5447 ASTContext &C = CGM.getContext(); 5448 5449 if (SimpleReduction) { 5450 CodeGenFunction::RunCleanupsScope Scope(CGF); 5451 auto IPriv = Privates.begin(); 5452 auto ILHS = LHSExprs.begin(); 5453 auto IRHS = RHSExprs.begin(); 5454 for (const Expr *E : ReductionOps) { 5455 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5456 cast<DeclRefExpr>(*IRHS)); 5457 ++IPriv; 5458 ++ILHS; 5459 ++IRHS; 5460 } 5461 return; 5462 } 5463 5464 // 1. Build a list of reduction variables. 5465 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5466 auto Size = RHSExprs.size(); 5467 for (const Expr *E : Privates) { 5468 if (E->getType()->isVariablyModifiedType()) 5469 // Reserve place for array size. 5470 ++Size; 5471 } 5472 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5473 QualType ReductionArrayTy = 5474 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5475 /*IndexTypeQuals=*/0); 5476 Address ReductionList = 5477 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5478 auto IPriv = Privates.begin(); 5479 unsigned Idx = 0; 5480 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5481 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5482 CGF.Builder.CreateStore( 5483 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5484 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5485 Elem); 5486 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5487 // Store array size. 5488 ++Idx; 5489 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5490 llvm::Value *Size = CGF.Builder.CreateIntCast( 5491 CGF.getVLASize( 5492 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5493 .NumElts, 5494 CGF.SizeTy, /*isSigned=*/false); 5495 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5496 Elem); 5497 } 5498 } 5499 5500 // 2. Emit reduce_func(). 5501 llvm::Function *ReductionFn = emitReductionFunction( 5502 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5503 LHSExprs, RHSExprs, ReductionOps); 5504 5505 // 3. Create static kmp_critical_name lock = { 0 }; 5506 std::string Name = getName({"reduction"}); 5507 llvm::Value *Lock = getCriticalRegionLock(Name); 5508 5509 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5510 // RedList, reduce_func, &<lock>); 5511 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5512 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5513 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5514 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5515 ReductionList.getPointer(), CGF.VoidPtrTy); 5516 llvm::Value *Args[] = { 5517 IdentTLoc, // ident_t *<loc> 5518 ThreadId, // i32 <gtid> 5519 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5520 ReductionArrayTySize, // size_type sizeof(RedList) 5521 RL, // void *RedList 5522 ReductionFn, // void (*) (void *, void *) <reduce_func> 5523 Lock // kmp_critical_name *&<lock> 5524 }; 5525 llvm::Value *Res = CGF.EmitRuntimeCall( 5526 OMPBuilder.getOrCreateRuntimeFunction( 5527 CGM.getModule(), 5528 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5529 Args); 5530 5531 // 5. Build switch(res) 5532 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5533 llvm::SwitchInst *SwInst = 5534 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5535 5536 // 6. Build case 1: 5537 // ... 5538 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5539 // ... 5540 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5541 // break; 5542 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5543 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5544 CGF.EmitBlock(Case1BB); 5545 5546 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5547 llvm::Value *EndArgs[] = { 5548 IdentTLoc, // ident_t *<loc> 5549 ThreadId, // i32 <gtid> 5550 Lock // kmp_critical_name *&<lock> 5551 }; 5552 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5553 CodeGenFunction &CGF, PrePostActionTy &Action) { 5554 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5555 auto IPriv = Privates.begin(); 5556 auto ILHS = LHSExprs.begin(); 5557 auto IRHS = RHSExprs.begin(); 5558 for (const Expr *E : ReductionOps) { 5559 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5560 cast<DeclRefExpr>(*IRHS)); 5561 ++IPriv; 5562 ++ILHS; 5563 ++IRHS; 5564 } 5565 }; 5566 RegionCodeGenTy RCG(CodeGen); 5567 CommonActionTy Action( 5568 nullptr, llvm::None, 5569 OMPBuilder.getOrCreateRuntimeFunction( 5570 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5571 : OMPRTL___kmpc_end_reduce), 5572 EndArgs); 5573 RCG.setAction(Action); 5574 RCG(CGF); 5575 5576 CGF.EmitBranch(DefaultBB); 5577 5578 // 7. Build case 2: 5579 // ... 5580 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5581 // ... 5582 // break; 5583 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5584 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5585 CGF.EmitBlock(Case2BB); 5586 5587 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5588 CodeGenFunction &CGF, PrePostActionTy &Action) { 5589 auto ILHS = LHSExprs.begin(); 5590 auto IRHS = RHSExprs.begin(); 5591 auto IPriv = Privates.begin(); 5592 for (const Expr *E : ReductionOps) { 5593 const Expr *XExpr = nullptr; 5594 const Expr *EExpr = nullptr; 5595 const Expr *UpExpr = nullptr; 5596 BinaryOperatorKind BO = BO_Comma; 5597 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5598 if (BO->getOpcode() == BO_Assign) { 5599 XExpr = BO->getLHS(); 5600 UpExpr = BO->getRHS(); 5601 } 5602 } 5603 // Try to emit update expression as a simple atomic. 5604 const Expr *RHSExpr = UpExpr; 5605 if (RHSExpr) { 5606 // Analyze RHS part of the whole expression. 5607 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5608 RHSExpr->IgnoreParenImpCasts())) { 5609 // If this is a conditional operator, analyze its condition for 5610 // min/max reduction operator. 5611 RHSExpr = ACO->getCond(); 5612 } 5613 if (const auto *BORHS = 5614 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5615 EExpr = BORHS->getRHS(); 5616 BO = BORHS->getOpcode(); 5617 } 5618 } 5619 if (XExpr) { 5620 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5621 auto &&AtomicRedGen = [BO, VD, 5622 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5623 const Expr *EExpr, const Expr *UpExpr) { 5624 LValue X = CGF.EmitLValue(XExpr); 5625 RValue E; 5626 if (EExpr) 5627 E = CGF.EmitAnyExpr(EExpr); 5628 CGF.EmitOMPAtomicSimpleUpdateExpr( 5629 X, E, BO, /*IsXLHSInRHSPart=*/true, 5630 llvm::AtomicOrdering::Monotonic, Loc, 5631 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5632 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5633 PrivateScope.addPrivate( 5634 VD, [&CGF, VD, XRValue, Loc]() { 5635 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5636 CGF.emitOMPSimpleStore( 5637 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5638 VD->getType().getNonReferenceType(), Loc); 5639 return LHSTemp; 5640 }); 5641 (void)PrivateScope.Privatize(); 5642 return CGF.EmitAnyExpr(UpExpr); 5643 }); 5644 }; 5645 if ((*IPriv)->getType()->isArrayType()) { 5646 // Emit atomic reduction for array section. 5647 const auto *RHSVar = 5648 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5649 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5650 AtomicRedGen, XExpr, EExpr, UpExpr); 5651 } else { 5652 // Emit atomic reduction for array subscript or single variable. 5653 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5654 } 5655 } else { 5656 // Emit as a critical region. 5657 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5658 const Expr *, const Expr *) { 5659 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5660 std::string Name = RT.getName({"atomic_reduction"}); 5661 RT.emitCriticalRegion( 5662 CGF, Name, 5663 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5664 Action.Enter(CGF); 5665 emitReductionCombiner(CGF, E); 5666 }, 5667 Loc); 5668 }; 5669 if ((*IPriv)->getType()->isArrayType()) { 5670 const auto *LHSVar = 5671 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5672 const auto *RHSVar = 5673 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5674 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5675 CritRedGen); 5676 } else { 5677 CritRedGen(CGF, nullptr, nullptr, nullptr); 5678 } 5679 } 5680 ++ILHS; 5681 ++IRHS; 5682 ++IPriv; 5683 } 5684 }; 5685 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5686 if (!WithNowait) { 5687 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5688 llvm::Value *EndArgs[] = { 5689 IdentTLoc, // ident_t *<loc> 5690 ThreadId, // i32 <gtid> 5691 Lock // kmp_critical_name *&<lock> 5692 }; 5693 CommonActionTy Action(nullptr, llvm::None, 5694 OMPBuilder.getOrCreateRuntimeFunction( 5695 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5696 EndArgs); 5697 AtomicRCG.setAction(Action); 5698 AtomicRCG(CGF); 5699 } else { 5700 AtomicRCG(CGF); 5701 } 5702 5703 CGF.EmitBranch(DefaultBB); 5704 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5705 } 5706 5707 /// Generates unique name for artificial threadprivate variables. 5708 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5709 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5710 const Expr *Ref) { 5711 SmallString<256> Buffer; 5712 llvm::raw_svector_ostream Out(Buffer); 5713 const clang::DeclRefExpr *DE; 5714 const VarDecl *D = ::getBaseDecl(Ref, DE); 5715 if (!D) 5716 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5717 D = D->getCanonicalDecl(); 5718 std::string Name = CGM.getOpenMPRuntime().getName( 5719 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5720 Out << Prefix << Name << "_" 5721 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5722 return std::string(Out.str()); 5723 } 5724 5725 /// Emits reduction initializer function: 5726 /// \code 5727 /// void @.red_init(void* %arg, void* %orig) { 5728 /// %0 = bitcast void* %arg to <type>* 5729 /// store <type> <init>, <type>* %0 5730 /// ret void 5731 /// } 5732 /// \endcode 5733 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5734 SourceLocation Loc, 5735 ReductionCodeGen &RCG, unsigned N) { 5736 ASTContext &C = CGM.getContext(); 5737 QualType VoidPtrTy = C.VoidPtrTy; 5738 VoidPtrTy.addRestrict(); 5739 FunctionArgList Args; 5740 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5741 ImplicitParamDecl::Other); 5742 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5743 ImplicitParamDecl::Other); 5744 Args.emplace_back(&Param); 5745 Args.emplace_back(&ParamOrig); 5746 const auto &FnInfo = 5747 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5748 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5749 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5750 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5751 Name, &CGM.getModule()); 5752 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5753 Fn->setDoesNotRecurse(); 5754 CodeGenFunction CGF(CGM); 5755 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5756 Address PrivateAddr = CGF.EmitLoadOfPointer( 5757 CGF.GetAddrOfLocalVar(&Param), 5758 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5759 llvm::Value *Size = nullptr; 5760 // If the size of the reduction item is non-constant, load it from global 5761 // threadprivate variable. 5762 if (RCG.getSizes(N).second) { 5763 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5764 CGF, CGM.getContext().getSizeType(), 5765 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5766 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5767 CGM.getContext().getSizeType(), Loc); 5768 } 5769 RCG.emitAggregateType(CGF, N, Size); 5770 LValue OrigLVal; 5771 // If initializer uses initializer from declare reduction construct, emit a 5772 // pointer to the address of the original reduction item (reuired by reduction 5773 // initializer) 5774 if (RCG.usesReductionInitializer(N)) { 5775 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5776 SharedAddr = CGF.EmitLoadOfPointer( 5777 SharedAddr, 5778 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5779 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5780 } else { 5781 OrigLVal = CGF.MakeNaturalAlignAddrLValue( 5782 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5783 CGM.getContext().VoidPtrTy); 5784 } 5785 // Emit the initializer: 5786 // %0 = bitcast void* %arg to <type>* 5787 // store <type> <init>, <type>* %0 5788 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, 5789 [](CodeGenFunction &) { return false; }); 5790 CGF.FinishFunction(); 5791 return Fn; 5792 } 5793 5794 /// Emits reduction combiner function: 5795 /// \code 5796 /// void @.red_comb(void* %arg0, void* %arg1) { 5797 /// %lhs = bitcast void* %arg0 to <type>* 5798 /// %rhs = bitcast void* %arg1 to <type>* 5799 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5800 /// store <type> %2, <type>* %lhs 5801 /// ret void 5802 /// } 5803 /// \endcode 5804 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5805 SourceLocation Loc, 5806 ReductionCodeGen &RCG, unsigned N, 5807 const Expr *ReductionOp, 5808 const Expr *LHS, const Expr *RHS, 5809 const Expr *PrivateRef) { 5810 ASTContext &C = CGM.getContext(); 5811 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5812 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5813 FunctionArgList Args; 5814 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5815 C.VoidPtrTy, ImplicitParamDecl::Other); 5816 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5817 ImplicitParamDecl::Other); 5818 Args.emplace_back(&ParamInOut); 5819 Args.emplace_back(&ParamIn); 5820 const auto &FnInfo = 5821 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5822 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5823 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5824 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5825 Name, &CGM.getModule()); 5826 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5827 Fn->setDoesNotRecurse(); 5828 CodeGenFunction CGF(CGM); 5829 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5830 llvm::Value *Size = nullptr; 5831 // If the size of the reduction item is non-constant, load it from global 5832 // threadprivate variable. 5833 if (RCG.getSizes(N).second) { 5834 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5835 CGF, CGM.getContext().getSizeType(), 5836 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5837 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5838 CGM.getContext().getSizeType(), Loc); 5839 } 5840 RCG.emitAggregateType(CGF, N, Size); 5841 // Remap lhs and rhs variables to the addresses of the function arguments. 5842 // %lhs = bitcast void* %arg0 to <type>* 5843 // %rhs = bitcast void* %arg1 to <type>* 5844 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5845 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 5846 // Pull out the pointer to the variable. 5847 Address PtrAddr = CGF.EmitLoadOfPointer( 5848 CGF.GetAddrOfLocalVar(&ParamInOut), 5849 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5850 return CGF.Builder.CreateElementBitCast( 5851 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5852 }); 5853 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 5854 // Pull out the pointer to the variable. 5855 Address PtrAddr = CGF.EmitLoadOfPointer( 5856 CGF.GetAddrOfLocalVar(&ParamIn), 5857 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5858 return CGF.Builder.CreateElementBitCast( 5859 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 5860 }); 5861 PrivateScope.Privatize(); 5862 // Emit the combiner body: 5863 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5864 // store <type> %2, <type>* %lhs 5865 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5866 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5867 cast<DeclRefExpr>(RHS)); 5868 CGF.FinishFunction(); 5869 return Fn; 5870 } 5871 5872 /// Emits reduction finalizer function: 5873 /// \code 5874 /// void @.red_fini(void* %arg) { 5875 /// %0 = bitcast void* %arg to <type>* 5876 /// <destroy>(<type>* %0) 5877 /// ret void 5878 /// } 5879 /// \endcode 5880 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5881 SourceLocation Loc, 5882 ReductionCodeGen &RCG, unsigned N) { 5883 if (!RCG.needCleanups(N)) 5884 return nullptr; 5885 ASTContext &C = CGM.getContext(); 5886 FunctionArgList Args; 5887 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5888 ImplicitParamDecl::Other); 5889 Args.emplace_back(&Param); 5890 const auto &FnInfo = 5891 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5892 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5893 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 5894 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5895 Name, &CGM.getModule()); 5896 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5897 Fn->setDoesNotRecurse(); 5898 CodeGenFunction CGF(CGM); 5899 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5900 Address PrivateAddr = CGF.EmitLoadOfPointer( 5901 CGF.GetAddrOfLocalVar(&Param), 5902 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5903 llvm::Value *Size = nullptr; 5904 // If the size of the reduction item is non-constant, load it from global 5905 // threadprivate variable. 5906 if (RCG.getSizes(N).second) { 5907 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5908 CGF, CGM.getContext().getSizeType(), 5909 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5910 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5911 CGM.getContext().getSizeType(), Loc); 5912 } 5913 RCG.emitAggregateType(CGF, N, Size); 5914 // Emit the finalizer body: 5915 // <destroy>(<type>* %0) 5916 RCG.emitCleanups(CGF, N, PrivateAddr); 5917 CGF.FinishFunction(Loc); 5918 return Fn; 5919 } 5920 5921 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 5922 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 5923 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 5924 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 5925 return nullptr; 5926 5927 // Build typedef struct: 5928 // kmp_taskred_input { 5929 // void *reduce_shar; // shared reduction item 5930 // void *reduce_orig; // original reduction item used for initialization 5931 // size_t reduce_size; // size of data item 5932 // void *reduce_init; // data initialization routine 5933 // void *reduce_fini; // data finalization routine 5934 // void *reduce_comb; // data combiner routine 5935 // kmp_task_red_flags_t flags; // flags for additional info from compiler 5936 // } kmp_taskred_input_t; 5937 ASTContext &C = CGM.getContext(); 5938 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 5939 RD->startDefinition(); 5940 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5941 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5942 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 5943 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5944 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5945 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5946 const FieldDecl *FlagsFD = addFieldToRecordDecl( 5947 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 5948 RD->completeDefinition(); 5949 QualType RDType = C.getRecordType(RD); 5950 unsigned Size = Data.ReductionVars.size(); 5951 llvm::APInt ArraySize(/*numBits=*/64, Size); 5952 QualType ArrayRDType = C.getConstantArrayType( 5953 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5954 // kmp_task_red_input_t .rd_input.[Size]; 5955 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 5956 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 5957 Data.ReductionCopies, Data.ReductionOps); 5958 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 5959 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 5960 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 5961 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 5962 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 5963 TaskRedInput.getPointer(), Idxs, 5964 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 5965 ".rd_input.gep."); 5966 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 5967 // ElemLVal.reduce_shar = &Shareds[Cnt]; 5968 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 5969 RCG.emitSharedOrigLValue(CGF, Cnt); 5970 llvm::Value *CastedShared = 5971 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 5972 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 5973 // ElemLVal.reduce_orig = &Origs[Cnt]; 5974 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 5975 llvm::Value *CastedOrig = 5976 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 5977 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 5978 RCG.emitAggregateType(CGF, Cnt); 5979 llvm::Value *SizeValInChars; 5980 llvm::Value *SizeVal; 5981 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 5982 // We use delayed creation/initialization for VLAs and array sections. It is 5983 // required because runtime does not provide the way to pass the sizes of 5984 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 5985 // threadprivate global variables are used to store these values and use 5986 // them in the functions. 5987 bool DelayedCreation = !!SizeVal; 5988 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 5989 /*isSigned=*/false); 5990 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 5991 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 5992 // ElemLVal.reduce_init = init; 5993 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 5994 llvm::Value *InitAddr = 5995 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 5996 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 5997 // ElemLVal.reduce_fini = fini; 5998 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 5999 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6000 llvm::Value *FiniAddr = Fini 6001 ? CGF.EmitCastToVoidPtr(Fini) 6002 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6003 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6004 // ElemLVal.reduce_comb = comb; 6005 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6006 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6007 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6008 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6009 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6010 // ElemLVal.flags = 0; 6011 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6012 if (DelayedCreation) { 6013 CGF.EmitStoreOfScalar( 6014 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6015 FlagsLVal); 6016 } else 6017 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6018 FlagsLVal.getType()); 6019 } 6020 if (Data.IsReductionWithTaskMod) { 6021 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6022 // is_ws, int num, void *data); 6023 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6024 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6025 CGM.IntTy, /*isSigned=*/true); 6026 llvm::Value *Args[] = { 6027 IdentTLoc, GTid, 6028 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6029 /*isSigned=*/true), 6030 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6031 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6032 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6033 return CGF.EmitRuntimeCall( 6034 OMPBuilder.getOrCreateRuntimeFunction( 6035 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6036 Args); 6037 } 6038 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6039 llvm::Value *Args[] = { 6040 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6041 /*isSigned=*/true), 6042 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6043 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6044 CGM.VoidPtrTy)}; 6045 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6046 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6047 Args); 6048 } 6049 6050 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6051 SourceLocation Loc, 6052 bool IsWorksharingReduction) { 6053 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6054 // is_ws, int num, void *data); 6055 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6056 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6057 CGM.IntTy, /*isSigned=*/true); 6058 llvm::Value *Args[] = {IdentTLoc, GTid, 6059 llvm::ConstantInt::get(CGM.IntTy, 6060 IsWorksharingReduction ? 1 : 0, 6061 /*isSigned=*/true)}; 6062 (void)CGF.EmitRuntimeCall( 6063 OMPBuilder.getOrCreateRuntimeFunction( 6064 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6065 Args); 6066 } 6067 6068 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6069 SourceLocation Loc, 6070 ReductionCodeGen &RCG, 6071 unsigned N) { 6072 auto Sizes = RCG.getSizes(N); 6073 // Emit threadprivate global variable if the type is non-constant 6074 // (Sizes.second = nullptr). 6075 if (Sizes.second) { 6076 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6077 /*isSigned=*/false); 6078 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6079 CGF, CGM.getContext().getSizeType(), 6080 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6081 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6082 } 6083 } 6084 6085 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6086 SourceLocation Loc, 6087 llvm::Value *ReductionsPtr, 6088 LValue SharedLVal) { 6089 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6090 // *d); 6091 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6092 CGM.IntTy, 6093 /*isSigned=*/true), 6094 ReductionsPtr, 6095 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6096 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6097 return Address( 6098 CGF.EmitRuntimeCall( 6099 OMPBuilder.getOrCreateRuntimeFunction( 6100 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6101 Args), 6102 SharedLVal.getAlignment()); 6103 } 6104 6105 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6106 SourceLocation Loc) { 6107 if (!CGF.HaveInsertPoint()) 6108 return; 6109 6110 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 6111 OMPBuilder.CreateTaskwait(CGF.Builder); 6112 } else { 6113 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6114 // global_tid); 6115 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6116 // Ignore return result until untied tasks are supported. 6117 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6118 CGM.getModule(), OMPRTL___kmpc_omp_taskwait), 6119 Args); 6120 } 6121 6122 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6123 Region->emitUntiedSwitch(CGF); 6124 } 6125 6126 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6127 OpenMPDirectiveKind InnerKind, 6128 const RegionCodeGenTy &CodeGen, 6129 bool HasCancel) { 6130 if (!CGF.HaveInsertPoint()) 6131 return; 6132 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6133 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6134 } 6135 6136 namespace { 6137 enum RTCancelKind { 6138 CancelNoreq = 0, 6139 CancelParallel = 1, 6140 CancelLoop = 2, 6141 CancelSections = 3, 6142 CancelTaskgroup = 4 6143 }; 6144 } // anonymous namespace 6145 6146 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6147 RTCancelKind CancelKind = CancelNoreq; 6148 if (CancelRegion == OMPD_parallel) 6149 CancelKind = CancelParallel; 6150 else if (CancelRegion == OMPD_for) 6151 CancelKind = CancelLoop; 6152 else if (CancelRegion == OMPD_sections) 6153 CancelKind = CancelSections; 6154 else { 6155 assert(CancelRegion == OMPD_taskgroup); 6156 CancelKind = CancelTaskgroup; 6157 } 6158 return CancelKind; 6159 } 6160 6161 void CGOpenMPRuntime::emitCancellationPointCall( 6162 CodeGenFunction &CGF, SourceLocation Loc, 6163 OpenMPDirectiveKind CancelRegion) { 6164 if (!CGF.HaveInsertPoint()) 6165 return; 6166 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6167 // global_tid, kmp_int32 cncl_kind); 6168 if (auto *OMPRegionInfo = 6169 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6170 // For 'cancellation point taskgroup', the task region info may not have a 6171 // cancel. This may instead happen in another adjacent task. 6172 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6173 llvm::Value *Args[] = { 6174 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6175 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6176 // Ignore return result until untied tasks are supported. 6177 llvm::Value *Result = CGF.EmitRuntimeCall( 6178 OMPBuilder.getOrCreateRuntimeFunction( 6179 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6180 Args); 6181 // if (__kmpc_cancellationpoint()) { 6182 // exit from construct; 6183 // } 6184 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6185 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6186 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6187 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6188 CGF.EmitBlock(ExitBB); 6189 // exit from construct; 6190 CodeGenFunction::JumpDest CancelDest = 6191 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6192 CGF.EmitBranchThroughCleanup(CancelDest); 6193 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6194 } 6195 } 6196 } 6197 6198 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6199 const Expr *IfCond, 6200 OpenMPDirectiveKind CancelRegion) { 6201 if (!CGF.HaveInsertPoint()) 6202 return; 6203 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6204 // kmp_int32 cncl_kind); 6205 auto &M = CGM.getModule(); 6206 if (auto *OMPRegionInfo = 6207 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6208 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6209 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6210 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6211 llvm::Value *Args[] = { 6212 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6213 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6214 // Ignore return result until untied tasks are supported. 6215 llvm::Value *Result = CGF.EmitRuntimeCall( 6216 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6217 // if (__kmpc_cancel()) { 6218 // exit from construct; 6219 // } 6220 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6221 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6222 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6223 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6224 CGF.EmitBlock(ExitBB); 6225 // exit from construct; 6226 CodeGenFunction::JumpDest CancelDest = 6227 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6228 CGF.EmitBranchThroughCleanup(CancelDest); 6229 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6230 }; 6231 if (IfCond) { 6232 emitIfClause(CGF, IfCond, ThenGen, 6233 [](CodeGenFunction &, PrePostActionTy &) {}); 6234 } else { 6235 RegionCodeGenTy ThenRCG(ThenGen); 6236 ThenRCG(CGF); 6237 } 6238 } 6239 } 6240 6241 namespace { 6242 /// Cleanup action for uses_allocators support. 6243 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6244 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6245 6246 public: 6247 OMPUsesAllocatorsActionTy( 6248 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6249 : Allocators(Allocators) {} 6250 void Enter(CodeGenFunction &CGF) override { 6251 if (!CGF.HaveInsertPoint()) 6252 return; 6253 for (const auto &AllocatorData : Allocators) { 6254 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6255 CGF, AllocatorData.first, AllocatorData.second); 6256 } 6257 } 6258 void Exit(CodeGenFunction &CGF) override { 6259 if (!CGF.HaveInsertPoint()) 6260 return; 6261 for (const auto &AllocatorData : Allocators) { 6262 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6263 AllocatorData.first); 6264 } 6265 } 6266 }; 6267 } // namespace 6268 6269 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6270 const OMPExecutableDirective &D, StringRef ParentName, 6271 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6272 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6273 assert(!ParentName.empty() && "Invalid target region parent name!"); 6274 HasEmittedTargetRegion = true; 6275 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6276 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6277 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6278 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6279 if (!D.AllocatorTraits) 6280 continue; 6281 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6282 } 6283 } 6284 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6285 CodeGen.setAction(UsesAllocatorAction); 6286 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6287 IsOffloadEntry, CodeGen); 6288 } 6289 6290 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6291 const Expr *Allocator, 6292 const Expr *AllocatorTraits) { 6293 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6294 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6295 // Use default memspace handle. 6296 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6297 llvm::Value *NumTraits = llvm::ConstantInt::get( 6298 CGF.IntTy, cast<ConstantArrayType>( 6299 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6300 ->getSize() 6301 .getLimitedValue()); 6302 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6303 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6304 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6305 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6306 AllocatorTraitsLVal.getBaseInfo(), 6307 AllocatorTraitsLVal.getTBAAInfo()); 6308 llvm::Value *Traits = 6309 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6310 6311 llvm::Value *AllocatorVal = 6312 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6313 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6314 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6315 // Store to allocator. 6316 CGF.EmitVarDecl(*cast<VarDecl>( 6317 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6318 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6319 AllocatorVal = 6320 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6321 Allocator->getType(), Allocator->getExprLoc()); 6322 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6323 } 6324 6325 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6326 const Expr *Allocator) { 6327 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6328 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6329 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6330 llvm::Value *AllocatorVal = 6331 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6332 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6333 CGF.getContext().VoidPtrTy, 6334 Allocator->getExprLoc()); 6335 (void)CGF.EmitRuntimeCall( 6336 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6337 OMPRTL___kmpc_destroy_allocator), 6338 {ThreadId, AllocatorVal}); 6339 } 6340 6341 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6342 const OMPExecutableDirective &D, StringRef ParentName, 6343 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6344 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6345 // Create a unique name for the entry function using the source location 6346 // information of the current target region. The name will be something like: 6347 // 6348 // __omp_offloading_DD_FFFF_PP_lBB 6349 // 6350 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6351 // mangled name of the function that encloses the target region and BB is the 6352 // line number of the target region. 6353 6354 unsigned DeviceID; 6355 unsigned FileID; 6356 unsigned Line; 6357 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6358 Line); 6359 SmallString<64> EntryFnName; 6360 { 6361 llvm::raw_svector_ostream OS(EntryFnName); 6362 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6363 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6364 } 6365 6366 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6367 6368 CodeGenFunction CGF(CGM, true); 6369 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6370 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6371 6372 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6373 6374 // If this target outline function is not an offload entry, we don't need to 6375 // register it. 6376 if (!IsOffloadEntry) 6377 return; 6378 6379 // The target region ID is used by the runtime library to identify the current 6380 // target region, so it only has to be unique and not necessarily point to 6381 // anything. It could be the pointer to the outlined function that implements 6382 // the target region, but we aren't using that so that the compiler doesn't 6383 // need to keep that, and could therefore inline the host function if proven 6384 // worthwhile during optimization. In the other hand, if emitting code for the 6385 // device, the ID has to be the function address so that it can retrieved from 6386 // the offloading entry and launched by the runtime library. We also mark the 6387 // outlined function to have external linkage in case we are emitting code for 6388 // the device, because these functions will be entry points to the device. 6389 6390 if (CGM.getLangOpts().OpenMPIsDevice) { 6391 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6392 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6393 OutlinedFn->setDSOLocal(false); 6394 } else { 6395 std::string Name = getName({EntryFnName, "region_id"}); 6396 OutlinedFnID = new llvm::GlobalVariable( 6397 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6398 llvm::GlobalValue::WeakAnyLinkage, 6399 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6400 } 6401 6402 // Register the information for the entry associated with this target region. 6403 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6404 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6405 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6406 } 6407 6408 /// Checks if the expression is constant or does not have non-trivial function 6409 /// calls. 6410 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6411 // We can skip constant expressions. 6412 // We can skip expressions with trivial calls or simple expressions. 6413 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6414 !E->hasNonTrivialCall(Ctx)) && 6415 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6416 } 6417 6418 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6419 const Stmt *Body) { 6420 const Stmt *Child = Body->IgnoreContainers(); 6421 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6422 Child = nullptr; 6423 for (const Stmt *S : C->body()) { 6424 if (const auto *E = dyn_cast<Expr>(S)) { 6425 if (isTrivial(Ctx, E)) 6426 continue; 6427 } 6428 // Some of the statements can be ignored. 6429 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6430 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6431 continue; 6432 // Analyze declarations. 6433 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6434 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6435 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6436 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6437 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6438 isa<UsingDirectiveDecl>(D) || 6439 isa<OMPDeclareReductionDecl>(D) || 6440 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6441 return true; 6442 const auto *VD = dyn_cast<VarDecl>(D); 6443 if (!VD) 6444 return false; 6445 return VD->isConstexpr() || 6446 ((VD->getType().isTrivialType(Ctx) || 6447 VD->getType()->isReferenceType()) && 6448 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6449 })) 6450 continue; 6451 } 6452 // Found multiple children - cannot get the one child only. 6453 if (Child) 6454 return nullptr; 6455 Child = S; 6456 } 6457 if (Child) 6458 Child = Child->IgnoreContainers(); 6459 } 6460 return Child; 6461 } 6462 6463 /// Emit the number of teams for a target directive. Inspect the num_teams 6464 /// clause associated with a teams construct combined or closely nested 6465 /// with the target directive. 6466 /// 6467 /// Emit a team of size one for directives such as 'target parallel' that 6468 /// have no associated teams construct. 6469 /// 6470 /// Otherwise, return nullptr. 6471 static llvm::Value * 6472 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6473 const OMPExecutableDirective &D) { 6474 assert(!CGF.getLangOpts().OpenMPIsDevice && 6475 "Clauses associated with the teams directive expected to be emitted " 6476 "only for the host!"); 6477 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6478 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6479 "Expected target-based executable directive."); 6480 CGBuilderTy &Bld = CGF.Builder; 6481 switch (DirectiveKind) { 6482 case OMPD_target: { 6483 const auto *CS = D.getInnermostCapturedStmt(); 6484 const auto *Body = 6485 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6486 const Stmt *ChildStmt = 6487 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6488 if (const auto *NestedDir = 6489 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6490 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6491 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6492 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6493 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6494 const Expr *NumTeams = 6495 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6496 llvm::Value *NumTeamsVal = 6497 CGF.EmitScalarExpr(NumTeams, 6498 /*IgnoreResultAssign*/ true); 6499 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6500 /*isSigned=*/true); 6501 } 6502 return Bld.getInt32(0); 6503 } 6504 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6505 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6506 return Bld.getInt32(1); 6507 return Bld.getInt32(0); 6508 } 6509 return nullptr; 6510 } 6511 case OMPD_target_teams: 6512 case OMPD_target_teams_distribute: 6513 case OMPD_target_teams_distribute_simd: 6514 case OMPD_target_teams_distribute_parallel_for: 6515 case OMPD_target_teams_distribute_parallel_for_simd: { 6516 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6517 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6518 const Expr *NumTeams = 6519 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6520 llvm::Value *NumTeamsVal = 6521 CGF.EmitScalarExpr(NumTeams, 6522 /*IgnoreResultAssign*/ true); 6523 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6524 /*isSigned=*/true); 6525 } 6526 return Bld.getInt32(0); 6527 } 6528 case OMPD_target_parallel: 6529 case OMPD_target_parallel_for: 6530 case OMPD_target_parallel_for_simd: 6531 case OMPD_target_simd: 6532 return Bld.getInt32(1); 6533 case OMPD_parallel: 6534 case OMPD_for: 6535 case OMPD_parallel_for: 6536 case OMPD_parallel_master: 6537 case OMPD_parallel_sections: 6538 case OMPD_for_simd: 6539 case OMPD_parallel_for_simd: 6540 case OMPD_cancel: 6541 case OMPD_cancellation_point: 6542 case OMPD_ordered: 6543 case OMPD_threadprivate: 6544 case OMPD_allocate: 6545 case OMPD_task: 6546 case OMPD_simd: 6547 case OMPD_sections: 6548 case OMPD_section: 6549 case OMPD_single: 6550 case OMPD_master: 6551 case OMPD_critical: 6552 case OMPD_taskyield: 6553 case OMPD_barrier: 6554 case OMPD_taskwait: 6555 case OMPD_taskgroup: 6556 case OMPD_atomic: 6557 case OMPD_flush: 6558 case OMPD_depobj: 6559 case OMPD_scan: 6560 case OMPD_teams: 6561 case OMPD_target_data: 6562 case OMPD_target_exit_data: 6563 case OMPD_target_enter_data: 6564 case OMPD_distribute: 6565 case OMPD_distribute_simd: 6566 case OMPD_distribute_parallel_for: 6567 case OMPD_distribute_parallel_for_simd: 6568 case OMPD_teams_distribute: 6569 case OMPD_teams_distribute_simd: 6570 case OMPD_teams_distribute_parallel_for: 6571 case OMPD_teams_distribute_parallel_for_simd: 6572 case OMPD_target_update: 6573 case OMPD_declare_simd: 6574 case OMPD_declare_variant: 6575 case OMPD_begin_declare_variant: 6576 case OMPD_end_declare_variant: 6577 case OMPD_declare_target: 6578 case OMPD_end_declare_target: 6579 case OMPD_declare_reduction: 6580 case OMPD_declare_mapper: 6581 case OMPD_taskloop: 6582 case OMPD_taskloop_simd: 6583 case OMPD_master_taskloop: 6584 case OMPD_master_taskloop_simd: 6585 case OMPD_parallel_master_taskloop: 6586 case OMPD_parallel_master_taskloop_simd: 6587 case OMPD_requires: 6588 case OMPD_unknown: 6589 break; 6590 default: 6591 break; 6592 } 6593 llvm_unreachable("Unexpected directive kind."); 6594 } 6595 6596 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6597 llvm::Value *DefaultThreadLimitVal) { 6598 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6599 CGF.getContext(), CS->getCapturedStmt()); 6600 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6601 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6602 llvm::Value *NumThreads = nullptr; 6603 llvm::Value *CondVal = nullptr; 6604 // Handle if clause. If if clause present, the number of threads is 6605 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6606 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6607 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6608 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6609 const OMPIfClause *IfClause = nullptr; 6610 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6611 if (C->getNameModifier() == OMPD_unknown || 6612 C->getNameModifier() == OMPD_parallel) { 6613 IfClause = C; 6614 break; 6615 } 6616 } 6617 if (IfClause) { 6618 const Expr *Cond = IfClause->getCondition(); 6619 bool Result; 6620 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6621 if (!Result) 6622 return CGF.Builder.getInt32(1); 6623 } else { 6624 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6625 if (const auto *PreInit = 6626 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6627 for (const auto *I : PreInit->decls()) { 6628 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6629 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6630 } else { 6631 CodeGenFunction::AutoVarEmission Emission = 6632 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6633 CGF.EmitAutoVarCleanups(Emission); 6634 } 6635 } 6636 } 6637 CondVal = CGF.EvaluateExprAsBool(Cond); 6638 } 6639 } 6640 } 6641 // Check the value of num_threads clause iff if clause was not specified 6642 // or is not evaluated to false. 6643 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6644 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6645 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6646 const auto *NumThreadsClause = 6647 Dir->getSingleClause<OMPNumThreadsClause>(); 6648 CodeGenFunction::LexicalScope Scope( 6649 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6650 if (const auto *PreInit = 6651 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6652 for (const auto *I : PreInit->decls()) { 6653 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6654 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6655 } else { 6656 CodeGenFunction::AutoVarEmission Emission = 6657 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6658 CGF.EmitAutoVarCleanups(Emission); 6659 } 6660 } 6661 } 6662 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6663 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6664 /*isSigned=*/false); 6665 if (DefaultThreadLimitVal) 6666 NumThreads = CGF.Builder.CreateSelect( 6667 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6668 DefaultThreadLimitVal, NumThreads); 6669 } else { 6670 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6671 : CGF.Builder.getInt32(0); 6672 } 6673 // Process condition of the if clause. 6674 if (CondVal) { 6675 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6676 CGF.Builder.getInt32(1)); 6677 } 6678 return NumThreads; 6679 } 6680 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6681 return CGF.Builder.getInt32(1); 6682 return DefaultThreadLimitVal; 6683 } 6684 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6685 : CGF.Builder.getInt32(0); 6686 } 6687 6688 /// Emit the number of threads for a target directive. Inspect the 6689 /// thread_limit clause associated with a teams construct combined or closely 6690 /// nested with the target directive. 6691 /// 6692 /// Emit the num_threads clause for directives such as 'target parallel' that 6693 /// have no associated teams construct. 6694 /// 6695 /// Otherwise, return nullptr. 6696 static llvm::Value * 6697 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6698 const OMPExecutableDirective &D) { 6699 assert(!CGF.getLangOpts().OpenMPIsDevice && 6700 "Clauses associated with the teams directive expected to be emitted " 6701 "only for the host!"); 6702 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6703 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6704 "Expected target-based executable directive."); 6705 CGBuilderTy &Bld = CGF.Builder; 6706 llvm::Value *ThreadLimitVal = nullptr; 6707 llvm::Value *NumThreadsVal = nullptr; 6708 switch (DirectiveKind) { 6709 case OMPD_target: { 6710 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6711 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6712 return NumThreads; 6713 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6714 CGF.getContext(), CS->getCapturedStmt()); 6715 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6716 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6717 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6718 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6719 const auto *ThreadLimitClause = 6720 Dir->getSingleClause<OMPThreadLimitClause>(); 6721 CodeGenFunction::LexicalScope Scope( 6722 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6723 if (const auto *PreInit = 6724 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6725 for (const auto *I : PreInit->decls()) { 6726 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6727 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6728 } else { 6729 CodeGenFunction::AutoVarEmission Emission = 6730 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6731 CGF.EmitAutoVarCleanups(Emission); 6732 } 6733 } 6734 } 6735 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6736 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6737 ThreadLimitVal = 6738 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6739 } 6740 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6741 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6742 CS = Dir->getInnermostCapturedStmt(); 6743 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6744 CGF.getContext(), CS->getCapturedStmt()); 6745 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6746 } 6747 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6748 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6749 CS = Dir->getInnermostCapturedStmt(); 6750 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6751 return NumThreads; 6752 } 6753 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6754 return Bld.getInt32(1); 6755 } 6756 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6757 } 6758 case OMPD_target_teams: { 6759 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6760 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6761 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6762 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6763 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6764 ThreadLimitVal = 6765 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6766 } 6767 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6768 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6769 return NumThreads; 6770 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6771 CGF.getContext(), CS->getCapturedStmt()); 6772 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6773 if (Dir->getDirectiveKind() == OMPD_distribute) { 6774 CS = Dir->getInnermostCapturedStmt(); 6775 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6776 return NumThreads; 6777 } 6778 } 6779 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6780 } 6781 case OMPD_target_teams_distribute: 6782 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6783 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6784 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6785 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6786 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6787 ThreadLimitVal = 6788 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6789 } 6790 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6791 case OMPD_target_parallel: 6792 case OMPD_target_parallel_for: 6793 case OMPD_target_parallel_for_simd: 6794 case OMPD_target_teams_distribute_parallel_for: 6795 case OMPD_target_teams_distribute_parallel_for_simd: { 6796 llvm::Value *CondVal = nullptr; 6797 // Handle if clause. If if clause present, the number of threads is 6798 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6799 if (D.hasClausesOfKind<OMPIfClause>()) { 6800 const OMPIfClause *IfClause = nullptr; 6801 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6802 if (C->getNameModifier() == OMPD_unknown || 6803 C->getNameModifier() == OMPD_parallel) { 6804 IfClause = C; 6805 break; 6806 } 6807 } 6808 if (IfClause) { 6809 const Expr *Cond = IfClause->getCondition(); 6810 bool Result; 6811 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6812 if (!Result) 6813 return Bld.getInt32(1); 6814 } else { 6815 CodeGenFunction::RunCleanupsScope Scope(CGF); 6816 CondVal = CGF.EvaluateExprAsBool(Cond); 6817 } 6818 } 6819 } 6820 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6821 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6822 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6823 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6824 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6825 ThreadLimitVal = 6826 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6827 } 6828 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6829 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6830 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6831 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6832 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6833 NumThreadsVal = 6834 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 6835 ThreadLimitVal = ThreadLimitVal 6836 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6837 ThreadLimitVal), 6838 NumThreadsVal, ThreadLimitVal) 6839 : NumThreadsVal; 6840 } 6841 if (!ThreadLimitVal) 6842 ThreadLimitVal = Bld.getInt32(0); 6843 if (CondVal) 6844 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6845 return ThreadLimitVal; 6846 } 6847 case OMPD_target_teams_distribute_simd: 6848 case OMPD_target_simd: 6849 return Bld.getInt32(1); 6850 case OMPD_parallel: 6851 case OMPD_for: 6852 case OMPD_parallel_for: 6853 case OMPD_parallel_master: 6854 case OMPD_parallel_sections: 6855 case OMPD_for_simd: 6856 case OMPD_parallel_for_simd: 6857 case OMPD_cancel: 6858 case OMPD_cancellation_point: 6859 case OMPD_ordered: 6860 case OMPD_threadprivate: 6861 case OMPD_allocate: 6862 case OMPD_task: 6863 case OMPD_simd: 6864 case OMPD_sections: 6865 case OMPD_section: 6866 case OMPD_single: 6867 case OMPD_master: 6868 case OMPD_critical: 6869 case OMPD_taskyield: 6870 case OMPD_barrier: 6871 case OMPD_taskwait: 6872 case OMPD_taskgroup: 6873 case OMPD_atomic: 6874 case OMPD_flush: 6875 case OMPD_depobj: 6876 case OMPD_scan: 6877 case OMPD_teams: 6878 case OMPD_target_data: 6879 case OMPD_target_exit_data: 6880 case OMPD_target_enter_data: 6881 case OMPD_distribute: 6882 case OMPD_distribute_simd: 6883 case OMPD_distribute_parallel_for: 6884 case OMPD_distribute_parallel_for_simd: 6885 case OMPD_teams_distribute: 6886 case OMPD_teams_distribute_simd: 6887 case OMPD_teams_distribute_parallel_for: 6888 case OMPD_teams_distribute_parallel_for_simd: 6889 case OMPD_target_update: 6890 case OMPD_declare_simd: 6891 case OMPD_declare_variant: 6892 case OMPD_begin_declare_variant: 6893 case OMPD_end_declare_variant: 6894 case OMPD_declare_target: 6895 case OMPD_end_declare_target: 6896 case OMPD_declare_reduction: 6897 case OMPD_declare_mapper: 6898 case OMPD_taskloop: 6899 case OMPD_taskloop_simd: 6900 case OMPD_master_taskloop: 6901 case OMPD_master_taskloop_simd: 6902 case OMPD_parallel_master_taskloop: 6903 case OMPD_parallel_master_taskloop_simd: 6904 case OMPD_requires: 6905 case OMPD_unknown: 6906 break; 6907 default: 6908 break; 6909 } 6910 llvm_unreachable("Unsupported directive kind."); 6911 } 6912 6913 namespace { 6914 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 6915 6916 // Utility to handle information from clauses associated with a given 6917 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 6918 // It provides a convenient interface to obtain the information and generate 6919 // code for that information. 6920 class MappableExprsHandler { 6921 public: 6922 /// Values for bit flags used to specify the mapping type for 6923 /// offloading. 6924 enum OpenMPOffloadMappingFlags : uint64_t { 6925 /// No flags 6926 OMP_MAP_NONE = 0x0, 6927 /// Allocate memory on the device and move data from host to device. 6928 OMP_MAP_TO = 0x01, 6929 /// Allocate memory on the device and move data from device to host. 6930 OMP_MAP_FROM = 0x02, 6931 /// Always perform the requested mapping action on the element, even 6932 /// if it was already mapped before. 6933 OMP_MAP_ALWAYS = 0x04, 6934 /// Delete the element from the device environment, ignoring the 6935 /// current reference count associated with the element. 6936 OMP_MAP_DELETE = 0x08, 6937 /// The element being mapped is a pointer-pointee pair; both the 6938 /// pointer and the pointee should be mapped. 6939 OMP_MAP_PTR_AND_OBJ = 0x10, 6940 /// This flags signals that the base address of an entry should be 6941 /// passed to the target kernel as an argument. 6942 OMP_MAP_TARGET_PARAM = 0x20, 6943 /// Signal that the runtime library has to return the device pointer 6944 /// in the current position for the data being mapped. Used when we have the 6945 /// use_device_ptr or use_device_addr clause. 6946 OMP_MAP_RETURN_PARAM = 0x40, 6947 /// This flag signals that the reference being passed is a pointer to 6948 /// private data. 6949 OMP_MAP_PRIVATE = 0x80, 6950 /// Pass the element to the device by value. 6951 OMP_MAP_LITERAL = 0x100, 6952 /// Implicit map 6953 OMP_MAP_IMPLICIT = 0x200, 6954 /// Close is a hint to the runtime to allocate memory close to 6955 /// the target device. 6956 OMP_MAP_CLOSE = 0x400, 6957 /// The 16 MSBs of the flags indicate whether the entry is member of some 6958 /// struct/class. 6959 OMP_MAP_MEMBER_OF = 0xffff000000000000, 6960 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 6961 }; 6962 6963 /// Get the offset of the OMP_MAP_MEMBER_OF field. 6964 static unsigned getFlagMemberOffset() { 6965 unsigned Offset = 0; 6966 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 6967 Remain = Remain >> 1) 6968 Offset++; 6969 return Offset; 6970 } 6971 6972 /// Class that associates information with a base pointer to be passed to the 6973 /// runtime library. 6974 class BasePointerInfo { 6975 /// The base pointer. 6976 llvm::Value *Ptr = nullptr; 6977 /// The base declaration that refers to this device pointer, or null if 6978 /// there is none. 6979 const ValueDecl *DevPtrDecl = nullptr; 6980 6981 public: 6982 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 6983 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 6984 llvm::Value *operator*() const { return Ptr; } 6985 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 6986 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 6987 }; 6988 6989 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 6990 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 6991 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 6992 6993 /// Map between a struct and the its lowest & highest elements which have been 6994 /// mapped. 6995 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 6996 /// HE(FieldIndex, Pointer)} 6997 struct StructRangeInfoTy { 6998 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 6999 0, Address::invalid()}; 7000 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7001 0, Address::invalid()}; 7002 Address Base = Address::invalid(); 7003 }; 7004 7005 private: 7006 /// Kind that defines how a device pointer has to be returned. 7007 struct MapInfo { 7008 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7009 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7010 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7011 bool ReturnDevicePointer = false; 7012 bool IsImplicit = false; 7013 bool ForDeviceAddr = false; 7014 7015 MapInfo() = default; 7016 MapInfo( 7017 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7018 OpenMPMapClauseKind MapType, 7019 ArrayRef<OpenMPMapModifierKind> MapModifiers, bool ReturnDevicePointer, 7020 bool IsImplicit, bool ForDeviceAddr = false) 7021 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7022 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7023 ForDeviceAddr(ForDeviceAddr) {} 7024 }; 7025 7026 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7027 /// member and there is no map information about it, then emission of that 7028 /// entry is deferred until the whole struct has been processed. 7029 struct DeferredDevicePtrEntryTy { 7030 const Expr *IE = nullptr; 7031 const ValueDecl *VD = nullptr; 7032 bool ForDeviceAddr = false; 7033 7034 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7035 bool ForDeviceAddr) 7036 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7037 }; 7038 7039 /// The target directive from where the mappable clauses were extracted. It 7040 /// is either a executable directive or a user-defined mapper directive. 7041 llvm::PointerUnion<const OMPExecutableDirective *, 7042 const OMPDeclareMapperDecl *> 7043 CurDir; 7044 7045 /// Function the directive is being generated for. 7046 CodeGenFunction &CGF; 7047 7048 /// Set of all first private variables in the current directive. 7049 /// bool data is set to true if the variable is implicitly marked as 7050 /// firstprivate, false otherwise. 7051 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7052 7053 /// Map between device pointer declarations and their expression components. 7054 /// The key value for declarations in 'this' is null. 7055 llvm::DenseMap< 7056 const ValueDecl *, 7057 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7058 DevPointersMap; 7059 7060 llvm::Value *getExprTypeSize(const Expr *E) const { 7061 QualType ExprTy = E->getType().getCanonicalType(); 7062 7063 // Calculate the size for array shaping expression. 7064 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7065 llvm::Value *Size = 7066 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7067 for (const Expr *SE : OAE->getDimensions()) { 7068 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7069 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7070 CGF.getContext().getSizeType(), 7071 SE->getExprLoc()); 7072 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7073 } 7074 return Size; 7075 } 7076 7077 // Reference types are ignored for mapping purposes. 7078 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7079 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7080 7081 // Given that an array section is considered a built-in type, we need to 7082 // do the calculation based on the length of the section instead of relying 7083 // on CGF.getTypeSize(E->getType()). 7084 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7085 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7086 OAE->getBase()->IgnoreParenImpCasts()) 7087 .getCanonicalType(); 7088 7089 // If there is no length associated with the expression and lower bound is 7090 // not specified too, that means we are using the whole length of the 7091 // base. 7092 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7093 !OAE->getLowerBound()) 7094 return CGF.getTypeSize(BaseTy); 7095 7096 llvm::Value *ElemSize; 7097 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7098 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7099 } else { 7100 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7101 assert(ATy && "Expecting array type if not a pointer type."); 7102 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7103 } 7104 7105 // If we don't have a length at this point, that is because we have an 7106 // array section with a single element. 7107 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7108 return ElemSize; 7109 7110 if (const Expr *LenExpr = OAE->getLength()) { 7111 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7112 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7113 CGF.getContext().getSizeType(), 7114 LenExpr->getExprLoc()); 7115 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7116 } 7117 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7118 OAE->getLowerBound() && "expected array_section[lb:]."); 7119 // Size = sizetype - lb * elemtype; 7120 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7121 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7122 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7123 CGF.getContext().getSizeType(), 7124 OAE->getLowerBound()->getExprLoc()); 7125 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7126 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7127 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7128 LengthVal = CGF.Builder.CreateSelect( 7129 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7130 return LengthVal; 7131 } 7132 return CGF.getTypeSize(ExprTy); 7133 } 7134 7135 /// Return the corresponding bits for a given map clause modifier. Add 7136 /// a flag marking the map as a pointer if requested. Add a flag marking the 7137 /// map as the first one of a series of maps that relate to the same map 7138 /// expression. 7139 OpenMPOffloadMappingFlags getMapTypeBits( 7140 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7141 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { 7142 OpenMPOffloadMappingFlags Bits = 7143 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7144 switch (MapType) { 7145 case OMPC_MAP_alloc: 7146 case OMPC_MAP_release: 7147 // alloc and release is the default behavior in the runtime library, i.e. 7148 // if we don't pass any bits alloc/release that is what the runtime is 7149 // going to do. Therefore, we don't need to signal anything for these two 7150 // type modifiers. 7151 break; 7152 case OMPC_MAP_to: 7153 Bits |= OMP_MAP_TO; 7154 break; 7155 case OMPC_MAP_from: 7156 Bits |= OMP_MAP_FROM; 7157 break; 7158 case OMPC_MAP_tofrom: 7159 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7160 break; 7161 case OMPC_MAP_delete: 7162 Bits |= OMP_MAP_DELETE; 7163 break; 7164 case OMPC_MAP_unknown: 7165 llvm_unreachable("Unexpected map type!"); 7166 } 7167 if (AddPtrFlag) 7168 Bits |= OMP_MAP_PTR_AND_OBJ; 7169 if (AddIsTargetParamFlag) 7170 Bits |= OMP_MAP_TARGET_PARAM; 7171 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7172 != MapModifiers.end()) 7173 Bits |= OMP_MAP_ALWAYS; 7174 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7175 != MapModifiers.end()) 7176 Bits |= OMP_MAP_CLOSE; 7177 return Bits; 7178 } 7179 7180 /// Return true if the provided expression is a final array section. A 7181 /// final array section, is one whose length can't be proved to be one. 7182 bool isFinalArraySectionExpression(const Expr *E) const { 7183 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7184 7185 // It is not an array section and therefore not a unity-size one. 7186 if (!OASE) 7187 return false; 7188 7189 // An array section with no colon always refer to a single element. 7190 if (OASE->getColonLocFirst().isInvalid()) 7191 return false; 7192 7193 const Expr *Length = OASE->getLength(); 7194 7195 // If we don't have a length we have to check if the array has size 1 7196 // for this dimension. Also, we should always expect a length if the 7197 // base type is pointer. 7198 if (!Length) { 7199 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7200 OASE->getBase()->IgnoreParenImpCasts()) 7201 .getCanonicalType(); 7202 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7203 return ATy->getSize().getSExtValue() != 1; 7204 // If we don't have a constant dimension length, we have to consider 7205 // the current section as having any size, so it is not necessarily 7206 // unitary. If it happen to be unity size, that's user fault. 7207 return true; 7208 } 7209 7210 // Check if the length evaluates to 1. 7211 Expr::EvalResult Result; 7212 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7213 return true; // Can have more that size 1. 7214 7215 llvm::APSInt ConstLength = Result.Val.getInt(); 7216 return ConstLength.getSExtValue() != 1; 7217 } 7218 7219 /// Generate the base pointers, section pointers, sizes and map type 7220 /// bits for the provided map type, map modifier, and expression components. 7221 /// \a IsFirstComponent should be set to true if the provided set of 7222 /// components is the first associated with a capture. 7223 void generateInfoForComponentList( 7224 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7225 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7226 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 7227 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 7228 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 7229 bool IsImplicit, bool ForDeviceAddr = false, 7230 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7231 OverlappedElements = llvm::None) const { 7232 // The following summarizes what has to be generated for each map and the 7233 // types below. The generated information is expressed in this order: 7234 // base pointer, section pointer, size, flags 7235 // (to add to the ones that come from the map type and modifier). 7236 // 7237 // double d; 7238 // int i[100]; 7239 // float *p; 7240 // 7241 // struct S1 { 7242 // int i; 7243 // float f[50]; 7244 // } 7245 // struct S2 { 7246 // int i; 7247 // float f[50]; 7248 // S1 s; 7249 // double *p; 7250 // struct S2 *ps; 7251 // } 7252 // S2 s; 7253 // S2 *ps; 7254 // 7255 // map(d) 7256 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7257 // 7258 // map(i) 7259 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7260 // 7261 // map(i[1:23]) 7262 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7263 // 7264 // map(p) 7265 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7266 // 7267 // map(p[1:24]) 7268 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7269 // in unified shared memory mode or for local pointers 7270 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7271 // 7272 // map(s) 7273 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7274 // 7275 // map(s.i) 7276 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7277 // 7278 // map(s.s.f) 7279 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7280 // 7281 // map(s.p) 7282 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7283 // 7284 // map(to: s.p[:22]) 7285 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7286 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7287 // &(s.p), &(s.p[0]), 22*sizeof(double), 7288 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7289 // (*) alloc space for struct members, only this is a target parameter 7290 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7291 // optimizes this entry out, same in the examples below) 7292 // (***) map the pointee (map: to) 7293 // 7294 // map(s.ps) 7295 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7296 // 7297 // map(from: s.ps->s.i) 7298 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7299 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7300 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7301 // 7302 // map(to: s.ps->ps) 7303 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7304 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7305 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7306 // 7307 // map(s.ps->ps->ps) 7308 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7309 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7310 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7311 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7312 // 7313 // map(to: s.ps->ps->s.f[:22]) 7314 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7315 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7316 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7317 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7318 // 7319 // map(ps) 7320 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7321 // 7322 // map(ps->i) 7323 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7324 // 7325 // map(ps->s.f) 7326 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7327 // 7328 // map(from: ps->p) 7329 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7330 // 7331 // map(to: ps->p[:22]) 7332 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7333 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7334 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7335 // 7336 // map(ps->ps) 7337 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7338 // 7339 // map(from: ps->ps->s.i) 7340 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7341 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7342 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7343 // 7344 // map(from: ps->ps->ps) 7345 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7346 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7347 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7348 // 7349 // map(ps->ps->ps->ps) 7350 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7351 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7352 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7353 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7354 // 7355 // map(to: ps->ps->ps->s.f[:22]) 7356 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7357 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7358 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7359 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7360 // 7361 // map(to: s.f[:22]) map(from: s.p[:33]) 7362 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7363 // sizeof(double*) (**), TARGET_PARAM 7364 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7365 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7366 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7367 // (*) allocate contiguous space needed to fit all mapped members even if 7368 // we allocate space for members not mapped (in this example, 7369 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7370 // them as well because they fall between &s.f[0] and &s.p) 7371 // 7372 // map(from: s.f[:22]) map(to: ps->p[:33]) 7373 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7374 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7375 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7376 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7377 // (*) the struct this entry pertains to is the 2nd element in the list of 7378 // arguments, hence MEMBER_OF(2) 7379 // 7380 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7381 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7382 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7383 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7384 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7385 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7386 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7387 // (*) the struct this entry pertains to is the 4th element in the list 7388 // of arguments, hence MEMBER_OF(4) 7389 7390 // Track if the map information being generated is the first for a capture. 7391 bool IsCaptureFirstInfo = IsFirstComponentList; 7392 // When the variable is on a declare target link or in a to clause with 7393 // unified memory, a reference is needed to hold the host/device address 7394 // of the variable. 7395 bool RequiresReference = false; 7396 7397 // Scan the components from the base to the complete expression. 7398 auto CI = Components.rbegin(); 7399 auto CE = Components.rend(); 7400 auto I = CI; 7401 7402 // Track if the map information being generated is the first for a list of 7403 // components. 7404 bool IsExpressionFirstInfo = true; 7405 bool FirstPointerInComplexData = false; 7406 Address BP = Address::invalid(); 7407 const Expr *AssocExpr = I->getAssociatedExpression(); 7408 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7409 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7410 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7411 7412 if (isa<MemberExpr>(AssocExpr)) { 7413 // The base is the 'this' pointer. The content of the pointer is going 7414 // to be the base of the field being mapped. 7415 BP = CGF.LoadCXXThisAddress(); 7416 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7417 (OASE && 7418 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7419 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7420 } else if (OAShE && 7421 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7422 BP = Address( 7423 CGF.EmitScalarExpr(OAShE->getBase()), 7424 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7425 } else { 7426 // The base is the reference to the variable. 7427 // BP = &Var. 7428 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7429 if (const auto *VD = 7430 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7431 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7432 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7433 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7434 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7435 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7436 RequiresReference = true; 7437 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7438 } 7439 } 7440 } 7441 7442 // If the variable is a pointer and is being dereferenced (i.e. is not 7443 // the last component), the base has to be the pointer itself, not its 7444 // reference. References are ignored for mapping purposes. 7445 QualType Ty = 7446 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7447 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7448 // No need to generate individual map information for the pointer, it 7449 // can be associated with the combined storage if shared memory mode is 7450 // active or the base declaration is not global variable. 7451 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7452 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7453 !VD || VD->hasLocalStorage()) 7454 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7455 else 7456 FirstPointerInComplexData = IsCaptureFirstInfo; 7457 ++I; 7458 } 7459 } 7460 7461 // Track whether a component of the list should be marked as MEMBER_OF some 7462 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7463 // in a component list should be marked as MEMBER_OF, all subsequent entries 7464 // do not belong to the base struct. E.g. 7465 // struct S2 s; 7466 // s.ps->ps->ps->f[:] 7467 // (1) (2) (3) (4) 7468 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7469 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7470 // is the pointee of ps(2) which is not member of struct s, so it should not 7471 // be marked as such (it is still PTR_AND_OBJ). 7472 // The variable is initialized to false so that PTR_AND_OBJ entries which 7473 // are not struct members are not considered (e.g. array of pointers to 7474 // data). 7475 bool ShouldBeMemberOf = false; 7476 7477 // Variable keeping track of whether or not we have encountered a component 7478 // in the component list which is a member expression. Useful when we have a 7479 // pointer or a final array section, in which case it is the previous 7480 // component in the list which tells us whether we have a member expression. 7481 // E.g. X.f[:] 7482 // While processing the final array section "[:]" it is "f" which tells us 7483 // whether we are dealing with a member of a declared struct. 7484 const MemberExpr *EncounteredME = nullptr; 7485 7486 for (; I != CE; ++I) { 7487 // If the current component is member of a struct (parent struct) mark it. 7488 if (!EncounteredME) { 7489 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7490 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7491 // as MEMBER_OF the parent struct. 7492 if (EncounteredME) { 7493 ShouldBeMemberOf = true; 7494 // Do not emit as complex pointer if this is actually not array-like 7495 // expression. 7496 if (FirstPointerInComplexData) { 7497 QualType Ty = std::prev(I) 7498 ->getAssociatedDeclaration() 7499 ->getType() 7500 .getNonReferenceType(); 7501 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7502 FirstPointerInComplexData = false; 7503 } 7504 } 7505 } 7506 7507 auto Next = std::next(I); 7508 7509 // We need to generate the addresses and sizes if this is the last 7510 // component, if the component is a pointer or if it is an array section 7511 // whose length can't be proved to be one. If this is a pointer, it 7512 // becomes the base address for the following components. 7513 7514 // A final array section, is one whose length can't be proved to be one. 7515 bool IsFinalArraySection = 7516 isFinalArraySectionExpression(I->getAssociatedExpression()); 7517 7518 // Get information on whether the element is a pointer. Have to do a 7519 // special treatment for array sections given that they are built-in 7520 // types. 7521 const auto *OASE = 7522 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7523 const auto *OAShE = 7524 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7525 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7526 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7527 bool IsPointer = 7528 OAShE || 7529 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7530 .getCanonicalType() 7531 ->isAnyPointerType()) || 7532 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7533 bool IsNonDerefPointer = IsPointer && !UO && !BO; 7534 7535 if (Next == CE || IsNonDerefPointer || IsFinalArraySection) { 7536 // If this is not the last component, we expect the pointer to be 7537 // associated with an array expression or member expression. 7538 assert((Next == CE || 7539 isa<MemberExpr>(Next->getAssociatedExpression()) || 7540 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7541 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7542 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7543 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7544 "Unexpected expression"); 7545 7546 Address LB = Address::invalid(); 7547 if (OAShE) { 7548 LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 7549 CGF.getContext().getTypeAlignInChars( 7550 OAShE->getBase()->getType())); 7551 } else { 7552 LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7553 .getAddress(CGF); 7554 } 7555 7556 // If this component is a pointer inside the base struct then we don't 7557 // need to create any entry for it - it will be combined with the object 7558 // it is pointing to into a single PTR_AND_OBJ entry. 7559 bool IsMemberPointerOrAddr = 7560 (IsPointer || ForDeviceAddr) && EncounteredME && 7561 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7562 EncounteredME); 7563 if (!OverlappedElements.empty()) { 7564 // Handle base element with the info for overlapped elements. 7565 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7566 assert(Next == CE && 7567 "Expected last element for the overlapped elements."); 7568 assert(!IsPointer && 7569 "Unexpected base element with the pointer type."); 7570 // Mark the whole struct as the struct that requires allocation on the 7571 // device. 7572 PartialStruct.LowestElem = {0, LB}; 7573 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7574 I->getAssociatedExpression()->getType()); 7575 Address HB = CGF.Builder.CreateConstGEP( 7576 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7577 CGF.VoidPtrTy), 7578 TypeSize.getQuantity() - 1); 7579 PartialStruct.HighestElem = { 7580 std::numeric_limits<decltype( 7581 PartialStruct.HighestElem.first)>::max(), 7582 HB}; 7583 PartialStruct.Base = BP; 7584 // Emit data for non-overlapped data. 7585 OpenMPOffloadMappingFlags Flags = 7586 OMP_MAP_MEMBER_OF | 7587 getMapTypeBits(MapType, MapModifiers, IsImplicit, 7588 /*AddPtrFlag=*/false, 7589 /*AddIsTargetParamFlag=*/false); 7590 LB = BP; 7591 llvm::Value *Size = nullptr; 7592 // Do bitcopy of all non-overlapped structure elements. 7593 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7594 Component : OverlappedElements) { 7595 Address ComponentLB = Address::invalid(); 7596 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7597 Component) { 7598 if (MC.getAssociatedDeclaration()) { 7599 ComponentLB = 7600 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7601 .getAddress(CGF); 7602 Size = CGF.Builder.CreatePtrDiff( 7603 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7604 CGF.EmitCastToVoidPtr(LB.getPointer())); 7605 break; 7606 } 7607 } 7608 BasePointers.push_back(BP.getPointer()); 7609 Pointers.push_back(LB.getPointer()); 7610 Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, 7611 /*isSigned=*/true)); 7612 Types.push_back(Flags); 7613 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7614 } 7615 BasePointers.push_back(BP.getPointer()); 7616 Pointers.push_back(LB.getPointer()); 7617 Size = CGF.Builder.CreatePtrDiff( 7618 CGF.EmitCastToVoidPtr( 7619 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7620 CGF.EmitCastToVoidPtr(LB.getPointer())); 7621 Sizes.push_back( 7622 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7623 Types.push_back(Flags); 7624 break; 7625 } 7626 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7627 if (!IsMemberPointerOrAddr) { 7628 BasePointers.push_back(BP.getPointer()); 7629 Pointers.push_back(LB.getPointer()); 7630 Sizes.push_back( 7631 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7632 7633 // We need to add a pointer flag for each map that comes from the 7634 // same expression except for the first one. We also need to signal 7635 // this map is the first one that relates with the current capture 7636 // (there is a set of entries for each capture). 7637 OpenMPOffloadMappingFlags Flags = 7638 getMapTypeBits(MapType, MapModifiers, IsImplicit, 7639 !IsExpressionFirstInfo || RequiresReference || 7640 FirstPointerInComplexData, 7641 IsCaptureFirstInfo && !RequiresReference); 7642 7643 if (!IsExpressionFirstInfo) { 7644 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7645 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7646 if (IsPointer) 7647 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7648 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7649 7650 if (ShouldBeMemberOf) { 7651 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7652 // should be later updated with the correct value of MEMBER_OF. 7653 Flags |= OMP_MAP_MEMBER_OF; 7654 // From now on, all subsequent PTR_AND_OBJ entries should not be 7655 // marked as MEMBER_OF. 7656 ShouldBeMemberOf = false; 7657 } 7658 } 7659 7660 Types.push_back(Flags); 7661 } 7662 7663 // If we have encountered a member expression so far, keep track of the 7664 // mapped member. If the parent is "*this", then the value declaration 7665 // is nullptr. 7666 if (EncounteredME) { 7667 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 7668 unsigned FieldIndex = FD->getFieldIndex(); 7669 7670 // Update info about the lowest and highest elements for this struct 7671 if (!PartialStruct.Base.isValid()) { 7672 PartialStruct.LowestElem = {FieldIndex, LB}; 7673 if (IsFinalArraySection) { 7674 Address HB = 7675 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 7676 .getAddress(CGF); 7677 PartialStruct.HighestElem = {FieldIndex, HB}; 7678 } else { 7679 PartialStruct.HighestElem = {FieldIndex, LB}; 7680 } 7681 PartialStruct.Base = BP; 7682 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7683 PartialStruct.LowestElem = {FieldIndex, LB}; 7684 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7685 PartialStruct.HighestElem = {FieldIndex, LB}; 7686 } 7687 } 7688 7689 // If we have a final array section, we are done with this expression. 7690 if (IsFinalArraySection) 7691 break; 7692 7693 // The pointer becomes the base for the next element. 7694 if (Next != CE) 7695 BP = LB; 7696 7697 IsExpressionFirstInfo = false; 7698 IsCaptureFirstInfo = false; 7699 FirstPointerInComplexData = false; 7700 } 7701 } 7702 } 7703 7704 /// Return the adjusted map modifiers if the declaration a capture refers to 7705 /// appears in a first-private clause. This is expected to be used only with 7706 /// directives that start with 'target'. 7707 MappableExprsHandler::OpenMPOffloadMappingFlags 7708 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7709 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7710 7711 // A first private variable captured by reference will use only the 7712 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7713 // declaration is known as first-private in this handler. 7714 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 7715 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 7716 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 7717 return MappableExprsHandler::OMP_MAP_ALWAYS | 7718 MappableExprsHandler::OMP_MAP_TO; 7719 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 7720 return MappableExprsHandler::OMP_MAP_TO | 7721 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 7722 return MappableExprsHandler::OMP_MAP_PRIVATE | 7723 MappableExprsHandler::OMP_MAP_TO; 7724 } 7725 return MappableExprsHandler::OMP_MAP_TO | 7726 MappableExprsHandler::OMP_MAP_FROM; 7727 } 7728 7729 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 7730 // Rotate by getFlagMemberOffset() bits. 7731 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 7732 << getFlagMemberOffset()); 7733 } 7734 7735 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 7736 OpenMPOffloadMappingFlags MemberOfFlag) { 7737 // If the entry is PTR_AND_OBJ but has not been marked with the special 7738 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 7739 // marked as MEMBER_OF. 7740 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 7741 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 7742 return; 7743 7744 // Reset the placeholder value to prepare the flag for the assignment of the 7745 // proper MEMBER_OF value. 7746 Flags &= ~OMP_MAP_MEMBER_OF; 7747 Flags |= MemberOfFlag; 7748 } 7749 7750 void getPlainLayout(const CXXRecordDecl *RD, 7751 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7752 bool AsBase) const { 7753 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7754 7755 llvm::StructType *St = 7756 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7757 7758 unsigned NumElements = St->getNumElements(); 7759 llvm::SmallVector< 7760 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7761 RecordLayout(NumElements); 7762 7763 // Fill bases. 7764 for (const auto &I : RD->bases()) { 7765 if (I.isVirtual()) 7766 continue; 7767 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7768 // Ignore empty bases. 7769 if (Base->isEmpty() || CGF.getContext() 7770 .getASTRecordLayout(Base) 7771 .getNonVirtualSize() 7772 .isZero()) 7773 continue; 7774 7775 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 7776 RecordLayout[FieldIndex] = Base; 7777 } 7778 // Fill in virtual bases. 7779 for (const auto &I : RD->vbases()) { 7780 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7781 // Ignore empty bases. 7782 if (Base->isEmpty()) 7783 continue; 7784 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 7785 if (RecordLayout[FieldIndex]) 7786 continue; 7787 RecordLayout[FieldIndex] = Base; 7788 } 7789 // Fill in all the fields. 7790 assert(!RD->isUnion() && "Unexpected union."); 7791 for (const auto *Field : RD->fields()) { 7792 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 7793 // will fill in later.) 7794 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 7795 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 7796 RecordLayout[FieldIndex] = Field; 7797 } 7798 } 7799 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 7800 &Data : RecordLayout) { 7801 if (Data.isNull()) 7802 continue; 7803 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 7804 getPlainLayout(Base, Layout, /*AsBase=*/true); 7805 else 7806 Layout.push_back(Data.get<const FieldDecl *>()); 7807 } 7808 } 7809 7810 public: 7811 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 7812 : CurDir(&Dir), CGF(CGF) { 7813 // Extract firstprivate clause information. 7814 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 7815 for (const auto *D : C->varlists()) 7816 FirstPrivateDecls.try_emplace( 7817 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 7818 // Extract implicit firstprivates from uses_allocators clauses. 7819 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 7820 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 7821 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 7822 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 7823 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 7824 /*Implicit=*/true); 7825 else if (const auto *VD = dyn_cast<VarDecl>( 7826 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 7827 ->getDecl())) 7828 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 7829 } 7830 } 7831 // Extract device pointer clause information. 7832 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 7833 for (auto L : C->component_lists()) 7834 DevPointersMap[L.first].push_back(L.second); 7835 } 7836 7837 /// Constructor for the declare mapper directive. 7838 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 7839 : CurDir(&Dir), CGF(CGF) {} 7840 7841 /// Generate code for the combined entry if we have a partially mapped struct 7842 /// and take care of the mapping flags of the arguments corresponding to 7843 /// individual struct members. 7844 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers, 7845 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7846 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes, 7847 const StructRangeInfoTy &PartialStruct) const { 7848 // Base is the base of the struct 7849 BasePointers.push_back(PartialStruct.Base.getPointer()); 7850 // Pointer is the address of the lowest element 7851 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 7852 Pointers.push_back(LB); 7853 // Size is (addr of {highest+1} element) - (addr of lowest element) 7854 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 7855 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 7856 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 7857 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 7858 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 7859 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 7860 /*isSigned=*/false); 7861 Sizes.push_back(Size); 7862 // Map type is always TARGET_PARAM 7863 Types.push_back(OMP_MAP_TARGET_PARAM); 7864 // Remove TARGET_PARAM flag from the first element 7865 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 7866 7867 // All other current entries will be MEMBER_OF the combined entry 7868 // (except for PTR_AND_OBJ entries which do not have a placeholder value 7869 // 0xFFFF in the MEMBER_OF field). 7870 OpenMPOffloadMappingFlags MemberOfFlag = 7871 getMemberOfFlag(BasePointers.size() - 1); 7872 for (auto &M : CurTypes) 7873 setCorrectMemberOfFlag(M, MemberOfFlag); 7874 } 7875 7876 /// Generate all the base pointers, section pointers, sizes and map 7877 /// types for the extracted mappable expressions. Also, for each item that 7878 /// relates with a device pointer, a pair of the relevant declaration and 7879 /// index where it occurs is appended to the device pointers info array. 7880 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 7881 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7882 MapFlagsArrayTy &Types) const { 7883 // We have to process the component lists that relate with the same 7884 // declaration in a single chunk so that we can generate the map flags 7885 // correctly. Therefore, we organize all lists in a map. 7886 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 7887 7888 // Helper function to fill the information map for the different supported 7889 // clauses. 7890 auto &&InfoGen = 7891 [&Info](const ValueDecl *D, 7892 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 7893 OpenMPMapClauseKind MapType, 7894 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7895 bool ReturnDevicePointer, bool IsImplicit, 7896 bool ForDeviceAddr = false) { 7897 const ValueDecl *VD = 7898 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 7899 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 7900 IsImplicit, ForDeviceAddr); 7901 }; 7902 7903 assert(CurDir.is<const OMPExecutableDirective *>() && 7904 "Expect a executable directive"); 7905 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 7906 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) 7907 for (const auto L : C->component_lists()) { 7908 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), 7909 /*ReturnDevicePointer=*/false, C->isImplicit()); 7910 } 7911 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) 7912 for (const auto L : C->component_lists()) { 7913 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, 7914 /*ReturnDevicePointer=*/false, C->isImplicit()); 7915 } 7916 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) 7917 for (const auto L : C->component_lists()) { 7918 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, 7919 /*ReturnDevicePointer=*/false, C->isImplicit()); 7920 } 7921 7922 // Look at the use_device_ptr clause information and mark the existing map 7923 // entries as such. If there is no map information for an entry in the 7924 // use_device_ptr list, we create one with map type 'alloc' and zero size 7925 // section. It is the user fault if that was not mapped before. If there is 7926 // no map information and the pointer is a struct member, then we defer the 7927 // emission of that entry until the whole struct has been processed. 7928 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 7929 DeferredInfo; 7930 MapBaseValuesArrayTy UseDevicePtrBasePointers; 7931 MapValuesArrayTy UseDevicePtrPointers; 7932 MapValuesArrayTy UseDevicePtrSizes; 7933 MapFlagsArrayTy UseDevicePtrTypes; 7934 7935 for (const auto *C : 7936 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { 7937 for (const auto L : C->component_lists()) { 7938 assert(!L.second.empty() && "Not expecting empty list of components!"); 7939 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 7940 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 7941 const Expr *IE = L.second.back().getAssociatedExpression(); 7942 // If the first component is a member expression, we have to look into 7943 // 'this', which maps to null in the map of map information. Otherwise 7944 // look directly for the information. 7945 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 7946 7947 // We potentially have map information for this declaration already. 7948 // Look for the first set of components that refer to it. 7949 if (It != Info.end()) { 7950 auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) { 7951 return MI.Components.back().getAssociatedDeclaration() == VD; 7952 }); 7953 // If we found a map entry, signal that the pointer has to be returned 7954 // and move on to the next declaration. 7955 // Exclude cases where the base pointer is mapped as array subscript, 7956 // array section or array shaping. The base address is passed as a 7957 // pointer to base in this case and cannot be used as a base for 7958 // use_device_ptr list item. 7959 if (CI != It->second.end()) { 7960 auto PrevCI = std::next(CI->Components.rbegin()); 7961 const auto *VarD = dyn_cast<VarDecl>(VD); 7962 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7963 isa<MemberExpr>(IE) || 7964 !VD->getType().getNonReferenceType()->isPointerType() || 7965 PrevCI == CI->Components.rend() || 7966 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 7967 VarD->hasLocalStorage()) { 7968 CI->ReturnDevicePointer = true; 7969 continue; 7970 } 7971 } 7972 } 7973 7974 // We didn't find any match in our map information - generate a zero 7975 // size array section - if the pointer is a struct member we defer this 7976 // action until the whole struct has been processed. 7977 if (isa<MemberExpr>(IE)) { 7978 // Insert the pointer into Info to be processed by 7979 // generateInfoForComponentList. Because it is a member pointer 7980 // without a pointee, no entry will be generated for it, therefore 7981 // we need to generate one after the whole struct has been processed. 7982 // Nonetheless, generateInfoForComponentList must be called to take 7983 // the pointer into account for the calculation of the range of the 7984 // partial struct. 7985 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, 7986 /*ReturnDevicePointer=*/false, C->isImplicit()); 7987 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 7988 } else { 7989 llvm::Value *Ptr = 7990 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 7991 UseDevicePtrBasePointers.emplace_back(Ptr, VD); 7992 UseDevicePtrPointers.push_back(Ptr); 7993 UseDevicePtrSizes.push_back( 7994 llvm::Constant::getNullValue(CGF.Int64Ty)); 7995 UseDevicePtrTypes.push_back(OMP_MAP_RETURN_PARAM | 7996 OMP_MAP_TARGET_PARAM); 7997 } 7998 } 7999 } 8000 8001 // Look at the use_device_addr clause information and mark the existing map 8002 // entries as such. If there is no map information for an entry in the 8003 // use_device_addr list, we create one with map type 'alloc' and zero size 8004 // section. It is the user fault if that was not mapped before. If there is 8005 // no map information and the pointer is a struct member, then we defer the 8006 // emission of that entry until the whole struct has been processed. 8007 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8008 for (const auto *C : 8009 CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) { 8010 for (const auto L : C->component_lists()) { 8011 assert(!L.second.empty() && "Not expecting empty list of components!"); 8012 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 8013 if (!Processed.insert(VD).second) 8014 continue; 8015 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8016 const Expr *IE = L.second.back().getAssociatedExpression(); 8017 // If the first component is a member expression, we have to look into 8018 // 'this', which maps to null in the map of map information. Otherwise 8019 // look directly for the information. 8020 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8021 8022 // We potentially have map information for this declaration already. 8023 // Look for the first set of components that refer to it. 8024 if (It != Info.end()) { 8025 auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) { 8026 return MI.Components.back().getAssociatedDeclaration() == VD; 8027 }); 8028 // If we found a map entry, signal that the pointer has to be returned 8029 // and move on to the next declaration. 8030 if (CI != It->second.end()) { 8031 CI->ReturnDevicePointer = true; 8032 continue; 8033 } 8034 } 8035 8036 // We didn't find any match in our map information - generate a zero 8037 // size array section - if the pointer is a struct member we defer this 8038 // action until the whole struct has been processed. 8039 if (isa<MemberExpr>(IE)) { 8040 // Insert the pointer into Info to be processed by 8041 // generateInfoForComponentList. Because it is a member pointer 8042 // without a pointee, no entry will be generated for it, therefore 8043 // we need to generate one after the whole struct has been processed. 8044 // Nonetheless, generateInfoForComponentList must be called to take 8045 // the pointer into account for the calculation of the range of the 8046 // partial struct. 8047 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, 8048 /*ReturnDevicePointer=*/false, C->isImplicit(), 8049 /*ForDeviceAddr=*/true); 8050 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8051 } else { 8052 llvm::Value *Ptr; 8053 if (IE->isGLValue()) 8054 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8055 else 8056 Ptr = CGF.EmitScalarExpr(IE); 8057 UseDevicePtrBasePointers.emplace_back(Ptr, VD); 8058 UseDevicePtrPointers.push_back(Ptr); 8059 UseDevicePtrSizes.push_back( 8060 llvm::Constant::getNullValue(CGF.Int64Ty)); 8061 UseDevicePtrTypes.push_back(OMP_MAP_RETURN_PARAM | 8062 OMP_MAP_TARGET_PARAM); 8063 } 8064 } 8065 } 8066 8067 for (const auto &M : Info) { 8068 // We need to know when we generate information for the first component 8069 // associated with a capture, because the mapping flags depend on it. 8070 bool IsFirstComponentList = true; 8071 8072 // Temporary versions of arrays 8073 MapBaseValuesArrayTy CurBasePointers; 8074 MapValuesArrayTy CurPointers; 8075 MapValuesArrayTy CurSizes; 8076 MapFlagsArrayTy CurTypes; 8077 StructRangeInfoTy PartialStruct; 8078 8079 for (const MapInfo &L : M.second) { 8080 assert(!L.Components.empty() && 8081 "Not expecting declaration with no component lists."); 8082 8083 // Remember the current base pointer index. 8084 unsigned CurrentBasePointersIdx = CurBasePointers.size(); 8085 generateInfoForComponentList( 8086 L.MapType, L.MapModifiers, L.Components, CurBasePointers, 8087 CurPointers, CurSizes, CurTypes, PartialStruct, 8088 IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr); 8089 8090 // If this entry relates with a device pointer, set the relevant 8091 // declaration and add the 'return pointer' flag. 8092 if (L.ReturnDevicePointer) { 8093 assert(CurBasePointers.size() > CurrentBasePointersIdx && 8094 "Unexpected number of mapped base pointers."); 8095 8096 const ValueDecl *RelevantVD = 8097 L.Components.back().getAssociatedDeclaration(); 8098 assert(RelevantVD && 8099 "No relevant declaration related with device pointer??"); 8100 8101 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 8102 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8103 } 8104 IsFirstComponentList = false; 8105 } 8106 8107 // Append any pending zero-length pointers which are struct members and 8108 // used with use_device_ptr or use_device_addr. 8109 auto CI = DeferredInfo.find(M.first); 8110 if (CI != DeferredInfo.end()) { 8111 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8112 llvm::Value *BasePtr; 8113 llvm::Value *Ptr; 8114 if (L.ForDeviceAddr) { 8115 if (L.IE->isGLValue()) 8116 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8117 else 8118 Ptr = this->CGF.EmitScalarExpr(L.IE); 8119 BasePtr = Ptr; 8120 // Entry is RETURN_PARAM. Also, set the placeholder value 8121 // MEMBER_OF=FFFF so that the entry is later updated with the 8122 // correct value of MEMBER_OF. 8123 CurTypes.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8124 } else { 8125 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8126 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8127 L.IE->getExprLoc()); 8128 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 8129 // value MEMBER_OF=FFFF so that the entry is later updated with the 8130 // correct value of MEMBER_OF. 8131 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8132 OMP_MAP_MEMBER_OF); 8133 } 8134 CurBasePointers.emplace_back(BasePtr, L.VD); 8135 CurPointers.push_back(Ptr); 8136 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8137 } 8138 } 8139 8140 // If there is an entry in PartialStruct it means we have a struct with 8141 // individual members mapped. Emit an extra combined entry. 8142 if (PartialStruct.Base.isValid()) 8143 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8144 PartialStruct); 8145 8146 // We need to append the results of this capture to what we already have. 8147 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8148 Pointers.append(CurPointers.begin(), CurPointers.end()); 8149 Sizes.append(CurSizes.begin(), CurSizes.end()); 8150 Types.append(CurTypes.begin(), CurTypes.end()); 8151 } 8152 // Append data for use_device_ptr clauses. 8153 BasePointers.append(UseDevicePtrBasePointers.begin(), 8154 UseDevicePtrBasePointers.end()); 8155 Pointers.append(UseDevicePtrPointers.begin(), UseDevicePtrPointers.end()); 8156 Sizes.append(UseDevicePtrSizes.begin(), UseDevicePtrSizes.end()); 8157 Types.append(UseDevicePtrTypes.begin(), UseDevicePtrTypes.end()); 8158 } 8159 8160 /// Generate all the base pointers, section pointers, sizes and map types for 8161 /// the extracted map clauses of user-defined mapper. 8162 void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers, 8163 MapValuesArrayTy &Pointers, 8164 MapValuesArrayTy &Sizes, 8165 MapFlagsArrayTy &Types) const { 8166 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8167 "Expect a declare mapper directive"); 8168 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8169 // We have to process the component lists that relate with the same 8170 // declaration in a single chunk so that we can generate the map flags 8171 // correctly. Therefore, we organize all lists in a map. 8172 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8173 8174 // Helper function to fill the information map for the different supported 8175 // clauses. 8176 auto &&InfoGen = [&Info]( 8177 const ValueDecl *D, 8178 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8179 OpenMPMapClauseKind MapType, 8180 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8181 bool ReturnDevicePointer, bool IsImplicit) { 8182 const ValueDecl *VD = 8183 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8184 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 8185 IsImplicit); 8186 }; 8187 8188 for (const auto *C : CurMapperDir->clauselists()) { 8189 const auto *MC = cast<OMPMapClause>(C); 8190 for (const auto L : MC->component_lists()) { 8191 InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(), 8192 /*ReturnDevicePointer=*/false, MC->isImplicit()); 8193 } 8194 } 8195 8196 for (const auto &M : Info) { 8197 // We need to know when we generate information for the first component 8198 // associated with a capture, because the mapping flags depend on it. 8199 bool IsFirstComponentList = true; 8200 8201 // Temporary versions of arrays 8202 MapBaseValuesArrayTy CurBasePointers; 8203 MapValuesArrayTy CurPointers; 8204 MapValuesArrayTy CurSizes; 8205 MapFlagsArrayTy CurTypes; 8206 StructRangeInfoTy PartialStruct; 8207 8208 for (const MapInfo &L : M.second) { 8209 assert(!L.Components.empty() && 8210 "Not expecting declaration with no component lists."); 8211 generateInfoForComponentList( 8212 L.MapType, L.MapModifiers, L.Components, CurBasePointers, 8213 CurPointers, CurSizes, CurTypes, PartialStruct, 8214 IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr); 8215 IsFirstComponentList = false; 8216 } 8217 8218 // If there is an entry in PartialStruct it means we have a struct with 8219 // individual members mapped. Emit an extra combined entry. 8220 if (PartialStruct.Base.isValid()) 8221 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8222 PartialStruct); 8223 8224 // We need to append the results of this capture to what we already have. 8225 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8226 Pointers.append(CurPointers.begin(), CurPointers.end()); 8227 Sizes.append(CurSizes.begin(), CurSizes.end()); 8228 Types.append(CurTypes.begin(), CurTypes.end()); 8229 } 8230 } 8231 8232 /// Emit capture info for lambdas for variables captured by reference. 8233 void generateInfoForLambdaCaptures( 8234 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, 8235 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8236 MapFlagsArrayTy &Types, 8237 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8238 const auto *RD = VD->getType() 8239 .getCanonicalType() 8240 .getNonReferenceType() 8241 ->getAsCXXRecordDecl(); 8242 if (!RD || !RD->isLambda()) 8243 return; 8244 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8245 LValue VDLVal = CGF.MakeAddrLValue( 8246 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8247 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8248 FieldDecl *ThisCapture = nullptr; 8249 RD->getCaptureFields(Captures, ThisCapture); 8250 if (ThisCapture) { 8251 LValue ThisLVal = 8252 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8253 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8254 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8255 VDLVal.getPointer(CGF)); 8256 BasePointers.push_back(ThisLVal.getPointer(CGF)); 8257 Pointers.push_back(ThisLValVal.getPointer(CGF)); 8258 Sizes.push_back( 8259 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8260 CGF.Int64Ty, /*isSigned=*/true)); 8261 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8262 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8263 } 8264 for (const LambdaCapture &LC : RD->captures()) { 8265 if (!LC.capturesVariable()) 8266 continue; 8267 const VarDecl *VD = LC.getCapturedVar(); 8268 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8269 continue; 8270 auto It = Captures.find(VD); 8271 assert(It != Captures.end() && "Found lambda capture without field."); 8272 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8273 if (LC.getCaptureKind() == LCK_ByRef) { 8274 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8275 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8276 VDLVal.getPointer(CGF)); 8277 BasePointers.push_back(VarLVal.getPointer(CGF)); 8278 Pointers.push_back(VarLValVal.getPointer(CGF)); 8279 Sizes.push_back(CGF.Builder.CreateIntCast( 8280 CGF.getTypeSize( 8281 VD->getType().getCanonicalType().getNonReferenceType()), 8282 CGF.Int64Ty, /*isSigned=*/true)); 8283 } else { 8284 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8285 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8286 VDLVal.getPointer(CGF)); 8287 BasePointers.push_back(VarLVal.getPointer(CGF)); 8288 Pointers.push_back(VarRVal.getScalarVal()); 8289 Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8290 } 8291 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8292 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8293 } 8294 } 8295 8296 /// Set correct indices for lambdas captures. 8297 void adjustMemberOfForLambdaCaptures( 8298 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8299 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8300 MapFlagsArrayTy &Types) const { 8301 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8302 // Set correct member_of idx for all implicit lambda captures. 8303 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8304 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8305 continue; 8306 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8307 assert(BasePtr && "Unable to find base lambda address."); 8308 int TgtIdx = -1; 8309 for (unsigned J = I; J > 0; --J) { 8310 unsigned Idx = J - 1; 8311 if (Pointers[Idx] != BasePtr) 8312 continue; 8313 TgtIdx = Idx; 8314 break; 8315 } 8316 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8317 // All other current entries will be MEMBER_OF the combined entry 8318 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8319 // 0xFFFF in the MEMBER_OF field). 8320 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8321 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8322 } 8323 } 8324 8325 /// Generate the base pointers, section pointers, sizes and map types 8326 /// associated to a given capture. 8327 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8328 llvm::Value *Arg, 8329 MapBaseValuesArrayTy &BasePointers, 8330 MapValuesArrayTy &Pointers, 8331 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 8332 StructRangeInfoTy &PartialStruct) const { 8333 assert(!Cap->capturesVariableArrayType() && 8334 "Not expecting to generate map info for a variable array type!"); 8335 8336 // We need to know when we generating information for the first component 8337 const ValueDecl *VD = Cap->capturesThis() 8338 ? nullptr 8339 : Cap->getCapturedVar()->getCanonicalDecl(); 8340 8341 // If this declaration appears in a is_device_ptr clause we just have to 8342 // pass the pointer by value. If it is a reference to a declaration, we just 8343 // pass its value. 8344 if (DevPointersMap.count(VD)) { 8345 BasePointers.emplace_back(Arg, VD); 8346 Pointers.push_back(Arg); 8347 Sizes.push_back( 8348 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8349 CGF.Int64Ty, /*isSigned=*/true)); 8350 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 8351 return; 8352 } 8353 8354 using MapData = 8355 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8356 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; 8357 SmallVector<MapData, 4> DeclComponentLists; 8358 assert(CurDir.is<const OMPExecutableDirective *>() && 8359 "Expect a executable directive"); 8360 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8361 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8362 for (const auto L : C->decl_component_lists(VD)) { 8363 assert(L.first == VD && 8364 "We got information for the wrong declaration??"); 8365 assert(!L.second.empty() && 8366 "Not expecting declaration with no component lists."); 8367 DeclComponentLists.emplace_back(L.second, C->getMapType(), 8368 C->getMapTypeModifiers(), 8369 C->isImplicit()); 8370 } 8371 } 8372 8373 // Find overlapping elements (including the offset from the base element). 8374 llvm::SmallDenseMap< 8375 const MapData *, 8376 llvm::SmallVector< 8377 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8378 4> 8379 OverlappedData; 8380 size_t Count = 0; 8381 for (const MapData &L : DeclComponentLists) { 8382 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8383 OpenMPMapClauseKind MapType; 8384 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8385 bool IsImplicit; 8386 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8387 ++Count; 8388 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8389 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8390 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1; 8391 auto CI = Components.rbegin(); 8392 auto CE = Components.rend(); 8393 auto SI = Components1.rbegin(); 8394 auto SE = Components1.rend(); 8395 for (; CI != CE && SI != SE; ++CI, ++SI) { 8396 if (CI->getAssociatedExpression()->getStmtClass() != 8397 SI->getAssociatedExpression()->getStmtClass()) 8398 break; 8399 // Are we dealing with different variables/fields? 8400 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8401 break; 8402 } 8403 // Found overlapping if, at least for one component, reached the head of 8404 // the components list. 8405 if (CI == CE || SI == SE) { 8406 assert((CI != CE || SI != SE) && 8407 "Unexpected full match of the mapping components."); 8408 const MapData &BaseData = CI == CE ? L : L1; 8409 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8410 SI == SE ? Components : Components1; 8411 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8412 OverlappedElements.getSecond().push_back(SubData); 8413 } 8414 } 8415 } 8416 // Sort the overlapped elements for each item. 8417 llvm::SmallVector<const FieldDecl *, 4> Layout; 8418 if (!OverlappedData.empty()) { 8419 if (const auto *CRD = 8420 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8421 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8422 else { 8423 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8424 Layout.append(RD->field_begin(), RD->field_end()); 8425 } 8426 } 8427 for (auto &Pair : OverlappedData) { 8428 llvm::sort( 8429 Pair.getSecond(), 8430 [&Layout]( 8431 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8432 OMPClauseMappableExprCommon::MappableExprComponentListRef 8433 Second) { 8434 auto CI = First.rbegin(); 8435 auto CE = First.rend(); 8436 auto SI = Second.rbegin(); 8437 auto SE = Second.rend(); 8438 for (; CI != CE && SI != SE; ++CI, ++SI) { 8439 if (CI->getAssociatedExpression()->getStmtClass() != 8440 SI->getAssociatedExpression()->getStmtClass()) 8441 break; 8442 // Are we dealing with different variables/fields? 8443 if (CI->getAssociatedDeclaration() != 8444 SI->getAssociatedDeclaration()) 8445 break; 8446 } 8447 8448 // Lists contain the same elements. 8449 if (CI == CE && SI == SE) 8450 return false; 8451 8452 // List with less elements is less than list with more elements. 8453 if (CI == CE || SI == SE) 8454 return CI == CE; 8455 8456 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8457 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8458 if (FD1->getParent() == FD2->getParent()) 8459 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8460 const auto It = 8461 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8462 return FD == FD1 || FD == FD2; 8463 }); 8464 return *It == FD1; 8465 }); 8466 } 8467 8468 // Associated with a capture, because the mapping flags depend on it. 8469 // Go through all of the elements with the overlapped elements. 8470 for (const auto &Pair : OverlappedData) { 8471 const MapData &L = *Pair.getFirst(); 8472 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8473 OpenMPMapClauseKind MapType; 8474 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8475 bool IsImplicit; 8476 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8477 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8478 OverlappedComponents = Pair.getSecond(); 8479 bool IsFirstComponentList = true; 8480 generateInfoForComponentList( 8481 MapType, MapModifiers, Components, BasePointers, Pointers, Sizes, 8482 Types, PartialStruct, IsFirstComponentList, IsImplicit, 8483 /*ForDeviceAddr=*/false, OverlappedComponents); 8484 } 8485 // Go through other elements without overlapped elements. 8486 bool IsFirstComponentList = OverlappedData.empty(); 8487 for (const MapData &L : DeclComponentLists) { 8488 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8489 OpenMPMapClauseKind MapType; 8490 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8491 bool IsImplicit; 8492 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8493 auto It = OverlappedData.find(&L); 8494 if (It == OverlappedData.end()) 8495 generateInfoForComponentList(MapType, MapModifiers, Components, 8496 BasePointers, Pointers, Sizes, Types, 8497 PartialStruct, IsFirstComponentList, 8498 IsImplicit); 8499 IsFirstComponentList = false; 8500 } 8501 } 8502 8503 /// Generate the base pointers, section pointers, sizes and map types 8504 /// associated with the declare target link variables. 8505 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, 8506 MapValuesArrayTy &Pointers, 8507 MapValuesArrayTy &Sizes, 8508 MapFlagsArrayTy &Types) const { 8509 assert(CurDir.is<const OMPExecutableDirective *>() && 8510 "Expect a executable directive"); 8511 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8512 // Map other list items in the map clause which are not captured variables 8513 // but "declare target link" global variables. 8514 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8515 for (const auto L : C->component_lists()) { 8516 if (!L.first) 8517 continue; 8518 const auto *VD = dyn_cast<VarDecl>(L.first); 8519 if (!VD) 8520 continue; 8521 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8522 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 8523 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8524 !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) 8525 continue; 8526 StructRangeInfoTy PartialStruct; 8527 generateInfoForComponentList( 8528 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers, 8529 Pointers, Sizes, Types, PartialStruct, 8530 /*IsFirstComponentList=*/true, C->isImplicit()); 8531 assert(!PartialStruct.Base.isValid() && 8532 "No partial structs for declare target link expected."); 8533 } 8534 } 8535 } 8536 8537 /// Generate the default map information for a given capture \a CI, 8538 /// record field declaration \a RI and captured value \a CV. 8539 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8540 const FieldDecl &RI, llvm::Value *CV, 8541 MapBaseValuesArrayTy &CurBasePointers, 8542 MapValuesArrayTy &CurPointers, 8543 MapValuesArrayTy &CurSizes, 8544 MapFlagsArrayTy &CurMapTypes) const { 8545 bool IsImplicit = true; 8546 // Do the default mapping. 8547 if (CI.capturesThis()) { 8548 CurBasePointers.push_back(CV); 8549 CurPointers.push_back(CV); 8550 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8551 CurSizes.push_back( 8552 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8553 CGF.Int64Ty, /*isSigned=*/true)); 8554 // Default map type. 8555 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 8556 } else if (CI.capturesVariableByCopy()) { 8557 CurBasePointers.push_back(CV); 8558 CurPointers.push_back(CV); 8559 if (!RI.getType()->isAnyPointerType()) { 8560 // We have to signal to the runtime captures passed by value that are 8561 // not pointers. 8562 CurMapTypes.push_back(OMP_MAP_LITERAL); 8563 CurSizes.push_back(CGF.Builder.CreateIntCast( 8564 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8565 } else { 8566 // Pointers are implicitly mapped with a zero size and no flags 8567 // (other than first map that is added for all implicit maps). 8568 CurMapTypes.push_back(OMP_MAP_NONE); 8569 CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8570 } 8571 const VarDecl *VD = CI.getCapturedVar(); 8572 auto I = FirstPrivateDecls.find(VD); 8573 if (I != FirstPrivateDecls.end()) 8574 IsImplicit = I->getSecond(); 8575 } else { 8576 assert(CI.capturesVariable() && "Expected captured reference."); 8577 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8578 QualType ElementType = PtrTy->getPointeeType(); 8579 CurSizes.push_back(CGF.Builder.CreateIntCast( 8580 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8581 // The default map type for a scalar/complex type is 'to' because by 8582 // default the value doesn't have to be retrieved. For an aggregate 8583 // type, the default is 'tofrom'. 8584 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); 8585 const VarDecl *VD = CI.getCapturedVar(); 8586 auto I = FirstPrivateDecls.find(VD); 8587 if (I != FirstPrivateDecls.end() && 8588 VD->getType().isConstant(CGF.getContext())) { 8589 llvm::Constant *Addr = 8590 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 8591 // Copy the value of the original variable to the new global copy. 8592 CGF.Builder.CreateMemCpy( 8593 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), 8594 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 8595 CurSizes.back(), /*IsVolatile=*/false); 8596 // Use new global variable as the base pointers. 8597 CurBasePointers.push_back(Addr); 8598 CurPointers.push_back(Addr); 8599 } else { 8600 CurBasePointers.push_back(CV); 8601 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 8602 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8603 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8604 AlignmentSource::Decl)); 8605 CurPointers.push_back(PtrAddr.getPointer()); 8606 } else { 8607 CurPointers.push_back(CV); 8608 } 8609 } 8610 if (I != FirstPrivateDecls.end()) 8611 IsImplicit = I->getSecond(); 8612 } 8613 // Every default map produces a single argument which is a target parameter. 8614 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 8615 8616 // Add flag stating this is an implicit map. 8617 if (IsImplicit) 8618 CurMapTypes.back() |= OMP_MAP_IMPLICIT; 8619 } 8620 }; 8621 } // anonymous namespace 8622 8623 /// Emit the arrays used to pass the captures and map information to the 8624 /// offloading runtime library. If there is no map or capture information, 8625 /// return nullptr by reference. 8626 static void 8627 emitOffloadingArrays(CodeGenFunction &CGF, 8628 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 8629 MappableExprsHandler::MapValuesArrayTy &Pointers, 8630 MappableExprsHandler::MapValuesArrayTy &Sizes, 8631 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 8632 CGOpenMPRuntime::TargetDataInfo &Info) { 8633 CodeGenModule &CGM = CGF.CGM; 8634 ASTContext &Ctx = CGF.getContext(); 8635 8636 // Reset the array information. 8637 Info.clearArrayInfo(); 8638 Info.NumberOfPtrs = BasePointers.size(); 8639 8640 if (Info.NumberOfPtrs) { 8641 // Detect if we have any capture size requiring runtime evaluation of the 8642 // size so that a constant array could be eventually used. 8643 bool hasRuntimeEvaluationCaptureSize = false; 8644 for (llvm::Value *S : Sizes) 8645 if (!isa<llvm::Constant>(S)) { 8646 hasRuntimeEvaluationCaptureSize = true; 8647 break; 8648 } 8649 8650 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 8651 QualType PointerArrayType = Ctx.getConstantArrayType( 8652 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 8653 /*IndexTypeQuals=*/0); 8654 8655 Info.BasePointersArray = 8656 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 8657 Info.PointersArray = 8658 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 8659 8660 // If we don't have any VLA types or other types that require runtime 8661 // evaluation, we can use a constant array for the map sizes, otherwise we 8662 // need to fill up the arrays as we do for the pointers. 8663 QualType Int64Ty = 8664 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 8665 if (hasRuntimeEvaluationCaptureSize) { 8666 QualType SizeArrayType = Ctx.getConstantArrayType( 8667 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 8668 /*IndexTypeQuals=*/0); 8669 Info.SizesArray = 8670 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 8671 } else { 8672 // We expect all the sizes to be constant, so we collect them to create 8673 // a constant array. 8674 SmallVector<llvm::Constant *, 16> ConstSizes; 8675 for (llvm::Value *S : Sizes) 8676 ConstSizes.push_back(cast<llvm::Constant>(S)); 8677 8678 auto *SizesArrayInit = llvm::ConstantArray::get( 8679 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 8680 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 8681 auto *SizesArrayGbl = new llvm::GlobalVariable( 8682 CGM.getModule(), SizesArrayInit->getType(), 8683 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8684 SizesArrayInit, Name); 8685 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8686 Info.SizesArray = SizesArrayGbl; 8687 } 8688 8689 // The map types are always constant so we don't need to generate code to 8690 // fill arrays. Instead, we create an array constant. 8691 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0); 8692 llvm::copy(MapTypes, Mapping.begin()); 8693 llvm::Constant *MapTypesArrayInit = 8694 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 8695 std::string MaptypesName = 8696 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 8697 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 8698 CGM.getModule(), MapTypesArrayInit->getType(), 8699 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8700 MapTypesArrayInit, MaptypesName); 8701 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8702 Info.MapTypesArray = MapTypesArrayGbl; 8703 8704 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 8705 llvm::Value *BPVal = *BasePointers[I]; 8706 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 8707 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8708 Info.BasePointersArray, 0, I); 8709 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8710 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8711 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8712 CGF.Builder.CreateStore(BPVal, BPAddr); 8713 8714 if (Info.requiresDevicePointerInfo()) 8715 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) 8716 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 8717 8718 llvm::Value *PVal = Pointers[I]; 8719 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 8720 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8721 Info.PointersArray, 0, I); 8722 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8723 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8724 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8725 CGF.Builder.CreateStore(PVal, PAddr); 8726 8727 if (hasRuntimeEvaluationCaptureSize) { 8728 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 8729 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8730 Info.SizesArray, 8731 /*Idx0=*/0, 8732 /*Idx1=*/I); 8733 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 8734 CGF.Builder.CreateStore( 8735 CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true), 8736 SAddr); 8737 } 8738 } 8739 } 8740 } 8741 8742 /// Emit the arguments to be passed to the runtime library based on the 8743 /// arrays of pointers, sizes and map types. 8744 static void emitOffloadingArraysArgument( 8745 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 8746 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 8747 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 8748 CodeGenModule &CGM = CGF.CGM; 8749 if (Info.NumberOfPtrs) { 8750 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8751 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8752 Info.BasePointersArray, 8753 /*Idx0=*/0, /*Idx1=*/0); 8754 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8755 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8756 Info.PointersArray, 8757 /*Idx0=*/0, 8758 /*Idx1=*/0); 8759 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8760 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 8761 /*Idx0=*/0, /*Idx1=*/0); 8762 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8763 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8764 Info.MapTypesArray, 8765 /*Idx0=*/0, 8766 /*Idx1=*/0); 8767 } else { 8768 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8769 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8770 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8771 MapTypesArrayArg = 8772 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8773 } 8774 } 8775 8776 /// Check for inner distribute directive. 8777 static const OMPExecutableDirective * 8778 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 8779 const auto *CS = D.getInnermostCapturedStmt(); 8780 const auto *Body = 8781 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 8782 const Stmt *ChildStmt = 8783 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8784 8785 if (const auto *NestedDir = 8786 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8787 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 8788 switch (D.getDirectiveKind()) { 8789 case OMPD_target: 8790 if (isOpenMPDistributeDirective(DKind)) 8791 return NestedDir; 8792 if (DKind == OMPD_teams) { 8793 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 8794 /*IgnoreCaptured=*/true); 8795 if (!Body) 8796 return nullptr; 8797 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8798 if (const auto *NND = 8799 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8800 DKind = NND->getDirectiveKind(); 8801 if (isOpenMPDistributeDirective(DKind)) 8802 return NND; 8803 } 8804 } 8805 return nullptr; 8806 case OMPD_target_teams: 8807 if (isOpenMPDistributeDirective(DKind)) 8808 return NestedDir; 8809 return nullptr; 8810 case OMPD_target_parallel: 8811 case OMPD_target_simd: 8812 case OMPD_target_parallel_for: 8813 case OMPD_target_parallel_for_simd: 8814 return nullptr; 8815 case OMPD_target_teams_distribute: 8816 case OMPD_target_teams_distribute_simd: 8817 case OMPD_target_teams_distribute_parallel_for: 8818 case OMPD_target_teams_distribute_parallel_for_simd: 8819 case OMPD_parallel: 8820 case OMPD_for: 8821 case OMPD_parallel_for: 8822 case OMPD_parallel_master: 8823 case OMPD_parallel_sections: 8824 case OMPD_for_simd: 8825 case OMPD_parallel_for_simd: 8826 case OMPD_cancel: 8827 case OMPD_cancellation_point: 8828 case OMPD_ordered: 8829 case OMPD_threadprivate: 8830 case OMPD_allocate: 8831 case OMPD_task: 8832 case OMPD_simd: 8833 case OMPD_sections: 8834 case OMPD_section: 8835 case OMPD_single: 8836 case OMPD_master: 8837 case OMPD_critical: 8838 case OMPD_taskyield: 8839 case OMPD_barrier: 8840 case OMPD_taskwait: 8841 case OMPD_taskgroup: 8842 case OMPD_atomic: 8843 case OMPD_flush: 8844 case OMPD_depobj: 8845 case OMPD_scan: 8846 case OMPD_teams: 8847 case OMPD_target_data: 8848 case OMPD_target_exit_data: 8849 case OMPD_target_enter_data: 8850 case OMPD_distribute: 8851 case OMPD_distribute_simd: 8852 case OMPD_distribute_parallel_for: 8853 case OMPD_distribute_parallel_for_simd: 8854 case OMPD_teams_distribute: 8855 case OMPD_teams_distribute_simd: 8856 case OMPD_teams_distribute_parallel_for: 8857 case OMPD_teams_distribute_parallel_for_simd: 8858 case OMPD_target_update: 8859 case OMPD_declare_simd: 8860 case OMPD_declare_variant: 8861 case OMPD_begin_declare_variant: 8862 case OMPD_end_declare_variant: 8863 case OMPD_declare_target: 8864 case OMPD_end_declare_target: 8865 case OMPD_declare_reduction: 8866 case OMPD_declare_mapper: 8867 case OMPD_taskloop: 8868 case OMPD_taskloop_simd: 8869 case OMPD_master_taskloop: 8870 case OMPD_master_taskloop_simd: 8871 case OMPD_parallel_master_taskloop: 8872 case OMPD_parallel_master_taskloop_simd: 8873 case OMPD_requires: 8874 case OMPD_unknown: 8875 default: 8876 llvm_unreachable("Unexpected directive."); 8877 } 8878 } 8879 8880 return nullptr; 8881 } 8882 8883 /// Emit the user-defined mapper function. The code generation follows the 8884 /// pattern in the example below. 8885 /// \code 8886 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 8887 /// void *base, void *begin, 8888 /// int64_t size, int64_t type) { 8889 /// // Allocate space for an array section first. 8890 /// if (size > 1 && !maptype.IsDelete) 8891 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8892 /// size*sizeof(Ty), clearToFrom(type)); 8893 /// // Map members. 8894 /// for (unsigned i = 0; i < size; i++) { 8895 /// // For each component specified by this mapper: 8896 /// for (auto c : all_components) { 8897 /// if (c.hasMapper()) 8898 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 8899 /// c.arg_type); 8900 /// else 8901 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 8902 /// c.arg_begin, c.arg_size, c.arg_type); 8903 /// } 8904 /// } 8905 /// // Delete the array section. 8906 /// if (size > 1 && maptype.IsDelete) 8907 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8908 /// size*sizeof(Ty), clearToFrom(type)); 8909 /// } 8910 /// \endcode 8911 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 8912 CodeGenFunction *CGF) { 8913 if (UDMMap.count(D) > 0) 8914 return; 8915 ASTContext &C = CGM.getContext(); 8916 QualType Ty = D->getType(); 8917 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 8918 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 8919 auto *MapperVarDecl = 8920 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 8921 SourceLocation Loc = D->getLocation(); 8922 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 8923 8924 // Prepare mapper function arguments and attributes. 8925 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 8926 C.VoidPtrTy, ImplicitParamDecl::Other); 8927 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 8928 ImplicitParamDecl::Other); 8929 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 8930 C.VoidPtrTy, ImplicitParamDecl::Other); 8931 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 8932 ImplicitParamDecl::Other); 8933 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 8934 ImplicitParamDecl::Other); 8935 FunctionArgList Args; 8936 Args.push_back(&HandleArg); 8937 Args.push_back(&BaseArg); 8938 Args.push_back(&BeginArg); 8939 Args.push_back(&SizeArg); 8940 Args.push_back(&TypeArg); 8941 const CGFunctionInfo &FnInfo = 8942 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 8943 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 8944 SmallString<64> TyStr; 8945 llvm::raw_svector_ostream Out(TyStr); 8946 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 8947 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 8948 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 8949 Name, &CGM.getModule()); 8950 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 8951 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 8952 // Start the mapper function code generation. 8953 CodeGenFunction MapperCGF(CGM); 8954 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 8955 // Compute the starting and end addreses of array elements. 8956 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 8957 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 8958 C.getPointerType(Int64Ty), Loc); 8959 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 8960 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), 8961 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); 8962 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 8963 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 8964 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 8965 C.getPointerType(Int64Ty), Loc); 8966 // Prepare common arguments for array initiation and deletion. 8967 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 8968 MapperCGF.GetAddrOfLocalVar(&HandleArg), 8969 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8970 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 8971 MapperCGF.GetAddrOfLocalVar(&BaseArg), 8972 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8973 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 8974 MapperCGF.GetAddrOfLocalVar(&BeginArg), 8975 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8976 8977 // Emit array initiation if this is an array section and \p MapType indicates 8978 // that memory allocation is required. 8979 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 8980 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 8981 ElementSize, HeadBB, /*IsInit=*/true); 8982 8983 // Emit a for loop to iterate through SizeArg of elements and map all of them. 8984 8985 // Emit the loop header block. 8986 MapperCGF.EmitBlock(HeadBB); 8987 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 8988 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 8989 // Evaluate whether the initial condition is satisfied. 8990 llvm::Value *IsEmpty = 8991 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 8992 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 8993 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 8994 8995 // Emit the loop body block. 8996 MapperCGF.EmitBlock(BodyBB); 8997 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 8998 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 8999 PtrPHI->addIncoming(PtrBegin, EntryBB); 9000 Address PtrCurrent = 9001 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 9002 .getAlignment() 9003 .alignmentOfArrayElement(ElementSize)); 9004 // Privatize the declared variable of mapper to be the current array element. 9005 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9006 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { 9007 return MapperCGF 9008 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) 9009 .getAddress(MapperCGF); 9010 }); 9011 (void)Scope.Privatize(); 9012 9013 // Get map clause information. Fill up the arrays with all mapped variables. 9014 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9015 MappableExprsHandler::MapValuesArrayTy Pointers; 9016 MappableExprsHandler::MapValuesArrayTy Sizes; 9017 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9018 MappableExprsHandler MEHandler(*D, MapperCGF); 9019 MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes); 9020 9021 // Call the runtime API __tgt_mapper_num_components to get the number of 9022 // pre-existing components. 9023 llvm::Value *OffloadingArgs[] = {Handle}; 9024 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9025 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9026 OMPRTL___tgt_mapper_num_components), 9027 OffloadingArgs); 9028 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9029 PreviousSize, 9030 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9031 9032 // Fill up the runtime mapper handle for all components. 9033 for (unsigned I = 0; I < BasePointers.size(); ++I) { 9034 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9035 *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9036 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9037 Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9038 llvm::Value *CurSizeArg = Sizes[I]; 9039 9040 // Extract the MEMBER_OF field from the map type. 9041 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); 9042 MapperCGF.EmitBlock(MemberBB); 9043 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]); 9044 llvm::Value *Member = MapperCGF.Builder.CreateAnd( 9045 OriMapType, 9046 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); 9047 llvm::BasicBlock *MemberCombineBB = 9048 MapperCGF.createBasicBlock("omp.member.combine"); 9049 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); 9050 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); 9051 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); 9052 // Add the number of pre-existing components to the MEMBER_OF field if it 9053 // is valid. 9054 MapperCGF.EmitBlock(MemberCombineBB); 9055 llvm::Value *CombinedMember = 9056 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9057 // Do nothing if it is not a member of previous components. 9058 MapperCGF.EmitBlock(TypeBB); 9059 llvm::PHINode *MemberMapType = 9060 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); 9061 MemberMapType->addIncoming(OriMapType, MemberBB); 9062 MemberMapType->addIncoming(CombinedMember, MemberCombineBB); 9063 9064 // Combine the map type inherited from user-defined mapper with that 9065 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9066 // bits of the \a MapType, which is the input argument of the mapper 9067 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9068 // bits of MemberMapType. 9069 // [OpenMP 5.0], 1.2.6. map-type decay. 9070 // | alloc | to | from | tofrom | release | delete 9071 // ---------------------------------------------------------- 9072 // alloc | alloc | alloc | alloc | alloc | release | delete 9073 // to | alloc | to | alloc | to | release | delete 9074 // from | alloc | alloc | from | from | release | delete 9075 // tofrom | alloc | to | from | tofrom | release | delete 9076 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9077 MapType, 9078 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9079 MappableExprsHandler::OMP_MAP_FROM)); 9080 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9081 llvm::BasicBlock *AllocElseBB = 9082 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9083 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9084 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9085 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9086 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9087 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9088 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9089 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9090 MapperCGF.EmitBlock(AllocBB); 9091 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9092 MemberMapType, 9093 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9094 MappableExprsHandler::OMP_MAP_FROM))); 9095 MapperCGF.Builder.CreateBr(EndBB); 9096 MapperCGF.EmitBlock(AllocElseBB); 9097 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9098 LeftToFrom, 9099 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9100 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9101 // In case of to, clear OMP_MAP_FROM. 9102 MapperCGF.EmitBlock(ToBB); 9103 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9104 MemberMapType, 9105 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9106 MapperCGF.Builder.CreateBr(EndBB); 9107 MapperCGF.EmitBlock(ToElseBB); 9108 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9109 LeftToFrom, 9110 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9111 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9112 // In case of from, clear OMP_MAP_TO. 9113 MapperCGF.EmitBlock(FromBB); 9114 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9115 MemberMapType, 9116 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9117 // In case of tofrom, do nothing. 9118 MapperCGF.EmitBlock(EndBB); 9119 llvm::PHINode *CurMapType = 9120 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9121 CurMapType->addIncoming(AllocMapType, AllocBB); 9122 CurMapType->addIncoming(ToMapType, ToBB); 9123 CurMapType->addIncoming(FromMapType, FromBB); 9124 CurMapType->addIncoming(MemberMapType, ToElseBB); 9125 9126 // TODO: call the corresponding mapper function if a user-defined mapper is 9127 // associated with this map clause. 9128 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9129 // data structure. 9130 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9131 CurSizeArg, CurMapType}; 9132 MapperCGF.EmitRuntimeCall( 9133 OMPBuilder.getOrCreateRuntimeFunction( 9134 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 9135 OffloadingArgs); 9136 } 9137 9138 // Update the pointer to point to the next element that needs to be mapped, 9139 // and check whether we have mapped all elements. 9140 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9141 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9142 PtrPHI->addIncoming(PtrNext, BodyBB); 9143 llvm::Value *IsDone = 9144 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9145 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9146 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9147 9148 MapperCGF.EmitBlock(ExitBB); 9149 // Emit array deletion if this is an array section and \p MapType indicates 9150 // that deletion is required. 9151 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9152 ElementSize, DoneBB, /*IsInit=*/false); 9153 9154 // Emit the function exit block. 9155 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9156 MapperCGF.FinishFunction(); 9157 UDMMap.try_emplace(D, Fn); 9158 if (CGF) { 9159 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9160 Decls.second.push_back(D); 9161 } 9162 } 9163 9164 /// Emit the array initialization or deletion portion for user-defined mapper 9165 /// code generation. First, it evaluates whether an array section is mapped and 9166 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9167 /// true, and \a MapType indicates to not delete this array, array 9168 /// initialization code is generated. If \a IsInit is false, and \a MapType 9169 /// indicates to not this array, array deletion code is generated. 9170 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9171 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9172 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9173 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9174 StringRef Prefix = IsInit ? ".init" : ".del"; 9175 9176 // Evaluate if this is an array section. 9177 llvm::BasicBlock *IsDeleteBB = 9178 MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"})); 9179 llvm::BasicBlock *BodyBB = 9180 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9181 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( 9182 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9183 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); 9184 9185 // Evaluate if we are going to delete this section. 9186 MapperCGF.EmitBlock(IsDeleteBB); 9187 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9188 MapType, 9189 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9190 llvm::Value *DeleteCond; 9191 if (IsInit) { 9192 DeleteCond = MapperCGF.Builder.CreateIsNull( 9193 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9194 } else { 9195 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9196 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9197 } 9198 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); 9199 9200 MapperCGF.EmitBlock(BodyBB); 9201 // Get the array size by multiplying element size and element number (i.e., \p 9202 // Size). 9203 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9204 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9205 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9206 // memory allocation/deletion purpose only. 9207 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9208 MapType, 9209 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9210 MappableExprsHandler::OMP_MAP_FROM))); 9211 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9212 // data structure. 9213 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; 9214 MapperCGF.EmitRuntimeCall( 9215 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9216 OMPRTL___tgt_push_mapper_component), 9217 OffloadingArgs); 9218 } 9219 9220 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9221 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9222 llvm::Value *DeviceID, 9223 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9224 const OMPLoopDirective &D)> 9225 SizeEmitter) { 9226 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9227 const OMPExecutableDirective *TD = &D; 9228 // Get nested teams distribute kind directive, if any. 9229 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9230 TD = getNestedDistributeDirective(CGM.getContext(), D); 9231 if (!TD) 9232 return; 9233 const auto *LD = cast<OMPLoopDirective>(TD); 9234 auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF, 9235 PrePostActionTy &) { 9236 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 9237 llvm::Value *Args[] = {DeviceID, NumIterations}; 9238 CGF.EmitRuntimeCall( 9239 OMPBuilder.getOrCreateRuntimeFunction( 9240 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount), 9241 Args); 9242 } 9243 }; 9244 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9245 } 9246 9247 void CGOpenMPRuntime::emitTargetCall( 9248 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9249 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9250 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9251 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9252 const OMPLoopDirective &D)> 9253 SizeEmitter) { 9254 if (!CGF.HaveInsertPoint()) 9255 return; 9256 9257 assert(OutlinedFn && "Invalid outlined function!"); 9258 9259 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 9260 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9261 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9262 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9263 PrePostActionTy &) { 9264 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9265 }; 9266 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9267 9268 CodeGenFunction::OMPTargetDataInfo InputInfo; 9269 llvm::Value *MapTypesArray = nullptr; 9270 // Fill up the pointer arrays and transfer execution to the device. 9271 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9272 &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars, 9273 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9274 if (Device.getInt() == OMPC_DEVICE_ancestor) { 9275 // Reverse offloading is not supported, so just execute on the host. 9276 if (RequiresOuterTask) { 9277 CapturedVars.clear(); 9278 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9279 } 9280 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9281 return; 9282 } 9283 9284 // On top of the arrays that were filled up, the target offloading call 9285 // takes as arguments the device id as well as the host pointer. The host 9286 // pointer is used by the runtime library to identify the current target 9287 // region, so it only has to be unique and not necessarily point to 9288 // anything. It could be the pointer to the outlined function that 9289 // implements the target region, but we aren't using that so that the 9290 // compiler doesn't need to keep that, and could therefore inline the host 9291 // function if proven worthwhile during optimization. 9292 9293 // From this point on, we need to have an ID of the target region defined. 9294 assert(OutlinedFnID && "Invalid outlined function ID!"); 9295 9296 // Emit device ID if any. 9297 llvm::Value *DeviceID; 9298 if (Device.getPointer()) { 9299 assert((Device.getInt() == OMPC_DEVICE_unknown || 9300 Device.getInt() == OMPC_DEVICE_device_num) && 9301 "Expected device_num modifier."); 9302 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 9303 DeviceID = 9304 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 9305 } else { 9306 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9307 } 9308 9309 // Emit the number of elements in the offloading arrays. 9310 llvm::Value *PointerNum = 9311 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9312 9313 // Return value of the runtime offloading call. 9314 llvm::Value *Return; 9315 9316 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 9317 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 9318 9319 // Emit tripcount for the target loop-based directive. 9320 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 9321 9322 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9323 // The target region is an outlined function launched by the runtime 9324 // via calls __tgt_target() or __tgt_target_teams(). 9325 // 9326 // __tgt_target() launches a target region with one team and one thread, 9327 // executing a serial region. This master thread may in turn launch 9328 // more threads within its team upon encountering a parallel region, 9329 // however, no additional teams can be launched on the device. 9330 // 9331 // __tgt_target_teams() launches a target region with one or more teams, 9332 // each with one or more threads. This call is required for target 9333 // constructs such as: 9334 // 'target teams' 9335 // 'target' / 'teams' 9336 // 'target teams distribute parallel for' 9337 // 'target parallel' 9338 // and so on. 9339 // 9340 // Note that on the host and CPU targets, the runtime implementation of 9341 // these calls simply call the outlined function without forking threads. 9342 // The outlined functions themselves have runtime calls to 9343 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 9344 // the compiler in emitTeamsCall() and emitParallelCall(). 9345 // 9346 // In contrast, on the NVPTX target, the implementation of 9347 // __tgt_target_teams() launches a GPU kernel with the requested number 9348 // of teams and threads so no additional calls to the runtime are required. 9349 if (NumTeams) { 9350 // If we have NumTeams defined this means that we have an enclosed teams 9351 // region. Therefore we also expect to have NumThreads defined. These two 9352 // values should be defined in the presence of a teams directive, 9353 // regardless of having any clauses associated. If the user is using teams 9354 // but no clauses, these two values will be the default that should be 9355 // passed to the runtime library - a 32-bit integer with the value zero. 9356 assert(NumThreads && "Thread limit expression should be available along " 9357 "with number of teams."); 9358 llvm::Value *OffloadingArgs[] = {DeviceID, 9359 OutlinedFnID, 9360 PointerNum, 9361 InputInfo.BasePointersArray.getPointer(), 9362 InputInfo.PointersArray.getPointer(), 9363 InputInfo.SizesArray.getPointer(), 9364 MapTypesArray, 9365 NumTeams, 9366 NumThreads}; 9367 Return = CGF.EmitRuntimeCall( 9368 OMPBuilder.getOrCreateRuntimeFunction( 9369 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_teams_nowait 9370 : OMPRTL___tgt_target_teams), 9371 OffloadingArgs); 9372 } else { 9373 llvm::Value *OffloadingArgs[] = {DeviceID, 9374 OutlinedFnID, 9375 PointerNum, 9376 InputInfo.BasePointersArray.getPointer(), 9377 InputInfo.PointersArray.getPointer(), 9378 InputInfo.SizesArray.getPointer(), 9379 MapTypesArray}; 9380 Return = CGF.EmitRuntimeCall( 9381 OMPBuilder.getOrCreateRuntimeFunction( 9382 CGM.getModule(), 9383 HasNowait ? OMPRTL___tgt_target_nowait : OMPRTL___tgt_target), 9384 OffloadingArgs); 9385 } 9386 9387 // Check the error code and execute the host version if required. 9388 llvm::BasicBlock *OffloadFailedBlock = 9389 CGF.createBasicBlock("omp_offload.failed"); 9390 llvm::BasicBlock *OffloadContBlock = 9391 CGF.createBasicBlock("omp_offload.cont"); 9392 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 9393 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 9394 9395 CGF.EmitBlock(OffloadFailedBlock); 9396 if (RequiresOuterTask) { 9397 CapturedVars.clear(); 9398 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9399 } 9400 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9401 CGF.EmitBranch(OffloadContBlock); 9402 9403 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 9404 }; 9405 9406 // Notify that the host version must be executed. 9407 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 9408 RequiresOuterTask](CodeGenFunction &CGF, 9409 PrePostActionTy &) { 9410 if (RequiresOuterTask) { 9411 CapturedVars.clear(); 9412 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9413 } 9414 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9415 }; 9416 9417 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 9418 &CapturedVars, RequiresOuterTask, 9419 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 9420 // Fill up the arrays with all the captured variables. 9421 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9422 MappableExprsHandler::MapValuesArrayTy Pointers; 9423 MappableExprsHandler::MapValuesArrayTy Sizes; 9424 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9425 9426 // Get mappable expression information. 9427 MappableExprsHandler MEHandler(D, CGF); 9428 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 9429 9430 auto RI = CS.getCapturedRecordDecl()->field_begin(); 9431 auto CV = CapturedVars.begin(); 9432 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 9433 CE = CS.capture_end(); 9434 CI != CE; ++CI, ++RI, ++CV) { 9435 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 9436 MappableExprsHandler::MapValuesArrayTy CurPointers; 9437 MappableExprsHandler::MapValuesArrayTy CurSizes; 9438 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 9439 MappableExprsHandler::StructRangeInfoTy PartialStruct; 9440 9441 // VLA sizes are passed to the outlined region by copy and do not have map 9442 // information associated. 9443 if (CI->capturesVariableArrayType()) { 9444 CurBasePointers.push_back(*CV); 9445 CurPointers.push_back(*CV); 9446 CurSizes.push_back(CGF.Builder.CreateIntCast( 9447 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 9448 // Copy to the device as an argument. No need to retrieve it. 9449 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 9450 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 9451 MappableExprsHandler::OMP_MAP_IMPLICIT); 9452 } else { 9453 // If we have any information in the map clause, we use it, otherwise we 9454 // just do a default mapping. 9455 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 9456 CurSizes, CurMapTypes, PartialStruct); 9457 if (CurBasePointers.empty()) 9458 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 9459 CurPointers, CurSizes, CurMapTypes); 9460 // Generate correct mapping for variables captured by reference in 9461 // lambdas. 9462 if (CI->capturesVariable()) 9463 MEHandler.generateInfoForLambdaCaptures( 9464 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes, 9465 CurMapTypes, LambdaPointers); 9466 } 9467 // We expect to have at least an element of information for this capture. 9468 assert(!CurBasePointers.empty() && 9469 "Non-existing map pointer for capture!"); 9470 assert(CurBasePointers.size() == CurPointers.size() && 9471 CurBasePointers.size() == CurSizes.size() && 9472 CurBasePointers.size() == CurMapTypes.size() && 9473 "Inconsistent map information sizes!"); 9474 9475 // If there is an entry in PartialStruct it means we have a struct with 9476 // individual members mapped. Emit an extra combined entry. 9477 if (PartialStruct.Base.isValid()) 9478 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes, 9479 CurMapTypes, PartialStruct); 9480 9481 // We need to append the results of this capture to what we already have. 9482 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 9483 Pointers.append(CurPointers.begin(), CurPointers.end()); 9484 Sizes.append(CurSizes.begin(), CurSizes.end()); 9485 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 9486 } 9487 // Adjust MEMBER_OF flags for the lambdas captures. 9488 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers, 9489 Pointers, MapTypes); 9490 // Map other list items in the map clause which are not captured variables 9491 // but "declare target link" global variables. 9492 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, 9493 MapTypes); 9494 9495 TargetDataInfo Info; 9496 // Fill up the arrays and create the arguments. 9497 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9498 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 9499 Info.PointersArray, Info.SizesArray, 9500 Info.MapTypesArray, Info); 9501 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9502 InputInfo.BasePointersArray = 9503 Address(Info.BasePointersArray, CGM.getPointerAlign()); 9504 InputInfo.PointersArray = 9505 Address(Info.PointersArray, CGM.getPointerAlign()); 9506 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 9507 MapTypesArray = Info.MapTypesArray; 9508 if (RequiresOuterTask) 9509 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9510 else 9511 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9512 }; 9513 9514 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 9515 CodeGenFunction &CGF, PrePostActionTy &) { 9516 if (RequiresOuterTask) { 9517 CodeGenFunction::OMPTargetDataInfo InputInfo; 9518 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 9519 } else { 9520 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 9521 } 9522 }; 9523 9524 // If we have a target function ID it means that we need to support 9525 // offloading, otherwise, just execute on the host. We need to execute on host 9526 // regardless of the conditional in the if clause if, e.g., the user do not 9527 // specify target triples. 9528 if (OutlinedFnID) { 9529 if (IfCond) { 9530 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 9531 } else { 9532 RegionCodeGenTy ThenRCG(TargetThenGen); 9533 ThenRCG(CGF); 9534 } 9535 } else { 9536 RegionCodeGenTy ElseRCG(TargetElseGen); 9537 ElseRCG(CGF); 9538 } 9539 } 9540 9541 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 9542 StringRef ParentName) { 9543 if (!S) 9544 return; 9545 9546 // Codegen OMP target directives that offload compute to the device. 9547 bool RequiresDeviceCodegen = 9548 isa<OMPExecutableDirective>(S) && 9549 isOpenMPTargetExecutionDirective( 9550 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 9551 9552 if (RequiresDeviceCodegen) { 9553 const auto &E = *cast<OMPExecutableDirective>(S); 9554 unsigned DeviceID; 9555 unsigned FileID; 9556 unsigned Line; 9557 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 9558 FileID, Line); 9559 9560 // Is this a target region that should not be emitted as an entry point? If 9561 // so just signal we are done with this target region. 9562 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 9563 ParentName, Line)) 9564 return; 9565 9566 switch (E.getDirectiveKind()) { 9567 case OMPD_target: 9568 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 9569 cast<OMPTargetDirective>(E)); 9570 break; 9571 case OMPD_target_parallel: 9572 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 9573 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 9574 break; 9575 case OMPD_target_teams: 9576 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 9577 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 9578 break; 9579 case OMPD_target_teams_distribute: 9580 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 9581 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 9582 break; 9583 case OMPD_target_teams_distribute_simd: 9584 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 9585 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 9586 break; 9587 case OMPD_target_parallel_for: 9588 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 9589 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 9590 break; 9591 case OMPD_target_parallel_for_simd: 9592 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 9593 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 9594 break; 9595 case OMPD_target_simd: 9596 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 9597 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 9598 break; 9599 case OMPD_target_teams_distribute_parallel_for: 9600 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 9601 CGM, ParentName, 9602 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 9603 break; 9604 case OMPD_target_teams_distribute_parallel_for_simd: 9605 CodeGenFunction:: 9606 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 9607 CGM, ParentName, 9608 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 9609 break; 9610 case OMPD_parallel: 9611 case OMPD_for: 9612 case OMPD_parallel_for: 9613 case OMPD_parallel_master: 9614 case OMPD_parallel_sections: 9615 case OMPD_for_simd: 9616 case OMPD_parallel_for_simd: 9617 case OMPD_cancel: 9618 case OMPD_cancellation_point: 9619 case OMPD_ordered: 9620 case OMPD_threadprivate: 9621 case OMPD_allocate: 9622 case OMPD_task: 9623 case OMPD_simd: 9624 case OMPD_sections: 9625 case OMPD_section: 9626 case OMPD_single: 9627 case OMPD_master: 9628 case OMPD_critical: 9629 case OMPD_taskyield: 9630 case OMPD_barrier: 9631 case OMPD_taskwait: 9632 case OMPD_taskgroup: 9633 case OMPD_atomic: 9634 case OMPD_flush: 9635 case OMPD_depobj: 9636 case OMPD_scan: 9637 case OMPD_teams: 9638 case OMPD_target_data: 9639 case OMPD_target_exit_data: 9640 case OMPD_target_enter_data: 9641 case OMPD_distribute: 9642 case OMPD_distribute_simd: 9643 case OMPD_distribute_parallel_for: 9644 case OMPD_distribute_parallel_for_simd: 9645 case OMPD_teams_distribute: 9646 case OMPD_teams_distribute_simd: 9647 case OMPD_teams_distribute_parallel_for: 9648 case OMPD_teams_distribute_parallel_for_simd: 9649 case OMPD_target_update: 9650 case OMPD_declare_simd: 9651 case OMPD_declare_variant: 9652 case OMPD_begin_declare_variant: 9653 case OMPD_end_declare_variant: 9654 case OMPD_declare_target: 9655 case OMPD_end_declare_target: 9656 case OMPD_declare_reduction: 9657 case OMPD_declare_mapper: 9658 case OMPD_taskloop: 9659 case OMPD_taskloop_simd: 9660 case OMPD_master_taskloop: 9661 case OMPD_master_taskloop_simd: 9662 case OMPD_parallel_master_taskloop: 9663 case OMPD_parallel_master_taskloop_simd: 9664 case OMPD_requires: 9665 case OMPD_unknown: 9666 default: 9667 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 9668 } 9669 return; 9670 } 9671 9672 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 9673 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 9674 return; 9675 9676 scanForTargetRegionsFunctions( 9677 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 9678 return; 9679 } 9680 9681 // If this is a lambda function, look into its body. 9682 if (const auto *L = dyn_cast<LambdaExpr>(S)) 9683 S = L->getBody(); 9684 9685 // Keep looking for target regions recursively. 9686 for (const Stmt *II : S->children()) 9687 scanForTargetRegionsFunctions(II, ParentName); 9688 } 9689 9690 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 9691 // If emitting code for the host, we do not process FD here. Instead we do 9692 // the normal code generation. 9693 if (!CGM.getLangOpts().OpenMPIsDevice) { 9694 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 9695 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9696 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9697 // Do not emit device_type(nohost) functions for the host. 9698 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 9699 return true; 9700 } 9701 return false; 9702 } 9703 9704 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 9705 // Try to detect target regions in the function. 9706 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 9707 StringRef Name = CGM.getMangledName(GD); 9708 scanForTargetRegionsFunctions(FD->getBody(), Name); 9709 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9710 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9711 // Do not emit device_type(nohost) functions for the host. 9712 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 9713 return true; 9714 } 9715 9716 // Do not to emit function if it is not marked as declare target. 9717 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 9718 AlreadyEmittedTargetDecls.count(VD) == 0; 9719 } 9720 9721 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9722 if (!CGM.getLangOpts().OpenMPIsDevice) 9723 return false; 9724 9725 // Check if there are Ctors/Dtors in this declaration and look for target 9726 // regions in it. We use the complete variant to produce the kernel name 9727 // mangling. 9728 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 9729 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 9730 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 9731 StringRef ParentName = 9732 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 9733 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 9734 } 9735 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 9736 StringRef ParentName = 9737 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 9738 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 9739 } 9740 } 9741 9742 // Do not to emit variable if it is not marked as declare target. 9743 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9744 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 9745 cast<VarDecl>(GD.getDecl())); 9746 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 9747 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9748 HasRequiresUnifiedSharedMemory)) { 9749 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 9750 return true; 9751 } 9752 return false; 9753 } 9754 9755 llvm::Constant * 9756 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 9757 const VarDecl *VD) { 9758 assert(VD->getType().isConstant(CGM.getContext()) && 9759 "Expected constant variable."); 9760 StringRef VarName; 9761 llvm::Constant *Addr; 9762 llvm::GlobalValue::LinkageTypes Linkage; 9763 QualType Ty = VD->getType(); 9764 SmallString<128> Buffer; 9765 { 9766 unsigned DeviceID; 9767 unsigned FileID; 9768 unsigned Line; 9769 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 9770 FileID, Line); 9771 llvm::raw_svector_ostream OS(Buffer); 9772 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 9773 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 9774 VarName = OS.str(); 9775 } 9776 Linkage = llvm::GlobalValue::InternalLinkage; 9777 Addr = 9778 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 9779 getDefaultFirstprivateAddressSpace()); 9780 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 9781 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 9782 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 9783 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9784 VarName, Addr, VarSize, 9785 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 9786 return Addr; 9787 } 9788 9789 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 9790 llvm::Constant *Addr) { 9791 if (CGM.getLangOpts().OMPTargetTriples.empty() && 9792 !CGM.getLangOpts().OpenMPIsDevice) 9793 return; 9794 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9795 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9796 if (!Res) { 9797 if (CGM.getLangOpts().OpenMPIsDevice) { 9798 // Register non-target variables being emitted in device code (debug info 9799 // may cause this). 9800 StringRef VarName = CGM.getMangledName(VD); 9801 EmittedNonTargetVariables.try_emplace(VarName, Addr); 9802 } 9803 return; 9804 } 9805 // Register declare target variables. 9806 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 9807 StringRef VarName; 9808 CharUnits VarSize; 9809 llvm::GlobalValue::LinkageTypes Linkage; 9810 9811 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9812 !HasRequiresUnifiedSharedMemory) { 9813 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9814 VarName = CGM.getMangledName(VD); 9815 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 9816 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 9817 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 9818 } else { 9819 VarSize = CharUnits::Zero(); 9820 } 9821 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 9822 // Temp solution to prevent optimizations of the internal variables. 9823 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 9824 std::string RefName = getName({VarName, "ref"}); 9825 if (!CGM.GetGlobalValue(RefName)) { 9826 llvm::Constant *AddrRef = 9827 getOrCreateInternalVariable(Addr->getType(), RefName); 9828 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 9829 GVAddrRef->setConstant(/*Val=*/true); 9830 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 9831 GVAddrRef->setInitializer(Addr); 9832 CGM.addCompilerUsedGlobal(GVAddrRef); 9833 } 9834 } 9835 } else { 9836 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 9837 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9838 HasRequiresUnifiedSharedMemory)) && 9839 "Declare target attribute must link or to with unified memory."); 9840 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 9841 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 9842 else 9843 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9844 9845 if (CGM.getLangOpts().OpenMPIsDevice) { 9846 VarName = Addr->getName(); 9847 Addr = nullptr; 9848 } else { 9849 VarName = getAddrOfDeclareTargetVar(VD).getName(); 9850 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 9851 } 9852 VarSize = CGM.getPointerSize(); 9853 Linkage = llvm::GlobalValue::WeakAnyLinkage; 9854 } 9855 9856 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9857 VarName, Addr, VarSize, Flags, Linkage); 9858 } 9859 9860 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 9861 if (isa<FunctionDecl>(GD.getDecl()) || 9862 isa<OMPDeclareReductionDecl>(GD.getDecl())) 9863 return emitTargetFunctions(GD); 9864 9865 return emitTargetGlobalVariable(GD); 9866 } 9867 9868 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 9869 for (const VarDecl *VD : DeferredGlobalVariables) { 9870 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9871 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9872 if (!Res) 9873 continue; 9874 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9875 !HasRequiresUnifiedSharedMemory) { 9876 CGM.EmitGlobal(VD); 9877 } else { 9878 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 9879 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9880 HasRequiresUnifiedSharedMemory)) && 9881 "Expected link clause or to clause with unified memory."); 9882 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 9883 } 9884 } 9885 } 9886 9887 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 9888 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 9889 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 9890 " Expected target-based directive."); 9891 } 9892 9893 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 9894 for (const OMPClause *Clause : D->clauselists()) { 9895 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 9896 HasRequiresUnifiedSharedMemory = true; 9897 } else if (const auto *AC = 9898 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 9899 switch (AC->getAtomicDefaultMemOrderKind()) { 9900 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 9901 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 9902 break; 9903 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 9904 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 9905 break; 9906 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 9907 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 9908 break; 9909 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 9910 break; 9911 } 9912 } 9913 } 9914 } 9915 9916 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 9917 return RequiresAtomicOrdering; 9918 } 9919 9920 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 9921 LangAS &AS) { 9922 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 9923 return false; 9924 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 9925 switch(A->getAllocatorType()) { 9926 case OMPAllocateDeclAttr::OMPNullMemAlloc: 9927 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 9928 // Not supported, fallback to the default mem space. 9929 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 9930 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 9931 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 9932 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 9933 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 9934 case OMPAllocateDeclAttr::OMPConstMemAlloc: 9935 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 9936 AS = LangAS::Default; 9937 return true; 9938 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 9939 llvm_unreachable("Expected predefined allocator for the variables with the " 9940 "static storage."); 9941 } 9942 return false; 9943 } 9944 9945 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 9946 return HasRequiresUnifiedSharedMemory; 9947 } 9948 9949 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 9950 CodeGenModule &CGM) 9951 : CGM(CGM) { 9952 if (CGM.getLangOpts().OpenMPIsDevice) { 9953 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 9954 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 9955 } 9956 } 9957 9958 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 9959 if (CGM.getLangOpts().OpenMPIsDevice) 9960 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 9961 } 9962 9963 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 9964 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 9965 return true; 9966 9967 const auto *D = cast<FunctionDecl>(GD.getDecl()); 9968 // Do not to emit function if it is marked as declare target as it was already 9969 // emitted. 9970 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 9971 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 9972 if (auto *F = dyn_cast_or_null<llvm::Function>( 9973 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 9974 return !F->isDeclaration(); 9975 return false; 9976 } 9977 return true; 9978 } 9979 9980 return !AlreadyEmittedTargetDecls.insert(D).second; 9981 } 9982 9983 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 9984 // If we don't have entries or if we are emitting code for the device, we 9985 // don't need to do anything. 9986 if (CGM.getLangOpts().OMPTargetTriples.empty() || 9987 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 9988 (OffloadEntriesInfoManager.empty() && 9989 !HasEmittedDeclareTargetRegion && 9990 !HasEmittedTargetRegion)) 9991 return nullptr; 9992 9993 // Create and register the function that handles the requires directives. 9994 ASTContext &C = CGM.getContext(); 9995 9996 llvm::Function *RequiresRegFn; 9997 { 9998 CodeGenFunction CGF(CGM); 9999 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10000 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10001 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10002 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 10003 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10004 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 10005 // TODO: check for other requires clauses. 10006 // The requires directive takes effect only when a target region is 10007 // present in the compilation unit. Otherwise it is ignored and not 10008 // passed to the runtime. This avoids the runtime from throwing an error 10009 // for mismatching requires clauses across compilation units that don't 10010 // contain at least 1 target region. 10011 assert((HasEmittedTargetRegion || 10012 HasEmittedDeclareTargetRegion || 10013 !OffloadEntriesInfoManager.empty()) && 10014 "Target or declare target region expected."); 10015 if (HasRequiresUnifiedSharedMemory) 10016 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 10017 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10018 CGM.getModule(), OMPRTL___tgt_register_requires), 10019 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 10020 CGF.FinishFunction(); 10021 } 10022 return RequiresRegFn; 10023 } 10024 10025 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10026 const OMPExecutableDirective &D, 10027 SourceLocation Loc, 10028 llvm::Function *OutlinedFn, 10029 ArrayRef<llvm::Value *> CapturedVars) { 10030 if (!CGF.HaveInsertPoint()) 10031 return; 10032 10033 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10034 CodeGenFunction::RunCleanupsScope Scope(CGF); 10035 10036 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10037 llvm::Value *Args[] = { 10038 RTLoc, 10039 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10040 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10041 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10042 RealArgs.append(std::begin(Args), std::end(Args)); 10043 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10044 10045 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 10046 CGM.getModule(), OMPRTL___kmpc_fork_teams); 10047 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10048 } 10049 10050 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10051 const Expr *NumTeams, 10052 const Expr *ThreadLimit, 10053 SourceLocation Loc) { 10054 if (!CGF.HaveInsertPoint()) 10055 return; 10056 10057 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10058 10059 llvm::Value *NumTeamsVal = 10060 NumTeams 10061 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10062 CGF.CGM.Int32Ty, /* isSigned = */ true) 10063 : CGF.Builder.getInt32(0); 10064 10065 llvm::Value *ThreadLimitVal = 10066 ThreadLimit 10067 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10068 CGF.CGM.Int32Ty, /* isSigned = */ true) 10069 : CGF.Builder.getInt32(0); 10070 10071 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10072 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10073 ThreadLimitVal}; 10074 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10075 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 10076 PushNumTeamsArgs); 10077 } 10078 10079 void CGOpenMPRuntime::emitTargetDataCalls( 10080 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10081 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10082 if (!CGF.HaveInsertPoint()) 10083 return; 10084 10085 // Action used to replace the default codegen action and turn privatization 10086 // off. 10087 PrePostActionTy NoPrivAction; 10088 10089 // Generate the code for the opening of the data environment. Capture all the 10090 // arguments of the runtime call by reference because they are used in the 10091 // closing of the region. 10092 auto &&BeginThenGen = [this, &D, Device, &Info, 10093 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10094 // Fill up the arrays with all the mapped variables. 10095 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10096 MappableExprsHandler::MapValuesArrayTy Pointers; 10097 MappableExprsHandler::MapValuesArrayTy Sizes; 10098 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10099 10100 // Get map clause information. 10101 MappableExprsHandler MCHandler(D, CGF); 10102 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10103 10104 // Fill up the arrays and create the arguments. 10105 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10106 10107 llvm::Value *BasePointersArrayArg = nullptr; 10108 llvm::Value *PointersArrayArg = nullptr; 10109 llvm::Value *SizesArrayArg = nullptr; 10110 llvm::Value *MapTypesArrayArg = nullptr; 10111 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10112 SizesArrayArg, MapTypesArrayArg, Info); 10113 10114 // Emit device ID if any. 10115 llvm::Value *DeviceID = nullptr; 10116 if (Device) { 10117 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10118 CGF.Int64Ty, /*isSigned=*/true); 10119 } else { 10120 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10121 } 10122 10123 // Emit the number of elements in the offloading arrays. 10124 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10125 10126 llvm::Value *OffloadingArgs[] = { 10127 DeviceID, PointerNum, BasePointersArrayArg, 10128 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10129 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10130 CGM.getModule(), OMPRTL___tgt_target_data_begin), 10131 OffloadingArgs); 10132 10133 // If device pointer privatization is required, emit the body of the region 10134 // here. It will have to be duplicated: with and without privatization. 10135 if (!Info.CaptureDeviceAddrMap.empty()) 10136 CodeGen(CGF); 10137 }; 10138 10139 // Generate code for the closing of the data region. 10140 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 10141 PrePostActionTy &) { 10142 assert(Info.isValid() && "Invalid data environment closing arguments."); 10143 10144 llvm::Value *BasePointersArrayArg = nullptr; 10145 llvm::Value *PointersArrayArg = nullptr; 10146 llvm::Value *SizesArrayArg = nullptr; 10147 llvm::Value *MapTypesArrayArg = nullptr; 10148 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10149 SizesArrayArg, MapTypesArrayArg, Info); 10150 10151 // Emit device ID if any. 10152 llvm::Value *DeviceID = nullptr; 10153 if (Device) { 10154 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10155 CGF.Int64Ty, /*isSigned=*/true); 10156 } else { 10157 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10158 } 10159 10160 // Emit the number of elements in the offloading arrays. 10161 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10162 10163 llvm::Value *OffloadingArgs[] = { 10164 DeviceID, PointerNum, BasePointersArrayArg, 10165 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10166 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10167 CGM.getModule(), OMPRTL___tgt_target_data_end), 10168 OffloadingArgs); 10169 }; 10170 10171 // If we need device pointer privatization, we need to emit the body of the 10172 // region with no privatization in the 'else' branch of the conditional. 10173 // Otherwise, we don't have to do anything. 10174 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10175 PrePostActionTy &) { 10176 if (!Info.CaptureDeviceAddrMap.empty()) { 10177 CodeGen.setAction(NoPrivAction); 10178 CodeGen(CGF); 10179 } 10180 }; 10181 10182 // We don't have to do anything to close the region if the if clause evaluates 10183 // to false. 10184 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10185 10186 if (IfCond) { 10187 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10188 } else { 10189 RegionCodeGenTy RCG(BeginThenGen); 10190 RCG(CGF); 10191 } 10192 10193 // If we don't require privatization of device pointers, we emit the body in 10194 // between the runtime calls. This avoids duplicating the body code. 10195 if (Info.CaptureDeviceAddrMap.empty()) { 10196 CodeGen.setAction(NoPrivAction); 10197 CodeGen(CGF); 10198 } 10199 10200 if (IfCond) { 10201 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10202 } else { 10203 RegionCodeGenTy RCG(EndThenGen); 10204 RCG(CGF); 10205 } 10206 } 10207 10208 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10209 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10210 const Expr *Device) { 10211 if (!CGF.HaveInsertPoint()) 10212 return; 10213 10214 assert((isa<OMPTargetEnterDataDirective>(D) || 10215 isa<OMPTargetExitDataDirective>(D) || 10216 isa<OMPTargetUpdateDirective>(D)) && 10217 "Expecting either target enter, exit data, or update directives."); 10218 10219 CodeGenFunction::OMPTargetDataInfo InputInfo; 10220 llvm::Value *MapTypesArray = nullptr; 10221 // Generate the code for the opening of the data environment. 10222 auto &&ThenGen = [this, &D, Device, &InputInfo, 10223 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10224 // Emit device ID if any. 10225 llvm::Value *DeviceID = nullptr; 10226 if (Device) { 10227 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10228 CGF.Int64Ty, /*isSigned=*/true); 10229 } else { 10230 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10231 } 10232 10233 // Emit the number of elements in the offloading arrays. 10234 llvm::Constant *PointerNum = 10235 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10236 10237 llvm::Value *OffloadingArgs[] = {DeviceID, 10238 PointerNum, 10239 InputInfo.BasePointersArray.getPointer(), 10240 InputInfo.PointersArray.getPointer(), 10241 InputInfo.SizesArray.getPointer(), 10242 MapTypesArray}; 10243 10244 // Select the right runtime function call for each expected standalone 10245 // directive. 10246 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10247 RuntimeFunction RTLFn; 10248 switch (D.getDirectiveKind()) { 10249 case OMPD_target_enter_data: 10250 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait 10251 : OMPRTL___tgt_target_data_begin; 10252 break; 10253 case OMPD_target_exit_data: 10254 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait 10255 : OMPRTL___tgt_target_data_end; 10256 break; 10257 case OMPD_target_update: 10258 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait 10259 : OMPRTL___tgt_target_data_update; 10260 break; 10261 case OMPD_parallel: 10262 case OMPD_for: 10263 case OMPD_parallel_for: 10264 case OMPD_parallel_master: 10265 case OMPD_parallel_sections: 10266 case OMPD_for_simd: 10267 case OMPD_parallel_for_simd: 10268 case OMPD_cancel: 10269 case OMPD_cancellation_point: 10270 case OMPD_ordered: 10271 case OMPD_threadprivate: 10272 case OMPD_allocate: 10273 case OMPD_task: 10274 case OMPD_simd: 10275 case OMPD_sections: 10276 case OMPD_section: 10277 case OMPD_single: 10278 case OMPD_master: 10279 case OMPD_critical: 10280 case OMPD_taskyield: 10281 case OMPD_barrier: 10282 case OMPD_taskwait: 10283 case OMPD_taskgroup: 10284 case OMPD_atomic: 10285 case OMPD_flush: 10286 case OMPD_depobj: 10287 case OMPD_scan: 10288 case OMPD_teams: 10289 case OMPD_target_data: 10290 case OMPD_distribute: 10291 case OMPD_distribute_simd: 10292 case OMPD_distribute_parallel_for: 10293 case OMPD_distribute_parallel_for_simd: 10294 case OMPD_teams_distribute: 10295 case OMPD_teams_distribute_simd: 10296 case OMPD_teams_distribute_parallel_for: 10297 case OMPD_teams_distribute_parallel_for_simd: 10298 case OMPD_declare_simd: 10299 case OMPD_declare_variant: 10300 case OMPD_begin_declare_variant: 10301 case OMPD_end_declare_variant: 10302 case OMPD_declare_target: 10303 case OMPD_end_declare_target: 10304 case OMPD_declare_reduction: 10305 case OMPD_declare_mapper: 10306 case OMPD_taskloop: 10307 case OMPD_taskloop_simd: 10308 case OMPD_master_taskloop: 10309 case OMPD_master_taskloop_simd: 10310 case OMPD_parallel_master_taskloop: 10311 case OMPD_parallel_master_taskloop_simd: 10312 case OMPD_target: 10313 case OMPD_target_simd: 10314 case OMPD_target_teams_distribute: 10315 case OMPD_target_teams_distribute_simd: 10316 case OMPD_target_teams_distribute_parallel_for: 10317 case OMPD_target_teams_distribute_parallel_for_simd: 10318 case OMPD_target_teams: 10319 case OMPD_target_parallel: 10320 case OMPD_target_parallel_for: 10321 case OMPD_target_parallel_for_simd: 10322 case OMPD_requires: 10323 case OMPD_unknown: 10324 default: 10325 llvm_unreachable("Unexpected standalone target data directive."); 10326 break; 10327 } 10328 CGF.EmitRuntimeCall( 10329 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 10330 OffloadingArgs); 10331 }; 10332 10333 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 10334 CodeGenFunction &CGF, PrePostActionTy &) { 10335 // Fill up the arrays with all the mapped variables. 10336 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10337 MappableExprsHandler::MapValuesArrayTy Pointers; 10338 MappableExprsHandler::MapValuesArrayTy Sizes; 10339 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10340 10341 // Get map clause information. 10342 MappableExprsHandler MEHandler(D, CGF); 10343 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10344 10345 TargetDataInfo Info; 10346 // Fill up the arrays and create the arguments. 10347 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10348 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 10349 Info.PointersArray, Info.SizesArray, 10350 Info.MapTypesArray, Info); 10351 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10352 InputInfo.BasePointersArray = 10353 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10354 InputInfo.PointersArray = 10355 Address(Info.PointersArray, CGM.getPointerAlign()); 10356 InputInfo.SizesArray = 10357 Address(Info.SizesArray, CGM.getPointerAlign()); 10358 MapTypesArray = Info.MapTypesArray; 10359 if (D.hasClausesOfKind<OMPDependClause>()) 10360 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10361 else 10362 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10363 }; 10364 10365 if (IfCond) { 10366 emitIfClause(CGF, IfCond, TargetThenGen, 10367 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 10368 } else { 10369 RegionCodeGenTy ThenRCG(TargetThenGen); 10370 ThenRCG(CGF); 10371 } 10372 } 10373 10374 namespace { 10375 /// Kind of parameter in a function with 'declare simd' directive. 10376 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 10377 /// Attribute set of the parameter. 10378 struct ParamAttrTy { 10379 ParamKindTy Kind = Vector; 10380 llvm::APSInt StrideOrArg; 10381 llvm::APSInt Alignment; 10382 }; 10383 } // namespace 10384 10385 static unsigned evaluateCDTSize(const FunctionDecl *FD, 10386 ArrayRef<ParamAttrTy> ParamAttrs) { 10387 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 10388 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 10389 // of that clause. The VLEN value must be power of 2. 10390 // In other case the notion of the function`s "characteristic data type" (CDT) 10391 // is used to compute the vector length. 10392 // CDT is defined in the following order: 10393 // a) For non-void function, the CDT is the return type. 10394 // b) If the function has any non-uniform, non-linear parameters, then the 10395 // CDT is the type of the first such parameter. 10396 // c) If the CDT determined by a) or b) above is struct, union, or class 10397 // type which is pass-by-value (except for the type that maps to the 10398 // built-in complex data type), the characteristic data type is int. 10399 // d) If none of the above three cases is applicable, the CDT is int. 10400 // The VLEN is then determined based on the CDT and the size of vector 10401 // register of that ISA for which current vector version is generated. The 10402 // VLEN is computed using the formula below: 10403 // VLEN = sizeof(vector_register) / sizeof(CDT), 10404 // where vector register size specified in section 3.2.1 Registers and the 10405 // Stack Frame of original AMD64 ABI document. 10406 QualType RetType = FD->getReturnType(); 10407 if (RetType.isNull()) 10408 return 0; 10409 ASTContext &C = FD->getASTContext(); 10410 QualType CDT; 10411 if (!RetType.isNull() && !RetType->isVoidType()) { 10412 CDT = RetType; 10413 } else { 10414 unsigned Offset = 0; 10415 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 10416 if (ParamAttrs[Offset].Kind == Vector) 10417 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 10418 ++Offset; 10419 } 10420 if (CDT.isNull()) { 10421 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10422 if (ParamAttrs[I + Offset].Kind == Vector) { 10423 CDT = FD->getParamDecl(I)->getType(); 10424 break; 10425 } 10426 } 10427 } 10428 } 10429 if (CDT.isNull()) 10430 CDT = C.IntTy; 10431 CDT = CDT->getCanonicalTypeUnqualified(); 10432 if (CDT->isRecordType() || CDT->isUnionType()) 10433 CDT = C.IntTy; 10434 return C.getTypeSize(CDT); 10435 } 10436 10437 static void 10438 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 10439 const llvm::APSInt &VLENVal, 10440 ArrayRef<ParamAttrTy> ParamAttrs, 10441 OMPDeclareSimdDeclAttr::BranchStateTy State) { 10442 struct ISADataTy { 10443 char ISA; 10444 unsigned VecRegSize; 10445 }; 10446 ISADataTy ISAData[] = { 10447 { 10448 'b', 128 10449 }, // SSE 10450 { 10451 'c', 256 10452 }, // AVX 10453 { 10454 'd', 256 10455 }, // AVX2 10456 { 10457 'e', 512 10458 }, // AVX512 10459 }; 10460 llvm::SmallVector<char, 2> Masked; 10461 switch (State) { 10462 case OMPDeclareSimdDeclAttr::BS_Undefined: 10463 Masked.push_back('N'); 10464 Masked.push_back('M'); 10465 break; 10466 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10467 Masked.push_back('N'); 10468 break; 10469 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10470 Masked.push_back('M'); 10471 break; 10472 } 10473 for (char Mask : Masked) { 10474 for (const ISADataTy &Data : ISAData) { 10475 SmallString<256> Buffer; 10476 llvm::raw_svector_ostream Out(Buffer); 10477 Out << "_ZGV" << Data.ISA << Mask; 10478 if (!VLENVal) { 10479 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 10480 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 10481 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 10482 } else { 10483 Out << VLENVal; 10484 } 10485 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 10486 switch (ParamAttr.Kind){ 10487 case LinearWithVarStride: 10488 Out << 's' << ParamAttr.StrideOrArg; 10489 break; 10490 case Linear: 10491 Out << 'l'; 10492 if (ParamAttr.StrideOrArg != 1) 10493 Out << ParamAttr.StrideOrArg; 10494 break; 10495 case Uniform: 10496 Out << 'u'; 10497 break; 10498 case Vector: 10499 Out << 'v'; 10500 break; 10501 } 10502 if (!!ParamAttr.Alignment) 10503 Out << 'a' << ParamAttr.Alignment; 10504 } 10505 Out << '_' << Fn->getName(); 10506 Fn->addFnAttr(Out.str()); 10507 } 10508 } 10509 } 10510 10511 // This are the Functions that are needed to mangle the name of the 10512 // vector functions generated by the compiler, according to the rules 10513 // defined in the "Vector Function ABI specifications for AArch64", 10514 // available at 10515 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 10516 10517 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 10518 /// 10519 /// TODO: Need to implement the behavior for reference marked with a 10520 /// var or no linear modifiers (1.b in the section). For this, we 10521 /// need to extend ParamKindTy to support the linear modifiers. 10522 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 10523 QT = QT.getCanonicalType(); 10524 10525 if (QT->isVoidType()) 10526 return false; 10527 10528 if (Kind == ParamKindTy::Uniform) 10529 return false; 10530 10531 if (Kind == ParamKindTy::Linear) 10532 return false; 10533 10534 // TODO: Handle linear references with modifiers 10535 10536 if (Kind == ParamKindTy::LinearWithVarStride) 10537 return false; 10538 10539 return true; 10540 } 10541 10542 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 10543 static bool getAArch64PBV(QualType QT, ASTContext &C) { 10544 QT = QT.getCanonicalType(); 10545 unsigned Size = C.getTypeSize(QT); 10546 10547 // Only scalars and complex within 16 bytes wide set PVB to true. 10548 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 10549 return false; 10550 10551 if (QT->isFloatingType()) 10552 return true; 10553 10554 if (QT->isIntegerType()) 10555 return true; 10556 10557 if (QT->isPointerType()) 10558 return true; 10559 10560 // TODO: Add support for complex types (section 3.1.2, item 2). 10561 10562 return false; 10563 } 10564 10565 /// Computes the lane size (LS) of a return type or of an input parameter, 10566 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 10567 /// TODO: Add support for references, section 3.2.1, item 1. 10568 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 10569 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 10570 QualType PTy = QT.getCanonicalType()->getPointeeType(); 10571 if (getAArch64PBV(PTy, C)) 10572 return C.getTypeSize(PTy); 10573 } 10574 if (getAArch64PBV(QT, C)) 10575 return C.getTypeSize(QT); 10576 10577 return C.getTypeSize(C.getUIntPtrType()); 10578 } 10579 10580 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 10581 // signature of the scalar function, as defined in 3.2.2 of the 10582 // AAVFABI. 10583 static std::tuple<unsigned, unsigned, bool> 10584 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 10585 QualType RetType = FD->getReturnType().getCanonicalType(); 10586 10587 ASTContext &C = FD->getASTContext(); 10588 10589 bool OutputBecomesInput = false; 10590 10591 llvm::SmallVector<unsigned, 8> Sizes; 10592 if (!RetType->isVoidType()) { 10593 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 10594 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 10595 OutputBecomesInput = true; 10596 } 10597 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10598 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 10599 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 10600 } 10601 10602 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 10603 // The LS of a function parameter / return value can only be a power 10604 // of 2, starting from 8 bits, up to 128. 10605 assert(std::all_of(Sizes.begin(), Sizes.end(), 10606 [](unsigned Size) { 10607 return Size == 8 || Size == 16 || Size == 32 || 10608 Size == 64 || Size == 128; 10609 }) && 10610 "Invalid size"); 10611 10612 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 10613 *std::max_element(std::begin(Sizes), std::end(Sizes)), 10614 OutputBecomesInput); 10615 } 10616 10617 /// Mangle the parameter part of the vector function name according to 10618 /// their OpenMP classification. The mangling function is defined in 10619 /// section 3.5 of the AAVFABI. 10620 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 10621 SmallString<256> Buffer; 10622 llvm::raw_svector_ostream Out(Buffer); 10623 for (const auto &ParamAttr : ParamAttrs) { 10624 switch (ParamAttr.Kind) { 10625 case LinearWithVarStride: 10626 Out << "ls" << ParamAttr.StrideOrArg; 10627 break; 10628 case Linear: 10629 Out << 'l'; 10630 // Don't print the step value if it is not present or if it is 10631 // equal to 1. 10632 if (ParamAttr.StrideOrArg != 1) 10633 Out << ParamAttr.StrideOrArg; 10634 break; 10635 case Uniform: 10636 Out << 'u'; 10637 break; 10638 case Vector: 10639 Out << 'v'; 10640 break; 10641 } 10642 10643 if (!!ParamAttr.Alignment) 10644 Out << 'a' << ParamAttr.Alignment; 10645 } 10646 10647 return std::string(Out.str()); 10648 } 10649 10650 // Function used to add the attribute. The parameter `VLEN` is 10651 // templated to allow the use of "x" when targeting scalable functions 10652 // for SVE. 10653 template <typename T> 10654 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 10655 char ISA, StringRef ParSeq, 10656 StringRef MangledName, bool OutputBecomesInput, 10657 llvm::Function *Fn) { 10658 SmallString<256> Buffer; 10659 llvm::raw_svector_ostream Out(Buffer); 10660 Out << Prefix << ISA << LMask << VLEN; 10661 if (OutputBecomesInput) 10662 Out << "v"; 10663 Out << ParSeq << "_" << MangledName; 10664 Fn->addFnAttr(Out.str()); 10665 } 10666 10667 // Helper function to generate the Advanced SIMD names depending on 10668 // the value of the NDS when simdlen is not present. 10669 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 10670 StringRef Prefix, char ISA, 10671 StringRef ParSeq, StringRef MangledName, 10672 bool OutputBecomesInput, 10673 llvm::Function *Fn) { 10674 switch (NDS) { 10675 case 8: 10676 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10677 OutputBecomesInput, Fn); 10678 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 10679 OutputBecomesInput, Fn); 10680 break; 10681 case 16: 10682 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10683 OutputBecomesInput, Fn); 10684 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10685 OutputBecomesInput, Fn); 10686 break; 10687 case 32: 10688 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10689 OutputBecomesInput, Fn); 10690 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10691 OutputBecomesInput, Fn); 10692 break; 10693 case 64: 10694 case 128: 10695 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10696 OutputBecomesInput, Fn); 10697 break; 10698 default: 10699 llvm_unreachable("Scalar type is too wide."); 10700 } 10701 } 10702 10703 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 10704 static void emitAArch64DeclareSimdFunction( 10705 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 10706 ArrayRef<ParamAttrTy> ParamAttrs, 10707 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 10708 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 10709 10710 // Get basic data for building the vector signature. 10711 const auto Data = getNDSWDS(FD, ParamAttrs); 10712 const unsigned NDS = std::get<0>(Data); 10713 const unsigned WDS = std::get<1>(Data); 10714 const bool OutputBecomesInput = std::get<2>(Data); 10715 10716 // Check the values provided via `simdlen` by the user. 10717 // 1. A `simdlen(1)` doesn't produce vector signatures, 10718 if (UserVLEN == 1) { 10719 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10720 DiagnosticsEngine::Warning, 10721 "The clause simdlen(1) has no effect when targeting aarch64."); 10722 CGM.getDiags().Report(SLoc, DiagID); 10723 return; 10724 } 10725 10726 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 10727 // Advanced SIMD output. 10728 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 10729 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10730 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 10731 "power of 2 when targeting Advanced SIMD."); 10732 CGM.getDiags().Report(SLoc, DiagID); 10733 return; 10734 } 10735 10736 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 10737 // limits. 10738 if (ISA == 's' && UserVLEN != 0) { 10739 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 10740 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10741 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 10742 "lanes in the architectural constraints " 10743 "for SVE (min is 128-bit, max is " 10744 "2048-bit, by steps of 128-bit)"); 10745 CGM.getDiags().Report(SLoc, DiagID) << WDS; 10746 return; 10747 } 10748 } 10749 10750 // Sort out parameter sequence. 10751 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 10752 StringRef Prefix = "_ZGV"; 10753 // Generate simdlen from user input (if any). 10754 if (UserVLEN) { 10755 if (ISA == 's') { 10756 // SVE generates only a masked function. 10757 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10758 OutputBecomesInput, Fn); 10759 } else { 10760 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10761 // Advanced SIMD generates one or two functions, depending on 10762 // the `[not]inbranch` clause. 10763 switch (State) { 10764 case OMPDeclareSimdDeclAttr::BS_Undefined: 10765 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10766 OutputBecomesInput, Fn); 10767 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10768 OutputBecomesInput, Fn); 10769 break; 10770 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10771 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10772 OutputBecomesInput, Fn); 10773 break; 10774 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10775 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10776 OutputBecomesInput, Fn); 10777 break; 10778 } 10779 } 10780 } else { 10781 // If no user simdlen is provided, follow the AAVFABI rules for 10782 // generating the vector length. 10783 if (ISA == 's') { 10784 // SVE, section 3.4.1, item 1. 10785 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 10786 OutputBecomesInput, Fn); 10787 } else { 10788 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10789 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 10790 // two vector names depending on the use of the clause 10791 // `[not]inbranch`. 10792 switch (State) { 10793 case OMPDeclareSimdDeclAttr::BS_Undefined: 10794 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10795 OutputBecomesInput, Fn); 10796 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10797 OutputBecomesInput, Fn); 10798 break; 10799 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10800 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10801 OutputBecomesInput, Fn); 10802 break; 10803 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10804 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10805 OutputBecomesInput, Fn); 10806 break; 10807 } 10808 } 10809 } 10810 } 10811 10812 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 10813 llvm::Function *Fn) { 10814 ASTContext &C = CGM.getContext(); 10815 FD = FD->getMostRecentDecl(); 10816 // Map params to their positions in function decl. 10817 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 10818 if (isa<CXXMethodDecl>(FD)) 10819 ParamPositions.try_emplace(FD, 0); 10820 unsigned ParamPos = ParamPositions.size(); 10821 for (const ParmVarDecl *P : FD->parameters()) { 10822 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 10823 ++ParamPos; 10824 } 10825 while (FD) { 10826 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 10827 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 10828 // Mark uniform parameters. 10829 for (const Expr *E : Attr->uniforms()) { 10830 E = E->IgnoreParenImpCasts(); 10831 unsigned Pos; 10832 if (isa<CXXThisExpr>(E)) { 10833 Pos = ParamPositions[FD]; 10834 } else { 10835 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10836 ->getCanonicalDecl(); 10837 Pos = ParamPositions[PVD]; 10838 } 10839 ParamAttrs[Pos].Kind = Uniform; 10840 } 10841 // Get alignment info. 10842 auto NI = Attr->alignments_begin(); 10843 for (const Expr *E : Attr->aligneds()) { 10844 E = E->IgnoreParenImpCasts(); 10845 unsigned Pos; 10846 QualType ParmTy; 10847 if (isa<CXXThisExpr>(E)) { 10848 Pos = ParamPositions[FD]; 10849 ParmTy = E->getType(); 10850 } else { 10851 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10852 ->getCanonicalDecl(); 10853 Pos = ParamPositions[PVD]; 10854 ParmTy = PVD->getType(); 10855 } 10856 ParamAttrs[Pos].Alignment = 10857 (*NI) 10858 ? (*NI)->EvaluateKnownConstInt(C) 10859 : llvm::APSInt::getUnsigned( 10860 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 10861 .getQuantity()); 10862 ++NI; 10863 } 10864 // Mark linear parameters. 10865 auto SI = Attr->steps_begin(); 10866 auto MI = Attr->modifiers_begin(); 10867 for (const Expr *E : Attr->linears()) { 10868 E = E->IgnoreParenImpCasts(); 10869 unsigned Pos; 10870 // Rescaling factor needed to compute the linear parameter 10871 // value in the mangled name. 10872 unsigned PtrRescalingFactor = 1; 10873 if (isa<CXXThisExpr>(E)) { 10874 Pos = ParamPositions[FD]; 10875 } else { 10876 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10877 ->getCanonicalDecl(); 10878 Pos = ParamPositions[PVD]; 10879 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 10880 PtrRescalingFactor = CGM.getContext() 10881 .getTypeSizeInChars(P->getPointeeType()) 10882 .getQuantity(); 10883 } 10884 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 10885 ParamAttr.Kind = Linear; 10886 // Assuming a stride of 1, for `linear` without modifiers. 10887 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 10888 if (*SI) { 10889 Expr::EvalResult Result; 10890 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 10891 if (const auto *DRE = 10892 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 10893 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 10894 ParamAttr.Kind = LinearWithVarStride; 10895 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 10896 ParamPositions[StridePVD->getCanonicalDecl()]); 10897 } 10898 } 10899 } else { 10900 ParamAttr.StrideOrArg = Result.Val.getInt(); 10901 } 10902 } 10903 // If we are using a linear clause on a pointer, we need to 10904 // rescale the value of linear_step with the byte size of the 10905 // pointee type. 10906 if (Linear == ParamAttr.Kind) 10907 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 10908 ++SI; 10909 ++MI; 10910 } 10911 llvm::APSInt VLENVal; 10912 SourceLocation ExprLoc; 10913 const Expr *VLENExpr = Attr->getSimdlen(); 10914 if (VLENExpr) { 10915 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 10916 ExprLoc = VLENExpr->getExprLoc(); 10917 } 10918 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 10919 if (CGM.getTriple().isX86()) { 10920 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 10921 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 10922 unsigned VLEN = VLENVal.getExtValue(); 10923 StringRef MangledName = Fn->getName(); 10924 if (CGM.getTarget().hasFeature("sve")) 10925 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10926 MangledName, 's', 128, Fn, ExprLoc); 10927 if (CGM.getTarget().hasFeature("neon")) 10928 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10929 MangledName, 'n', 128, Fn, ExprLoc); 10930 } 10931 } 10932 FD = FD->getPreviousDecl(); 10933 } 10934 } 10935 10936 namespace { 10937 /// Cleanup action for doacross support. 10938 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 10939 public: 10940 static const int DoacrossFinArgs = 2; 10941 10942 private: 10943 llvm::FunctionCallee RTLFn; 10944 llvm::Value *Args[DoacrossFinArgs]; 10945 10946 public: 10947 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 10948 ArrayRef<llvm::Value *> CallArgs) 10949 : RTLFn(RTLFn) { 10950 assert(CallArgs.size() == DoacrossFinArgs); 10951 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10952 } 10953 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 10954 if (!CGF.HaveInsertPoint()) 10955 return; 10956 CGF.EmitRuntimeCall(RTLFn, Args); 10957 } 10958 }; 10959 } // namespace 10960 10961 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 10962 const OMPLoopDirective &D, 10963 ArrayRef<Expr *> NumIterations) { 10964 if (!CGF.HaveInsertPoint()) 10965 return; 10966 10967 ASTContext &C = CGM.getContext(); 10968 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 10969 RecordDecl *RD; 10970 if (KmpDimTy.isNull()) { 10971 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 10972 // kmp_int64 lo; // lower 10973 // kmp_int64 up; // upper 10974 // kmp_int64 st; // stride 10975 // }; 10976 RD = C.buildImplicitRecord("kmp_dim"); 10977 RD->startDefinition(); 10978 addFieldToRecordDecl(C, RD, Int64Ty); 10979 addFieldToRecordDecl(C, RD, Int64Ty); 10980 addFieldToRecordDecl(C, RD, Int64Ty); 10981 RD->completeDefinition(); 10982 KmpDimTy = C.getRecordType(RD); 10983 } else { 10984 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 10985 } 10986 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 10987 QualType ArrayTy = 10988 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 10989 10990 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 10991 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 10992 enum { LowerFD = 0, UpperFD, StrideFD }; 10993 // Fill dims with data. 10994 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 10995 LValue DimsLVal = CGF.MakeAddrLValue( 10996 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 10997 // dims.upper = num_iterations; 10998 LValue UpperLVal = CGF.EmitLValueForField( 10999 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11000 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 11001 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 11002 Int64Ty, NumIterations[I]->getExprLoc()); 11003 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11004 // dims.stride = 1; 11005 LValue StrideLVal = CGF.EmitLValueForField( 11006 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11007 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11008 StrideLVal); 11009 } 11010 11011 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11012 // kmp_int32 num_dims, struct kmp_dim * dims); 11013 llvm::Value *Args[] = { 11014 emitUpdateLocation(CGF, D.getBeginLoc()), 11015 getThreadID(CGF, D.getBeginLoc()), 11016 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11017 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11018 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 11019 CGM.VoidPtrTy)}; 11020 11021 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11022 CGM.getModule(), OMPRTL___kmpc_doacross_init); 11023 CGF.EmitRuntimeCall(RTLFn, Args); 11024 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11025 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11026 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11027 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 11028 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11029 llvm::makeArrayRef(FiniArgs)); 11030 } 11031 11032 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11033 const OMPDependClause *C) { 11034 QualType Int64Ty = 11035 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11036 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11037 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11038 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 11039 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11040 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11041 const Expr *CounterVal = C->getLoopData(I); 11042 assert(CounterVal); 11043 llvm::Value *CntVal = CGF.EmitScalarConversion( 11044 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11045 CounterVal->getExprLoc()); 11046 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11047 /*Volatile=*/false, Int64Ty); 11048 } 11049 llvm::Value *Args[] = { 11050 emitUpdateLocation(CGF, C->getBeginLoc()), 11051 getThreadID(CGF, C->getBeginLoc()), 11052 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11053 llvm::FunctionCallee RTLFn; 11054 if (C->getDependencyKind() == OMPC_DEPEND_source) { 11055 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11056 OMPRTL___kmpc_doacross_post); 11057 } else { 11058 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 11059 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11060 OMPRTL___kmpc_doacross_wait); 11061 } 11062 CGF.EmitRuntimeCall(RTLFn, Args); 11063 } 11064 11065 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11066 llvm::FunctionCallee Callee, 11067 ArrayRef<llvm::Value *> Args) const { 11068 assert(Loc.isValid() && "Outlined function call location must be valid."); 11069 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11070 11071 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11072 if (Fn->doesNotThrow()) { 11073 CGF.EmitNounwindRuntimeCall(Fn, Args); 11074 return; 11075 } 11076 } 11077 CGF.EmitRuntimeCall(Callee, Args); 11078 } 11079 11080 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11081 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11082 ArrayRef<llvm::Value *> Args) const { 11083 emitCall(CGF, Loc, OutlinedFn, Args); 11084 } 11085 11086 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11087 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11088 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11089 HasEmittedDeclareTargetRegion = true; 11090 } 11091 11092 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11093 const VarDecl *NativeParam, 11094 const VarDecl *TargetParam) const { 11095 return CGF.GetAddrOfLocalVar(NativeParam); 11096 } 11097 11098 namespace { 11099 /// Cleanup action for allocate support. 11100 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 11101 public: 11102 static const int CleanupArgs = 3; 11103 11104 private: 11105 llvm::FunctionCallee RTLFn; 11106 llvm::Value *Args[CleanupArgs]; 11107 11108 public: 11109 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 11110 ArrayRef<llvm::Value *> CallArgs) 11111 : RTLFn(RTLFn) { 11112 assert(CallArgs.size() == CleanupArgs && 11113 "Size of arguments does not match."); 11114 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11115 } 11116 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11117 if (!CGF.HaveInsertPoint()) 11118 return; 11119 CGF.EmitRuntimeCall(RTLFn, Args); 11120 } 11121 }; 11122 } // namespace 11123 11124 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11125 const VarDecl *VD) { 11126 if (!VD) 11127 return Address::invalid(); 11128 const VarDecl *CVD = VD->getCanonicalDecl(); 11129 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 11130 return Address::invalid(); 11131 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11132 // Use the default allocation. 11133 if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 11134 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 11135 !AA->getAllocator()) 11136 return Address::invalid(); 11137 llvm::Value *Size; 11138 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11139 if (CVD->getType()->isVariablyModifiedType()) { 11140 Size = CGF.getTypeSize(CVD->getType()); 11141 // Align the size: ((size + align - 1) / align) * align 11142 Size = CGF.Builder.CreateNUWAdd( 11143 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11144 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11145 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11146 } else { 11147 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11148 Size = CGM.getSize(Sz.alignTo(Align)); 11149 } 11150 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11151 assert(AA->getAllocator() && 11152 "Expected allocator expression for non-default allocator."); 11153 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11154 // According to the standard, the original allocator type is a enum (integer). 11155 // Convert to pointer type, if required. 11156 if (Allocator->getType()->isIntegerTy()) 11157 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 11158 else if (Allocator->getType()->isPointerTy()) 11159 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 11160 CGM.VoidPtrTy); 11161 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11162 11163 llvm::Value *Addr = 11164 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11165 CGM.getModule(), OMPRTL___kmpc_alloc), 11166 Args, getName({CVD->getName(), ".void.addr"})); 11167 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, 11168 Allocator}; 11169 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11170 CGM.getModule(), OMPRTL___kmpc_free); 11171 11172 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11173 llvm::makeArrayRef(FiniArgs)); 11174 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11175 Addr, 11176 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 11177 getName({CVD->getName(), ".addr"})); 11178 return Address(Addr, Align); 11179 } 11180 11181 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11182 CodeGenModule &CGM, const OMPLoopDirective &S) 11183 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11184 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11185 if (!NeedToPush) 11186 return; 11187 NontemporalDeclsSet &DS = 11188 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11189 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11190 for (const Stmt *Ref : C->private_refs()) { 11191 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11192 const ValueDecl *VD; 11193 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11194 VD = DRE->getDecl(); 11195 } else { 11196 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11197 assert((ME->isImplicitCXXThis() || 11198 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11199 "Expected member of current class."); 11200 VD = ME->getMemberDecl(); 11201 } 11202 DS.insert(VD); 11203 } 11204 } 11205 } 11206 11207 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 11208 if (!NeedToPush) 11209 return; 11210 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 11211 } 11212 11213 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 11214 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11215 11216 return llvm::any_of( 11217 CGM.getOpenMPRuntime().NontemporalDeclsStack, 11218 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 11219 } 11220 11221 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 11222 const OMPExecutableDirective &S, 11223 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 11224 const { 11225 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 11226 // Vars in target/task regions must be excluded completely. 11227 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 11228 isOpenMPTaskingDirective(S.getDirectiveKind())) { 11229 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11230 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 11231 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 11232 for (const CapturedStmt::Capture &Cap : CS->captures()) { 11233 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 11234 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 11235 } 11236 } 11237 // Exclude vars in private clauses. 11238 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 11239 for (const Expr *Ref : C->varlists()) { 11240 if (!Ref->getType()->isScalarType()) 11241 continue; 11242 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11243 if (!DRE) 11244 continue; 11245 NeedToCheckForLPCs.insert(DRE->getDecl()); 11246 } 11247 } 11248 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 11249 for (const Expr *Ref : C->varlists()) { 11250 if (!Ref->getType()->isScalarType()) 11251 continue; 11252 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11253 if (!DRE) 11254 continue; 11255 NeedToCheckForLPCs.insert(DRE->getDecl()); 11256 } 11257 } 11258 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11259 for (const Expr *Ref : C->varlists()) { 11260 if (!Ref->getType()->isScalarType()) 11261 continue; 11262 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11263 if (!DRE) 11264 continue; 11265 NeedToCheckForLPCs.insert(DRE->getDecl()); 11266 } 11267 } 11268 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 11269 for (const Expr *Ref : C->varlists()) { 11270 if (!Ref->getType()->isScalarType()) 11271 continue; 11272 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11273 if (!DRE) 11274 continue; 11275 NeedToCheckForLPCs.insert(DRE->getDecl()); 11276 } 11277 } 11278 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 11279 for (const Expr *Ref : C->varlists()) { 11280 if (!Ref->getType()->isScalarType()) 11281 continue; 11282 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11283 if (!DRE) 11284 continue; 11285 NeedToCheckForLPCs.insert(DRE->getDecl()); 11286 } 11287 } 11288 for (const Decl *VD : NeedToCheckForLPCs) { 11289 for (const LastprivateConditionalData &Data : 11290 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 11291 if (Data.DeclToUniqueName.count(VD) > 0) { 11292 if (!Data.Disabled) 11293 NeedToAddForLPCsAsDisabled.insert(VD); 11294 break; 11295 } 11296 } 11297 } 11298 } 11299 11300 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11301 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 11302 : CGM(CGF.CGM), 11303 Action((CGM.getLangOpts().OpenMP >= 50 && 11304 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 11305 [](const OMPLastprivateClause *C) { 11306 return C->getKind() == 11307 OMPC_LASTPRIVATE_conditional; 11308 })) 11309 ? ActionToDo::PushAsLastprivateConditional 11310 : ActionToDo::DoNotPush) { 11311 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11312 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 11313 return; 11314 assert(Action == ActionToDo::PushAsLastprivateConditional && 11315 "Expected a push action."); 11316 LastprivateConditionalData &Data = 11317 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11318 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11319 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 11320 continue; 11321 11322 for (const Expr *Ref : C->varlists()) { 11323 Data.DeclToUniqueName.insert(std::make_pair( 11324 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 11325 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 11326 } 11327 } 11328 Data.IVLVal = IVLVal; 11329 Data.Fn = CGF.CurFn; 11330 } 11331 11332 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11333 CodeGenFunction &CGF, const OMPExecutableDirective &S) 11334 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 11335 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11336 if (CGM.getLangOpts().OpenMP < 50) 11337 return; 11338 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 11339 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 11340 if (!NeedToAddForLPCsAsDisabled.empty()) { 11341 Action = ActionToDo::DisableLastprivateConditional; 11342 LastprivateConditionalData &Data = 11343 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11344 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 11345 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 11346 Data.Fn = CGF.CurFn; 11347 Data.Disabled = true; 11348 } 11349 } 11350 11351 CGOpenMPRuntime::LastprivateConditionalRAII 11352 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 11353 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 11354 return LastprivateConditionalRAII(CGF, S); 11355 } 11356 11357 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 11358 if (CGM.getLangOpts().OpenMP < 50) 11359 return; 11360 if (Action == ActionToDo::DisableLastprivateConditional) { 11361 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11362 "Expected list of disabled private vars."); 11363 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11364 } 11365 if (Action == ActionToDo::PushAsLastprivateConditional) { 11366 assert( 11367 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11368 "Expected list of lastprivate conditional vars."); 11369 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11370 } 11371 } 11372 11373 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 11374 const VarDecl *VD) { 11375 ASTContext &C = CGM.getContext(); 11376 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 11377 if (I == LastprivateConditionalToTypes.end()) 11378 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 11379 QualType NewType; 11380 const FieldDecl *VDField; 11381 const FieldDecl *FiredField; 11382 LValue BaseLVal; 11383 auto VI = I->getSecond().find(VD); 11384 if (VI == I->getSecond().end()) { 11385 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 11386 RD->startDefinition(); 11387 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 11388 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 11389 RD->completeDefinition(); 11390 NewType = C.getRecordType(RD); 11391 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 11392 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 11393 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 11394 } else { 11395 NewType = std::get<0>(VI->getSecond()); 11396 VDField = std::get<1>(VI->getSecond()); 11397 FiredField = std::get<2>(VI->getSecond()); 11398 BaseLVal = std::get<3>(VI->getSecond()); 11399 } 11400 LValue FiredLVal = 11401 CGF.EmitLValueForField(BaseLVal, FiredField); 11402 CGF.EmitStoreOfScalar( 11403 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 11404 FiredLVal); 11405 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 11406 } 11407 11408 namespace { 11409 /// Checks if the lastprivate conditional variable is referenced in LHS. 11410 class LastprivateConditionalRefChecker final 11411 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 11412 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 11413 const Expr *FoundE = nullptr; 11414 const Decl *FoundD = nullptr; 11415 StringRef UniqueDeclName; 11416 LValue IVLVal; 11417 llvm::Function *FoundFn = nullptr; 11418 SourceLocation Loc; 11419 11420 public: 11421 bool VisitDeclRefExpr(const DeclRefExpr *E) { 11422 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11423 llvm::reverse(LPM)) { 11424 auto It = D.DeclToUniqueName.find(E->getDecl()); 11425 if (It == D.DeclToUniqueName.end()) 11426 continue; 11427 if (D.Disabled) 11428 return false; 11429 FoundE = E; 11430 FoundD = E->getDecl()->getCanonicalDecl(); 11431 UniqueDeclName = It->second; 11432 IVLVal = D.IVLVal; 11433 FoundFn = D.Fn; 11434 break; 11435 } 11436 return FoundE == E; 11437 } 11438 bool VisitMemberExpr(const MemberExpr *E) { 11439 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 11440 return false; 11441 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11442 llvm::reverse(LPM)) { 11443 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 11444 if (It == D.DeclToUniqueName.end()) 11445 continue; 11446 if (D.Disabled) 11447 return false; 11448 FoundE = E; 11449 FoundD = E->getMemberDecl()->getCanonicalDecl(); 11450 UniqueDeclName = It->second; 11451 IVLVal = D.IVLVal; 11452 FoundFn = D.Fn; 11453 break; 11454 } 11455 return FoundE == E; 11456 } 11457 bool VisitStmt(const Stmt *S) { 11458 for (const Stmt *Child : S->children()) { 11459 if (!Child) 11460 continue; 11461 if (const auto *E = dyn_cast<Expr>(Child)) 11462 if (!E->isGLValue()) 11463 continue; 11464 if (Visit(Child)) 11465 return true; 11466 } 11467 return false; 11468 } 11469 explicit LastprivateConditionalRefChecker( 11470 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 11471 : LPM(LPM) {} 11472 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 11473 getFoundData() const { 11474 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 11475 } 11476 }; 11477 } // namespace 11478 11479 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 11480 LValue IVLVal, 11481 StringRef UniqueDeclName, 11482 LValue LVal, 11483 SourceLocation Loc) { 11484 // Last updated loop counter for the lastprivate conditional var. 11485 // int<xx> last_iv = 0; 11486 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 11487 llvm::Constant *LastIV = 11488 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 11489 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 11490 IVLVal.getAlignment().getAsAlign()); 11491 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 11492 11493 // Last value of the lastprivate conditional. 11494 // decltype(priv_a) last_a; 11495 llvm::Constant *Last = getOrCreateInternalVariable( 11496 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 11497 cast<llvm::GlobalVariable>(Last)->setAlignment( 11498 LVal.getAlignment().getAsAlign()); 11499 LValue LastLVal = 11500 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 11501 11502 // Global loop counter. Required to handle inner parallel-for regions. 11503 // iv 11504 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 11505 11506 // #pragma omp critical(a) 11507 // if (last_iv <= iv) { 11508 // last_iv = iv; 11509 // last_a = priv_a; 11510 // } 11511 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 11512 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 11513 Action.Enter(CGF); 11514 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 11515 // (last_iv <= iv) ? Check if the variable is updated and store new 11516 // value in global var. 11517 llvm::Value *CmpRes; 11518 if (IVLVal.getType()->isSignedIntegerType()) { 11519 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 11520 } else { 11521 assert(IVLVal.getType()->isUnsignedIntegerType() && 11522 "Loop iteration variable must be integer."); 11523 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 11524 } 11525 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 11526 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 11527 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 11528 // { 11529 CGF.EmitBlock(ThenBB); 11530 11531 // last_iv = iv; 11532 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 11533 11534 // last_a = priv_a; 11535 switch (CGF.getEvaluationKind(LVal.getType())) { 11536 case TEK_Scalar: { 11537 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 11538 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 11539 break; 11540 } 11541 case TEK_Complex: { 11542 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 11543 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 11544 break; 11545 } 11546 case TEK_Aggregate: 11547 llvm_unreachable( 11548 "Aggregates are not supported in lastprivate conditional."); 11549 } 11550 // } 11551 CGF.EmitBranch(ExitBB); 11552 // There is no need to emit line number for unconditional branch. 11553 (void)ApplyDebugLocation::CreateEmpty(CGF); 11554 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 11555 }; 11556 11557 if (CGM.getLangOpts().OpenMPSimd) { 11558 // Do not emit as a critical region as no parallel region could be emitted. 11559 RegionCodeGenTy ThenRCG(CodeGen); 11560 ThenRCG(CGF); 11561 } else { 11562 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 11563 } 11564 } 11565 11566 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 11567 const Expr *LHS) { 11568 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11569 return; 11570 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 11571 if (!Checker.Visit(LHS)) 11572 return; 11573 const Expr *FoundE; 11574 const Decl *FoundD; 11575 StringRef UniqueDeclName; 11576 LValue IVLVal; 11577 llvm::Function *FoundFn; 11578 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 11579 Checker.getFoundData(); 11580 if (FoundFn != CGF.CurFn) { 11581 // Special codegen for inner parallel regions. 11582 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 11583 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 11584 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 11585 "Lastprivate conditional is not found in outer region."); 11586 QualType StructTy = std::get<0>(It->getSecond()); 11587 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 11588 LValue PrivLVal = CGF.EmitLValue(FoundE); 11589 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11590 PrivLVal.getAddress(CGF), 11591 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 11592 LValue BaseLVal = 11593 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 11594 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 11595 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 11596 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 11597 FiredLVal, llvm::AtomicOrdering::Unordered, 11598 /*IsVolatile=*/true, /*isInit=*/false); 11599 return; 11600 } 11601 11602 // Private address of the lastprivate conditional in the current context. 11603 // priv_a 11604 LValue LVal = CGF.EmitLValue(FoundE); 11605 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 11606 FoundE->getExprLoc()); 11607 } 11608 11609 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 11610 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11611 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 11612 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11613 return; 11614 auto Range = llvm::reverse(LastprivateConditionalStack); 11615 auto It = llvm::find_if( 11616 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 11617 if (It == Range.end() || It->Fn != CGF.CurFn) 11618 return; 11619 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 11620 assert(LPCI != LastprivateConditionalToTypes.end() && 11621 "Lastprivates must be registered already."); 11622 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11623 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 11624 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 11625 for (const auto &Pair : It->DeclToUniqueName) { 11626 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 11627 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 11628 continue; 11629 auto I = LPCI->getSecond().find(Pair.first); 11630 assert(I != LPCI->getSecond().end() && 11631 "Lastprivate must be rehistered already."); 11632 // bool Cmp = priv_a.Fired != 0; 11633 LValue BaseLVal = std::get<3>(I->getSecond()); 11634 LValue FiredLVal = 11635 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 11636 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 11637 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 11638 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 11639 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 11640 // if (Cmp) { 11641 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 11642 CGF.EmitBlock(ThenBB); 11643 Address Addr = CGF.GetAddrOfLocalVar(VD); 11644 LValue LVal; 11645 if (VD->getType()->isReferenceType()) 11646 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 11647 AlignmentSource::Decl); 11648 else 11649 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 11650 AlignmentSource::Decl); 11651 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 11652 D.getBeginLoc()); 11653 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 11654 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 11655 // } 11656 } 11657 } 11658 11659 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 11660 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 11661 SourceLocation Loc) { 11662 if (CGF.getLangOpts().OpenMP < 50) 11663 return; 11664 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 11665 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 11666 "Unknown lastprivate conditional variable."); 11667 StringRef UniqueName = It->second; 11668 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 11669 // The variable was not updated in the region - exit. 11670 if (!GV) 11671 return; 11672 LValue LPLVal = CGF.MakeAddrLValue( 11673 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 11674 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 11675 CGF.EmitStoreOfScalar(Res, PrivLVal); 11676 } 11677 11678 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 11679 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11680 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11681 llvm_unreachable("Not supported in SIMD-only mode"); 11682 } 11683 11684 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 11685 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11686 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11687 llvm_unreachable("Not supported in SIMD-only mode"); 11688 } 11689 11690 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 11691 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11692 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 11693 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 11694 bool Tied, unsigned &NumberOfParts) { 11695 llvm_unreachable("Not supported in SIMD-only mode"); 11696 } 11697 11698 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 11699 SourceLocation Loc, 11700 llvm::Function *OutlinedFn, 11701 ArrayRef<llvm::Value *> CapturedVars, 11702 const Expr *IfCond) { 11703 llvm_unreachable("Not supported in SIMD-only mode"); 11704 } 11705 11706 void CGOpenMPSIMDRuntime::emitCriticalRegion( 11707 CodeGenFunction &CGF, StringRef CriticalName, 11708 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 11709 const Expr *Hint) { 11710 llvm_unreachable("Not supported in SIMD-only mode"); 11711 } 11712 11713 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 11714 const RegionCodeGenTy &MasterOpGen, 11715 SourceLocation Loc) { 11716 llvm_unreachable("Not supported in SIMD-only mode"); 11717 } 11718 11719 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 11720 SourceLocation Loc) { 11721 llvm_unreachable("Not supported in SIMD-only mode"); 11722 } 11723 11724 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 11725 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 11726 SourceLocation Loc) { 11727 llvm_unreachable("Not supported in SIMD-only mode"); 11728 } 11729 11730 void CGOpenMPSIMDRuntime::emitSingleRegion( 11731 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 11732 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 11733 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 11734 ArrayRef<const Expr *> AssignmentOps) { 11735 llvm_unreachable("Not supported in SIMD-only mode"); 11736 } 11737 11738 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 11739 const RegionCodeGenTy &OrderedOpGen, 11740 SourceLocation Loc, 11741 bool IsThreads) { 11742 llvm_unreachable("Not supported in SIMD-only mode"); 11743 } 11744 11745 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 11746 SourceLocation Loc, 11747 OpenMPDirectiveKind Kind, 11748 bool EmitChecks, 11749 bool ForceSimpleCall) { 11750 llvm_unreachable("Not supported in SIMD-only mode"); 11751 } 11752 11753 void CGOpenMPSIMDRuntime::emitForDispatchInit( 11754 CodeGenFunction &CGF, SourceLocation Loc, 11755 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 11756 bool Ordered, const DispatchRTInput &DispatchValues) { 11757 llvm_unreachable("Not supported in SIMD-only mode"); 11758 } 11759 11760 void CGOpenMPSIMDRuntime::emitForStaticInit( 11761 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 11762 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 11763 llvm_unreachable("Not supported in SIMD-only mode"); 11764 } 11765 11766 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 11767 CodeGenFunction &CGF, SourceLocation Loc, 11768 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 11769 llvm_unreachable("Not supported in SIMD-only mode"); 11770 } 11771 11772 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 11773 SourceLocation Loc, 11774 unsigned IVSize, 11775 bool IVSigned) { 11776 llvm_unreachable("Not supported in SIMD-only mode"); 11777 } 11778 11779 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 11780 SourceLocation Loc, 11781 OpenMPDirectiveKind DKind) { 11782 llvm_unreachable("Not supported in SIMD-only mode"); 11783 } 11784 11785 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 11786 SourceLocation Loc, 11787 unsigned IVSize, bool IVSigned, 11788 Address IL, Address LB, 11789 Address UB, Address ST) { 11790 llvm_unreachable("Not supported in SIMD-only mode"); 11791 } 11792 11793 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 11794 llvm::Value *NumThreads, 11795 SourceLocation Loc) { 11796 llvm_unreachable("Not supported in SIMD-only mode"); 11797 } 11798 11799 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 11800 ProcBindKind ProcBind, 11801 SourceLocation Loc) { 11802 llvm_unreachable("Not supported in SIMD-only mode"); 11803 } 11804 11805 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 11806 const VarDecl *VD, 11807 Address VDAddr, 11808 SourceLocation Loc) { 11809 llvm_unreachable("Not supported in SIMD-only mode"); 11810 } 11811 11812 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 11813 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 11814 CodeGenFunction *CGF) { 11815 llvm_unreachable("Not supported in SIMD-only mode"); 11816 } 11817 11818 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 11819 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 11820 llvm_unreachable("Not supported in SIMD-only mode"); 11821 } 11822 11823 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 11824 ArrayRef<const Expr *> Vars, 11825 SourceLocation Loc, 11826 llvm::AtomicOrdering AO) { 11827 llvm_unreachable("Not supported in SIMD-only mode"); 11828 } 11829 11830 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 11831 const OMPExecutableDirective &D, 11832 llvm::Function *TaskFunction, 11833 QualType SharedsTy, Address Shareds, 11834 const Expr *IfCond, 11835 const OMPTaskDataTy &Data) { 11836 llvm_unreachable("Not supported in SIMD-only mode"); 11837 } 11838 11839 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 11840 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 11841 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 11842 const Expr *IfCond, const OMPTaskDataTy &Data) { 11843 llvm_unreachable("Not supported in SIMD-only mode"); 11844 } 11845 11846 void CGOpenMPSIMDRuntime::emitReduction( 11847 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 11848 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 11849 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 11850 assert(Options.SimpleReduction && "Only simple reduction is expected."); 11851 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 11852 ReductionOps, Options); 11853 } 11854 11855 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 11856 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 11857 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 11858 llvm_unreachable("Not supported in SIMD-only mode"); 11859 } 11860 11861 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 11862 SourceLocation Loc, 11863 bool IsWorksharingReduction) { 11864 llvm_unreachable("Not supported in SIMD-only mode"); 11865 } 11866 11867 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 11868 SourceLocation Loc, 11869 ReductionCodeGen &RCG, 11870 unsigned N) { 11871 llvm_unreachable("Not supported in SIMD-only mode"); 11872 } 11873 11874 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 11875 SourceLocation Loc, 11876 llvm::Value *ReductionsPtr, 11877 LValue SharedLVal) { 11878 llvm_unreachable("Not supported in SIMD-only mode"); 11879 } 11880 11881 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 11882 SourceLocation Loc) { 11883 llvm_unreachable("Not supported in SIMD-only mode"); 11884 } 11885 11886 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 11887 CodeGenFunction &CGF, SourceLocation Loc, 11888 OpenMPDirectiveKind CancelRegion) { 11889 llvm_unreachable("Not supported in SIMD-only mode"); 11890 } 11891 11892 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 11893 SourceLocation Loc, const Expr *IfCond, 11894 OpenMPDirectiveKind CancelRegion) { 11895 llvm_unreachable("Not supported in SIMD-only mode"); 11896 } 11897 11898 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 11899 const OMPExecutableDirective &D, StringRef ParentName, 11900 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 11901 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 11902 llvm_unreachable("Not supported in SIMD-only mode"); 11903 } 11904 11905 void CGOpenMPSIMDRuntime::emitTargetCall( 11906 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11907 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 11908 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 11909 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 11910 const OMPLoopDirective &D)> 11911 SizeEmitter) { 11912 llvm_unreachable("Not supported in SIMD-only mode"); 11913 } 11914 11915 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 11916 llvm_unreachable("Not supported in SIMD-only mode"); 11917 } 11918 11919 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 11920 llvm_unreachable("Not supported in SIMD-only mode"); 11921 } 11922 11923 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 11924 return false; 11925 } 11926 11927 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 11928 const OMPExecutableDirective &D, 11929 SourceLocation Loc, 11930 llvm::Function *OutlinedFn, 11931 ArrayRef<llvm::Value *> CapturedVars) { 11932 llvm_unreachable("Not supported in SIMD-only mode"); 11933 } 11934 11935 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11936 const Expr *NumTeams, 11937 const Expr *ThreadLimit, 11938 SourceLocation Loc) { 11939 llvm_unreachable("Not supported in SIMD-only mode"); 11940 } 11941 11942 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 11943 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11944 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11945 llvm_unreachable("Not supported in SIMD-only mode"); 11946 } 11947 11948 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 11949 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11950 const Expr *Device) { 11951 llvm_unreachable("Not supported in SIMD-only mode"); 11952 } 11953 11954 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11955 const OMPLoopDirective &D, 11956 ArrayRef<Expr *> NumIterations) { 11957 llvm_unreachable("Not supported in SIMD-only mode"); 11958 } 11959 11960 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11961 const OMPDependClause *C) { 11962 llvm_unreachable("Not supported in SIMD-only mode"); 11963 } 11964 11965 const VarDecl * 11966 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 11967 const VarDecl *NativeParam) const { 11968 llvm_unreachable("Not supported in SIMD-only mode"); 11969 } 11970 11971 Address 11972 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 11973 const VarDecl *NativeParam, 11974 const VarDecl *TargetParam) const { 11975 llvm_unreachable("Not supported in SIMD-only mode"); 11976 } 11977