1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "ABIInfoImpl.h" 15 #include "CGCXXABI.h" 16 #include "CGCleanup.h" 17 #include "CGRecordLayout.h" 18 #include "CodeGenFunction.h" 19 #include "TargetInfo.h" 20 #include "clang/AST/APValue.h" 21 #include "clang/AST/Attr.h" 22 #include "clang/AST/Decl.h" 23 #include "clang/AST/OpenMPClause.h" 24 #include "clang/AST/StmtOpenMP.h" 25 #include "clang/AST/StmtVisitor.h" 26 #include "clang/Basic/BitmaskEnum.h" 27 #include "clang/Basic/FileManager.h" 28 #include "clang/Basic/OpenMPKinds.h" 29 #include "clang/Basic/SourceManager.h" 30 #include "clang/CodeGen/ConstantInitBuilder.h" 31 #include "llvm/ADT/ArrayRef.h" 32 #include "llvm/ADT/SetOperations.h" 33 #include "llvm/ADT/SmallBitVector.h" 34 #include "llvm/ADT/SmallVector.h" 35 #include "llvm/ADT/StringExtras.h" 36 #include "llvm/Bitcode/BitcodeReader.h" 37 #include "llvm/IR/Constants.h" 38 #include "llvm/IR/DerivedTypes.h" 39 #include "llvm/IR/GlobalValue.h" 40 #include "llvm/IR/InstrTypes.h" 41 #include "llvm/IR/Value.h" 42 #include "llvm/Support/AtomicOrdering.h" 43 #include "llvm/Support/Format.h" 44 #include "llvm/Support/raw_ostream.h" 45 #include <cassert> 46 #include <cstdint> 47 #include <numeric> 48 #include <optional> 49 50 using namespace clang; 51 using namespace CodeGen; 52 using namespace llvm::omp; 53 54 namespace { 55 /// Base class for handling code generation inside OpenMP regions. 56 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 57 public: 58 /// Kinds of OpenMP regions used in codegen. 59 enum CGOpenMPRegionKind { 60 /// Region with outlined function for standalone 'parallel' 61 /// directive. 62 ParallelOutlinedRegion, 63 /// Region with outlined function for standalone 'task' directive. 64 TaskOutlinedRegion, 65 /// Region for constructs that do not require function outlining, 66 /// like 'for', 'sections', 'atomic' etc. directives. 67 InlinedRegion, 68 /// Region with outlined function for standalone 'target' directive. 69 TargetRegion, 70 }; 71 72 CGOpenMPRegionInfo(const CapturedStmt &CS, 73 const CGOpenMPRegionKind RegionKind, 74 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 75 bool HasCancel) 76 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 77 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 78 79 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 80 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 81 bool HasCancel) 82 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 83 Kind(Kind), HasCancel(HasCancel) {} 84 85 /// Get a variable or parameter for storing global thread id 86 /// inside OpenMP construct. 87 virtual const VarDecl *getThreadIDVariable() const = 0; 88 89 /// Emit the captured statement body. 90 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 91 92 /// Get an LValue for the current ThreadID variable. 93 /// \return LValue for thread id variable. This LValue always has type int32*. 94 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 95 96 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 97 98 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 99 100 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 101 102 bool hasCancel() const { return HasCancel; } 103 104 static bool classof(const CGCapturedStmtInfo *Info) { 105 return Info->getKind() == CR_OpenMP; 106 } 107 108 ~CGOpenMPRegionInfo() override = default; 109 110 protected: 111 CGOpenMPRegionKind RegionKind; 112 RegionCodeGenTy CodeGen; 113 OpenMPDirectiveKind Kind; 114 bool HasCancel; 115 }; 116 117 /// API for captured statement code generation in OpenMP constructs. 118 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 119 public: 120 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 121 const RegionCodeGenTy &CodeGen, 122 OpenMPDirectiveKind Kind, bool HasCancel, 123 StringRef HelperName) 124 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 125 HasCancel), 126 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 127 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 128 } 129 130 /// Get a variable or parameter for storing global thread id 131 /// inside OpenMP construct. 132 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 133 134 /// Get the name of the capture helper. 135 StringRef getHelperName() const override { return HelperName; } 136 137 static bool classof(const CGCapturedStmtInfo *Info) { 138 return CGOpenMPRegionInfo::classof(Info) && 139 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 140 ParallelOutlinedRegion; 141 } 142 143 private: 144 /// A variable or parameter storing global thread id for OpenMP 145 /// constructs. 146 const VarDecl *ThreadIDVar; 147 StringRef HelperName; 148 }; 149 150 /// API for captured statement code generation in OpenMP constructs. 151 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 152 public: 153 class UntiedTaskActionTy final : public PrePostActionTy { 154 bool Untied; 155 const VarDecl *PartIDVar; 156 const RegionCodeGenTy UntiedCodeGen; 157 llvm::SwitchInst *UntiedSwitch = nullptr; 158 159 public: 160 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 161 const RegionCodeGenTy &UntiedCodeGen) 162 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 163 void Enter(CodeGenFunction &CGF) override { 164 if (Untied) { 165 // Emit task switching point. 166 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 167 CGF.GetAddrOfLocalVar(PartIDVar), 168 PartIDVar->getType()->castAs<PointerType>()); 169 llvm::Value *Res = 170 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 171 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 172 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 173 CGF.EmitBlock(DoneBB); 174 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 175 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 176 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 177 CGF.Builder.GetInsertBlock()); 178 emitUntiedSwitch(CGF); 179 } 180 } 181 void emitUntiedSwitch(CodeGenFunction &CGF) const { 182 if (Untied) { 183 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 184 CGF.GetAddrOfLocalVar(PartIDVar), 185 PartIDVar->getType()->castAs<PointerType>()); 186 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 187 PartIdLVal); 188 UntiedCodeGen(CGF); 189 CodeGenFunction::JumpDest CurPoint = 190 CGF.getJumpDestInCurrentScope(".untied.next."); 191 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 192 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 193 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 194 CGF.Builder.GetInsertBlock()); 195 CGF.EmitBranchThroughCleanup(CurPoint); 196 CGF.EmitBlock(CurPoint.getBlock()); 197 } 198 } 199 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 200 }; 201 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 202 const VarDecl *ThreadIDVar, 203 const RegionCodeGenTy &CodeGen, 204 OpenMPDirectiveKind Kind, bool HasCancel, 205 const UntiedTaskActionTy &Action) 206 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 207 ThreadIDVar(ThreadIDVar), Action(Action) { 208 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 209 } 210 211 /// Get a variable or parameter for storing global thread id 212 /// inside OpenMP construct. 213 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 214 215 /// Get an LValue for the current ThreadID variable. 216 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 217 218 /// Get the name of the capture helper. 219 StringRef getHelperName() const override { return ".omp_outlined."; } 220 221 void emitUntiedSwitch(CodeGenFunction &CGF) override { 222 Action.emitUntiedSwitch(CGF); 223 } 224 225 static bool classof(const CGCapturedStmtInfo *Info) { 226 return CGOpenMPRegionInfo::classof(Info) && 227 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 228 TaskOutlinedRegion; 229 } 230 231 private: 232 /// A variable or parameter storing global thread id for OpenMP 233 /// constructs. 234 const VarDecl *ThreadIDVar; 235 /// Action for emitting code for untied tasks. 236 const UntiedTaskActionTy &Action; 237 }; 238 239 /// API for inlined captured statement code generation in OpenMP 240 /// constructs. 241 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 242 public: 243 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 244 const RegionCodeGenTy &CodeGen, 245 OpenMPDirectiveKind Kind, bool HasCancel) 246 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 247 OldCSI(OldCSI), 248 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 249 250 // Retrieve the value of the context parameter. 251 llvm::Value *getContextValue() const override { 252 if (OuterRegionInfo) 253 return OuterRegionInfo->getContextValue(); 254 llvm_unreachable("No context value for inlined OpenMP region"); 255 } 256 257 void setContextValue(llvm::Value *V) override { 258 if (OuterRegionInfo) { 259 OuterRegionInfo->setContextValue(V); 260 return; 261 } 262 llvm_unreachable("No context value for inlined OpenMP region"); 263 } 264 265 /// Lookup the captured field decl for a variable. 266 const FieldDecl *lookup(const VarDecl *VD) const override { 267 if (OuterRegionInfo) 268 return OuterRegionInfo->lookup(VD); 269 // If there is no outer outlined region,no need to lookup in a list of 270 // captured variables, we can use the original one. 271 return nullptr; 272 } 273 274 FieldDecl *getThisFieldDecl() const override { 275 if (OuterRegionInfo) 276 return OuterRegionInfo->getThisFieldDecl(); 277 return nullptr; 278 } 279 280 /// Get a variable or parameter for storing global thread id 281 /// inside OpenMP construct. 282 const VarDecl *getThreadIDVariable() const override { 283 if (OuterRegionInfo) 284 return OuterRegionInfo->getThreadIDVariable(); 285 return nullptr; 286 } 287 288 /// Get an LValue for the current ThreadID variable. 289 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 290 if (OuterRegionInfo) 291 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 292 llvm_unreachable("No LValue for inlined OpenMP construct"); 293 } 294 295 /// Get the name of the capture helper. 296 StringRef getHelperName() const override { 297 if (auto *OuterRegionInfo = getOldCSI()) 298 return OuterRegionInfo->getHelperName(); 299 llvm_unreachable("No helper name for inlined OpenMP construct"); 300 } 301 302 void emitUntiedSwitch(CodeGenFunction &CGF) override { 303 if (OuterRegionInfo) 304 OuterRegionInfo->emitUntiedSwitch(CGF); 305 } 306 307 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 308 309 static bool classof(const CGCapturedStmtInfo *Info) { 310 return CGOpenMPRegionInfo::classof(Info) && 311 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 312 } 313 314 ~CGOpenMPInlinedRegionInfo() override = default; 315 316 private: 317 /// CodeGen info about outer OpenMP region. 318 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 319 CGOpenMPRegionInfo *OuterRegionInfo; 320 }; 321 322 /// API for captured statement code generation in OpenMP target 323 /// constructs. For this captures, implicit parameters are used instead of the 324 /// captured fields. The name of the target region has to be unique in a given 325 /// application so it is provided by the client, because only the client has 326 /// the information to generate that. 327 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 328 public: 329 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 330 const RegionCodeGenTy &CodeGen, StringRef HelperName) 331 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 332 /*HasCancel=*/false), 333 HelperName(HelperName) {} 334 335 /// This is unused for target regions because each starts executing 336 /// with a single thread. 337 const VarDecl *getThreadIDVariable() const override { return nullptr; } 338 339 /// Get the name of the capture helper. 340 StringRef getHelperName() const override { return HelperName; } 341 342 static bool classof(const CGCapturedStmtInfo *Info) { 343 return CGOpenMPRegionInfo::classof(Info) && 344 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 345 } 346 347 private: 348 StringRef HelperName; 349 }; 350 351 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 352 llvm_unreachable("No codegen for expressions"); 353 } 354 /// API for generation of expressions captured in a innermost OpenMP 355 /// region. 356 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 357 public: 358 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 359 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 360 OMPD_unknown, 361 /*HasCancel=*/false), 362 PrivScope(CGF) { 363 // Make sure the globals captured in the provided statement are local by 364 // using the privatization logic. We assume the same variable is not 365 // captured more than once. 366 for (const auto &C : CS.captures()) { 367 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 368 continue; 369 370 const VarDecl *VD = C.getCapturedVar(); 371 if (VD->isLocalVarDeclOrParm()) 372 continue; 373 374 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 375 /*RefersToEnclosingVariableOrCapture=*/false, 376 VD->getType().getNonReferenceType(), VK_LValue, 377 C.getLocation()); 378 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress()); 379 } 380 (void)PrivScope.Privatize(); 381 } 382 383 /// Lookup the captured field decl for a variable. 384 const FieldDecl *lookup(const VarDecl *VD) const override { 385 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 386 return FD; 387 return nullptr; 388 } 389 390 /// Emit the captured statement body. 391 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 392 llvm_unreachable("No body for expressions"); 393 } 394 395 /// Get a variable or parameter for storing global thread id 396 /// inside OpenMP construct. 397 const VarDecl *getThreadIDVariable() const override { 398 llvm_unreachable("No thread id for expressions"); 399 } 400 401 /// Get the name of the capture helper. 402 StringRef getHelperName() const override { 403 llvm_unreachable("No helper name for expressions"); 404 } 405 406 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 407 408 private: 409 /// Private scope to capture global variables. 410 CodeGenFunction::OMPPrivateScope PrivScope; 411 }; 412 413 /// RAII for emitting code of OpenMP constructs. 414 class InlinedOpenMPRegionRAII { 415 CodeGenFunction &CGF; 416 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields; 417 FieldDecl *LambdaThisCaptureField = nullptr; 418 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 419 bool NoInheritance = false; 420 421 public: 422 /// Constructs region for combined constructs. 423 /// \param CodeGen Code generation sequence for combined directives. Includes 424 /// a list of functions used for code generation of implicitly inlined 425 /// regions. 426 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 427 OpenMPDirectiveKind Kind, bool HasCancel, 428 bool NoInheritance = true) 429 : CGF(CGF), NoInheritance(NoInheritance) { 430 // Start emission for the construct. 431 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 432 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 433 if (NoInheritance) { 434 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 435 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 436 CGF.LambdaThisCaptureField = nullptr; 437 BlockInfo = CGF.BlockInfo; 438 CGF.BlockInfo = nullptr; 439 } 440 } 441 442 ~InlinedOpenMPRegionRAII() { 443 // Restore original CapturedStmtInfo only if we're done with code emission. 444 auto *OldCSI = 445 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 446 delete CGF.CapturedStmtInfo; 447 CGF.CapturedStmtInfo = OldCSI; 448 if (NoInheritance) { 449 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 450 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 451 CGF.BlockInfo = BlockInfo; 452 } 453 } 454 }; 455 456 /// Values for bit flags used in the ident_t to describe the fields. 457 /// All enumeric elements are named and described in accordance with the code 458 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 459 enum OpenMPLocationFlags : unsigned { 460 /// Use trampoline for internal microtask. 461 OMP_IDENT_IMD = 0x01, 462 /// Use c-style ident structure. 463 OMP_IDENT_KMPC = 0x02, 464 /// Atomic reduction option for kmpc_reduce. 465 OMP_ATOMIC_REDUCE = 0x10, 466 /// Explicit 'barrier' directive. 467 OMP_IDENT_BARRIER_EXPL = 0x20, 468 /// Implicit barrier in code. 469 OMP_IDENT_BARRIER_IMPL = 0x40, 470 /// Implicit barrier in 'for' directive. 471 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 472 /// Implicit barrier in 'sections' directive. 473 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 474 /// Implicit barrier in 'single' directive. 475 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 476 /// Call of __kmp_for_static_init for static loop. 477 OMP_IDENT_WORK_LOOP = 0x200, 478 /// Call of __kmp_for_static_init for sections. 479 OMP_IDENT_WORK_SECTIONS = 0x400, 480 /// Call of __kmp_for_static_init for distribute. 481 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 482 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 483 }; 484 485 /// Describes ident structure that describes a source location. 486 /// All descriptions are taken from 487 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 488 /// Original structure: 489 /// typedef struct ident { 490 /// kmp_int32 reserved_1; /**< might be used in Fortran; 491 /// see above */ 492 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 493 /// KMP_IDENT_KMPC identifies this union 494 /// member */ 495 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 496 /// see above */ 497 ///#if USE_ITT_BUILD 498 /// /* but currently used for storing 499 /// region-specific ITT */ 500 /// /* contextual information. */ 501 ///#endif /* USE_ITT_BUILD */ 502 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 503 /// C++ */ 504 /// char const *psource; /**< String describing the source location. 505 /// The string is composed of semi-colon separated 506 // fields which describe the source file, 507 /// the function and a pair of line numbers that 508 /// delimit the construct. 509 /// */ 510 /// } ident_t; 511 enum IdentFieldIndex { 512 /// might be used in Fortran 513 IdentField_Reserved_1, 514 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 515 IdentField_Flags, 516 /// Not really used in Fortran any more 517 IdentField_Reserved_2, 518 /// Source[4] in Fortran, do not use for C++ 519 IdentField_Reserved_3, 520 /// String describing the source location. The string is composed of 521 /// semi-colon separated fields which describe the source file, the function 522 /// and a pair of line numbers that delimit the construct. 523 IdentField_PSource 524 }; 525 526 /// Schedule types for 'omp for' loops (these enumerators are taken from 527 /// the enum sched_type in kmp.h). 528 enum OpenMPSchedType { 529 /// Lower bound for default (unordered) versions. 530 OMP_sch_lower = 32, 531 OMP_sch_static_chunked = 33, 532 OMP_sch_static = 34, 533 OMP_sch_dynamic_chunked = 35, 534 OMP_sch_guided_chunked = 36, 535 OMP_sch_runtime = 37, 536 OMP_sch_auto = 38, 537 /// static with chunk adjustment (e.g., simd) 538 OMP_sch_static_balanced_chunked = 45, 539 /// Lower bound for 'ordered' versions. 540 OMP_ord_lower = 64, 541 OMP_ord_static_chunked = 65, 542 OMP_ord_static = 66, 543 OMP_ord_dynamic_chunked = 67, 544 OMP_ord_guided_chunked = 68, 545 OMP_ord_runtime = 69, 546 OMP_ord_auto = 70, 547 OMP_sch_default = OMP_sch_static, 548 /// dist_schedule types 549 OMP_dist_sch_static_chunked = 91, 550 OMP_dist_sch_static = 92, 551 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 552 /// Set if the monotonic schedule modifier was present. 553 OMP_sch_modifier_monotonic = (1 << 29), 554 /// Set if the nonmonotonic schedule modifier was present. 555 OMP_sch_modifier_nonmonotonic = (1 << 30), 556 }; 557 558 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 559 /// region. 560 class CleanupTy final : public EHScopeStack::Cleanup { 561 PrePostActionTy *Action; 562 563 public: 564 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 565 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 566 if (!CGF.HaveInsertPoint()) 567 return; 568 Action->Exit(CGF); 569 } 570 }; 571 572 } // anonymous namespace 573 574 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 575 CodeGenFunction::RunCleanupsScope Scope(CGF); 576 if (PrePostAction) { 577 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 578 Callback(CodeGen, CGF, *PrePostAction); 579 } else { 580 PrePostActionTy Action; 581 Callback(CodeGen, CGF, Action); 582 } 583 } 584 585 /// Check if the combiner is a call to UDR combiner and if it is so return the 586 /// UDR decl used for reduction. 587 static const OMPDeclareReductionDecl * 588 getReductionInit(const Expr *ReductionOp) { 589 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 590 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 591 if (const auto *DRE = 592 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 593 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 594 return DRD; 595 return nullptr; 596 } 597 598 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 599 const OMPDeclareReductionDecl *DRD, 600 const Expr *InitOp, 601 Address Private, Address Original, 602 QualType Ty) { 603 if (DRD->getInitializer()) { 604 std::pair<llvm::Function *, llvm::Function *> Reduction = 605 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 606 const auto *CE = cast<CallExpr>(InitOp); 607 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 608 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 609 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 610 const auto *LHSDRE = 611 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 612 const auto *RHSDRE = 613 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 614 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 615 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private); 616 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original); 617 (void)PrivateScope.Privatize(); 618 RValue Func = RValue::get(Reduction.second); 619 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 620 CGF.EmitIgnoredExpr(InitOp); 621 } else { 622 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 623 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 624 auto *GV = new llvm::GlobalVariable( 625 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 626 llvm::GlobalValue::PrivateLinkage, Init, Name); 627 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(GV, Ty); 628 RValue InitRVal; 629 switch (CGF.getEvaluationKind(Ty)) { 630 case TEK_Scalar: 631 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 632 break; 633 case TEK_Complex: 634 InitRVal = 635 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 636 break; 637 case TEK_Aggregate: { 638 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); 639 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); 640 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 641 /*IsInitializer=*/false); 642 return; 643 } 644 } 645 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue); 646 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 647 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 648 /*IsInitializer=*/false); 649 } 650 } 651 652 /// Emit initialization of arrays of complex types. 653 /// \param DestAddr Address of the array. 654 /// \param Type Type of array. 655 /// \param Init Initial expression of array. 656 /// \param SrcAddr Address of the original array. 657 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 658 QualType Type, bool EmitDeclareReductionInit, 659 const Expr *Init, 660 const OMPDeclareReductionDecl *DRD, 661 Address SrcAddr = Address::invalid()) { 662 // Perform element-by-element initialization. 663 QualType ElementTy; 664 665 // Drill down to the base element type on both arrays. 666 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 667 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 668 if (DRD) 669 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType()); 670 671 llvm::Value *SrcBegin = nullptr; 672 if (DRD) 673 SrcBegin = SrcAddr.emitRawPointer(CGF); 674 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF); 675 // Cast from pointer to array type to pointer to single element. 676 llvm::Value *DestEnd = 677 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); 678 // The basic structure here is a while-do loop. 679 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 680 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 681 llvm::Value *IsEmpty = 682 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 683 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 684 685 // Enter the loop body, making that address the current address. 686 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 687 CGF.EmitBlock(BodyBB); 688 689 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 690 691 llvm::PHINode *SrcElementPHI = nullptr; 692 Address SrcElementCurrent = Address::invalid(); 693 if (DRD) { 694 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 695 "omp.arraycpy.srcElementPast"); 696 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 697 SrcElementCurrent = 698 Address(SrcElementPHI, SrcAddr.getElementType(), 699 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 700 } 701 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 702 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 703 DestElementPHI->addIncoming(DestBegin, EntryBB); 704 Address DestElementCurrent = 705 Address(DestElementPHI, DestAddr.getElementType(), 706 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 707 708 // Emit copy. 709 { 710 CodeGenFunction::RunCleanupsScope InitScope(CGF); 711 if (EmitDeclareReductionInit) { 712 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 713 SrcElementCurrent, ElementTy); 714 } else 715 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 716 /*IsInitializer=*/false); 717 } 718 719 if (DRD) { 720 // Shift the address forward by one element. 721 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 722 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, 723 "omp.arraycpy.dest.element"); 724 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 725 } 726 727 // Shift the address forward by one element. 728 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 729 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, 730 "omp.arraycpy.dest.element"); 731 // Check whether we've reached the end. 732 llvm::Value *Done = 733 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 734 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 735 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 736 737 // Done. 738 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 739 } 740 741 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 742 return CGF.EmitOMPSharedLValue(E); 743 } 744 745 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 746 const Expr *E) { 747 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E)) 748 return CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false); 749 return LValue(); 750 } 751 752 void ReductionCodeGen::emitAggregateInitialization( 753 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 754 const OMPDeclareReductionDecl *DRD) { 755 // Emit VarDecl with copy init for arrays. 756 // Get the address of the original variable captured in current 757 // captured region. 758 const auto *PrivateVD = 759 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 760 bool EmitDeclareReductionInit = 761 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 762 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 763 EmitDeclareReductionInit, 764 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 765 : PrivateVD->getInit(), 766 DRD, SharedAddr); 767 } 768 769 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 770 ArrayRef<const Expr *> Origs, 771 ArrayRef<const Expr *> Privates, 772 ArrayRef<const Expr *> ReductionOps) { 773 ClausesData.reserve(Shareds.size()); 774 SharedAddresses.reserve(Shareds.size()); 775 Sizes.reserve(Shareds.size()); 776 BaseDecls.reserve(Shareds.size()); 777 const auto *IOrig = Origs.begin(); 778 const auto *IPriv = Privates.begin(); 779 const auto *IRed = ReductionOps.begin(); 780 for (const Expr *Ref : Shareds) { 781 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 782 std::advance(IOrig, 1); 783 std::advance(IPriv, 1); 784 std::advance(IRed, 1); 785 } 786 } 787 788 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 789 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 790 "Number of generated lvalues must be exactly N."); 791 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 792 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 793 SharedAddresses.emplace_back(First, Second); 794 if (ClausesData[N].Shared == ClausesData[N].Ref) { 795 OrigAddresses.emplace_back(First, Second); 796 } else { 797 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 798 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 799 OrigAddresses.emplace_back(First, Second); 800 } 801 } 802 803 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 804 QualType PrivateType = getPrivateType(N); 805 bool AsArraySection = isa<ArraySectionExpr>(ClausesData[N].Ref); 806 if (!PrivateType->isVariablyModifiedType()) { 807 Sizes.emplace_back( 808 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 809 nullptr); 810 return; 811 } 812 llvm::Value *Size; 813 llvm::Value *SizeInChars; 814 auto *ElemType = OrigAddresses[N].first.getAddress().getElementType(); 815 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 816 if (AsArraySection) { 817 Size = CGF.Builder.CreatePtrDiff(ElemType, 818 OrigAddresses[N].second.getPointer(CGF), 819 OrigAddresses[N].first.getPointer(CGF)); 820 Size = CGF.Builder.CreateNUWAdd( 821 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 822 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 823 } else { 824 SizeInChars = 825 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 826 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 827 } 828 Sizes.emplace_back(SizeInChars, Size); 829 CodeGenFunction::OpaqueValueMapping OpaqueMap( 830 CGF, 831 cast<OpaqueValueExpr>( 832 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 833 RValue::get(Size)); 834 CGF.EmitVariablyModifiedType(PrivateType); 835 } 836 837 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 838 llvm::Value *Size) { 839 QualType PrivateType = getPrivateType(N); 840 if (!PrivateType->isVariablyModifiedType()) { 841 assert(!Size && !Sizes[N].second && 842 "Size should be nullptr for non-variably modified reduction " 843 "items."); 844 return; 845 } 846 CodeGenFunction::OpaqueValueMapping OpaqueMap( 847 CGF, 848 cast<OpaqueValueExpr>( 849 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 850 RValue::get(Size)); 851 CGF.EmitVariablyModifiedType(PrivateType); 852 } 853 854 void ReductionCodeGen::emitInitialization( 855 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 856 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 857 assert(SharedAddresses.size() > N && "No variable was generated"); 858 const auto *PrivateVD = 859 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 860 const OMPDeclareReductionDecl *DRD = 861 getReductionInit(ClausesData[N].ReductionOp); 862 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 863 if (DRD && DRD->getInitializer()) 864 (void)DefaultInit(CGF); 865 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD); 866 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 867 (void)DefaultInit(CGF); 868 QualType SharedType = SharedAddresses[N].first.getType(); 869 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 870 PrivateAddr, SharedAddr, SharedType); 871 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 872 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 873 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 874 PrivateVD->getType().getQualifiers(), 875 /*IsInitializer=*/false); 876 } 877 } 878 879 bool ReductionCodeGen::needCleanups(unsigned N) { 880 QualType PrivateType = getPrivateType(N); 881 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 882 return DTorKind != QualType::DK_none; 883 } 884 885 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 886 Address PrivateAddr) { 887 QualType PrivateType = getPrivateType(N); 888 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 889 if (needCleanups(N)) { 890 PrivateAddr = 891 PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType)); 892 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 893 } 894 } 895 896 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 897 LValue BaseLV) { 898 BaseTy = BaseTy.getNonReferenceType(); 899 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 900 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 901 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 902 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 903 } else { 904 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy); 905 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 906 } 907 BaseTy = BaseTy->getPointeeType(); 908 } 909 return CGF.MakeAddrLValue( 910 BaseLV.getAddress().withElementType(CGF.ConvertTypeForMem(ElTy)), 911 BaseLV.getType(), BaseLV.getBaseInfo(), 912 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 913 } 914 915 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 916 Address OriginalBaseAddress, llvm::Value *Addr) { 917 RawAddress Tmp = RawAddress::invalid(); 918 Address TopTmp = Address::invalid(); 919 Address MostTopTmp = Address::invalid(); 920 BaseTy = BaseTy.getNonReferenceType(); 921 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 922 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 923 Tmp = CGF.CreateMemTemp(BaseTy); 924 if (TopTmp.isValid()) 925 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 926 else 927 MostTopTmp = Tmp; 928 TopTmp = Tmp; 929 BaseTy = BaseTy->getPointeeType(); 930 } 931 932 if (Tmp.isValid()) { 933 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 934 Addr, Tmp.getElementType()); 935 CGF.Builder.CreateStore(Addr, Tmp); 936 return MostTopTmp; 937 } 938 939 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 940 Addr, OriginalBaseAddress.getType()); 941 return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull); 942 } 943 944 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 945 const VarDecl *OrigVD = nullptr; 946 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Ref)) { 947 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 948 while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base)) 949 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 950 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 951 Base = TempASE->getBase()->IgnoreParenImpCasts(); 952 DE = cast<DeclRefExpr>(Base); 953 OrigVD = cast<VarDecl>(DE->getDecl()); 954 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 955 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 956 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 957 Base = TempASE->getBase()->IgnoreParenImpCasts(); 958 DE = cast<DeclRefExpr>(Base); 959 OrigVD = cast<VarDecl>(DE->getDecl()); 960 } 961 return OrigVD; 962 } 963 964 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 965 Address PrivateAddr) { 966 const DeclRefExpr *DE; 967 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 968 BaseDecls.emplace_back(OrigVD); 969 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 970 LValue BaseLValue = 971 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 972 OriginalBaseLValue); 973 Address SharedAddr = SharedAddresses[N].first.getAddress(); 974 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 975 SharedAddr.getElementType(), BaseLValue.getPointer(CGF), 976 SharedAddr.emitRawPointer(CGF)); 977 llvm::Value *PrivatePointer = 978 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 979 PrivateAddr.emitRawPointer(CGF), SharedAddr.getType()); 980 llvm::Value *Ptr = CGF.Builder.CreateGEP( 981 SharedAddr.getElementType(), PrivatePointer, Adjustment); 982 return castToBase(CGF, OrigVD->getType(), 983 SharedAddresses[N].first.getType(), 984 OriginalBaseLValue.getAddress(), Ptr); 985 } 986 BaseDecls.emplace_back( 987 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 988 return PrivateAddr; 989 } 990 991 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 992 const OMPDeclareReductionDecl *DRD = 993 getReductionInit(ClausesData[N].ReductionOp); 994 return DRD && DRD->getInitializer(); 995 } 996 997 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 998 return CGF.EmitLoadOfPointerLValue( 999 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1000 getThreadIDVariable()->getType()->castAs<PointerType>()); 1001 } 1002 1003 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 1004 if (!CGF.HaveInsertPoint()) 1005 return; 1006 // 1.2.2 OpenMP Language Terminology 1007 // Structured block - An executable statement with a single entry at the 1008 // top and a single exit at the bottom. 1009 // The point of exit cannot be a branch out of the structured block. 1010 // longjmp() and throw() must not violate the entry/exit criteria. 1011 CGF.EHStack.pushTerminate(); 1012 if (S) 1013 CGF.incrementProfileCounter(S); 1014 CodeGen(CGF); 1015 CGF.EHStack.popTerminate(); 1016 } 1017 1018 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1019 CodeGenFunction &CGF) { 1020 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1021 getThreadIDVariable()->getType(), 1022 AlignmentSource::Decl); 1023 } 1024 1025 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1026 QualType FieldTy) { 1027 auto *Field = FieldDecl::Create( 1028 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1029 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1030 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1031 Field->setAccess(AS_public); 1032 DC->addDecl(Field); 1033 return Field; 1034 } 1035 1036 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) 1037 : CGM(CGM), OMPBuilder(CGM.getModule()) { 1038 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1039 llvm::OpenMPIRBuilderConfig Config( 1040 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(), 1041 CGM.getLangOpts().OpenMPOffloadMandatory, 1042 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false, 1043 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false); 1044 OMPBuilder.initialize(); 1045 OMPBuilder.loadOffloadInfoMetadata(CGM.getLangOpts().OpenMPIsTargetDevice 1046 ? CGM.getLangOpts().OMPHostIRFile 1047 : StringRef{}); 1048 OMPBuilder.setConfig(Config); 1049 1050 // The user forces the compiler to behave as if omp requires 1051 // unified_shared_memory was given. 1052 if (CGM.getLangOpts().OpenMPForceUSM) { 1053 HasRequiresUnifiedSharedMemory = true; 1054 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true); 1055 } 1056 } 1057 1058 void CGOpenMPRuntime::clear() { 1059 InternalVars.clear(); 1060 // Clean non-target variable declarations possibly used only in debug info. 1061 for (const auto &Data : EmittedNonTargetVariables) { 1062 if (!Data.getValue().pointsToAliveValue()) 1063 continue; 1064 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1065 if (!GV) 1066 continue; 1067 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1068 continue; 1069 GV->eraseFromParent(); 1070 } 1071 } 1072 1073 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1074 return OMPBuilder.createPlatformSpecificName(Parts); 1075 } 1076 1077 static llvm::Function * 1078 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1079 const Expr *CombinerInitializer, const VarDecl *In, 1080 const VarDecl *Out, bool IsCombiner) { 1081 // void .omp_combiner.(Ty *in, Ty *out); 1082 ASTContext &C = CGM.getContext(); 1083 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1084 FunctionArgList Args; 1085 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1086 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other); 1087 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1088 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other); 1089 Args.push_back(&OmpOutParm); 1090 Args.push_back(&OmpInParm); 1091 const CGFunctionInfo &FnInfo = 1092 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1093 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1094 std::string Name = CGM.getOpenMPRuntime().getName( 1095 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1096 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1097 Name, &CGM.getModule()); 1098 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1099 if (CGM.getLangOpts().Optimize) { 1100 Fn->removeFnAttr(llvm::Attribute::NoInline); 1101 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1102 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1103 } 1104 CodeGenFunction CGF(CGM); 1105 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1106 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1107 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1108 Out->getLocation()); 1109 CodeGenFunction::OMPPrivateScope Scope(CGF); 1110 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1111 Scope.addPrivate( 1112 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1113 .getAddress()); 1114 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1115 Scope.addPrivate( 1116 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1117 .getAddress()); 1118 (void)Scope.Privatize(); 1119 if (!IsCombiner && Out->hasInit() && 1120 !CGF.isTrivialInitializer(Out->getInit())) { 1121 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1122 Out->getType().getQualifiers(), 1123 /*IsInitializer=*/true); 1124 } 1125 if (CombinerInitializer) 1126 CGF.EmitIgnoredExpr(CombinerInitializer); 1127 Scope.ForceCleanup(); 1128 CGF.FinishFunction(); 1129 return Fn; 1130 } 1131 1132 void CGOpenMPRuntime::emitUserDefinedReduction( 1133 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1134 if (UDRMap.count(D) > 0) 1135 return; 1136 llvm::Function *Combiner = emitCombinerOrInitializer( 1137 CGM, D->getType(), D->getCombiner(), 1138 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1139 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1140 /*IsCombiner=*/true); 1141 llvm::Function *Initializer = nullptr; 1142 if (const Expr *Init = D->getInitializer()) { 1143 Initializer = emitCombinerOrInitializer( 1144 CGM, D->getType(), 1145 D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init 1146 : nullptr, 1147 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1148 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1149 /*IsCombiner=*/false); 1150 } 1151 UDRMap.try_emplace(D, Combiner, Initializer); 1152 if (CGF) { 1153 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1154 Decls.second.push_back(D); 1155 } 1156 } 1157 1158 std::pair<llvm::Function *, llvm::Function *> 1159 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1160 auto I = UDRMap.find(D); 1161 if (I != UDRMap.end()) 1162 return I->second; 1163 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1164 return UDRMap.lookup(D); 1165 } 1166 1167 namespace { 1168 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1169 // Builder if one is present. 1170 struct PushAndPopStackRAII { 1171 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1172 bool HasCancel, llvm::omp::Directive Kind) 1173 : OMPBuilder(OMPBuilder) { 1174 if (!OMPBuilder) 1175 return; 1176 1177 // The following callback is the crucial part of clangs cleanup process. 1178 // 1179 // NOTE: 1180 // Once the OpenMPIRBuilder is used to create parallel regions (and 1181 // similar), the cancellation destination (Dest below) is determined via 1182 // IP. That means if we have variables to finalize we split the block at IP, 1183 // use the new block (=BB) as destination to build a JumpDest (via 1184 // getJumpDestInCurrentScope(BB)) which then is fed to 1185 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1186 // to push & pop an FinalizationInfo object. 1187 // The FiniCB will still be needed but at the point where the 1188 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1189 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1190 assert(IP.getBlock()->end() == IP.getPoint() && 1191 "Clang CG should cause non-terminated block!"); 1192 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1193 CGF.Builder.restoreIP(IP); 1194 CodeGenFunction::JumpDest Dest = 1195 CGF.getOMPCancelDestination(OMPD_parallel); 1196 CGF.EmitBranchThroughCleanup(Dest); 1197 }; 1198 1199 // TODO: Remove this once we emit parallel regions through the 1200 // OpenMPIRBuilder as it can do this setup internally. 1201 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); 1202 OMPBuilder->pushFinalizationCB(std::move(FI)); 1203 } 1204 ~PushAndPopStackRAII() { 1205 if (OMPBuilder) 1206 OMPBuilder->popFinalizationCB(); 1207 } 1208 llvm::OpenMPIRBuilder *OMPBuilder; 1209 }; 1210 } // namespace 1211 1212 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1213 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1214 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1215 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1216 assert(ThreadIDVar->getType()->isPointerType() && 1217 "thread id variable must be of type kmp_int32 *"); 1218 CodeGenFunction CGF(CGM, true); 1219 bool HasCancel = false; 1220 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1221 HasCancel = OPD->hasCancel(); 1222 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1223 HasCancel = OPD->hasCancel(); 1224 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1225 HasCancel = OPSD->hasCancel(); 1226 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1227 HasCancel = OPFD->hasCancel(); 1228 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1229 HasCancel = OPFD->hasCancel(); 1230 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1231 HasCancel = OPFD->hasCancel(); 1232 else if (const auto *OPFD = 1233 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1234 HasCancel = OPFD->hasCancel(); 1235 else if (const auto *OPFD = 1236 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1237 HasCancel = OPFD->hasCancel(); 1238 1239 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1240 // parallel region to make cancellation barriers work properly. 1241 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1242 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); 1243 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1244 HasCancel, OutlinedHelperName); 1245 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1246 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1247 } 1248 1249 std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const { 1250 std::string Suffix = getName({"omp_outlined"}); 1251 return (Name + Suffix).str(); 1252 } 1253 1254 std::string CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction &CGF) const { 1255 return getOutlinedHelperName(CGF.CurFn->getName()); 1256 } 1257 1258 std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const { 1259 std::string Suffix = getName({"omp", "reduction", "reduction_func"}); 1260 return (Name + Suffix).str(); 1261 } 1262 1263 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1264 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1265 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1266 const RegionCodeGenTy &CodeGen) { 1267 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1268 return emitParallelOrTeamsOutlinedFunction( 1269 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF), 1270 CodeGen); 1271 } 1272 1273 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1274 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1275 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1276 const RegionCodeGenTy &CodeGen) { 1277 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1278 return emitParallelOrTeamsOutlinedFunction( 1279 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF), 1280 CodeGen); 1281 } 1282 1283 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1284 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1285 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1286 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1287 bool Tied, unsigned &NumberOfParts) { 1288 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1289 PrePostActionTy &) { 1290 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1291 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1292 llvm::Value *TaskArgs[] = { 1293 UpLoc, ThreadID, 1294 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1295 TaskTVar->getType()->castAs<PointerType>()) 1296 .getPointer(CGF)}; 1297 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1298 CGM.getModule(), OMPRTL___kmpc_omp_task), 1299 TaskArgs); 1300 }; 1301 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1302 UntiedCodeGen); 1303 CodeGen.setAction(Action); 1304 assert(!ThreadIDVar->getType()->isPointerType() && 1305 "thread id variable must be of type kmp_int32 for tasks"); 1306 const OpenMPDirectiveKind Region = 1307 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1308 : OMPD_task; 1309 const CapturedStmt *CS = D.getCapturedStmt(Region); 1310 bool HasCancel = false; 1311 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1312 HasCancel = TD->hasCancel(); 1313 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1314 HasCancel = TD->hasCancel(); 1315 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1316 HasCancel = TD->hasCancel(); 1317 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1318 HasCancel = TD->hasCancel(); 1319 1320 CodeGenFunction CGF(CGM, true); 1321 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1322 InnermostKind, HasCancel, Action); 1323 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1324 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1325 if (!Tied) 1326 NumberOfParts = Action.getNumberOfParts(); 1327 return Res; 1328 } 1329 1330 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1331 bool AtCurrentPoint) { 1332 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1333 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1334 1335 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1336 if (AtCurrentPoint) { 1337 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1338 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1339 } else { 1340 Elem.second.ServiceInsertPt = 1341 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1342 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1343 } 1344 } 1345 1346 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1347 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1348 if (Elem.second.ServiceInsertPt) { 1349 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1350 Elem.second.ServiceInsertPt = nullptr; 1351 Ptr->eraseFromParent(); 1352 } 1353 } 1354 1355 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1356 SourceLocation Loc, 1357 SmallString<128> &Buffer) { 1358 llvm::raw_svector_ostream OS(Buffer); 1359 // Build debug location 1360 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1361 OS << ";" << PLoc.getFilename() << ";"; 1362 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1363 OS << FD->getQualifiedNameAsString(); 1364 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1365 return OS.str(); 1366 } 1367 1368 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1369 SourceLocation Loc, 1370 unsigned Flags, bool EmitLoc) { 1371 uint32_t SrcLocStrSize; 1372 llvm::Constant *SrcLocStr; 1373 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() == 1374 llvm::codegenoptions::NoDebugInfo) || 1375 Loc.isInvalid()) { 1376 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 1377 } else { 1378 std::string FunctionName; 1379 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1380 FunctionName = FD->getQualifiedNameAsString(); 1381 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1382 const char *FileName = PLoc.getFilename(); 1383 unsigned Line = PLoc.getLine(); 1384 unsigned Column = PLoc.getColumn(); 1385 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, 1386 Column, SrcLocStrSize); 1387 } 1388 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1389 return OMPBuilder.getOrCreateIdent( 1390 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags); 1391 } 1392 1393 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1394 SourceLocation Loc) { 1395 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1396 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1397 // the clang invariants used below might be broken. 1398 if (CGM.getLangOpts().OpenMPIRBuilder) { 1399 SmallString<128> Buffer; 1400 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1401 uint32_t SrcLocStrSize; 1402 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1403 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize); 1404 return OMPBuilder.getOrCreateThreadID( 1405 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize)); 1406 } 1407 1408 llvm::Value *ThreadID = nullptr; 1409 // Check whether we've already cached a load of the thread id in this 1410 // function. 1411 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1412 if (I != OpenMPLocThreadIDMap.end()) { 1413 ThreadID = I->second.ThreadID; 1414 if (ThreadID != nullptr) 1415 return ThreadID; 1416 } 1417 // If exceptions are enabled, do not use parameter to avoid possible crash. 1418 if (auto *OMPRegionInfo = 1419 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1420 if (OMPRegionInfo->getThreadIDVariable()) { 1421 // Check if this an outlined function with thread id passed as argument. 1422 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1423 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1424 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1425 !CGF.getLangOpts().CXXExceptions || 1426 CGF.Builder.GetInsertBlock() == TopBlock || 1427 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1428 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1429 TopBlock || 1430 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1431 CGF.Builder.GetInsertBlock()) { 1432 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1433 // If value loaded in entry block, cache it and use it everywhere in 1434 // function. 1435 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1436 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1437 Elem.second.ThreadID = ThreadID; 1438 } 1439 return ThreadID; 1440 } 1441 } 1442 } 1443 1444 // This is not an outlined function region - need to call __kmpc_int32 1445 // kmpc_global_thread_num(ident_t *loc). 1446 // Generate thread id value and cache this value for use across the 1447 // function. 1448 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1449 if (!Elem.second.ServiceInsertPt) 1450 setLocThreadIdInsertPt(CGF); 1451 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1452 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1453 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 1454 llvm::CallInst *Call = CGF.Builder.CreateCall( 1455 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1456 OMPRTL___kmpc_global_thread_num), 1457 emitUpdateLocation(CGF, Loc)); 1458 Call->setCallingConv(CGF.getRuntimeCC()); 1459 Elem.second.ThreadID = Call; 1460 return Call; 1461 } 1462 1463 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1464 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1465 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1466 clearLocThreadIdInsertPt(CGF); 1467 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1468 } 1469 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1470 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1471 UDRMap.erase(D); 1472 FunctionUDRMap.erase(CGF.CurFn); 1473 } 1474 auto I = FunctionUDMMap.find(CGF.CurFn); 1475 if (I != FunctionUDMMap.end()) { 1476 for(const auto *D : I->second) 1477 UDMMap.erase(D); 1478 FunctionUDMMap.erase(I); 1479 } 1480 LastprivateConditionalToTypes.erase(CGF.CurFn); 1481 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1482 } 1483 1484 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1485 return OMPBuilder.IdentPtr; 1486 } 1487 1488 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1489 if (!Kmpc_MicroTy) { 1490 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1491 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1492 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1493 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1494 } 1495 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1496 } 1497 1498 llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind 1499 convertDeviceClause(const VarDecl *VD) { 1500 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 1501 OMPDeclareTargetDeclAttr::getDeviceType(VD); 1502 if (!DevTy) 1503 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone; 1504 1505 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default 1506 case OMPDeclareTargetDeclAttr::DT_Host: 1507 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost; 1508 break; 1509 case OMPDeclareTargetDeclAttr::DT_NoHost: 1510 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost; 1511 break; 1512 case OMPDeclareTargetDeclAttr::DT_Any: 1513 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny; 1514 break; 1515 default: 1516 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone; 1517 break; 1518 } 1519 } 1520 1521 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind 1522 convertCaptureClause(const VarDecl *VD) { 1523 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType = 1524 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1525 if (!MapType) 1526 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone; 1527 switch ((int)*MapType) { // Avoid -Wcovered-switch-default 1528 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To: 1529 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo; 1530 break; 1531 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter: 1532 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter; 1533 break; 1534 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link: 1535 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink; 1536 break; 1537 default: 1538 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone; 1539 break; 1540 } 1541 } 1542 1543 static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc( 1544 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder, 1545 SourceLocation BeginLoc, llvm::StringRef ParentName = "") { 1546 1547 auto FileInfoCallBack = [&]() { 1548 SourceManager &SM = CGM.getContext().getSourceManager(); 1549 PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc); 1550 1551 llvm::sys::fs::UniqueID ID; 1552 if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1553 PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false); 1554 } 1555 1556 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine()); 1557 }; 1558 1559 return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName); 1560 } 1561 1562 ConstantAddress CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1563 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); }; 1564 1565 auto LinkageForVariable = [&VD, this]() { 1566 return CGM.getLLVMLinkageVarDefinition(VD); 1567 }; 1568 1569 std::vector<llvm::GlobalVariable *> GeneratedRefs; 1570 1571 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem( 1572 CGM.getContext().getPointerType(VD->getType())); 1573 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar( 1574 convertCaptureClause(VD), convertDeviceClause(VD), 1575 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly, 1576 VD->isExternallyVisible(), 1577 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, 1578 VD->getCanonicalDecl()->getBeginLoc()), 1579 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd, 1580 CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal, 1581 LinkageForVariable); 1582 1583 if (!addr) 1584 return ConstantAddress::invalid(); 1585 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD)); 1586 } 1587 1588 llvm::Constant * 1589 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1590 assert(!CGM.getLangOpts().OpenMPUseTLS || 1591 !CGM.getContext().getTargetInfo().isTLSSupported()); 1592 // Lookup the entry, lazily creating it if necessary. 1593 std::string Suffix = getName({"cache", ""}); 1594 return OMPBuilder.getOrCreateInternalVariable( 1595 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str()); 1596 } 1597 1598 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1599 const VarDecl *VD, 1600 Address VDAddr, 1601 SourceLocation Loc) { 1602 if (CGM.getLangOpts().OpenMPUseTLS && 1603 CGM.getContext().getTargetInfo().isTLSSupported()) 1604 return VDAddr; 1605 1606 llvm::Type *VarTy = VDAddr.getElementType(); 1607 llvm::Value *Args[] = { 1608 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1609 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy), 1610 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1611 getOrCreateThreadPrivateCache(VD)}; 1612 return Address( 1613 CGF.EmitRuntimeCall( 1614 OMPBuilder.getOrCreateRuntimeFunction( 1615 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1616 Args), 1617 CGF.Int8Ty, VDAddr.getAlignment()); 1618 } 1619 1620 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1621 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1622 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1623 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1624 // library. 1625 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1626 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1627 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1628 OMPLoc); 1629 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1630 // to register constructor/destructor for variable. 1631 llvm::Value *Args[] = { 1632 OMPLoc, 1633 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.VoidPtrTy), 1634 Ctor, CopyCtor, Dtor}; 1635 CGF.EmitRuntimeCall( 1636 OMPBuilder.getOrCreateRuntimeFunction( 1637 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1638 Args); 1639 } 1640 1641 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1642 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1643 bool PerformInit, CodeGenFunction *CGF) { 1644 if (CGM.getLangOpts().OpenMPUseTLS && 1645 CGM.getContext().getTargetInfo().isTLSSupported()) 1646 return nullptr; 1647 1648 VD = VD->getDefinition(CGM.getContext()); 1649 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1650 QualType ASTTy = VD->getType(); 1651 1652 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1653 const Expr *Init = VD->getAnyInitializer(); 1654 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1655 // Generate function that re-emits the declaration's initializer into the 1656 // threadprivate copy of the variable VD 1657 CodeGenFunction CtorCGF(CGM); 1658 FunctionArgList Args; 1659 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1660 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1661 ImplicitParamKind::Other); 1662 Args.push_back(&Dst); 1663 1664 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1665 CGM.getContext().VoidPtrTy, Args); 1666 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1667 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1668 llvm::Function *Fn = 1669 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1670 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1671 Args, Loc, Loc); 1672 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1673 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1674 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1675 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy), 1676 VDAddr.getAlignment()); 1677 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1678 /*IsInitializer=*/true); 1679 ArgVal = CtorCGF.EmitLoadOfScalar( 1680 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1681 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1682 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1683 CtorCGF.FinishFunction(); 1684 Ctor = Fn; 1685 } 1686 if (VD->getType().isDestructedType() != QualType::DK_none) { 1687 // Generate function that emits destructor call for the threadprivate copy 1688 // of the variable VD 1689 CodeGenFunction DtorCGF(CGM); 1690 FunctionArgList Args; 1691 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1692 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1693 ImplicitParamKind::Other); 1694 Args.push_back(&Dst); 1695 1696 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1697 CGM.getContext().VoidTy, Args); 1698 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1699 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1700 llvm::Function *Fn = 1701 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1702 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1703 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1704 Loc, Loc); 1705 // Create a scope with an artificial location for the body of this function. 1706 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1707 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1708 DtorCGF.GetAddrOfLocalVar(&Dst), 1709 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1710 DtorCGF.emitDestroy( 1711 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy, 1712 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1713 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1714 DtorCGF.FinishFunction(); 1715 Dtor = Fn; 1716 } 1717 // Do not emit init function if it is not required. 1718 if (!Ctor && !Dtor) 1719 return nullptr; 1720 1721 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1722 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1723 /*isVarArg=*/false) 1724 ->getPointerTo(); 1725 // Copying constructor for the threadprivate variable. 1726 // Must be NULL - reserved by runtime, but currently it requires that this 1727 // parameter is always NULL. Otherwise it fires assertion. 1728 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1729 if (Ctor == nullptr) { 1730 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1731 /*isVarArg=*/false) 1732 ->getPointerTo(); 1733 Ctor = llvm::Constant::getNullValue(CtorTy); 1734 } 1735 if (Dtor == nullptr) { 1736 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1737 /*isVarArg=*/false) 1738 ->getPointerTo(); 1739 Dtor = llvm::Constant::getNullValue(DtorTy); 1740 } 1741 if (!CGF) { 1742 auto *InitFunctionTy = 1743 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1744 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1745 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1746 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1747 CodeGenFunction InitCGF(CGM); 1748 FunctionArgList ArgList; 1749 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1750 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1751 Loc, Loc); 1752 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1753 InitCGF.FinishFunction(); 1754 return InitFunction; 1755 } 1756 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1757 } 1758 return nullptr; 1759 } 1760 1761 void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD, 1762 llvm::GlobalValue *GV) { 1763 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr = 1764 OMPDeclareTargetDeclAttr::getActiveAttr(FD); 1765 1766 // We only need to handle active 'indirect' declare target functions. 1767 if (!ActiveAttr || !(*ActiveAttr)->getIndirect()) 1768 return; 1769 1770 // Get a mangled name to store the new device global in. 1771 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc( 1772 CGM, OMPBuilder, FD->getCanonicalDecl()->getBeginLoc(), FD->getName()); 1773 SmallString<128> Name; 1774 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo); 1775 1776 // We need to generate a new global to hold the address of the indirectly 1777 // called device function. Doing this allows us to keep the visibility and 1778 // linkage of the associated function unchanged while allowing the runtime to 1779 // access its value. 1780 llvm::GlobalValue *Addr = GV; 1781 if (CGM.getLangOpts().OpenMPIsTargetDevice) { 1782 Addr = new llvm::GlobalVariable( 1783 CGM.getModule(), CGM.VoidPtrTy, 1784 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name, 1785 nullptr, llvm::GlobalValue::NotThreadLocal, 1786 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace()); 1787 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility); 1788 } 1789 1790 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo( 1791 Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(), 1792 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect, 1793 llvm::GlobalValue::WeakODRLinkage); 1794 } 1795 1796 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 1797 QualType VarType, 1798 StringRef Name) { 1799 std::string Suffix = getName({"artificial", ""}); 1800 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 1801 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable( 1802 VarLVType, Twine(Name).concat(Suffix).str()); 1803 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 1804 CGM.getTarget().isTLSSupported()) { 1805 GAddr->setThreadLocal(/*Val=*/true); 1806 return Address(GAddr, GAddr->getValueType(), 1807 CGM.getContext().getTypeAlignInChars(VarType)); 1808 } 1809 std::string CacheSuffix = getName({"cache", ""}); 1810 llvm::Value *Args[] = { 1811 emitUpdateLocation(CGF, SourceLocation()), 1812 getThreadID(CGF, SourceLocation()), 1813 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 1814 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 1815 /*isSigned=*/false), 1816 OMPBuilder.getOrCreateInternalVariable( 1817 CGM.VoidPtrPtrTy, 1818 Twine(Name).concat(Suffix).concat(CacheSuffix).str())}; 1819 return Address( 1820 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1821 CGF.EmitRuntimeCall( 1822 OMPBuilder.getOrCreateRuntimeFunction( 1823 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1824 Args), 1825 VarLVType->getPointerTo(/*AddrSpace=*/0)), 1826 VarLVType, CGM.getContext().getTypeAlignInChars(VarType)); 1827 } 1828 1829 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 1830 const RegionCodeGenTy &ThenGen, 1831 const RegionCodeGenTy &ElseGen) { 1832 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 1833 1834 // If the condition constant folds and can be elided, try to avoid emitting 1835 // the condition and the dead arm of the if/else. 1836 bool CondConstant; 1837 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 1838 if (CondConstant) 1839 ThenGen(CGF); 1840 else 1841 ElseGen(CGF); 1842 return; 1843 } 1844 1845 // Otherwise, the condition did not fold, or we couldn't elide it. Just 1846 // emit the conditional branch. 1847 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 1848 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 1849 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 1850 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 1851 1852 // Emit the 'then' code. 1853 CGF.EmitBlock(ThenBlock); 1854 ThenGen(CGF); 1855 CGF.EmitBranch(ContBlock); 1856 // Emit the 'else' code if present. 1857 // There is no need to emit line number for unconditional branch. 1858 (void)ApplyDebugLocation::CreateEmpty(CGF); 1859 CGF.EmitBlock(ElseBlock); 1860 ElseGen(CGF); 1861 // There is no need to emit line number for unconditional branch. 1862 (void)ApplyDebugLocation::CreateEmpty(CGF); 1863 CGF.EmitBranch(ContBlock); 1864 // Emit the continuation block for code after the if. 1865 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 1866 } 1867 1868 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 1869 llvm::Function *OutlinedFn, 1870 ArrayRef<llvm::Value *> CapturedVars, 1871 const Expr *IfCond, 1872 llvm::Value *NumThreads) { 1873 if (!CGF.HaveInsertPoint()) 1874 return; 1875 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 1876 auto &M = CGM.getModule(); 1877 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 1878 this](CodeGenFunction &CGF, PrePostActionTy &) { 1879 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 1880 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 1881 llvm::Value *Args[] = { 1882 RTLoc, 1883 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 1884 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 1885 llvm::SmallVector<llvm::Value *, 16> RealArgs; 1886 RealArgs.append(std::begin(Args), std::end(Args)); 1887 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 1888 1889 llvm::FunctionCallee RTLFn = 1890 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 1891 CGF.EmitRuntimeCall(RTLFn, RealArgs); 1892 }; 1893 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 1894 this](CodeGenFunction &CGF, PrePostActionTy &) { 1895 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 1896 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 1897 // Build calls: 1898 // __kmpc_serialized_parallel(&Loc, GTid); 1899 llvm::Value *Args[] = {RTLoc, ThreadID}; 1900 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1901 M, OMPRTL___kmpc_serialized_parallel), 1902 Args); 1903 1904 // OutlinedFn(>id, &zero_bound, CapturedStruct); 1905 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 1906 RawAddress ZeroAddrBound = 1907 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 1908 /*Name=*/".bound.zero.addr"); 1909 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound); 1910 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 1911 // ThreadId for serialized parallels is 0. 1912 OutlinedFnArgs.push_back(ThreadIDAddr.emitRawPointer(CGF)); 1913 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 1914 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 1915 1916 // Ensure we do not inline the function. This is trivially true for the ones 1917 // passed to __kmpc_fork_call but the ones called in serialized regions 1918 // could be inlined. This is not a perfect but it is closer to the invariant 1919 // we want, namely, every data environment starts with a new function. 1920 // TODO: We should pass the if condition to the runtime function and do the 1921 // handling there. Much cleaner code. 1922 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline); 1923 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 1924 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 1925 1926 // __kmpc_end_serialized_parallel(&Loc, GTid); 1927 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 1928 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1929 M, OMPRTL___kmpc_end_serialized_parallel), 1930 EndArgs); 1931 }; 1932 if (IfCond) { 1933 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 1934 } else { 1935 RegionCodeGenTy ThenRCG(ThenGen); 1936 ThenRCG(CGF); 1937 } 1938 } 1939 1940 // If we're inside an (outlined) parallel region, use the region info's 1941 // thread-ID variable (it is passed in a first argument of the outlined function 1942 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 1943 // regular serial code region, get thread ID by calling kmp_int32 1944 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 1945 // return the address of that temp. 1946 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 1947 SourceLocation Loc) { 1948 if (auto *OMPRegionInfo = 1949 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 1950 if (OMPRegionInfo->getThreadIDVariable()) 1951 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 1952 1953 llvm::Value *ThreadID = getThreadID(CGF, Loc); 1954 QualType Int32Ty = 1955 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 1956 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 1957 CGF.EmitStoreOfScalar(ThreadID, 1958 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 1959 1960 return ThreadIDTemp; 1961 } 1962 1963 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 1964 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 1965 std::string Name = getName({Prefix, "var"}); 1966 return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name); 1967 } 1968 1969 namespace { 1970 /// Common pre(post)-action for different OpenMP constructs. 1971 class CommonActionTy final : public PrePostActionTy { 1972 llvm::FunctionCallee EnterCallee; 1973 ArrayRef<llvm::Value *> EnterArgs; 1974 llvm::FunctionCallee ExitCallee; 1975 ArrayRef<llvm::Value *> ExitArgs; 1976 bool Conditional; 1977 llvm::BasicBlock *ContBlock = nullptr; 1978 1979 public: 1980 CommonActionTy(llvm::FunctionCallee EnterCallee, 1981 ArrayRef<llvm::Value *> EnterArgs, 1982 llvm::FunctionCallee ExitCallee, 1983 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 1984 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 1985 ExitArgs(ExitArgs), Conditional(Conditional) {} 1986 void Enter(CodeGenFunction &CGF) override { 1987 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 1988 if (Conditional) { 1989 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 1990 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 1991 ContBlock = CGF.createBasicBlock("omp_if.end"); 1992 // Generate the branch (If-stmt) 1993 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 1994 CGF.EmitBlock(ThenBlock); 1995 } 1996 } 1997 void Done(CodeGenFunction &CGF) { 1998 // Emit the rest of blocks/branches 1999 CGF.EmitBranch(ContBlock); 2000 CGF.EmitBlock(ContBlock, true); 2001 } 2002 void Exit(CodeGenFunction &CGF) override { 2003 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2004 } 2005 }; 2006 } // anonymous namespace 2007 2008 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2009 StringRef CriticalName, 2010 const RegionCodeGenTy &CriticalOpGen, 2011 SourceLocation Loc, const Expr *Hint) { 2012 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2013 // CriticalOpGen(); 2014 // __kmpc_end_critical(ident_t *, gtid, Lock); 2015 // Prepare arguments and build a call to __kmpc_critical 2016 if (!CGF.HaveInsertPoint()) 2017 return; 2018 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2019 getCriticalRegionLock(CriticalName)}; 2020 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2021 std::end(Args)); 2022 if (Hint) { 2023 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2024 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2025 } 2026 CommonActionTy Action( 2027 OMPBuilder.getOrCreateRuntimeFunction( 2028 CGM.getModule(), 2029 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2030 EnterArgs, 2031 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2032 OMPRTL___kmpc_end_critical), 2033 Args); 2034 CriticalOpGen.setAction(Action); 2035 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2036 } 2037 2038 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2039 const RegionCodeGenTy &MasterOpGen, 2040 SourceLocation Loc) { 2041 if (!CGF.HaveInsertPoint()) 2042 return; 2043 // if(__kmpc_master(ident_t *, gtid)) { 2044 // MasterOpGen(); 2045 // __kmpc_end_master(ident_t *, gtid); 2046 // } 2047 // Prepare arguments and build a call to __kmpc_master 2048 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2049 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2050 CGM.getModule(), OMPRTL___kmpc_master), 2051 Args, 2052 OMPBuilder.getOrCreateRuntimeFunction( 2053 CGM.getModule(), OMPRTL___kmpc_end_master), 2054 Args, 2055 /*Conditional=*/true); 2056 MasterOpGen.setAction(Action); 2057 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2058 Action.Done(CGF); 2059 } 2060 2061 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, 2062 const RegionCodeGenTy &MaskedOpGen, 2063 SourceLocation Loc, const Expr *Filter) { 2064 if (!CGF.HaveInsertPoint()) 2065 return; 2066 // if(__kmpc_masked(ident_t *, gtid, filter)) { 2067 // MaskedOpGen(); 2068 // __kmpc_end_masked(iden_t *, gtid); 2069 // } 2070 // Prepare arguments and build a call to __kmpc_masked 2071 llvm::Value *FilterVal = Filter 2072 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) 2073 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 2074 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2075 FilterVal}; 2076 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), 2077 getThreadID(CGF, Loc)}; 2078 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2079 CGM.getModule(), OMPRTL___kmpc_masked), 2080 Args, 2081 OMPBuilder.getOrCreateRuntimeFunction( 2082 CGM.getModule(), OMPRTL___kmpc_end_masked), 2083 ArgsEnd, 2084 /*Conditional=*/true); 2085 MaskedOpGen.setAction(Action); 2086 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); 2087 Action.Done(CGF); 2088 } 2089 2090 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2091 SourceLocation Loc) { 2092 if (!CGF.HaveInsertPoint()) 2093 return; 2094 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2095 OMPBuilder.createTaskyield(CGF.Builder); 2096 } else { 2097 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2098 llvm::Value *Args[] = { 2099 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2100 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2101 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2102 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2103 Args); 2104 } 2105 2106 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2107 Region->emitUntiedSwitch(CGF); 2108 } 2109 2110 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2111 const RegionCodeGenTy &TaskgroupOpGen, 2112 SourceLocation Loc) { 2113 if (!CGF.HaveInsertPoint()) 2114 return; 2115 // __kmpc_taskgroup(ident_t *, gtid); 2116 // TaskgroupOpGen(); 2117 // __kmpc_end_taskgroup(ident_t *, gtid); 2118 // Prepare arguments and build a call to __kmpc_taskgroup 2119 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2120 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2121 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2122 Args, 2123 OMPBuilder.getOrCreateRuntimeFunction( 2124 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2125 Args); 2126 TaskgroupOpGen.setAction(Action); 2127 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2128 } 2129 2130 /// Given an array of pointers to variables, project the address of a 2131 /// given variable. 2132 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2133 unsigned Index, const VarDecl *Var) { 2134 // Pull out the pointer to the variable. 2135 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2136 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2137 2138 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType()); 2139 return Address( 2140 CGF.Builder.CreateBitCast( 2141 Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())), 2142 ElemTy, CGF.getContext().getDeclAlign(Var)); 2143 } 2144 2145 static llvm::Value *emitCopyprivateCopyFunction( 2146 CodeGenModule &CGM, llvm::Type *ArgsElemType, 2147 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2148 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2149 SourceLocation Loc) { 2150 ASTContext &C = CGM.getContext(); 2151 // void copy_func(void *LHSArg, void *RHSArg); 2152 FunctionArgList Args; 2153 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2154 ImplicitParamKind::Other); 2155 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2156 ImplicitParamKind::Other); 2157 Args.push_back(&LHSArg); 2158 Args.push_back(&RHSArg); 2159 const auto &CGFI = 2160 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2161 std::string Name = 2162 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2163 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2164 llvm::GlobalValue::InternalLinkage, Name, 2165 &CGM.getModule()); 2166 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2167 Fn->setDoesNotRecurse(); 2168 CodeGenFunction CGF(CGM); 2169 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2170 // Dest = (void*[n])(LHSArg); 2171 // Src = (void*[n])(RHSArg); 2172 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2173 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2174 ArgsElemType->getPointerTo()), 2175 ArgsElemType, CGF.getPointerAlign()); 2176 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2177 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2178 ArgsElemType->getPointerTo()), 2179 ArgsElemType, CGF.getPointerAlign()); 2180 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2181 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2182 // ... 2183 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2184 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2185 const auto *DestVar = 2186 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2187 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2188 2189 const auto *SrcVar = 2190 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2191 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2192 2193 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2194 QualType Type = VD->getType(); 2195 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2196 } 2197 CGF.FinishFunction(); 2198 return Fn; 2199 } 2200 2201 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2202 const RegionCodeGenTy &SingleOpGen, 2203 SourceLocation Loc, 2204 ArrayRef<const Expr *> CopyprivateVars, 2205 ArrayRef<const Expr *> SrcExprs, 2206 ArrayRef<const Expr *> DstExprs, 2207 ArrayRef<const Expr *> AssignmentOps) { 2208 if (!CGF.HaveInsertPoint()) 2209 return; 2210 assert(CopyprivateVars.size() == SrcExprs.size() && 2211 CopyprivateVars.size() == DstExprs.size() && 2212 CopyprivateVars.size() == AssignmentOps.size()); 2213 ASTContext &C = CGM.getContext(); 2214 // int32 did_it = 0; 2215 // if(__kmpc_single(ident_t *, gtid)) { 2216 // SingleOpGen(); 2217 // __kmpc_end_single(ident_t *, gtid); 2218 // did_it = 1; 2219 // } 2220 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2221 // <copy_func>, did_it); 2222 2223 Address DidIt = Address::invalid(); 2224 if (!CopyprivateVars.empty()) { 2225 // int32 did_it = 0; 2226 QualType KmpInt32Ty = 2227 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2228 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2229 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2230 } 2231 // Prepare arguments and build a call to __kmpc_single 2232 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2233 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2234 CGM.getModule(), OMPRTL___kmpc_single), 2235 Args, 2236 OMPBuilder.getOrCreateRuntimeFunction( 2237 CGM.getModule(), OMPRTL___kmpc_end_single), 2238 Args, 2239 /*Conditional=*/true); 2240 SingleOpGen.setAction(Action); 2241 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2242 if (DidIt.isValid()) { 2243 // did_it = 1; 2244 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2245 } 2246 Action.Done(CGF); 2247 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2248 // <copy_func>, did_it); 2249 if (DidIt.isValid()) { 2250 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2251 QualType CopyprivateArrayTy = C.getConstantArrayType( 2252 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal, 2253 /*IndexTypeQuals=*/0); 2254 // Create a list of all private variables for copyprivate. 2255 Address CopyprivateList = 2256 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2257 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2258 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2259 CGF.Builder.CreateStore( 2260 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2261 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2262 CGF.VoidPtrTy), 2263 Elem); 2264 } 2265 // Build function that copies private values from single region to all other 2266 // threads in the corresponding parallel region. 2267 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2268 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars, 2269 SrcExprs, DstExprs, AssignmentOps, Loc); 2270 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2271 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2272 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty); 2273 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2274 llvm::Value *Args[] = { 2275 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2276 getThreadID(CGF, Loc), // i32 <gtid> 2277 BufSize, // size_t <buf_size> 2278 CL.emitRawPointer(CGF), // void *<copyprivate list> 2279 CpyFn, // void (*) (void *, void *) <copy_func> 2280 DidItVal // i32 did_it 2281 }; 2282 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2283 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2284 Args); 2285 } 2286 } 2287 2288 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2289 const RegionCodeGenTy &OrderedOpGen, 2290 SourceLocation Loc, bool IsThreads) { 2291 if (!CGF.HaveInsertPoint()) 2292 return; 2293 // __kmpc_ordered(ident_t *, gtid); 2294 // OrderedOpGen(); 2295 // __kmpc_end_ordered(ident_t *, gtid); 2296 // Prepare arguments and build a call to __kmpc_ordered 2297 if (IsThreads) { 2298 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2299 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2300 CGM.getModule(), OMPRTL___kmpc_ordered), 2301 Args, 2302 OMPBuilder.getOrCreateRuntimeFunction( 2303 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2304 Args); 2305 OrderedOpGen.setAction(Action); 2306 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2307 return; 2308 } 2309 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2310 } 2311 2312 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2313 unsigned Flags; 2314 if (Kind == OMPD_for) 2315 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2316 else if (Kind == OMPD_sections) 2317 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2318 else if (Kind == OMPD_single) 2319 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2320 else if (Kind == OMPD_barrier) 2321 Flags = OMP_IDENT_BARRIER_EXPL; 2322 else 2323 Flags = OMP_IDENT_BARRIER_IMPL; 2324 return Flags; 2325 } 2326 2327 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2328 CodeGenFunction &CGF, const OMPLoopDirective &S, 2329 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2330 // Check if the loop directive is actually a doacross loop directive. In this 2331 // case choose static, 1 schedule. 2332 if (llvm::any_of( 2333 S.getClausesOfKind<OMPOrderedClause>(), 2334 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2335 ScheduleKind = OMPC_SCHEDULE_static; 2336 // Chunk size is 1 in this case. 2337 llvm::APInt ChunkSize(32, 1); 2338 ChunkExpr = IntegerLiteral::Create( 2339 CGF.getContext(), ChunkSize, 2340 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2341 SourceLocation()); 2342 } 2343 } 2344 2345 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2346 OpenMPDirectiveKind Kind, bool EmitChecks, 2347 bool ForceSimpleCall) { 2348 // Check if we should use the OMPBuilder 2349 auto *OMPRegionInfo = 2350 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2351 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2352 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2353 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2354 return; 2355 } 2356 2357 if (!CGF.HaveInsertPoint()) 2358 return; 2359 // Build call __kmpc_cancel_barrier(loc, thread_id); 2360 // Build call __kmpc_barrier(loc, thread_id); 2361 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2362 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2363 // thread_id); 2364 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2365 getThreadID(CGF, Loc)}; 2366 if (OMPRegionInfo) { 2367 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2368 llvm::Value *Result = CGF.EmitRuntimeCall( 2369 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2370 OMPRTL___kmpc_cancel_barrier), 2371 Args); 2372 if (EmitChecks) { 2373 // if (__kmpc_cancel_barrier()) { 2374 // exit from construct; 2375 // } 2376 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2377 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2378 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2379 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2380 CGF.EmitBlock(ExitBB); 2381 // exit from construct; 2382 CodeGenFunction::JumpDest CancelDestination = 2383 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2384 CGF.EmitBranchThroughCleanup(CancelDestination); 2385 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2386 } 2387 return; 2388 } 2389 } 2390 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2391 CGM.getModule(), OMPRTL___kmpc_barrier), 2392 Args); 2393 } 2394 2395 void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc, 2396 Expr *ME, bool IsFatal) { 2397 llvm::Value *MVL = 2398 ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF) 2399 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 2400 // Build call void __kmpc_error(ident_t *loc, int severity, const char 2401 // *message) 2402 llvm::Value *Args[] = { 2403 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true), 2404 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1), 2405 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)}; 2406 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2407 CGM.getModule(), OMPRTL___kmpc_error), 2408 Args); 2409 } 2410 2411 /// Map the OpenMP loop schedule to the runtime enumeration. 2412 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2413 bool Chunked, bool Ordered) { 2414 switch (ScheduleKind) { 2415 case OMPC_SCHEDULE_static: 2416 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2417 : (Ordered ? OMP_ord_static : OMP_sch_static); 2418 case OMPC_SCHEDULE_dynamic: 2419 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2420 case OMPC_SCHEDULE_guided: 2421 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2422 case OMPC_SCHEDULE_runtime: 2423 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2424 case OMPC_SCHEDULE_auto: 2425 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2426 case OMPC_SCHEDULE_unknown: 2427 assert(!Chunked && "chunk was specified but schedule kind not known"); 2428 return Ordered ? OMP_ord_static : OMP_sch_static; 2429 } 2430 llvm_unreachable("Unexpected runtime schedule"); 2431 } 2432 2433 /// Map the OpenMP distribute schedule to the runtime enumeration. 2434 static OpenMPSchedType 2435 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2436 // only static is allowed for dist_schedule 2437 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2438 } 2439 2440 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2441 bool Chunked) const { 2442 OpenMPSchedType Schedule = 2443 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2444 return Schedule == OMP_sch_static; 2445 } 2446 2447 bool CGOpenMPRuntime::isStaticNonchunked( 2448 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2449 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2450 return Schedule == OMP_dist_sch_static; 2451 } 2452 2453 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2454 bool Chunked) const { 2455 OpenMPSchedType Schedule = 2456 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2457 return Schedule == OMP_sch_static_chunked; 2458 } 2459 2460 bool CGOpenMPRuntime::isStaticChunked( 2461 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2462 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2463 return Schedule == OMP_dist_sch_static_chunked; 2464 } 2465 2466 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2467 OpenMPSchedType Schedule = 2468 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2469 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2470 return Schedule != OMP_sch_static; 2471 } 2472 2473 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2474 OpenMPScheduleClauseModifier M1, 2475 OpenMPScheduleClauseModifier M2) { 2476 int Modifier = 0; 2477 switch (M1) { 2478 case OMPC_SCHEDULE_MODIFIER_monotonic: 2479 Modifier = OMP_sch_modifier_monotonic; 2480 break; 2481 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2482 Modifier = OMP_sch_modifier_nonmonotonic; 2483 break; 2484 case OMPC_SCHEDULE_MODIFIER_simd: 2485 if (Schedule == OMP_sch_static_chunked) 2486 Schedule = OMP_sch_static_balanced_chunked; 2487 break; 2488 case OMPC_SCHEDULE_MODIFIER_last: 2489 case OMPC_SCHEDULE_MODIFIER_unknown: 2490 break; 2491 } 2492 switch (M2) { 2493 case OMPC_SCHEDULE_MODIFIER_monotonic: 2494 Modifier = OMP_sch_modifier_monotonic; 2495 break; 2496 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2497 Modifier = OMP_sch_modifier_nonmonotonic; 2498 break; 2499 case OMPC_SCHEDULE_MODIFIER_simd: 2500 if (Schedule == OMP_sch_static_chunked) 2501 Schedule = OMP_sch_static_balanced_chunked; 2502 break; 2503 case OMPC_SCHEDULE_MODIFIER_last: 2504 case OMPC_SCHEDULE_MODIFIER_unknown: 2505 break; 2506 } 2507 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2508 // If the static schedule kind is specified or if the ordered clause is 2509 // specified, and if the nonmonotonic modifier is not specified, the effect is 2510 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2511 // modifier is specified, the effect is as if the nonmonotonic modifier is 2512 // specified. 2513 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2514 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2515 Schedule == OMP_sch_static_balanced_chunked || 2516 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2517 Schedule == OMP_dist_sch_static_chunked || 2518 Schedule == OMP_dist_sch_static)) 2519 Modifier = OMP_sch_modifier_nonmonotonic; 2520 } 2521 return Schedule | Modifier; 2522 } 2523 2524 void CGOpenMPRuntime::emitForDispatchInit( 2525 CodeGenFunction &CGF, SourceLocation Loc, 2526 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2527 bool Ordered, const DispatchRTInput &DispatchValues) { 2528 if (!CGF.HaveInsertPoint()) 2529 return; 2530 OpenMPSchedType Schedule = getRuntimeSchedule( 2531 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2532 assert(Ordered || 2533 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2534 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2535 Schedule != OMP_sch_static_balanced_chunked)); 2536 // Call __kmpc_dispatch_init( 2537 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2538 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2539 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2540 2541 // If the Chunk was not specified in the clause - use default value 1. 2542 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2543 : CGF.Builder.getIntN(IVSize, 1); 2544 llvm::Value *Args[] = { 2545 emitUpdateLocation(CGF, Loc), 2546 getThreadID(CGF, Loc), 2547 CGF.Builder.getInt32(addMonoNonMonoModifier( 2548 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2549 DispatchValues.LB, // Lower 2550 DispatchValues.UB, // Upper 2551 CGF.Builder.getIntN(IVSize, 1), // Stride 2552 Chunk // Chunk 2553 }; 2554 CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned), 2555 Args); 2556 } 2557 2558 void CGOpenMPRuntime::emitForDispatchDeinit(CodeGenFunction &CGF, 2559 SourceLocation Loc) { 2560 if (!CGF.HaveInsertPoint()) 2561 return; 2562 // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid); 2563 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2564 CGF.EmitRuntimeCall(OMPBuilder.createDispatchDeinitFunction(), Args); 2565 } 2566 2567 static void emitForStaticInitCall( 2568 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2569 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2570 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2571 const CGOpenMPRuntime::StaticRTInput &Values) { 2572 if (!CGF.HaveInsertPoint()) 2573 return; 2574 2575 assert(!Values.Ordered); 2576 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2577 Schedule == OMP_sch_static_balanced_chunked || 2578 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2579 Schedule == OMP_dist_sch_static || 2580 Schedule == OMP_dist_sch_static_chunked); 2581 2582 // Call __kmpc_for_static_init( 2583 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2584 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2585 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2586 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2587 llvm::Value *Chunk = Values.Chunk; 2588 if (Chunk == nullptr) { 2589 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2590 Schedule == OMP_dist_sch_static) && 2591 "expected static non-chunked schedule"); 2592 // If the Chunk was not specified in the clause - use default value 1. 2593 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2594 } else { 2595 assert((Schedule == OMP_sch_static_chunked || 2596 Schedule == OMP_sch_static_balanced_chunked || 2597 Schedule == OMP_ord_static_chunked || 2598 Schedule == OMP_dist_sch_static_chunked) && 2599 "expected static chunked schedule"); 2600 } 2601 llvm::Value *Args[] = { 2602 UpdateLocation, 2603 ThreadId, 2604 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2605 M2)), // Schedule type 2606 Values.IL.emitRawPointer(CGF), // &isLastIter 2607 Values.LB.emitRawPointer(CGF), // &LB 2608 Values.UB.emitRawPointer(CGF), // &UB 2609 Values.ST.emitRawPointer(CGF), // &Stride 2610 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2611 Chunk // Chunk 2612 }; 2613 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2614 } 2615 2616 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2617 SourceLocation Loc, 2618 OpenMPDirectiveKind DKind, 2619 const OpenMPScheduleTy &ScheduleKind, 2620 const StaticRTInput &Values) { 2621 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2622 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2623 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) && 2624 "Expected loop-based or sections-based directive."); 2625 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2626 isOpenMPLoopDirective(DKind) 2627 ? OMP_IDENT_WORK_LOOP 2628 : OMP_IDENT_WORK_SECTIONS); 2629 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2630 llvm::FunctionCallee StaticInitFunction = 2631 OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned, 2632 false); 2633 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2634 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2635 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2636 } 2637 2638 void CGOpenMPRuntime::emitDistributeStaticInit( 2639 CodeGenFunction &CGF, SourceLocation Loc, 2640 OpenMPDistScheduleClauseKind SchedKind, 2641 const CGOpenMPRuntime::StaticRTInput &Values) { 2642 OpenMPSchedType ScheduleNum = 2643 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2644 llvm::Value *UpdatedLocation = 2645 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2646 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2647 llvm::FunctionCallee StaticInitFunction; 2648 bool isGPUDistribute = 2649 CGM.getLangOpts().OpenMPIsTargetDevice && 2650 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()); 2651 StaticInitFunction = OMPBuilder.createForStaticInitFunction( 2652 Values.IVSize, Values.IVSigned, isGPUDistribute); 2653 2654 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2655 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2656 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2657 } 2658 2659 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2660 SourceLocation Loc, 2661 OpenMPDirectiveKind DKind) { 2662 assert((DKind == OMPD_distribute || DKind == OMPD_for || 2663 DKind == OMPD_sections) && 2664 "Expected distribute, for, or sections directive kind"); 2665 if (!CGF.HaveInsertPoint()) 2666 return; 2667 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2668 llvm::Value *Args[] = { 2669 emitUpdateLocation(CGF, Loc, 2670 isOpenMPDistributeDirective(DKind) || 2671 (DKind == OMPD_target_teams_loop) 2672 ? OMP_IDENT_WORK_DISTRIBUTE 2673 : isOpenMPLoopDirective(DKind) 2674 ? OMP_IDENT_WORK_LOOP 2675 : OMP_IDENT_WORK_SECTIONS), 2676 getThreadID(CGF, Loc)}; 2677 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2678 if (isOpenMPDistributeDirective(DKind) && 2679 CGM.getLangOpts().OpenMPIsTargetDevice && 2680 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX())) 2681 CGF.EmitRuntimeCall( 2682 OMPBuilder.getOrCreateRuntimeFunction( 2683 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini), 2684 Args); 2685 else 2686 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2687 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2688 Args); 2689 } 2690 2691 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2692 SourceLocation Loc, 2693 unsigned IVSize, 2694 bool IVSigned) { 2695 if (!CGF.HaveInsertPoint()) 2696 return; 2697 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2698 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2699 CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned), 2700 Args); 2701 } 2702 2703 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2704 SourceLocation Loc, unsigned IVSize, 2705 bool IVSigned, Address IL, 2706 Address LB, Address UB, 2707 Address ST) { 2708 // Call __kmpc_dispatch_next( 2709 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2710 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2711 // kmp_int[32|64] *p_stride); 2712 llvm::Value *Args[] = { 2713 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2714 IL.emitRawPointer(CGF), // &isLastIter 2715 LB.emitRawPointer(CGF), // &Lower 2716 UB.emitRawPointer(CGF), // &Upper 2717 ST.emitRawPointer(CGF) // &Stride 2718 }; 2719 llvm::Value *Call = CGF.EmitRuntimeCall( 2720 OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args); 2721 return CGF.EmitScalarConversion( 2722 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2723 CGF.getContext().BoolTy, Loc); 2724 } 2725 2726 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2727 llvm::Value *NumThreads, 2728 SourceLocation Loc) { 2729 if (!CGF.HaveInsertPoint()) 2730 return; 2731 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2732 llvm::Value *Args[] = { 2733 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2734 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2735 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2736 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2737 Args); 2738 } 2739 2740 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2741 ProcBindKind ProcBind, 2742 SourceLocation Loc) { 2743 if (!CGF.HaveInsertPoint()) 2744 return; 2745 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2746 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2747 llvm::Value *Args[] = { 2748 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2749 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2750 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2751 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2752 Args); 2753 } 2754 2755 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2756 SourceLocation Loc, llvm::AtomicOrdering AO) { 2757 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2758 OMPBuilder.createFlush(CGF.Builder); 2759 } else { 2760 if (!CGF.HaveInsertPoint()) 2761 return; 2762 // Build call void __kmpc_flush(ident_t *loc) 2763 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2764 CGM.getModule(), OMPRTL___kmpc_flush), 2765 emitUpdateLocation(CGF, Loc)); 2766 } 2767 } 2768 2769 namespace { 2770 /// Indexes of fields for type kmp_task_t. 2771 enum KmpTaskTFields { 2772 /// List of shared variables. 2773 KmpTaskTShareds, 2774 /// Task routine. 2775 KmpTaskTRoutine, 2776 /// Partition id for the untied tasks. 2777 KmpTaskTPartId, 2778 /// Function with call of destructors for private variables. 2779 Data1, 2780 /// Task priority. 2781 Data2, 2782 /// (Taskloops only) Lower bound. 2783 KmpTaskTLowerBound, 2784 /// (Taskloops only) Upper bound. 2785 KmpTaskTUpperBound, 2786 /// (Taskloops only) Stride. 2787 KmpTaskTStride, 2788 /// (Taskloops only) Is last iteration flag. 2789 KmpTaskTLastIter, 2790 /// (Taskloops only) Reduction data. 2791 KmpTaskTReductions, 2792 }; 2793 } // anonymous namespace 2794 2795 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 2796 // If we are in simd mode or there are no entries, we don't need to do 2797 // anything. 2798 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty()) 2799 return; 2800 2801 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn = 2802 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind, 2803 const llvm::TargetRegionEntryInfo &EntryInfo) -> void { 2804 SourceLocation Loc; 2805 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) { 2806 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 2807 E = CGM.getContext().getSourceManager().fileinfo_end(); 2808 I != E; ++I) { 2809 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID && 2810 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) { 2811 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 2812 I->getFirst(), EntryInfo.Line, 1); 2813 break; 2814 } 2815 } 2816 } 2817 switch (Kind) { 2818 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: { 2819 unsigned DiagID = CGM.getDiags().getCustomDiagID( 2820 DiagnosticsEngine::Error, "Offloading entry for target region in " 2821 "%0 is incorrect: either the " 2822 "address or the ID is invalid."); 2823 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName; 2824 } break; 2825 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: { 2826 unsigned DiagID = CGM.getDiags().getCustomDiagID( 2827 DiagnosticsEngine::Error, "Offloading entry for declare target " 2828 "variable %0 is incorrect: the " 2829 "address is invalid."); 2830 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName; 2831 } break; 2832 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: { 2833 unsigned DiagID = CGM.getDiags().getCustomDiagID( 2834 DiagnosticsEngine::Error, 2835 "Offloading entry for declare target variable is incorrect: the " 2836 "address is invalid."); 2837 CGM.getDiags().Report(DiagID); 2838 } break; 2839 } 2840 }; 2841 2842 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn); 2843 } 2844 2845 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 2846 if (!KmpRoutineEntryPtrTy) { 2847 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 2848 ASTContext &C = CGM.getContext(); 2849 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 2850 FunctionProtoType::ExtProtoInfo EPI; 2851 KmpRoutineEntryPtrQTy = C.getPointerType( 2852 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 2853 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 2854 } 2855 } 2856 2857 namespace { 2858 struct PrivateHelpersTy { 2859 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 2860 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 2861 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 2862 PrivateElemInit(PrivateElemInit) {} 2863 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 2864 const Expr *OriginalRef = nullptr; 2865 const VarDecl *Original = nullptr; 2866 const VarDecl *PrivateCopy = nullptr; 2867 const VarDecl *PrivateElemInit = nullptr; 2868 bool isLocalPrivate() const { 2869 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 2870 } 2871 }; 2872 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 2873 } // anonymous namespace 2874 2875 static bool isAllocatableDecl(const VarDecl *VD) { 2876 const VarDecl *CVD = VD->getCanonicalDecl(); 2877 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 2878 return false; 2879 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 2880 // Use the default allocation. 2881 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 2882 !AA->getAllocator()); 2883 } 2884 2885 static RecordDecl * 2886 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 2887 if (!Privates.empty()) { 2888 ASTContext &C = CGM.getContext(); 2889 // Build struct .kmp_privates_t. { 2890 // /* private vars */ 2891 // }; 2892 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 2893 RD->startDefinition(); 2894 for (const auto &Pair : Privates) { 2895 const VarDecl *VD = Pair.second.Original; 2896 QualType Type = VD->getType().getNonReferenceType(); 2897 // If the private variable is a local variable with lvalue ref type, 2898 // allocate the pointer instead of the pointee type. 2899 if (Pair.second.isLocalPrivate()) { 2900 if (VD->getType()->isLValueReferenceType()) 2901 Type = C.getPointerType(Type); 2902 if (isAllocatableDecl(VD)) 2903 Type = C.getPointerType(Type); 2904 } 2905 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 2906 if (VD->hasAttrs()) { 2907 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 2908 E(VD->getAttrs().end()); 2909 I != E; ++I) 2910 FD->addAttr(*I); 2911 } 2912 } 2913 RD->completeDefinition(); 2914 return RD; 2915 } 2916 return nullptr; 2917 } 2918 2919 static RecordDecl * 2920 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 2921 QualType KmpInt32Ty, 2922 QualType KmpRoutineEntryPointerQTy) { 2923 ASTContext &C = CGM.getContext(); 2924 // Build struct kmp_task_t { 2925 // void * shareds; 2926 // kmp_routine_entry_t routine; 2927 // kmp_int32 part_id; 2928 // kmp_cmplrdata_t data1; 2929 // kmp_cmplrdata_t data2; 2930 // For taskloops additional fields: 2931 // kmp_uint64 lb; 2932 // kmp_uint64 ub; 2933 // kmp_int64 st; 2934 // kmp_int32 liter; 2935 // void * reductions; 2936 // }; 2937 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union); 2938 UD->startDefinition(); 2939 addFieldToRecordDecl(C, UD, KmpInt32Ty); 2940 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 2941 UD->completeDefinition(); 2942 QualType KmpCmplrdataTy = C.getRecordType(UD); 2943 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 2944 RD->startDefinition(); 2945 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 2946 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 2947 addFieldToRecordDecl(C, RD, KmpInt32Ty); 2948 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 2949 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 2950 if (isOpenMPTaskLoopDirective(Kind)) { 2951 QualType KmpUInt64Ty = 2952 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 2953 QualType KmpInt64Ty = 2954 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 2955 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 2956 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 2957 addFieldToRecordDecl(C, RD, KmpInt64Ty); 2958 addFieldToRecordDecl(C, RD, KmpInt32Ty); 2959 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 2960 } 2961 RD->completeDefinition(); 2962 return RD; 2963 } 2964 2965 static RecordDecl * 2966 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 2967 ArrayRef<PrivateDataTy> Privates) { 2968 ASTContext &C = CGM.getContext(); 2969 // Build struct kmp_task_t_with_privates { 2970 // kmp_task_t task_data; 2971 // .kmp_privates_t. privates; 2972 // }; 2973 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 2974 RD->startDefinition(); 2975 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 2976 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 2977 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 2978 RD->completeDefinition(); 2979 return RD; 2980 } 2981 2982 /// Emit a proxy function which accepts kmp_task_t as the second 2983 /// argument. 2984 /// \code 2985 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 2986 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 2987 /// For taskloops: 2988 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 2989 /// tt->reductions, tt->shareds); 2990 /// return 0; 2991 /// } 2992 /// \endcode 2993 static llvm::Function * 2994 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 2995 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 2996 QualType KmpTaskTWithPrivatesPtrQTy, 2997 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 2998 QualType SharedsPtrTy, llvm::Function *TaskFunction, 2999 llvm::Value *TaskPrivatesMap) { 3000 ASTContext &C = CGM.getContext(); 3001 FunctionArgList Args; 3002 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3003 ImplicitParamKind::Other); 3004 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3005 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3006 ImplicitParamKind::Other); 3007 Args.push_back(&GtidArg); 3008 Args.push_back(&TaskTypeArg); 3009 const auto &TaskEntryFnInfo = 3010 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3011 llvm::FunctionType *TaskEntryTy = 3012 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3013 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3014 auto *TaskEntry = llvm::Function::Create( 3015 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3016 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3017 TaskEntry->setDoesNotRecurse(); 3018 CodeGenFunction CGF(CGM); 3019 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3020 Loc, Loc); 3021 3022 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3023 // tt, 3024 // For taskloops: 3025 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3026 // tt->task_data.shareds); 3027 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3028 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3029 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3030 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3031 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3032 const auto *KmpTaskTWithPrivatesQTyRD = 3033 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3034 LValue Base = 3035 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3036 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3037 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3038 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3039 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3040 3041 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3042 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3043 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3044 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3045 CGF.ConvertTypeForMem(SharedsPtrTy)); 3046 3047 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3048 llvm::Value *PrivatesParam; 3049 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3050 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3051 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3052 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3053 } else { 3054 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3055 } 3056 3057 llvm::Value *CommonArgs[] = { 3058 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap, 3059 CGF.Builder 3060 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(), 3061 CGF.VoidPtrTy, CGF.Int8Ty) 3062 .emitRawPointer(CGF)}; 3063 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3064 std::end(CommonArgs)); 3065 if (isOpenMPTaskLoopDirective(Kind)) { 3066 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3067 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3068 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3069 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3070 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3071 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3072 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3073 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3074 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3075 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3076 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3077 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3078 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3079 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3080 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3081 CallArgs.push_back(LBParam); 3082 CallArgs.push_back(UBParam); 3083 CallArgs.push_back(StParam); 3084 CallArgs.push_back(LIParam); 3085 CallArgs.push_back(RParam); 3086 } 3087 CallArgs.push_back(SharedsParam); 3088 3089 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3090 CallArgs); 3091 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3092 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3093 CGF.FinishFunction(); 3094 return TaskEntry; 3095 } 3096 3097 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3098 SourceLocation Loc, 3099 QualType KmpInt32Ty, 3100 QualType KmpTaskTWithPrivatesPtrQTy, 3101 QualType KmpTaskTWithPrivatesQTy) { 3102 ASTContext &C = CGM.getContext(); 3103 FunctionArgList Args; 3104 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3105 ImplicitParamKind::Other); 3106 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3107 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3108 ImplicitParamKind::Other); 3109 Args.push_back(&GtidArg); 3110 Args.push_back(&TaskTypeArg); 3111 const auto &DestructorFnInfo = 3112 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3113 llvm::FunctionType *DestructorFnTy = 3114 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3115 std::string Name = 3116 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3117 auto *DestructorFn = 3118 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3119 Name, &CGM.getModule()); 3120 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3121 DestructorFnInfo); 3122 DestructorFn->setDoesNotRecurse(); 3123 CodeGenFunction CGF(CGM); 3124 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3125 Args, Loc, Loc); 3126 3127 LValue Base = CGF.EmitLoadOfPointerLValue( 3128 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3129 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3130 const auto *KmpTaskTWithPrivatesQTyRD = 3131 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3132 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3133 Base = CGF.EmitLValueForField(Base, *FI); 3134 for (const auto *Field : 3135 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3136 if (QualType::DestructionKind DtorKind = 3137 Field->getType().isDestructedType()) { 3138 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3139 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 3140 } 3141 } 3142 CGF.FinishFunction(); 3143 return DestructorFn; 3144 } 3145 3146 /// Emit a privates mapping function for correct handling of private and 3147 /// firstprivate variables. 3148 /// \code 3149 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3150 /// **noalias priv1,..., <tyn> **noalias privn) { 3151 /// *priv1 = &.privates.priv1; 3152 /// ...; 3153 /// *privn = &.privates.privn; 3154 /// } 3155 /// \endcode 3156 static llvm::Value * 3157 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3158 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3159 ArrayRef<PrivateDataTy> Privates) { 3160 ASTContext &C = CGM.getContext(); 3161 FunctionArgList Args; 3162 ImplicitParamDecl TaskPrivatesArg( 3163 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3164 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3165 ImplicitParamKind::Other); 3166 Args.push_back(&TaskPrivatesArg); 3167 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3168 unsigned Counter = 1; 3169 for (const Expr *E : Data.PrivateVars) { 3170 Args.push_back(ImplicitParamDecl::Create( 3171 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3172 C.getPointerType(C.getPointerType(E->getType())) 3173 .withConst() 3174 .withRestrict(), 3175 ImplicitParamKind::Other)); 3176 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3177 PrivateVarsPos[VD] = Counter; 3178 ++Counter; 3179 } 3180 for (const Expr *E : Data.FirstprivateVars) { 3181 Args.push_back(ImplicitParamDecl::Create( 3182 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3183 C.getPointerType(C.getPointerType(E->getType())) 3184 .withConst() 3185 .withRestrict(), 3186 ImplicitParamKind::Other)); 3187 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3188 PrivateVarsPos[VD] = Counter; 3189 ++Counter; 3190 } 3191 for (const Expr *E : Data.LastprivateVars) { 3192 Args.push_back(ImplicitParamDecl::Create( 3193 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3194 C.getPointerType(C.getPointerType(E->getType())) 3195 .withConst() 3196 .withRestrict(), 3197 ImplicitParamKind::Other)); 3198 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3199 PrivateVarsPos[VD] = Counter; 3200 ++Counter; 3201 } 3202 for (const VarDecl *VD : Data.PrivateLocals) { 3203 QualType Ty = VD->getType().getNonReferenceType(); 3204 if (VD->getType()->isLValueReferenceType()) 3205 Ty = C.getPointerType(Ty); 3206 if (isAllocatableDecl(VD)) 3207 Ty = C.getPointerType(Ty); 3208 Args.push_back(ImplicitParamDecl::Create( 3209 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3210 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3211 ImplicitParamKind::Other)); 3212 PrivateVarsPos[VD] = Counter; 3213 ++Counter; 3214 } 3215 const auto &TaskPrivatesMapFnInfo = 3216 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3217 llvm::FunctionType *TaskPrivatesMapTy = 3218 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3219 std::string Name = 3220 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3221 auto *TaskPrivatesMap = llvm::Function::Create( 3222 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3223 &CGM.getModule()); 3224 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3225 TaskPrivatesMapFnInfo); 3226 if (CGM.getLangOpts().Optimize) { 3227 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3228 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3229 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3230 } 3231 CodeGenFunction CGF(CGM); 3232 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3233 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3234 3235 // *privi = &.privates.privi; 3236 LValue Base = CGF.EmitLoadOfPointerLValue( 3237 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3238 TaskPrivatesArg.getType()->castAs<PointerType>()); 3239 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3240 Counter = 0; 3241 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3242 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3243 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3244 LValue RefLVal = 3245 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3246 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3247 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); 3248 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3249 ++Counter; 3250 } 3251 CGF.FinishFunction(); 3252 return TaskPrivatesMap; 3253 } 3254 3255 /// Emit initialization for private variables in task-based directives. 3256 static void emitPrivatesInit(CodeGenFunction &CGF, 3257 const OMPExecutableDirective &D, 3258 Address KmpTaskSharedsPtr, LValue TDBase, 3259 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3260 QualType SharedsTy, QualType SharedsPtrTy, 3261 const OMPTaskDataTy &Data, 3262 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3263 ASTContext &C = CGF.getContext(); 3264 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3265 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3266 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3267 ? OMPD_taskloop 3268 : OMPD_task; 3269 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3270 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3271 LValue SrcBase; 3272 bool IsTargetTask = 3273 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3274 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3275 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3276 // PointersArray, SizesArray, and MappersArray. The original variables for 3277 // these arrays are not captured and we get their addresses explicitly. 3278 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3279 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3280 SrcBase = CGF.MakeAddrLValue( 3281 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3282 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy), 3283 CGF.ConvertTypeForMem(SharedsTy)), 3284 SharedsTy); 3285 } 3286 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3287 for (const PrivateDataTy &Pair : Privates) { 3288 // Do not initialize private locals. 3289 if (Pair.second.isLocalPrivate()) { 3290 ++FI; 3291 continue; 3292 } 3293 const VarDecl *VD = Pair.second.PrivateCopy; 3294 const Expr *Init = VD->getAnyInitializer(); 3295 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3296 !CGF.isTrivialInitializer(Init)))) { 3297 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3298 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3299 const VarDecl *OriginalVD = Pair.second.Original; 3300 // Check if the variable is the target-based BasePointersArray, 3301 // PointersArray, SizesArray, or MappersArray. 3302 LValue SharedRefLValue; 3303 QualType Type = PrivateLValue.getType(); 3304 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3305 if (IsTargetTask && !SharedField) { 3306 assert(isa<ImplicitParamDecl>(OriginalVD) && 3307 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3308 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3309 ->getNumParams() == 0 && 3310 isa<TranslationUnitDecl>( 3311 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3312 ->getDeclContext()) && 3313 "Expected artificial target data variable."); 3314 SharedRefLValue = 3315 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3316 } else if (ForDup) { 3317 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3318 SharedRefLValue = CGF.MakeAddrLValue( 3319 SharedRefLValue.getAddress().withAlignment( 3320 C.getDeclAlign(OriginalVD)), 3321 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3322 SharedRefLValue.getTBAAInfo()); 3323 } else if (CGF.LambdaCaptureFields.count( 3324 Pair.second.Original->getCanonicalDecl()) > 0 || 3325 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) { 3326 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3327 } else { 3328 // Processing for implicitly captured variables. 3329 InlinedOpenMPRegionRAII Region( 3330 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3331 /*HasCancel=*/false, /*NoInheritance=*/true); 3332 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3333 } 3334 if (Type->isArrayType()) { 3335 // Initialize firstprivate array. 3336 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3337 // Perform simple memcpy. 3338 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3339 } else { 3340 // Initialize firstprivate array using element-by-element 3341 // initialization. 3342 CGF.EmitOMPAggregateAssign( 3343 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, 3344 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3345 Address SrcElement) { 3346 // Clean up any temporaries needed by the initialization. 3347 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3348 InitScope.addPrivate(Elem, SrcElement); 3349 (void)InitScope.Privatize(); 3350 // Emit initialization for single element. 3351 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3352 CGF, &CapturesInfo); 3353 CGF.EmitAnyExprToMem(Init, DestElement, 3354 Init->getType().getQualifiers(), 3355 /*IsInitializer=*/false); 3356 }); 3357 } 3358 } else { 3359 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3360 InitScope.addPrivate(Elem, SharedRefLValue.getAddress()); 3361 (void)InitScope.Privatize(); 3362 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3363 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3364 /*capturedByInit=*/false); 3365 } 3366 } else { 3367 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3368 } 3369 } 3370 ++FI; 3371 } 3372 } 3373 3374 /// Check if duplication function is required for taskloops. 3375 static bool checkInitIsRequired(CodeGenFunction &CGF, 3376 ArrayRef<PrivateDataTy> Privates) { 3377 bool InitRequired = false; 3378 for (const PrivateDataTy &Pair : Privates) { 3379 if (Pair.second.isLocalPrivate()) 3380 continue; 3381 const VarDecl *VD = Pair.second.PrivateCopy; 3382 const Expr *Init = VD->getAnyInitializer(); 3383 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) && 3384 !CGF.isTrivialInitializer(Init)); 3385 if (InitRequired) 3386 break; 3387 } 3388 return InitRequired; 3389 } 3390 3391 3392 /// Emit task_dup function (for initialization of 3393 /// private/firstprivate/lastprivate vars and last_iter flag) 3394 /// \code 3395 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3396 /// lastpriv) { 3397 /// // setup lastprivate flag 3398 /// task_dst->last = lastpriv; 3399 /// // could be constructor calls here... 3400 /// } 3401 /// \endcode 3402 static llvm::Value * 3403 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3404 const OMPExecutableDirective &D, 3405 QualType KmpTaskTWithPrivatesPtrQTy, 3406 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3407 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3408 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3409 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3410 ASTContext &C = CGM.getContext(); 3411 FunctionArgList Args; 3412 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3413 KmpTaskTWithPrivatesPtrQTy, 3414 ImplicitParamKind::Other); 3415 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3416 KmpTaskTWithPrivatesPtrQTy, 3417 ImplicitParamKind::Other); 3418 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 3419 ImplicitParamKind::Other); 3420 Args.push_back(&DstArg); 3421 Args.push_back(&SrcArg); 3422 Args.push_back(&LastprivArg); 3423 const auto &TaskDupFnInfo = 3424 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3425 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3426 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 3427 auto *TaskDup = llvm::Function::Create( 3428 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3429 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 3430 TaskDup->setDoesNotRecurse(); 3431 CodeGenFunction CGF(CGM); 3432 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 3433 Loc); 3434 3435 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3436 CGF.GetAddrOfLocalVar(&DstArg), 3437 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3438 // task_dst->liter = lastpriv; 3439 if (WithLastIter) { 3440 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3441 LValue Base = CGF.EmitLValueForField( 3442 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3443 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3444 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 3445 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 3446 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 3447 } 3448 3449 // Emit initial values for private copies (if any). 3450 assert(!Privates.empty()); 3451 Address KmpTaskSharedsPtr = Address::invalid(); 3452 if (!Data.FirstprivateVars.empty()) { 3453 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3454 CGF.GetAddrOfLocalVar(&SrcArg), 3455 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3456 LValue Base = CGF.EmitLValueForField( 3457 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3458 KmpTaskSharedsPtr = Address( 3459 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 3460 Base, *std::next(KmpTaskTQTyRD->field_begin(), 3461 KmpTaskTShareds)), 3462 Loc), 3463 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy)); 3464 } 3465 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 3466 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 3467 CGF.FinishFunction(); 3468 return TaskDup; 3469 } 3470 3471 /// Checks if destructor function is required to be generated. 3472 /// \return true if cleanups are required, false otherwise. 3473 static bool 3474 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3475 ArrayRef<PrivateDataTy> Privates) { 3476 for (const PrivateDataTy &P : Privates) { 3477 if (P.second.isLocalPrivate()) 3478 continue; 3479 QualType Ty = P.second.Original->getType().getNonReferenceType(); 3480 if (Ty.isDestructedType()) 3481 return true; 3482 } 3483 return false; 3484 } 3485 3486 namespace { 3487 /// Loop generator for OpenMP iterator expression. 3488 class OMPIteratorGeneratorScope final 3489 : public CodeGenFunction::OMPPrivateScope { 3490 CodeGenFunction &CGF; 3491 const OMPIteratorExpr *E = nullptr; 3492 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 3493 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 3494 OMPIteratorGeneratorScope() = delete; 3495 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 3496 3497 public: 3498 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 3499 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 3500 if (!E) 3501 return; 3502 SmallVector<llvm::Value *, 4> Uppers; 3503 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 3504 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 3505 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 3506 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName())); 3507 const OMPIteratorHelperData &HelperData = E->getHelper(I); 3508 addPrivate( 3509 HelperData.CounterVD, 3510 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr")); 3511 } 3512 Privatize(); 3513 3514 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 3515 const OMPIteratorHelperData &HelperData = E->getHelper(I); 3516 LValue CLVal = 3517 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 3518 HelperData.CounterVD->getType()); 3519 // Counter = 0; 3520 CGF.EmitStoreOfScalar( 3521 llvm::ConstantInt::get(CLVal.getAddress().getElementType(), 0), 3522 CLVal); 3523 CodeGenFunction::JumpDest &ContDest = 3524 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 3525 CodeGenFunction::JumpDest &ExitDest = 3526 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 3527 // N = <number-of_iterations>; 3528 llvm::Value *N = Uppers[I]; 3529 // cont: 3530 // if (Counter < N) goto body; else goto exit; 3531 CGF.EmitBlock(ContDest.getBlock()); 3532 auto *CVal = 3533 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 3534 llvm::Value *Cmp = 3535 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 3536 ? CGF.Builder.CreateICmpSLT(CVal, N) 3537 : CGF.Builder.CreateICmpULT(CVal, N); 3538 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 3539 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 3540 // body: 3541 CGF.EmitBlock(BodyBB); 3542 // Iteri = Begini + Counter * Stepi; 3543 CGF.EmitIgnoredExpr(HelperData.Update); 3544 } 3545 } 3546 ~OMPIteratorGeneratorScope() { 3547 if (!E) 3548 return; 3549 for (unsigned I = E->numOfIterators(); I > 0; --I) { 3550 // Counter = Counter + 1; 3551 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 3552 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 3553 // goto cont; 3554 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 3555 // exit: 3556 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 3557 } 3558 } 3559 }; 3560 } // namespace 3561 3562 static std::pair<llvm::Value *, llvm::Value *> 3563 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 3564 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 3565 llvm::Value *Addr; 3566 if (OASE) { 3567 const Expr *Base = OASE->getBase(); 3568 Addr = CGF.EmitScalarExpr(Base); 3569 } else { 3570 Addr = CGF.EmitLValue(E).getPointer(CGF); 3571 } 3572 llvm::Value *SizeVal; 3573 QualType Ty = E->getType(); 3574 if (OASE) { 3575 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 3576 for (const Expr *SE : OASE->getDimensions()) { 3577 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 3578 Sz = CGF.EmitScalarConversion( 3579 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 3580 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 3581 } 3582 } else if (const auto *ASE = 3583 dyn_cast<ArraySectionExpr>(E->IgnoreParenImpCasts())) { 3584 LValue UpAddrLVal = CGF.EmitArraySectionExpr(ASE, /*IsLowerBound=*/false); 3585 Address UpAddrAddress = UpAddrLVal.getAddress(); 3586 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 3587 UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF), 3588 /*Idx0=*/1); 3589 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 3590 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 3591 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 3592 } else { 3593 SizeVal = CGF.getTypeSize(Ty); 3594 } 3595 return std::make_pair(Addr, SizeVal); 3596 } 3597 3598 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 3599 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 3600 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 3601 if (KmpTaskAffinityInfoTy.isNull()) { 3602 RecordDecl *KmpAffinityInfoRD = 3603 C.buildImplicitRecord("kmp_task_affinity_info_t"); 3604 KmpAffinityInfoRD->startDefinition(); 3605 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 3606 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 3607 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 3608 KmpAffinityInfoRD->completeDefinition(); 3609 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 3610 } 3611 } 3612 3613 CGOpenMPRuntime::TaskResultTy 3614 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 3615 const OMPExecutableDirective &D, 3616 llvm::Function *TaskFunction, QualType SharedsTy, 3617 Address Shareds, const OMPTaskDataTy &Data) { 3618 ASTContext &C = CGM.getContext(); 3619 llvm::SmallVector<PrivateDataTy, 4> Privates; 3620 // Aggregate privates and sort them by the alignment. 3621 const auto *I = Data.PrivateCopies.begin(); 3622 for (const Expr *E : Data.PrivateVars) { 3623 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3624 Privates.emplace_back( 3625 C.getDeclAlign(VD), 3626 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3627 /*PrivateElemInit=*/nullptr)); 3628 ++I; 3629 } 3630 I = Data.FirstprivateCopies.begin(); 3631 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 3632 for (const Expr *E : Data.FirstprivateVars) { 3633 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3634 Privates.emplace_back( 3635 C.getDeclAlign(VD), 3636 PrivateHelpersTy( 3637 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3638 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 3639 ++I; 3640 ++IElemInitRef; 3641 } 3642 I = Data.LastprivateCopies.begin(); 3643 for (const Expr *E : Data.LastprivateVars) { 3644 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3645 Privates.emplace_back( 3646 C.getDeclAlign(VD), 3647 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3648 /*PrivateElemInit=*/nullptr)); 3649 ++I; 3650 } 3651 for (const VarDecl *VD : Data.PrivateLocals) { 3652 if (isAllocatableDecl(VD)) 3653 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 3654 else 3655 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 3656 } 3657 llvm::stable_sort(Privates, 3658 [](const PrivateDataTy &L, const PrivateDataTy &R) { 3659 return L.first > R.first; 3660 }); 3661 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3662 // Build type kmp_routine_entry_t (if not built yet). 3663 emitKmpRoutineEntryT(KmpInt32Ty); 3664 // Build type kmp_task_t (if not built yet). 3665 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 3666 if (SavedKmpTaskloopTQTy.isNull()) { 3667 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 3668 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 3669 } 3670 KmpTaskTQTy = SavedKmpTaskloopTQTy; 3671 } else { 3672 assert((D.getDirectiveKind() == OMPD_task || 3673 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 3674 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 3675 "Expected taskloop, task or target directive"); 3676 if (SavedKmpTaskTQTy.isNull()) { 3677 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 3678 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 3679 } 3680 KmpTaskTQTy = SavedKmpTaskTQTy; 3681 } 3682 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3683 // Build particular struct kmp_task_t for the given task. 3684 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 3685 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 3686 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 3687 QualType KmpTaskTWithPrivatesPtrQTy = 3688 C.getPointerType(KmpTaskTWithPrivatesQTy); 3689 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 3690 llvm::Type *KmpTaskTWithPrivatesPtrTy = 3691 KmpTaskTWithPrivatesTy->getPointerTo(); 3692 llvm::Value *KmpTaskTWithPrivatesTySize = 3693 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 3694 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 3695 3696 // Emit initial values for private copies (if any). 3697 llvm::Value *TaskPrivatesMap = nullptr; 3698 llvm::Type *TaskPrivatesMapTy = 3699 std::next(TaskFunction->arg_begin(), 3)->getType(); 3700 if (!Privates.empty()) { 3701 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3702 TaskPrivatesMap = 3703 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 3704 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3705 TaskPrivatesMap, TaskPrivatesMapTy); 3706 } else { 3707 TaskPrivatesMap = llvm::ConstantPointerNull::get( 3708 cast<llvm::PointerType>(TaskPrivatesMapTy)); 3709 } 3710 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 3711 // kmp_task_t *tt); 3712 llvm::Function *TaskEntry = emitProxyTaskFunction( 3713 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 3714 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 3715 TaskPrivatesMap); 3716 3717 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 3718 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 3719 // kmp_routine_entry_t *task_entry); 3720 // Task flags. Format is taken from 3721 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 3722 // description of kmp_tasking_flags struct. 3723 enum { 3724 TiedFlag = 0x1, 3725 FinalFlag = 0x2, 3726 DestructorsFlag = 0x8, 3727 PriorityFlag = 0x20, 3728 DetachableFlag = 0x40, 3729 }; 3730 unsigned Flags = Data.Tied ? TiedFlag : 0; 3731 bool NeedsCleanup = false; 3732 if (!Privates.empty()) { 3733 NeedsCleanup = 3734 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 3735 if (NeedsCleanup) 3736 Flags = Flags | DestructorsFlag; 3737 } 3738 if (Data.Priority.getInt()) 3739 Flags = Flags | PriorityFlag; 3740 if (D.hasClausesOfKind<OMPDetachClause>()) 3741 Flags = Flags | DetachableFlag; 3742 llvm::Value *TaskFlags = 3743 Data.Final.getPointer() 3744 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 3745 CGF.Builder.getInt32(FinalFlag), 3746 CGF.Builder.getInt32(/*C=*/0)) 3747 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 3748 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 3749 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 3750 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 3751 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 3752 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3753 TaskEntry, KmpRoutineEntryPtrTy)}; 3754 llvm::Value *NewTask; 3755 if (D.hasClausesOfKind<OMPNowaitClause>()) { 3756 // Check if we have any device clause associated with the directive. 3757 const Expr *Device = nullptr; 3758 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 3759 Device = C->getDevice(); 3760 // Emit device ID if any otherwise use default value. 3761 llvm::Value *DeviceID; 3762 if (Device) 3763 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 3764 CGF.Int64Ty, /*isSigned=*/true); 3765 else 3766 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 3767 AllocArgs.push_back(DeviceID); 3768 NewTask = CGF.EmitRuntimeCall( 3769 OMPBuilder.getOrCreateRuntimeFunction( 3770 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 3771 AllocArgs); 3772 } else { 3773 NewTask = 3774 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 3775 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 3776 AllocArgs); 3777 } 3778 // Emit detach clause initialization. 3779 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 3780 // task_descriptor); 3781 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 3782 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 3783 LValue EvtLVal = CGF.EmitLValue(Evt); 3784 3785 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 3786 // int gtid, kmp_task_t *task); 3787 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 3788 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 3789 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 3790 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 3791 OMPBuilder.getOrCreateRuntimeFunction( 3792 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 3793 {Loc, Tid, NewTask}); 3794 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 3795 Evt->getExprLoc()); 3796 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 3797 } 3798 // Process affinity clauses. 3799 if (D.hasClausesOfKind<OMPAffinityClause>()) { 3800 // Process list of affinity data. 3801 ASTContext &C = CGM.getContext(); 3802 Address AffinitiesArray = Address::invalid(); 3803 // Calculate number of elements to form the array of affinity data. 3804 llvm::Value *NumOfElements = nullptr; 3805 unsigned NumAffinities = 0; 3806 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 3807 if (const Expr *Modifier = C->getModifier()) { 3808 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 3809 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 3810 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 3811 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 3812 NumOfElements = 3813 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 3814 } 3815 } else { 3816 NumAffinities += C->varlist_size(); 3817 } 3818 } 3819 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 3820 // Fields ids in kmp_task_affinity_info record. 3821 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 3822 3823 QualType KmpTaskAffinityInfoArrayTy; 3824 if (NumOfElements) { 3825 NumOfElements = CGF.Builder.CreateNUWAdd( 3826 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 3827 auto *OVE = new (C) OpaqueValueExpr( 3828 Loc, 3829 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 3830 VK_PRValue); 3831 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 3832 RValue::get(NumOfElements)); 3833 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType( 3834 KmpTaskAffinityInfoTy, OVE, ArraySizeModifier::Normal, 3835 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 3836 // Properly emit variable-sized array. 3837 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 3838 ImplicitParamKind::Other); 3839 CGF.EmitVarDecl(*PD); 3840 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 3841 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 3842 /*isSigned=*/false); 3843 } else { 3844 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 3845 KmpTaskAffinityInfoTy, 3846 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 3847 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0); 3848 AffinitiesArray = 3849 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 3850 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 3851 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 3852 /*isSigned=*/false); 3853 } 3854 3855 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 3856 // Fill array by elements without iterators. 3857 unsigned Pos = 0; 3858 bool HasIterator = false; 3859 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 3860 if (C->getModifier()) { 3861 HasIterator = true; 3862 continue; 3863 } 3864 for (const Expr *E : C->varlists()) { 3865 llvm::Value *Addr; 3866 llvm::Value *Size; 3867 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 3868 LValue Base = 3869 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 3870 KmpTaskAffinityInfoTy); 3871 // affs[i].base_addr = &<Affinities[i].second>; 3872 LValue BaseAddrLVal = CGF.EmitLValueForField( 3873 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 3874 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 3875 BaseAddrLVal); 3876 // affs[i].len = sizeof(<Affinities[i].second>); 3877 LValue LenLVal = CGF.EmitLValueForField( 3878 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 3879 CGF.EmitStoreOfScalar(Size, LenLVal); 3880 ++Pos; 3881 } 3882 } 3883 LValue PosLVal; 3884 if (HasIterator) { 3885 PosLVal = CGF.MakeAddrLValue( 3886 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 3887 C.getSizeType()); 3888 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 3889 } 3890 // Process elements with iterators. 3891 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 3892 const Expr *Modifier = C->getModifier(); 3893 if (!Modifier) 3894 continue; 3895 OMPIteratorGeneratorScope IteratorScope( 3896 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 3897 for (const Expr *E : C->varlists()) { 3898 llvm::Value *Addr; 3899 llvm::Value *Size; 3900 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 3901 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 3902 LValue Base = 3903 CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx), 3904 KmpTaskAffinityInfoTy); 3905 // affs[i].base_addr = &<Affinities[i].second>; 3906 LValue BaseAddrLVal = CGF.EmitLValueForField( 3907 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 3908 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 3909 BaseAddrLVal); 3910 // affs[i].len = sizeof(<Affinities[i].second>); 3911 LValue LenLVal = CGF.EmitLValueForField( 3912 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 3913 CGF.EmitStoreOfScalar(Size, LenLVal); 3914 Idx = CGF.Builder.CreateNUWAdd( 3915 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 3916 CGF.EmitStoreOfScalar(Idx, PosLVal); 3917 } 3918 } 3919 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 3920 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 3921 // naffins, kmp_task_affinity_info_t *affin_list); 3922 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 3923 llvm::Value *GTid = getThreadID(CGF, Loc); 3924 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3925 AffinitiesArray.emitRawPointer(CGF), CGM.VoidPtrTy); 3926 // FIXME: Emit the function and ignore its result for now unless the 3927 // runtime function is properly implemented. 3928 (void)CGF.EmitRuntimeCall( 3929 OMPBuilder.getOrCreateRuntimeFunction( 3930 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 3931 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 3932 } 3933 llvm::Value *NewTaskNewTaskTTy = 3934 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3935 NewTask, KmpTaskTWithPrivatesPtrTy); 3936 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy, 3937 KmpTaskTWithPrivatesQTy); 3938 LValue TDBase = 3939 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3940 // Fill the data in the resulting kmp_task_t record. 3941 // Copy shareds if there are any. 3942 Address KmpTaskSharedsPtr = Address::invalid(); 3943 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 3944 KmpTaskSharedsPtr = Address( 3945 CGF.EmitLoadOfScalar( 3946 CGF.EmitLValueForField( 3947 TDBase, 3948 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)), 3949 Loc), 3950 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy)); 3951 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 3952 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 3953 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 3954 } 3955 // Emit initial values for private copies (if any). 3956 TaskResultTy Result; 3957 if (!Privates.empty()) { 3958 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 3959 SharedsTy, SharedsPtrTy, Data, Privates, 3960 /*ForDup=*/false); 3961 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 3962 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 3963 Result.TaskDupFn = emitTaskDupFunction( 3964 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 3965 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 3966 /*WithLastIter=*/!Data.LastprivateVars.empty()); 3967 } 3968 } 3969 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 3970 enum { Priority = 0, Destructors = 1 }; 3971 // Provide pointer to function with destructors for privates. 3972 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 3973 const RecordDecl *KmpCmplrdataUD = 3974 (*FI)->getType()->getAsUnionType()->getDecl(); 3975 if (NeedsCleanup) { 3976 llvm::Value *DestructorFn = emitDestructorsFunction( 3977 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 3978 KmpTaskTWithPrivatesQTy); 3979 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 3980 LValue DestructorsLV = CGF.EmitLValueForField( 3981 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 3982 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3983 DestructorFn, KmpRoutineEntryPtrTy), 3984 DestructorsLV); 3985 } 3986 // Set priority. 3987 if (Data.Priority.getInt()) { 3988 LValue Data2LV = CGF.EmitLValueForField( 3989 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 3990 LValue PriorityLV = CGF.EmitLValueForField( 3991 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 3992 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 3993 } 3994 Result.NewTask = NewTask; 3995 Result.TaskEntry = TaskEntry; 3996 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 3997 Result.TDBase = TDBase; 3998 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 3999 return Result; 4000 } 4001 4002 /// Translates internal dependency kind into the runtime kind. 4003 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4004 RTLDependenceKindTy DepKind; 4005 switch (K) { 4006 case OMPC_DEPEND_in: 4007 DepKind = RTLDependenceKindTy::DepIn; 4008 break; 4009 // Out and InOut dependencies must use the same code. 4010 case OMPC_DEPEND_out: 4011 case OMPC_DEPEND_inout: 4012 DepKind = RTLDependenceKindTy::DepInOut; 4013 break; 4014 case OMPC_DEPEND_mutexinoutset: 4015 DepKind = RTLDependenceKindTy::DepMutexInOutSet; 4016 break; 4017 case OMPC_DEPEND_inoutset: 4018 DepKind = RTLDependenceKindTy::DepInOutSet; 4019 break; 4020 case OMPC_DEPEND_outallmemory: 4021 DepKind = RTLDependenceKindTy::DepOmpAllMem; 4022 break; 4023 case OMPC_DEPEND_source: 4024 case OMPC_DEPEND_sink: 4025 case OMPC_DEPEND_depobj: 4026 case OMPC_DEPEND_inoutallmemory: 4027 case OMPC_DEPEND_unknown: 4028 llvm_unreachable("Unknown task dependence type"); 4029 } 4030 return DepKind; 4031 } 4032 4033 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4034 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4035 QualType &FlagsTy) { 4036 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4037 if (KmpDependInfoTy.isNull()) { 4038 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4039 KmpDependInfoRD->startDefinition(); 4040 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4041 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4042 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4043 KmpDependInfoRD->completeDefinition(); 4044 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4045 } 4046 } 4047 4048 std::pair<llvm::Value *, LValue> 4049 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4050 SourceLocation Loc) { 4051 ASTContext &C = CGM.getContext(); 4052 QualType FlagsTy; 4053 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4054 RecordDecl *KmpDependInfoRD = 4055 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4056 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4057 LValue Base = CGF.EmitLoadOfPointerLValue( 4058 DepobjLVal.getAddress().withElementType( 4059 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)), 4060 KmpDependInfoPtrTy->castAs<PointerType>()); 4061 Address DepObjAddr = CGF.Builder.CreateGEP( 4062 CGF, Base.getAddress(), 4063 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4064 LValue NumDepsBase = CGF.MakeAddrLValue( 4065 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4066 // NumDeps = deps[i].base_addr; 4067 LValue BaseAddrLVal = CGF.EmitLValueForField( 4068 NumDepsBase, 4069 *std::next(KmpDependInfoRD->field_begin(), 4070 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr))); 4071 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4072 return std::make_pair(NumDeps, Base); 4073 } 4074 4075 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4076 llvm::PointerUnion<unsigned *, LValue *> Pos, 4077 const OMPTaskDataTy::DependData &Data, 4078 Address DependenciesArray) { 4079 CodeGenModule &CGM = CGF.CGM; 4080 ASTContext &C = CGM.getContext(); 4081 QualType FlagsTy; 4082 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4083 RecordDecl *KmpDependInfoRD = 4084 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4085 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4086 4087 OMPIteratorGeneratorScope IteratorScope( 4088 CGF, cast_or_null<OMPIteratorExpr>( 4089 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4090 : nullptr)); 4091 for (const Expr *E : Data.DepExprs) { 4092 llvm::Value *Addr; 4093 llvm::Value *Size; 4094 4095 // The expression will be a nullptr in the 'omp_all_memory' case. 4096 if (E) { 4097 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4098 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy); 4099 } else { 4100 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4101 Size = llvm::ConstantInt::get(CGF.SizeTy, 0); 4102 } 4103 LValue Base; 4104 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4105 Base = CGF.MakeAddrLValue( 4106 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4107 } else { 4108 assert(E && "Expected a non-null expression"); 4109 LValue &PosLVal = *Pos.get<LValue *>(); 4110 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4111 Base = CGF.MakeAddrLValue( 4112 CGF.Builder.CreateGEP(CGF, DependenciesArray, Idx), KmpDependInfoTy); 4113 } 4114 // deps[i].base_addr = &<Dependencies[i].second>; 4115 LValue BaseAddrLVal = CGF.EmitLValueForField( 4116 Base, 4117 *std::next(KmpDependInfoRD->field_begin(), 4118 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr))); 4119 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal); 4120 // deps[i].len = sizeof(<Dependencies[i].second>); 4121 LValue LenLVal = CGF.EmitLValueForField( 4122 Base, *std::next(KmpDependInfoRD->field_begin(), 4123 static_cast<unsigned int>(RTLDependInfoFields::Len))); 4124 CGF.EmitStoreOfScalar(Size, LenLVal); 4125 // deps[i].flags = <Dependencies[i].first>; 4126 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4127 LValue FlagsLVal = CGF.EmitLValueForField( 4128 Base, 4129 *std::next(KmpDependInfoRD->field_begin(), 4130 static_cast<unsigned int>(RTLDependInfoFields::Flags))); 4131 CGF.EmitStoreOfScalar( 4132 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)), 4133 FlagsLVal); 4134 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4135 ++(*P); 4136 } else { 4137 LValue &PosLVal = *Pos.get<LValue *>(); 4138 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4139 Idx = CGF.Builder.CreateNUWAdd(Idx, 4140 llvm::ConstantInt::get(Idx->getType(), 1)); 4141 CGF.EmitStoreOfScalar(Idx, PosLVal); 4142 } 4143 } 4144 } 4145 4146 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes( 4147 CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4148 const OMPTaskDataTy::DependData &Data) { 4149 assert(Data.DepKind == OMPC_DEPEND_depobj && 4150 "Expected depobj dependency kind."); 4151 SmallVector<llvm::Value *, 4> Sizes; 4152 SmallVector<LValue, 4> SizeLVals; 4153 ASTContext &C = CGF.getContext(); 4154 { 4155 OMPIteratorGeneratorScope IteratorScope( 4156 CGF, cast_or_null<OMPIteratorExpr>( 4157 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4158 : nullptr)); 4159 for (const Expr *E : Data.DepExprs) { 4160 llvm::Value *NumDeps; 4161 LValue Base; 4162 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4163 std::tie(NumDeps, Base) = 4164 getDepobjElements(CGF, DepobjLVal, E->getExprLoc()); 4165 LValue NumLVal = CGF.MakeAddrLValue( 4166 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4167 C.getUIntPtrType()); 4168 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0), 4169 NumLVal.getAddress()); 4170 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4171 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4172 CGF.EmitStoreOfScalar(Add, NumLVal); 4173 SizeLVals.push_back(NumLVal); 4174 } 4175 } 4176 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4177 llvm::Value *Size = 4178 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4179 Sizes.push_back(Size); 4180 } 4181 return Sizes; 4182 } 4183 4184 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF, 4185 QualType &KmpDependInfoTy, 4186 LValue PosLVal, 4187 const OMPTaskDataTy::DependData &Data, 4188 Address DependenciesArray) { 4189 assert(Data.DepKind == OMPC_DEPEND_depobj && 4190 "Expected depobj dependency kind."); 4191 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4192 { 4193 OMPIteratorGeneratorScope IteratorScope( 4194 CGF, cast_or_null<OMPIteratorExpr>( 4195 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4196 : nullptr)); 4197 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4198 const Expr *E = Data.DepExprs[I]; 4199 llvm::Value *NumDeps; 4200 LValue Base; 4201 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4202 std::tie(NumDeps, Base) = 4203 getDepobjElements(CGF, DepobjLVal, E->getExprLoc()); 4204 4205 // memcopy dependency data. 4206 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4207 ElSize, 4208 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4209 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4210 Address DepAddr = CGF.Builder.CreateGEP(CGF, DependenciesArray, Pos); 4211 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(), Size); 4212 4213 // Increase pos. 4214 // pos += size; 4215 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4216 CGF.EmitStoreOfScalar(Add, PosLVal); 4217 } 4218 } 4219 } 4220 4221 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4222 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4223 SourceLocation Loc) { 4224 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4225 return D.DepExprs.empty(); 4226 })) 4227 return std::make_pair(nullptr, Address::invalid()); 4228 // Process list of dependencies. 4229 ASTContext &C = CGM.getContext(); 4230 Address DependenciesArray = Address::invalid(); 4231 llvm::Value *NumOfElements = nullptr; 4232 unsigned NumDependencies = std::accumulate( 4233 Dependencies.begin(), Dependencies.end(), 0, 4234 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4235 return D.DepKind == OMPC_DEPEND_depobj 4236 ? V 4237 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4238 }); 4239 QualType FlagsTy; 4240 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4241 bool HasDepobjDeps = false; 4242 bool HasRegularWithIterators = false; 4243 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4244 llvm::Value *NumOfRegularWithIterators = 4245 llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4246 // Calculate number of depobj dependencies and regular deps with the 4247 // iterators. 4248 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4249 if (D.DepKind == OMPC_DEPEND_depobj) { 4250 SmallVector<llvm::Value *, 4> Sizes = 4251 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4252 for (llvm::Value *Size : Sizes) { 4253 NumOfDepobjElements = 4254 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4255 } 4256 HasDepobjDeps = true; 4257 continue; 4258 } 4259 // Include number of iterations, if any. 4260 4261 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4262 llvm::Value *ClauseIteratorSpace = 4263 llvm::ConstantInt::get(CGF.IntPtrTy, 1); 4264 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4265 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4266 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4267 ClauseIteratorSpace = CGF.Builder.CreateNUWMul(Sz, ClauseIteratorSpace); 4268 } 4269 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul( 4270 ClauseIteratorSpace, 4271 llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size())); 4272 NumOfRegularWithIterators = 4273 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps); 4274 HasRegularWithIterators = true; 4275 continue; 4276 } 4277 } 4278 4279 QualType KmpDependInfoArrayTy; 4280 if (HasDepobjDeps || HasRegularWithIterators) { 4281 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4282 /*isSigned=*/false); 4283 if (HasDepobjDeps) { 4284 NumOfElements = 4285 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4286 } 4287 if (HasRegularWithIterators) { 4288 NumOfElements = 4289 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4290 } 4291 auto *OVE = new (C) OpaqueValueExpr( 4292 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4293 VK_PRValue); 4294 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4295 RValue::get(NumOfElements)); 4296 KmpDependInfoArrayTy = 4297 C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal, 4298 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4299 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4300 // Properly emit variable-sized array. 4301 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4302 ImplicitParamKind::Other); 4303 CGF.EmitVarDecl(*PD); 4304 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4305 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4306 /*isSigned=*/false); 4307 } else { 4308 KmpDependInfoArrayTy = C.getConstantArrayType( 4309 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4310 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0); 4311 DependenciesArray = 4312 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4313 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4314 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4315 /*isSigned=*/false); 4316 } 4317 unsigned Pos = 0; 4318 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4319 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4320 Dependencies[I].IteratorExpr) 4321 continue; 4322 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4323 DependenciesArray); 4324 } 4325 // Copy regular dependencies with iterators. 4326 LValue PosLVal = CGF.MakeAddrLValue( 4327 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4328 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4329 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4330 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4331 !Dependencies[I].IteratorExpr) 4332 continue; 4333 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4334 DependenciesArray); 4335 } 4336 // Copy final depobj arrays without iterators. 4337 if (HasDepobjDeps) { 4338 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4339 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4340 continue; 4341 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4342 DependenciesArray); 4343 } 4344 } 4345 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4346 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty); 4347 return std::make_pair(NumOfElements, DependenciesArray); 4348 } 4349 4350 Address CGOpenMPRuntime::emitDepobjDependClause( 4351 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4352 SourceLocation Loc) { 4353 if (Dependencies.DepExprs.empty()) 4354 return Address::invalid(); 4355 // Process list of dependencies. 4356 ASTContext &C = CGM.getContext(); 4357 Address DependenciesArray = Address::invalid(); 4358 unsigned NumDependencies = Dependencies.DepExprs.size(); 4359 QualType FlagsTy; 4360 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4361 RecordDecl *KmpDependInfoRD = 4362 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4363 4364 llvm::Value *Size; 4365 // Define type kmp_depend_info[<Dependencies.size()>]; 4366 // For depobj reserve one extra element to store the number of elements. 4367 // It is required to handle depobj(x) update(in) construct. 4368 // kmp_depend_info[<Dependencies.size()>] deps; 4369 llvm::Value *NumDepsVal; 4370 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4371 if (const auto *IE = 4372 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4373 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4374 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4375 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4376 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4377 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4378 } 4379 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4380 NumDepsVal); 4381 CharUnits SizeInBytes = 4382 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4383 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4384 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4385 NumDepsVal = 4386 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4387 } else { 4388 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4389 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4390 nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0); 4391 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4392 Size = CGM.getSize(Sz.alignTo(Align)); 4393 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 4394 } 4395 // Need to allocate on the dynamic memory. 4396 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4397 // Use default allocator. 4398 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4399 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 4400 4401 llvm::Value *Addr = 4402 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4403 CGM.getModule(), OMPRTL___kmpc_alloc), 4404 Args, ".dep.arr.addr"); 4405 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy); 4406 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4407 Addr, KmpDependInfoLlvmTy->getPointerTo()); 4408 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align); 4409 // Write number of elements in the first element of array for depobj. 4410 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 4411 // deps[i].base_addr = NumDependencies; 4412 LValue BaseAddrLVal = CGF.EmitLValueForField( 4413 Base, 4414 *std::next(KmpDependInfoRD->field_begin(), 4415 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr))); 4416 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 4417 llvm::PointerUnion<unsigned *, LValue *> Pos; 4418 unsigned Idx = 1; 4419 LValue PosLVal; 4420 if (Dependencies.IteratorExpr) { 4421 PosLVal = CGF.MakeAddrLValue( 4422 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 4423 C.getSizeType()); 4424 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 4425 /*IsInit=*/true); 4426 Pos = &PosLVal; 4427 } else { 4428 Pos = &Idx; 4429 } 4430 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 4431 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4432 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy, 4433 CGF.Int8Ty); 4434 return DependenciesArray; 4435 } 4436 4437 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 4438 SourceLocation Loc) { 4439 ASTContext &C = CGM.getContext(); 4440 QualType FlagsTy; 4441 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4442 LValue Base = CGF.EmitLoadOfPointerLValue(DepobjLVal.getAddress(), 4443 C.VoidPtrTy.castAs<PointerType>()); 4444 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4445 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4446 Base.getAddress(), CGF.ConvertTypeForMem(KmpDependInfoPtrTy), 4447 CGF.ConvertTypeForMem(KmpDependInfoTy)); 4448 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4449 Addr.getElementType(), Addr.emitRawPointer(CGF), 4450 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4451 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 4452 CGF.VoidPtrTy); 4453 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4454 // Use default allocator. 4455 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4456 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 4457 4458 // _kmpc_free(gtid, addr, nullptr); 4459 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4460 CGM.getModule(), OMPRTL___kmpc_free), 4461 Args); 4462 } 4463 4464 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 4465 OpenMPDependClauseKind NewDepKind, 4466 SourceLocation Loc) { 4467 ASTContext &C = CGM.getContext(); 4468 QualType FlagsTy; 4469 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4470 RecordDecl *KmpDependInfoRD = 4471 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4472 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4473 llvm::Value *NumDeps; 4474 LValue Base; 4475 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 4476 4477 Address Begin = Base.getAddress(); 4478 // Cast from pointer to array type to pointer to single element. 4479 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getElementType(), 4480 Begin.emitRawPointer(CGF), NumDeps); 4481 // The basic structure here is a while-do loop. 4482 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 4483 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 4484 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 4485 CGF.EmitBlock(BodyBB); 4486 llvm::PHINode *ElementPHI = 4487 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 4488 ElementPHI->addIncoming(Begin.emitRawPointer(CGF), EntryBB); 4489 Begin = Begin.withPointer(ElementPHI, KnownNonNull); 4490 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 4491 Base.getTBAAInfo()); 4492 // deps[i].flags = NewDepKind; 4493 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 4494 LValue FlagsLVal = CGF.EmitLValueForField( 4495 Base, *std::next(KmpDependInfoRD->field_begin(), 4496 static_cast<unsigned int>(RTLDependInfoFields::Flags))); 4497 CGF.EmitStoreOfScalar( 4498 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)), 4499 FlagsLVal); 4500 4501 // Shift the address forward by one element. 4502 llvm::Value *ElementNext = 4503 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext") 4504 .emitRawPointer(CGF); 4505 ElementPHI->addIncoming(ElementNext, CGF.Builder.GetInsertBlock()); 4506 llvm::Value *IsEmpty = 4507 CGF.Builder.CreateICmpEQ(ElementNext, End, "omp.isempty"); 4508 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 4509 // Done. 4510 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 4511 } 4512 4513 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 4514 const OMPExecutableDirective &D, 4515 llvm::Function *TaskFunction, 4516 QualType SharedsTy, Address Shareds, 4517 const Expr *IfCond, 4518 const OMPTaskDataTy &Data) { 4519 if (!CGF.HaveInsertPoint()) 4520 return; 4521 4522 TaskResultTy Result = 4523 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 4524 llvm::Value *NewTask = Result.NewTask; 4525 llvm::Function *TaskEntry = Result.TaskEntry; 4526 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 4527 LValue TDBase = Result.TDBase; 4528 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 4529 // Process list of dependences. 4530 Address DependenciesArray = Address::invalid(); 4531 llvm::Value *NumOfElements; 4532 std::tie(NumOfElements, DependenciesArray) = 4533 emitDependClause(CGF, Data.Dependences, Loc); 4534 4535 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 4536 // libcall. 4537 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 4538 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 4539 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 4540 // list is not empty 4541 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4542 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 4543 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 4544 llvm::Value *DepTaskArgs[7]; 4545 if (!Data.Dependences.empty()) { 4546 DepTaskArgs[0] = UpLoc; 4547 DepTaskArgs[1] = ThreadID; 4548 DepTaskArgs[2] = NewTask; 4549 DepTaskArgs[3] = NumOfElements; 4550 DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF); 4551 DepTaskArgs[5] = CGF.Builder.getInt32(0); 4552 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4553 } 4554 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 4555 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 4556 if (!Data.Tied) { 4557 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4558 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 4559 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 4560 } 4561 if (!Data.Dependences.empty()) { 4562 CGF.EmitRuntimeCall( 4563 OMPBuilder.getOrCreateRuntimeFunction( 4564 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 4565 DepTaskArgs); 4566 } else { 4567 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4568 CGM.getModule(), OMPRTL___kmpc_omp_task), 4569 TaskArgs); 4570 } 4571 // Check if parent region is untied and build return for untied task; 4572 if (auto *Region = 4573 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 4574 Region->emitUntiedSwitch(CGF); 4575 }; 4576 4577 llvm::Value *DepWaitTaskArgs[7]; 4578 if (!Data.Dependences.empty()) { 4579 DepWaitTaskArgs[0] = UpLoc; 4580 DepWaitTaskArgs[1] = ThreadID; 4581 DepWaitTaskArgs[2] = NumOfElements; 4582 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF); 4583 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 4584 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4585 DepWaitTaskArgs[6] = 4586 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause); 4587 } 4588 auto &M = CGM.getModule(); 4589 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 4590 TaskEntry, &Data, &DepWaitTaskArgs, 4591 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 4592 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 4593 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 4594 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 4595 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 4596 // is specified. 4597 if (!Data.Dependences.empty()) 4598 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4599 M, OMPRTL___kmpc_omp_taskwait_deps_51), 4600 DepWaitTaskArgs); 4601 // Call proxy_task_entry(gtid, new_task); 4602 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 4603 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 4604 Action.Enter(CGF); 4605 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 4606 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 4607 OutlinedFnArgs); 4608 }; 4609 4610 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 4611 // kmp_task_t *new_task); 4612 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 4613 // kmp_task_t *new_task); 4614 RegionCodeGenTy RCG(CodeGen); 4615 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 4616 M, OMPRTL___kmpc_omp_task_begin_if0), 4617 TaskArgs, 4618 OMPBuilder.getOrCreateRuntimeFunction( 4619 M, OMPRTL___kmpc_omp_task_complete_if0), 4620 TaskArgs); 4621 RCG.setAction(Action); 4622 RCG(CGF); 4623 }; 4624 4625 if (IfCond) { 4626 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 4627 } else { 4628 RegionCodeGenTy ThenRCG(ThenCodeGen); 4629 ThenRCG(CGF); 4630 } 4631 } 4632 4633 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 4634 const OMPLoopDirective &D, 4635 llvm::Function *TaskFunction, 4636 QualType SharedsTy, Address Shareds, 4637 const Expr *IfCond, 4638 const OMPTaskDataTy &Data) { 4639 if (!CGF.HaveInsertPoint()) 4640 return; 4641 TaskResultTy Result = 4642 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 4643 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 4644 // libcall. 4645 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 4646 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 4647 // sched, kmp_uint64 grainsize, void *task_dup); 4648 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4649 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 4650 llvm::Value *IfVal; 4651 if (IfCond) { 4652 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 4653 /*isSigned=*/true); 4654 } else { 4655 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 4656 } 4657 4658 LValue LBLVal = CGF.EmitLValueForField( 4659 Result.TDBase, 4660 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 4661 const auto *LBVar = 4662 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 4663 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), 4664 /*IsInitializer=*/true); 4665 LValue UBLVal = CGF.EmitLValueForField( 4666 Result.TDBase, 4667 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 4668 const auto *UBVar = 4669 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 4670 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), 4671 /*IsInitializer=*/true); 4672 LValue StLVal = CGF.EmitLValueForField( 4673 Result.TDBase, 4674 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 4675 const auto *StVar = 4676 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 4677 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), 4678 /*IsInitializer=*/true); 4679 // Store reductions address. 4680 LValue RedLVal = CGF.EmitLValueForField( 4681 Result.TDBase, 4682 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 4683 if (Data.Reductions) { 4684 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 4685 } else { 4686 CGF.EmitNullInitialization(RedLVal.getAddress(), 4687 CGF.getContext().VoidPtrTy); 4688 } 4689 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 4690 llvm::Value *TaskArgs[] = { 4691 UpLoc, 4692 ThreadID, 4693 Result.NewTask, 4694 IfVal, 4695 LBLVal.getPointer(CGF), 4696 UBLVal.getPointer(CGF), 4697 CGF.EmitLoadOfScalar(StLVal, Loc), 4698 llvm::ConstantInt::getSigned( 4699 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 4700 llvm::ConstantInt::getSigned( 4701 CGF.IntTy, Data.Schedule.getPointer() 4702 ? Data.Schedule.getInt() ? NumTasks : Grainsize 4703 : NoSchedule), 4704 Data.Schedule.getPointer() 4705 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 4706 /*isSigned=*/false) 4707 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 4708 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4709 Result.TaskDupFn, CGF.VoidPtrTy) 4710 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 4711 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4712 CGM.getModule(), OMPRTL___kmpc_taskloop), 4713 TaskArgs); 4714 } 4715 4716 /// Emit reduction operation for each element of array (required for 4717 /// array sections) LHS op = RHS. 4718 /// \param Type Type of array. 4719 /// \param LHSVar Variable on the left side of the reduction operation 4720 /// (references element of array in original variable). 4721 /// \param RHSVar Variable on the right side of the reduction operation 4722 /// (references element of array in original variable). 4723 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 4724 /// RHSVar. 4725 static void EmitOMPAggregateReduction( 4726 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 4727 const VarDecl *RHSVar, 4728 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 4729 const Expr *, const Expr *)> &RedOpGen, 4730 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 4731 const Expr *UpExpr = nullptr) { 4732 // Perform element-by-element initialization. 4733 QualType ElementTy; 4734 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 4735 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 4736 4737 // Drill down to the base element type on both arrays. 4738 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 4739 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 4740 4741 llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF); 4742 llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF); 4743 // Cast from pointer to array type to pointer to single element. 4744 llvm::Value *LHSEnd = 4745 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements); 4746 // The basic structure here is a while-do loop. 4747 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 4748 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 4749 llvm::Value *IsEmpty = 4750 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 4751 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 4752 4753 // Enter the loop body, making that address the current address. 4754 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 4755 CGF.EmitBlock(BodyBB); 4756 4757 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 4758 4759 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 4760 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 4761 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 4762 Address RHSElementCurrent( 4763 RHSElementPHI, RHSAddr.getElementType(), 4764 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 4765 4766 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 4767 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 4768 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 4769 Address LHSElementCurrent( 4770 LHSElementPHI, LHSAddr.getElementType(), 4771 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 4772 4773 // Emit copy. 4774 CodeGenFunction::OMPPrivateScope Scope(CGF); 4775 Scope.addPrivate(LHSVar, LHSElementCurrent); 4776 Scope.addPrivate(RHSVar, RHSElementCurrent); 4777 Scope.Privatize(); 4778 RedOpGen(CGF, XExpr, EExpr, UpExpr); 4779 Scope.ForceCleanup(); 4780 4781 // Shift the address forward by one element. 4782 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 4783 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1, 4784 "omp.arraycpy.dest.element"); 4785 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 4786 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1, 4787 "omp.arraycpy.src.element"); 4788 // Check whether we've reached the end. 4789 llvm::Value *Done = 4790 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 4791 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 4792 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 4793 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 4794 4795 // Done. 4796 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 4797 } 4798 4799 /// Emit reduction combiner. If the combiner is a simple expression emit it as 4800 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 4801 /// UDR combiner function. 4802 static void emitReductionCombiner(CodeGenFunction &CGF, 4803 const Expr *ReductionOp) { 4804 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 4805 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 4806 if (const auto *DRE = 4807 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 4808 if (const auto *DRD = 4809 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 4810 std::pair<llvm::Function *, llvm::Function *> Reduction = 4811 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 4812 RValue Func = RValue::get(Reduction.first); 4813 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 4814 CGF.EmitIgnoredExpr(ReductionOp); 4815 return; 4816 } 4817 CGF.EmitIgnoredExpr(ReductionOp); 4818 } 4819 4820 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 4821 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType, 4822 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs, 4823 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) { 4824 ASTContext &C = CGM.getContext(); 4825 4826 // void reduction_func(void *LHSArg, void *RHSArg); 4827 FunctionArgList Args; 4828 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 4829 ImplicitParamKind::Other); 4830 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 4831 ImplicitParamKind::Other); 4832 Args.push_back(&LHSArg); 4833 Args.push_back(&RHSArg); 4834 const auto &CGFI = 4835 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4836 std::string Name = getReductionFuncName(ReducerName); 4837 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 4838 llvm::GlobalValue::InternalLinkage, Name, 4839 &CGM.getModule()); 4840 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 4841 Fn->setDoesNotRecurse(); 4842 CodeGenFunction CGF(CGM); 4843 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 4844 4845 // Dst = (void*[n])(LHSArg); 4846 // Src = (void*[n])(RHSArg); 4847 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4848 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 4849 ArgsElemType->getPointerTo()), 4850 ArgsElemType, CGF.getPointerAlign()); 4851 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4852 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 4853 ArgsElemType->getPointerTo()), 4854 ArgsElemType, CGF.getPointerAlign()); 4855 4856 // ... 4857 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 4858 // ... 4859 CodeGenFunction::OMPPrivateScope Scope(CGF); 4860 const auto *IPriv = Privates.begin(); 4861 unsigned Idx = 0; 4862 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 4863 const auto *RHSVar = 4864 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 4865 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar)); 4866 const auto *LHSVar = 4867 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 4868 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar)); 4869 QualType PrivTy = (*IPriv)->getType(); 4870 if (PrivTy->isVariablyModifiedType()) { 4871 // Get array size and emit VLA type. 4872 ++Idx; 4873 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 4874 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 4875 const VariableArrayType *VLA = 4876 CGF.getContext().getAsVariableArrayType(PrivTy); 4877 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 4878 CodeGenFunction::OpaqueValueMapping OpaqueMap( 4879 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 4880 CGF.EmitVariablyModifiedType(PrivTy); 4881 } 4882 } 4883 Scope.Privatize(); 4884 IPriv = Privates.begin(); 4885 const auto *ILHS = LHSExprs.begin(); 4886 const auto *IRHS = RHSExprs.begin(); 4887 for (const Expr *E : ReductionOps) { 4888 if ((*IPriv)->getType()->isArrayType()) { 4889 // Emit reduction for array section. 4890 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 4891 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 4892 EmitOMPAggregateReduction( 4893 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 4894 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 4895 emitReductionCombiner(CGF, E); 4896 }); 4897 } else { 4898 // Emit reduction for array subscript or single variable. 4899 emitReductionCombiner(CGF, E); 4900 } 4901 ++IPriv; 4902 ++ILHS; 4903 ++IRHS; 4904 } 4905 Scope.ForceCleanup(); 4906 CGF.FinishFunction(); 4907 return Fn; 4908 } 4909 4910 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 4911 const Expr *ReductionOp, 4912 const Expr *PrivateRef, 4913 const DeclRefExpr *LHS, 4914 const DeclRefExpr *RHS) { 4915 if (PrivateRef->getType()->isArrayType()) { 4916 // Emit reduction for array section. 4917 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 4918 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 4919 EmitOMPAggregateReduction( 4920 CGF, PrivateRef->getType(), LHSVar, RHSVar, 4921 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 4922 emitReductionCombiner(CGF, ReductionOp); 4923 }); 4924 } else { 4925 // Emit reduction for array subscript or single variable. 4926 emitReductionCombiner(CGF, ReductionOp); 4927 } 4928 } 4929 4930 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 4931 ArrayRef<const Expr *> Privates, 4932 ArrayRef<const Expr *> LHSExprs, 4933 ArrayRef<const Expr *> RHSExprs, 4934 ArrayRef<const Expr *> ReductionOps, 4935 ReductionOptionsTy Options) { 4936 if (!CGF.HaveInsertPoint()) 4937 return; 4938 4939 bool WithNowait = Options.WithNowait; 4940 bool SimpleReduction = Options.SimpleReduction; 4941 4942 // Next code should be emitted for reduction: 4943 // 4944 // static kmp_critical_name lock = { 0 }; 4945 // 4946 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 4947 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 4948 // ... 4949 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 4950 // *(Type<n>-1*)rhs[<n>-1]); 4951 // } 4952 // 4953 // ... 4954 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 4955 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 4956 // RedList, reduce_func, &<lock>)) { 4957 // case 1: 4958 // ... 4959 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 4960 // ... 4961 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 4962 // break; 4963 // case 2: 4964 // ... 4965 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 4966 // ... 4967 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 4968 // break; 4969 // default:; 4970 // } 4971 // 4972 // if SimpleReduction is true, only the next code is generated: 4973 // ... 4974 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 4975 // ... 4976 4977 ASTContext &C = CGM.getContext(); 4978 4979 if (SimpleReduction) { 4980 CodeGenFunction::RunCleanupsScope Scope(CGF); 4981 const auto *IPriv = Privates.begin(); 4982 const auto *ILHS = LHSExprs.begin(); 4983 const auto *IRHS = RHSExprs.begin(); 4984 for (const Expr *E : ReductionOps) { 4985 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 4986 cast<DeclRefExpr>(*IRHS)); 4987 ++IPriv; 4988 ++ILHS; 4989 ++IRHS; 4990 } 4991 return; 4992 } 4993 4994 // 1. Build a list of reduction variables. 4995 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 4996 auto Size = RHSExprs.size(); 4997 for (const Expr *E : Privates) { 4998 if (E->getType()->isVariablyModifiedType()) 4999 // Reserve place for array size. 5000 ++Size; 5001 } 5002 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5003 QualType ReductionArrayTy = C.getConstantArrayType( 5004 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal, 5005 /*IndexTypeQuals=*/0); 5006 RawAddress ReductionList = 5007 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5008 const auto *IPriv = Privates.begin(); 5009 unsigned Idx = 0; 5010 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5011 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5012 CGF.Builder.CreateStore( 5013 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5014 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5015 Elem); 5016 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5017 // Store array size. 5018 ++Idx; 5019 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5020 llvm::Value *Size = CGF.Builder.CreateIntCast( 5021 CGF.getVLASize( 5022 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5023 .NumElts, 5024 CGF.SizeTy, /*isSigned=*/false); 5025 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5026 Elem); 5027 } 5028 } 5029 5030 // 2. Emit reduce_func(). 5031 llvm::Function *ReductionFn = emitReductionFunction( 5032 CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy), 5033 Privates, LHSExprs, RHSExprs, ReductionOps); 5034 5035 // 3. Create static kmp_critical_name lock = { 0 }; 5036 std::string Name = getName({"reduction"}); 5037 llvm::Value *Lock = getCriticalRegionLock(Name); 5038 5039 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5040 // RedList, reduce_func, &<lock>); 5041 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5042 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5043 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5044 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5045 ReductionList.getPointer(), CGF.VoidPtrTy); 5046 llvm::Value *Args[] = { 5047 IdentTLoc, // ident_t *<loc> 5048 ThreadId, // i32 <gtid> 5049 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5050 ReductionArrayTySize, // size_type sizeof(RedList) 5051 RL, // void *RedList 5052 ReductionFn, // void (*) (void *, void *) <reduce_func> 5053 Lock // kmp_critical_name *&<lock> 5054 }; 5055 llvm::Value *Res = CGF.EmitRuntimeCall( 5056 OMPBuilder.getOrCreateRuntimeFunction( 5057 CGM.getModule(), 5058 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5059 Args); 5060 5061 // 5. Build switch(res) 5062 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5063 llvm::SwitchInst *SwInst = 5064 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5065 5066 // 6. Build case 1: 5067 // ... 5068 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5069 // ... 5070 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5071 // break; 5072 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5073 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5074 CGF.EmitBlock(Case1BB); 5075 5076 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5077 llvm::Value *EndArgs[] = { 5078 IdentTLoc, // ident_t *<loc> 5079 ThreadId, // i32 <gtid> 5080 Lock // kmp_critical_name *&<lock> 5081 }; 5082 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5083 CodeGenFunction &CGF, PrePostActionTy &Action) { 5084 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5085 const auto *IPriv = Privates.begin(); 5086 const auto *ILHS = LHSExprs.begin(); 5087 const auto *IRHS = RHSExprs.begin(); 5088 for (const Expr *E : ReductionOps) { 5089 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5090 cast<DeclRefExpr>(*IRHS)); 5091 ++IPriv; 5092 ++ILHS; 5093 ++IRHS; 5094 } 5095 }; 5096 RegionCodeGenTy RCG(CodeGen); 5097 CommonActionTy Action( 5098 nullptr, std::nullopt, 5099 OMPBuilder.getOrCreateRuntimeFunction( 5100 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5101 : OMPRTL___kmpc_end_reduce), 5102 EndArgs); 5103 RCG.setAction(Action); 5104 RCG(CGF); 5105 5106 CGF.EmitBranch(DefaultBB); 5107 5108 // 7. Build case 2: 5109 // ... 5110 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5111 // ... 5112 // break; 5113 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5114 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5115 CGF.EmitBlock(Case2BB); 5116 5117 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5118 CodeGenFunction &CGF, PrePostActionTy &Action) { 5119 const auto *ILHS = LHSExprs.begin(); 5120 const auto *IRHS = RHSExprs.begin(); 5121 const auto *IPriv = Privates.begin(); 5122 for (const Expr *E : ReductionOps) { 5123 const Expr *XExpr = nullptr; 5124 const Expr *EExpr = nullptr; 5125 const Expr *UpExpr = nullptr; 5126 BinaryOperatorKind BO = BO_Comma; 5127 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5128 if (BO->getOpcode() == BO_Assign) { 5129 XExpr = BO->getLHS(); 5130 UpExpr = BO->getRHS(); 5131 } 5132 } 5133 // Try to emit update expression as a simple atomic. 5134 const Expr *RHSExpr = UpExpr; 5135 if (RHSExpr) { 5136 // Analyze RHS part of the whole expression. 5137 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5138 RHSExpr->IgnoreParenImpCasts())) { 5139 // If this is a conditional operator, analyze its condition for 5140 // min/max reduction operator. 5141 RHSExpr = ACO->getCond(); 5142 } 5143 if (const auto *BORHS = 5144 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5145 EExpr = BORHS->getRHS(); 5146 BO = BORHS->getOpcode(); 5147 } 5148 } 5149 if (XExpr) { 5150 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5151 auto &&AtomicRedGen = [BO, VD, 5152 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5153 const Expr *EExpr, const Expr *UpExpr) { 5154 LValue X = CGF.EmitLValue(XExpr); 5155 RValue E; 5156 if (EExpr) 5157 E = CGF.EmitAnyExpr(EExpr); 5158 CGF.EmitOMPAtomicSimpleUpdateExpr( 5159 X, E, BO, /*IsXLHSInRHSPart=*/true, 5160 llvm::AtomicOrdering::Monotonic, Loc, 5161 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5162 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5163 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5164 CGF.emitOMPSimpleStore( 5165 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5166 VD->getType().getNonReferenceType(), Loc); 5167 PrivateScope.addPrivate(VD, LHSTemp); 5168 (void)PrivateScope.Privatize(); 5169 return CGF.EmitAnyExpr(UpExpr); 5170 }); 5171 }; 5172 if ((*IPriv)->getType()->isArrayType()) { 5173 // Emit atomic reduction for array section. 5174 const auto *RHSVar = 5175 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5176 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5177 AtomicRedGen, XExpr, EExpr, UpExpr); 5178 } else { 5179 // Emit atomic reduction for array subscript or single variable. 5180 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5181 } 5182 } else { 5183 // Emit as a critical region. 5184 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5185 const Expr *, const Expr *) { 5186 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5187 std::string Name = RT.getName({"atomic_reduction"}); 5188 RT.emitCriticalRegion( 5189 CGF, Name, 5190 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5191 Action.Enter(CGF); 5192 emitReductionCombiner(CGF, E); 5193 }, 5194 Loc); 5195 }; 5196 if ((*IPriv)->getType()->isArrayType()) { 5197 const auto *LHSVar = 5198 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5199 const auto *RHSVar = 5200 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5201 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5202 CritRedGen); 5203 } else { 5204 CritRedGen(CGF, nullptr, nullptr, nullptr); 5205 } 5206 } 5207 ++ILHS; 5208 ++IRHS; 5209 ++IPriv; 5210 } 5211 }; 5212 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5213 if (!WithNowait) { 5214 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5215 llvm::Value *EndArgs[] = { 5216 IdentTLoc, // ident_t *<loc> 5217 ThreadId, // i32 <gtid> 5218 Lock // kmp_critical_name *&<lock> 5219 }; 5220 CommonActionTy Action(nullptr, std::nullopt, 5221 OMPBuilder.getOrCreateRuntimeFunction( 5222 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5223 EndArgs); 5224 AtomicRCG.setAction(Action); 5225 AtomicRCG(CGF); 5226 } else { 5227 AtomicRCG(CGF); 5228 } 5229 5230 CGF.EmitBranch(DefaultBB); 5231 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5232 } 5233 5234 /// Generates unique name for artificial threadprivate variables. 5235 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5236 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5237 const Expr *Ref) { 5238 SmallString<256> Buffer; 5239 llvm::raw_svector_ostream Out(Buffer); 5240 const clang::DeclRefExpr *DE; 5241 const VarDecl *D = ::getBaseDecl(Ref, DE); 5242 if (!D) 5243 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5244 D = D->getCanonicalDecl(); 5245 std::string Name = CGM.getOpenMPRuntime().getName( 5246 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5247 Out << Prefix << Name << "_" 5248 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5249 return std::string(Out.str()); 5250 } 5251 5252 /// Emits reduction initializer function: 5253 /// \code 5254 /// void @.red_init(void* %arg, void* %orig) { 5255 /// %0 = bitcast void* %arg to <type>* 5256 /// store <type> <init>, <type>* %0 5257 /// ret void 5258 /// } 5259 /// \endcode 5260 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5261 SourceLocation Loc, 5262 ReductionCodeGen &RCG, unsigned N) { 5263 ASTContext &C = CGM.getContext(); 5264 QualType VoidPtrTy = C.VoidPtrTy; 5265 VoidPtrTy.addRestrict(); 5266 FunctionArgList Args; 5267 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5268 ImplicitParamKind::Other); 5269 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5270 ImplicitParamKind::Other); 5271 Args.emplace_back(&Param); 5272 Args.emplace_back(&ParamOrig); 5273 const auto &FnInfo = 5274 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5275 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5276 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5277 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5278 Name, &CGM.getModule()); 5279 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5280 Fn->setDoesNotRecurse(); 5281 CodeGenFunction CGF(CGM); 5282 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5283 QualType PrivateType = RCG.getPrivateType(N); 5284 Address PrivateAddr = CGF.EmitLoadOfPointer( 5285 CGF.GetAddrOfLocalVar(&Param).withElementType( 5286 CGF.ConvertTypeForMem(PrivateType)->getPointerTo()), 5287 C.getPointerType(PrivateType)->castAs<PointerType>()); 5288 llvm::Value *Size = nullptr; 5289 // If the size of the reduction item is non-constant, load it from global 5290 // threadprivate variable. 5291 if (RCG.getSizes(N).second) { 5292 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5293 CGF, CGM.getContext().getSizeType(), 5294 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5295 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5296 CGM.getContext().getSizeType(), Loc); 5297 } 5298 RCG.emitAggregateType(CGF, N, Size); 5299 Address OrigAddr = Address::invalid(); 5300 // If initializer uses initializer from declare reduction construct, emit a 5301 // pointer to the address of the original reduction item (reuired by reduction 5302 // initializer) 5303 if (RCG.usesReductionInitializer(N)) { 5304 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5305 OrigAddr = CGF.EmitLoadOfPointer( 5306 SharedAddr, 5307 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5308 } 5309 // Emit the initializer: 5310 // %0 = bitcast void* %arg to <type>* 5311 // store <type> <init>, <type>* %0 5312 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr, 5313 [](CodeGenFunction &) { return false; }); 5314 CGF.FinishFunction(); 5315 return Fn; 5316 } 5317 5318 /// Emits reduction combiner function: 5319 /// \code 5320 /// void @.red_comb(void* %arg0, void* %arg1) { 5321 /// %lhs = bitcast void* %arg0 to <type>* 5322 /// %rhs = bitcast void* %arg1 to <type>* 5323 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5324 /// store <type> %2, <type>* %lhs 5325 /// ret void 5326 /// } 5327 /// \endcode 5328 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5329 SourceLocation Loc, 5330 ReductionCodeGen &RCG, unsigned N, 5331 const Expr *ReductionOp, 5332 const Expr *LHS, const Expr *RHS, 5333 const Expr *PrivateRef) { 5334 ASTContext &C = CGM.getContext(); 5335 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5336 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5337 FunctionArgList Args; 5338 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5339 C.VoidPtrTy, ImplicitParamKind::Other); 5340 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5341 ImplicitParamKind::Other); 5342 Args.emplace_back(&ParamInOut); 5343 Args.emplace_back(&ParamIn); 5344 const auto &FnInfo = 5345 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5346 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5347 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5348 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5349 Name, &CGM.getModule()); 5350 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5351 Fn->setDoesNotRecurse(); 5352 CodeGenFunction CGF(CGM); 5353 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5354 llvm::Value *Size = nullptr; 5355 // If the size of the reduction item is non-constant, load it from global 5356 // threadprivate variable. 5357 if (RCG.getSizes(N).second) { 5358 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5359 CGF, CGM.getContext().getSizeType(), 5360 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5361 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5362 CGM.getContext().getSizeType(), Loc); 5363 } 5364 RCG.emitAggregateType(CGF, N, Size); 5365 // Remap lhs and rhs variables to the addresses of the function arguments. 5366 // %lhs = bitcast void* %arg0 to <type>* 5367 // %rhs = bitcast void* %arg1 to <type>* 5368 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5369 PrivateScope.addPrivate( 5370 LHSVD, 5371 // Pull out the pointer to the variable. 5372 CGF.EmitLoadOfPointer( 5373 CGF.GetAddrOfLocalVar(&ParamInOut) 5374 .withElementType( 5375 CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()), 5376 C.getPointerType(LHSVD->getType())->castAs<PointerType>())); 5377 PrivateScope.addPrivate( 5378 RHSVD, 5379 // Pull out the pointer to the variable. 5380 CGF.EmitLoadOfPointer( 5381 CGF.GetAddrOfLocalVar(&ParamIn).withElementType( 5382 CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()), 5383 C.getPointerType(RHSVD->getType())->castAs<PointerType>())); 5384 PrivateScope.Privatize(); 5385 // Emit the combiner body: 5386 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5387 // store <type> %2, <type>* %lhs 5388 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5389 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5390 cast<DeclRefExpr>(RHS)); 5391 CGF.FinishFunction(); 5392 return Fn; 5393 } 5394 5395 /// Emits reduction finalizer function: 5396 /// \code 5397 /// void @.red_fini(void* %arg) { 5398 /// %0 = bitcast void* %arg to <type>* 5399 /// <destroy>(<type>* %0) 5400 /// ret void 5401 /// } 5402 /// \endcode 5403 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5404 SourceLocation Loc, 5405 ReductionCodeGen &RCG, unsigned N) { 5406 if (!RCG.needCleanups(N)) 5407 return nullptr; 5408 ASTContext &C = CGM.getContext(); 5409 FunctionArgList Args; 5410 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5411 ImplicitParamKind::Other); 5412 Args.emplace_back(&Param); 5413 const auto &FnInfo = 5414 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5415 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5416 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 5417 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5418 Name, &CGM.getModule()); 5419 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5420 Fn->setDoesNotRecurse(); 5421 CodeGenFunction CGF(CGM); 5422 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5423 Address PrivateAddr = CGF.EmitLoadOfPointer( 5424 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>()); 5425 llvm::Value *Size = nullptr; 5426 // If the size of the reduction item is non-constant, load it from global 5427 // threadprivate variable. 5428 if (RCG.getSizes(N).second) { 5429 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5430 CGF, CGM.getContext().getSizeType(), 5431 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5432 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5433 CGM.getContext().getSizeType(), Loc); 5434 } 5435 RCG.emitAggregateType(CGF, N, Size); 5436 // Emit the finalizer body: 5437 // <destroy>(<type>* %0) 5438 RCG.emitCleanups(CGF, N, PrivateAddr); 5439 CGF.FinishFunction(Loc); 5440 return Fn; 5441 } 5442 5443 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 5444 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 5445 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 5446 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 5447 return nullptr; 5448 5449 // Build typedef struct: 5450 // kmp_taskred_input { 5451 // void *reduce_shar; // shared reduction item 5452 // void *reduce_orig; // original reduction item used for initialization 5453 // size_t reduce_size; // size of data item 5454 // void *reduce_init; // data initialization routine 5455 // void *reduce_fini; // data finalization routine 5456 // void *reduce_comb; // data combiner routine 5457 // kmp_task_red_flags_t flags; // flags for additional info from compiler 5458 // } kmp_taskred_input_t; 5459 ASTContext &C = CGM.getContext(); 5460 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 5461 RD->startDefinition(); 5462 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5463 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5464 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 5465 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5466 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5467 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5468 const FieldDecl *FlagsFD = addFieldToRecordDecl( 5469 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 5470 RD->completeDefinition(); 5471 QualType RDType = C.getRecordType(RD); 5472 unsigned Size = Data.ReductionVars.size(); 5473 llvm::APInt ArraySize(/*numBits=*/64, Size); 5474 QualType ArrayRDType = 5475 C.getConstantArrayType(RDType, ArraySize, nullptr, 5476 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0); 5477 // kmp_task_red_input_t .rd_input.[Size]; 5478 RawAddress TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 5479 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 5480 Data.ReductionCopies, Data.ReductionOps); 5481 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 5482 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 5483 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 5484 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 5485 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 5486 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs, 5487 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 5488 ".rd_input.gep."); 5489 LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(GEP, RDType); 5490 // ElemLVal.reduce_shar = &Shareds[Cnt]; 5491 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 5492 RCG.emitSharedOrigLValue(CGF, Cnt); 5493 llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF); 5494 CGF.EmitStoreOfScalar(Shared, SharedLVal); 5495 // ElemLVal.reduce_orig = &Origs[Cnt]; 5496 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 5497 llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF); 5498 CGF.EmitStoreOfScalar(Orig, OrigLVal); 5499 RCG.emitAggregateType(CGF, Cnt); 5500 llvm::Value *SizeValInChars; 5501 llvm::Value *SizeVal; 5502 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 5503 // We use delayed creation/initialization for VLAs and array sections. It is 5504 // required because runtime does not provide the way to pass the sizes of 5505 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 5506 // threadprivate global variables are used to store these values and use 5507 // them in the functions. 5508 bool DelayedCreation = !!SizeVal; 5509 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 5510 /*isSigned=*/false); 5511 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 5512 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 5513 // ElemLVal.reduce_init = init; 5514 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 5515 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt); 5516 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 5517 // ElemLVal.reduce_fini = fini; 5518 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 5519 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 5520 llvm::Value *FiniAddr = 5521 Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 5522 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 5523 // ElemLVal.reduce_comb = comb; 5524 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 5525 llvm::Value *CombAddr = emitReduceCombFunction( 5526 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 5527 RHSExprs[Cnt], Data.ReductionCopies[Cnt]); 5528 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 5529 // ElemLVal.flags = 0; 5530 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 5531 if (DelayedCreation) { 5532 CGF.EmitStoreOfScalar( 5533 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 5534 FlagsLVal); 5535 } else 5536 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); 5537 } 5538 if (Data.IsReductionWithTaskMod) { 5539 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 5540 // is_ws, int num, void *data); 5541 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 5542 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 5543 CGM.IntTy, /*isSigned=*/true); 5544 llvm::Value *Args[] = { 5545 IdentTLoc, GTid, 5546 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 5547 /*isSigned=*/true), 5548 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 5549 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5550 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 5551 return CGF.EmitRuntimeCall( 5552 OMPBuilder.getOrCreateRuntimeFunction( 5553 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 5554 Args); 5555 } 5556 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 5557 llvm::Value *Args[] = { 5558 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 5559 /*isSigned=*/true), 5560 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 5561 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 5562 CGM.VoidPtrTy)}; 5563 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5564 CGM.getModule(), OMPRTL___kmpc_taskred_init), 5565 Args); 5566 } 5567 5568 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 5569 SourceLocation Loc, 5570 bool IsWorksharingReduction) { 5571 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 5572 // is_ws, int num, void *data); 5573 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 5574 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 5575 CGM.IntTy, /*isSigned=*/true); 5576 llvm::Value *Args[] = {IdentTLoc, GTid, 5577 llvm::ConstantInt::get(CGM.IntTy, 5578 IsWorksharingReduction ? 1 : 0, 5579 /*isSigned=*/true)}; 5580 (void)CGF.EmitRuntimeCall( 5581 OMPBuilder.getOrCreateRuntimeFunction( 5582 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 5583 Args); 5584 } 5585 5586 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 5587 SourceLocation Loc, 5588 ReductionCodeGen &RCG, 5589 unsigned N) { 5590 auto Sizes = RCG.getSizes(N); 5591 // Emit threadprivate global variable if the type is non-constant 5592 // (Sizes.second = nullptr). 5593 if (Sizes.second) { 5594 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 5595 /*isSigned=*/false); 5596 Address SizeAddr = getAddrOfArtificialThreadPrivate( 5597 CGF, CGM.getContext().getSizeType(), 5598 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5599 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 5600 } 5601 } 5602 5603 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 5604 SourceLocation Loc, 5605 llvm::Value *ReductionsPtr, 5606 LValue SharedLVal) { 5607 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 5608 // *d); 5609 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 5610 CGM.IntTy, 5611 /*isSigned=*/true), 5612 ReductionsPtr, 5613 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5614 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 5615 return Address( 5616 CGF.EmitRuntimeCall( 5617 OMPBuilder.getOrCreateRuntimeFunction( 5618 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 5619 Args), 5620 CGF.Int8Ty, SharedLVal.getAlignment()); 5621 } 5622 5623 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, 5624 const OMPTaskDataTy &Data) { 5625 if (!CGF.HaveInsertPoint()) 5626 return; 5627 5628 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) { 5629 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder. 5630 OMPBuilder.createTaskwait(CGF.Builder); 5631 } else { 5632 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5633 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5634 auto &M = CGM.getModule(); 5635 Address DependenciesArray = Address::invalid(); 5636 llvm::Value *NumOfElements; 5637 std::tie(NumOfElements, DependenciesArray) = 5638 emitDependClause(CGF, Data.Dependences, Loc); 5639 if (!Data.Dependences.empty()) { 5640 llvm::Value *DepWaitTaskArgs[7]; 5641 DepWaitTaskArgs[0] = UpLoc; 5642 DepWaitTaskArgs[1] = ThreadID; 5643 DepWaitTaskArgs[2] = NumOfElements; 5644 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF); 5645 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5646 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5647 DepWaitTaskArgs[6] = 5648 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause); 5649 5650 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5651 5652 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid, 5653 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5654 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list, 5655 // kmp_int32 has_no_wait); if dependence info is specified. 5656 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5657 M, OMPRTL___kmpc_omp_taskwait_deps_51), 5658 DepWaitTaskArgs); 5659 5660 } else { 5661 5662 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 5663 // global_tid); 5664 llvm::Value *Args[] = {UpLoc, ThreadID}; 5665 // Ignore return result until untied tasks are supported. 5666 CGF.EmitRuntimeCall( 5667 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait), 5668 Args); 5669 } 5670 } 5671 5672 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5673 Region->emitUntiedSwitch(CGF); 5674 } 5675 5676 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 5677 OpenMPDirectiveKind InnerKind, 5678 const RegionCodeGenTy &CodeGen, 5679 bool HasCancel) { 5680 if (!CGF.HaveInsertPoint()) 5681 return; 5682 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 5683 InnerKind != OMPD_critical && 5684 InnerKind != OMPD_master && 5685 InnerKind != OMPD_masked); 5686 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 5687 } 5688 5689 namespace { 5690 enum RTCancelKind { 5691 CancelNoreq = 0, 5692 CancelParallel = 1, 5693 CancelLoop = 2, 5694 CancelSections = 3, 5695 CancelTaskgroup = 4 5696 }; 5697 } // anonymous namespace 5698 5699 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 5700 RTCancelKind CancelKind = CancelNoreq; 5701 if (CancelRegion == OMPD_parallel) 5702 CancelKind = CancelParallel; 5703 else if (CancelRegion == OMPD_for) 5704 CancelKind = CancelLoop; 5705 else if (CancelRegion == OMPD_sections) 5706 CancelKind = CancelSections; 5707 else { 5708 assert(CancelRegion == OMPD_taskgroup); 5709 CancelKind = CancelTaskgroup; 5710 } 5711 return CancelKind; 5712 } 5713 5714 void CGOpenMPRuntime::emitCancellationPointCall( 5715 CodeGenFunction &CGF, SourceLocation Loc, 5716 OpenMPDirectiveKind CancelRegion) { 5717 if (!CGF.HaveInsertPoint()) 5718 return; 5719 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 5720 // global_tid, kmp_int32 cncl_kind); 5721 if (auto *OMPRegionInfo = 5722 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 5723 // For 'cancellation point taskgroup', the task region info may not have a 5724 // cancel. This may instead happen in another adjacent task. 5725 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 5726 llvm::Value *Args[] = { 5727 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 5728 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 5729 // Ignore return result until untied tasks are supported. 5730 llvm::Value *Result = CGF.EmitRuntimeCall( 5731 OMPBuilder.getOrCreateRuntimeFunction( 5732 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 5733 Args); 5734 // if (__kmpc_cancellationpoint()) { 5735 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 5736 // exit from construct; 5737 // } 5738 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 5739 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 5740 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 5741 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 5742 CGF.EmitBlock(ExitBB); 5743 if (CancelRegion == OMPD_parallel) 5744 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 5745 // exit from construct; 5746 CodeGenFunction::JumpDest CancelDest = 5747 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 5748 CGF.EmitBranchThroughCleanup(CancelDest); 5749 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 5750 } 5751 } 5752 } 5753 5754 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 5755 const Expr *IfCond, 5756 OpenMPDirectiveKind CancelRegion) { 5757 if (!CGF.HaveInsertPoint()) 5758 return; 5759 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 5760 // kmp_int32 cncl_kind); 5761 auto &M = CGM.getModule(); 5762 if (auto *OMPRegionInfo = 5763 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 5764 auto &&ThenGen = [this, &M, Loc, CancelRegion, 5765 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 5766 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5767 llvm::Value *Args[] = { 5768 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 5769 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 5770 // Ignore return result until untied tasks are supported. 5771 llvm::Value *Result = CGF.EmitRuntimeCall( 5772 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 5773 // if (__kmpc_cancel()) { 5774 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 5775 // exit from construct; 5776 // } 5777 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 5778 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 5779 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 5780 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 5781 CGF.EmitBlock(ExitBB); 5782 if (CancelRegion == OMPD_parallel) 5783 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 5784 // exit from construct; 5785 CodeGenFunction::JumpDest CancelDest = 5786 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 5787 CGF.EmitBranchThroughCleanup(CancelDest); 5788 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 5789 }; 5790 if (IfCond) { 5791 emitIfClause(CGF, IfCond, ThenGen, 5792 [](CodeGenFunction &, PrePostActionTy &) {}); 5793 } else { 5794 RegionCodeGenTy ThenRCG(ThenGen); 5795 ThenRCG(CGF); 5796 } 5797 } 5798 } 5799 5800 namespace { 5801 /// Cleanup action for uses_allocators support. 5802 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 5803 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 5804 5805 public: 5806 OMPUsesAllocatorsActionTy( 5807 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 5808 : Allocators(Allocators) {} 5809 void Enter(CodeGenFunction &CGF) override { 5810 if (!CGF.HaveInsertPoint()) 5811 return; 5812 for (const auto &AllocatorData : Allocators) { 5813 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 5814 CGF, AllocatorData.first, AllocatorData.second); 5815 } 5816 } 5817 void Exit(CodeGenFunction &CGF) override { 5818 if (!CGF.HaveInsertPoint()) 5819 return; 5820 for (const auto &AllocatorData : Allocators) { 5821 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 5822 AllocatorData.first); 5823 } 5824 } 5825 }; 5826 } // namespace 5827 5828 void CGOpenMPRuntime::emitTargetOutlinedFunction( 5829 const OMPExecutableDirective &D, StringRef ParentName, 5830 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 5831 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 5832 assert(!ParentName.empty() && "Invalid target entry parent name!"); 5833 HasEmittedTargetRegion = true; 5834 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 5835 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 5836 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 5837 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 5838 if (!D.AllocatorTraits) 5839 continue; 5840 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 5841 } 5842 } 5843 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 5844 CodeGen.setAction(UsesAllocatorAction); 5845 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 5846 IsOffloadEntry, CodeGen); 5847 } 5848 5849 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 5850 const Expr *Allocator, 5851 const Expr *AllocatorTraits) { 5852 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 5853 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 5854 // Use default memspace handle. 5855 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5856 llvm::Value *NumTraits = llvm::ConstantInt::get( 5857 CGF.IntTy, cast<ConstantArrayType>( 5858 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 5859 ->getSize() 5860 .getLimitedValue()); 5861 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 5862 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5863 AllocatorTraitsLVal.getAddress(), CGF.VoidPtrPtrTy, CGF.VoidPtrTy); 5864 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 5865 AllocatorTraitsLVal.getBaseInfo(), 5866 AllocatorTraitsLVal.getTBAAInfo()); 5867 llvm::Value *Traits = Addr.emitRawPointer(CGF); 5868 5869 llvm::Value *AllocatorVal = 5870 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5871 CGM.getModule(), OMPRTL___kmpc_init_allocator), 5872 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 5873 // Store to allocator. 5874 CGF.EmitAutoVarAlloca(*cast<VarDecl>( 5875 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 5876 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 5877 AllocatorVal = 5878 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 5879 Allocator->getType(), Allocator->getExprLoc()); 5880 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 5881 } 5882 5883 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 5884 const Expr *Allocator) { 5885 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 5886 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 5887 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 5888 llvm::Value *AllocatorVal = 5889 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 5890 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 5891 CGF.getContext().VoidPtrTy, 5892 Allocator->getExprLoc()); 5893 (void)CGF.EmitRuntimeCall( 5894 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 5895 OMPRTL___kmpc_destroy_allocator), 5896 {ThreadId, AllocatorVal}); 5897 } 5898 5899 void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams( 5900 const OMPExecutableDirective &D, CodeGenFunction &CGF, 5901 int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal, 5902 int32_t &MaxTeamsVal) { 5903 5904 getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal, MaxTeamsVal); 5905 getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal, 5906 /*UpperBoundOnly=*/true); 5907 5908 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) { 5909 for (auto *A : C->getAttrs()) { 5910 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1; 5911 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1; 5912 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A)) 5913 CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal, 5914 &AttrMinBlocksVal, &AttrMaxBlocksVal); 5915 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A)) 5916 CGM.handleAMDGPUFlatWorkGroupSizeAttr( 5917 nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal, 5918 &AttrMaxThreadsVal); 5919 else 5920 continue; 5921 5922 MinThreadsVal = std::max(MinThreadsVal, AttrMinThreadsVal); 5923 if (AttrMaxThreadsVal > 0) 5924 MaxThreadsVal = MaxThreadsVal > 0 5925 ? std::min(MaxThreadsVal, AttrMaxThreadsVal) 5926 : AttrMaxThreadsVal; 5927 MinTeamsVal = std::max(MinTeamsVal, AttrMinBlocksVal); 5928 if (AttrMaxBlocksVal > 0) 5929 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal) 5930 : AttrMaxBlocksVal; 5931 } 5932 } 5933 } 5934 5935 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 5936 const OMPExecutableDirective &D, StringRef ParentName, 5937 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 5938 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 5939 5940 llvm::TargetRegionEntryInfo EntryInfo = 5941 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName); 5942 5943 CodeGenFunction CGF(CGM, true); 5944 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction = 5945 [&CGF, &D, &CodeGen](StringRef EntryFnName) { 5946 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 5947 5948 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 5949 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 5950 return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 5951 }; 5952 5953 OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction, 5954 IsOffloadEntry, OutlinedFn, OutlinedFnID); 5955 5956 if (!OutlinedFn) 5957 return; 5958 5959 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); 5960 5961 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) { 5962 for (auto *A : C->getAttrs()) { 5963 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A)) 5964 CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr); 5965 } 5966 } 5967 } 5968 5969 /// Checks if the expression is constant or does not have non-trivial function 5970 /// calls. 5971 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 5972 // We can skip constant expressions. 5973 // We can skip expressions with trivial calls or simple expressions. 5974 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 5975 !E->hasNonTrivialCall(Ctx)) && 5976 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 5977 } 5978 5979 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 5980 const Stmt *Body) { 5981 const Stmt *Child = Body->IgnoreContainers(); 5982 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 5983 Child = nullptr; 5984 for (const Stmt *S : C->body()) { 5985 if (const auto *E = dyn_cast<Expr>(S)) { 5986 if (isTrivial(Ctx, E)) 5987 continue; 5988 } 5989 // Some of the statements can be ignored. 5990 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 5991 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 5992 continue; 5993 // Analyze declarations. 5994 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 5995 if (llvm::all_of(DS->decls(), [](const Decl *D) { 5996 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 5997 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 5998 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 5999 isa<UsingDirectiveDecl>(D) || 6000 isa<OMPDeclareReductionDecl>(D) || 6001 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6002 return true; 6003 const auto *VD = dyn_cast<VarDecl>(D); 6004 if (!VD) 6005 return false; 6006 return VD->hasGlobalStorage() || !VD->isUsed(); 6007 })) 6008 continue; 6009 } 6010 // Found multiple children - cannot get the one child only. 6011 if (Child) 6012 return nullptr; 6013 Child = S; 6014 } 6015 if (Child) 6016 Child = Child->IgnoreContainers(); 6017 } 6018 return Child; 6019 } 6020 6021 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( 6022 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal, 6023 int32_t &MaxTeamsVal) { 6024 6025 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6026 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6027 "Expected target-based executable directive."); 6028 switch (DirectiveKind) { 6029 case OMPD_target: { 6030 const auto *CS = D.getInnermostCapturedStmt(); 6031 const auto *Body = 6032 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6033 const Stmt *ChildStmt = 6034 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6035 if (const auto *NestedDir = 6036 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6037 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6038 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6039 const Expr *NumTeams = 6040 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6041 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6042 if (auto Constant = 6043 NumTeams->getIntegerConstantExpr(CGF.getContext())) 6044 MinTeamsVal = MaxTeamsVal = Constant->getExtValue(); 6045 return NumTeams; 6046 } 6047 MinTeamsVal = MaxTeamsVal = 0; 6048 return nullptr; 6049 } 6050 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6051 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) { 6052 MinTeamsVal = MaxTeamsVal = 1; 6053 return nullptr; 6054 } 6055 MinTeamsVal = MaxTeamsVal = 1; 6056 return nullptr; 6057 } 6058 // A value of -1 is used to check if we need to emit no teams region 6059 MinTeamsVal = MaxTeamsVal = -1; 6060 return nullptr; 6061 } 6062 case OMPD_target_teams_loop: 6063 case OMPD_target_teams: 6064 case OMPD_target_teams_distribute: 6065 case OMPD_target_teams_distribute_simd: 6066 case OMPD_target_teams_distribute_parallel_for: 6067 case OMPD_target_teams_distribute_parallel_for_simd: { 6068 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6069 const Expr *NumTeams = 6070 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6071 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6072 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) 6073 MinTeamsVal = MaxTeamsVal = Constant->getExtValue(); 6074 return NumTeams; 6075 } 6076 MinTeamsVal = MaxTeamsVal = 0; 6077 return nullptr; 6078 } 6079 case OMPD_target_parallel: 6080 case OMPD_target_parallel_for: 6081 case OMPD_target_parallel_for_simd: 6082 case OMPD_target_parallel_loop: 6083 case OMPD_target_simd: 6084 MinTeamsVal = MaxTeamsVal = 1; 6085 return nullptr; 6086 case OMPD_parallel: 6087 case OMPD_for: 6088 case OMPD_parallel_for: 6089 case OMPD_parallel_loop: 6090 case OMPD_parallel_master: 6091 case OMPD_parallel_sections: 6092 case OMPD_for_simd: 6093 case OMPD_parallel_for_simd: 6094 case OMPD_cancel: 6095 case OMPD_cancellation_point: 6096 case OMPD_ordered: 6097 case OMPD_threadprivate: 6098 case OMPD_allocate: 6099 case OMPD_task: 6100 case OMPD_simd: 6101 case OMPD_tile: 6102 case OMPD_unroll: 6103 case OMPD_sections: 6104 case OMPD_section: 6105 case OMPD_single: 6106 case OMPD_master: 6107 case OMPD_critical: 6108 case OMPD_taskyield: 6109 case OMPD_barrier: 6110 case OMPD_taskwait: 6111 case OMPD_taskgroup: 6112 case OMPD_atomic: 6113 case OMPD_flush: 6114 case OMPD_depobj: 6115 case OMPD_scan: 6116 case OMPD_teams: 6117 case OMPD_target_data: 6118 case OMPD_target_exit_data: 6119 case OMPD_target_enter_data: 6120 case OMPD_distribute: 6121 case OMPD_distribute_simd: 6122 case OMPD_distribute_parallel_for: 6123 case OMPD_distribute_parallel_for_simd: 6124 case OMPD_teams_distribute: 6125 case OMPD_teams_distribute_simd: 6126 case OMPD_teams_distribute_parallel_for: 6127 case OMPD_teams_distribute_parallel_for_simd: 6128 case OMPD_target_update: 6129 case OMPD_declare_simd: 6130 case OMPD_declare_variant: 6131 case OMPD_begin_declare_variant: 6132 case OMPD_end_declare_variant: 6133 case OMPD_declare_target: 6134 case OMPD_end_declare_target: 6135 case OMPD_declare_reduction: 6136 case OMPD_declare_mapper: 6137 case OMPD_taskloop: 6138 case OMPD_taskloop_simd: 6139 case OMPD_master_taskloop: 6140 case OMPD_master_taskloop_simd: 6141 case OMPD_parallel_master_taskloop: 6142 case OMPD_parallel_master_taskloop_simd: 6143 case OMPD_requires: 6144 case OMPD_metadirective: 6145 case OMPD_unknown: 6146 break; 6147 default: 6148 break; 6149 } 6150 llvm_unreachable("Unexpected directive kind."); 6151 } 6152 6153 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( 6154 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6155 assert(!CGF.getLangOpts().OpenMPIsTargetDevice && 6156 "Clauses associated with the teams directive expected to be emitted " 6157 "only for the host!"); 6158 CGBuilderTy &Bld = CGF.Builder; 6159 int32_t MinNT = -1, MaxNT = -1; 6160 const Expr *NumTeams = 6161 getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT); 6162 if (NumTeams != nullptr) { 6163 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6164 6165 switch (DirectiveKind) { 6166 case OMPD_target: { 6167 const auto *CS = D.getInnermostCapturedStmt(); 6168 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6169 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6170 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6171 /*IgnoreResultAssign*/ true); 6172 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6173 /*isSigned=*/true); 6174 } 6175 case OMPD_target_teams: 6176 case OMPD_target_teams_distribute: 6177 case OMPD_target_teams_distribute_simd: 6178 case OMPD_target_teams_distribute_parallel_for: 6179 case OMPD_target_teams_distribute_parallel_for_simd: { 6180 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6181 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6182 /*IgnoreResultAssign*/ true); 6183 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6184 /*isSigned=*/true); 6185 } 6186 default: 6187 break; 6188 } 6189 } 6190 6191 assert(MinNT == MaxNT && "Num threads ranges require handling here."); 6192 return llvm::ConstantInt::get(CGF.Int32Ty, MinNT); 6193 } 6194 6195 /// Check for a num threads constant value (stored in \p DefaultVal), or 6196 /// expression (stored in \p E). If the value is conditional (via an if-clause), 6197 /// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are 6198 /// nullptr, no expression evaluation is perfomed. 6199 static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6200 const Expr **E, int32_t &UpperBound, 6201 bool UpperBoundOnly, llvm::Value **CondVal) { 6202 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6203 CGF.getContext(), CS->getCapturedStmt()); 6204 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6205 if (!Dir) 6206 return; 6207 6208 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6209 // Handle if clause. If if clause present, the number of threads is 6210 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6211 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) { 6212 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6213 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6214 const OMPIfClause *IfClause = nullptr; 6215 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6216 if (C->getNameModifier() == OMPD_unknown || 6217 C->getNameModifier() == OMPD_parallel) { 6218 IfClause = C; 6219 break; 6220 } 6221 } 6222 if (IfClause) { 6223 const Expr *CondExpr = IfClause->getCondition(); 6224 bool Result; 6225 if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6226 if (!Result) { 6227 UpperBound = 1; 6228 return; 6229 } 6230 } else { 6231 CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange()); 6232 if (const auto *PreInit = 6233 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6234 for (const auto *I : PreInit->decls()) { 6235 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6236 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6237 } else { 6238 CodeGenFunction::AutoVarEmission Emission = 6239 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6240 CGF.EmitAutoVarCleanups(Emission); 6241 } 6242 } 6243 *CondVal = CGF.EvaluateExprAsBool(CondExpr); 6244 } 6245 } 6246 } 6247 } 6248 // Check the value of num_threads clause iff if clause was not specified 6249 // or is not evaluated to false. 6250 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6251 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6252 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6253 const auto *NumThreadsClause = 6254 Dir->getSingleClause<OMPNumThreadsClause>(); 6255 const Expr *NTExpr = NumThreadsClause->getNumThreads(); 6256 if (NTExpr->isIntegerConstantExpr(CGF.getContext())) 6257 if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext())) 6258 UpperBound = 6259 UpperBound 6260 ? Constant->getZExtValue() 6261 : std::min(UpperBound, 6262 static_cast<int32_t>(Constant->getZExtValue())); 6263 // If we haven't found a upper bound, remember we saw a thread limiting 6264 // clause. 6265 if (UpperBound == -1) 6266 UpperBound = 0; 6267 if (!E) 6268 return; 6269 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange()); 6270 if (const auto *PreInit = 6271 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6272 for (const auto *I : PreInit->decls()) { 6273 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6274 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6275 } else { 6276 CodeGenFunction::AutoVarEmission Emission = 6277 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6278 CGF.EmitAutoVarCleanups(Emission); 6279 } 6280 } 6281 } 6282 *E = NTExpr; 6283 } 6284 return; 6285 } 6286 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6287 UpperBound = 1; 6288 } 6289 6290 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( 6291 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound, 6292 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) { 6293 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) && 6294 "Clauses associated with the teams directive expected to be emitted " 6295 "only for the host!"); 6296 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6297 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6298 "Expected target-based executable directive."); 6299 6300 const Expr *NT = nullptr; 6301 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT; 6302 6303 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) { 6304 if (E->isIntegerConstantExpr(CGF.getContext())) { 6305 if (auto Constant = E->getIntegerConstantExpr(CGF.getContext())) 6306 UpperBound = UpperBound ? Constant->getZExtValue() 6307 : std::min(UpperBound, 6308 int32_t(Constant->getZExtValue())); 6309 } 6310 // If we haven't found a upper bound, remember we saw a thread limiting 6311 // clause. 6312 if (UpperBound == -1) 6313 UpperBound = 0; 6314 if (EPtr) 6315 *EPtr = E; 6316 }; 6317 6318 auto ReturnSequential = [&]() { 6319 UpperBound = 1; 6320 return NT; 6321 }; 6322 6323 switch (DirectiveKind) { 6324 case OMPD_target: { 6325 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6326 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal); 6327 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6328 CGF.getContext(), CS->getCapturedStmt()); 6329 // TODO: The standard is not clear how to resolve two thread limit clauses, 6330 // let's pick the teams one if it's present, otherwise the target one. 6331 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6332 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6333 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) { 6334 ThreadLimitClause = TLC; 6335 if (ThreadLimitExpr) { 6336 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6337 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6338 CodeGenFunction::LexicalScope Scope( 6339 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6340 if (const auto *PreInit = 6341 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6342 for (const auto *I : PreInit->decls()) { 6343 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6344 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6345 } else { 6346 CodeGenFunction::AutoVarEmission Emission = 6347 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6348 CGF.EmitAutoVarCleanups(Emission); 6349 } 6350 } 6351 } 6352 } 6353 } 6354 } 6355 if (ThreadLimitClause) 6356 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr); 6357 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6358 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6359 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6360 CS = Dir->getInnermostCapturedStmt(); 6361 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6362 CGF.getContext(), CS->getCapturedStmt()); 6363 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6364 } 6365 if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6366 CS = Dir->getInnermostCapturedStmt(); 6367 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal); 6368 } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6369 return ReturnSequential(); 6370 } 6371 return NT; 6372 } 6373 case OMPD_target_teams: { 6374 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6375 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6376 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6377 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr); 6378 } 6379 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6380 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal); 6381 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6382 CGF.getContext(), CS->getCapturedStmt()); 6383 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6384 if (Dir->getDirectiveKind() == OMPD_distribute) { 6385 CS = Dir->getInnermostCapturedStmt(); 6386 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal); 6387 } 6388 } 6389 return NT; 6390 } 6391 case OMPD_target_teams_distribute: 6392 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6393 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6394 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6395 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr); 6396 } 6397 getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound, 6398 UpperBoundOnly, CondVal); 6399 return NT; 6400 case OMPD_target_teams_loop: 6401 case OMPD_target_parallel_loop: 6402 case OMPD_target_parallel: 6403 case OMPD_target_parallel_for: 6404 case OMPD_target_parallel_for_simd: 6405 case OMPD_target_teams_distribute_parallel_for: 6406 case OMPD_target_teams_distribute_parallel_for_simd: { 6407 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) { 6408 const OMPIfClause *IfClause = nullptr; 6409 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6410 if (C->getNameModifier() == OMPD_unknown || 6411 C->getNameModifier() == OMPD_parallel) { 6412 IfClause = C; 6413 break; 6414 } 6415 } 6416 if (IfClause) { 6417 const Expr *Cond = IfClause->getCondition(); 6418 bool Result; 6419 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6420 if (!Result) 6421 return ReturnSequential(); 6422 } else { 6423 CodeGenFunction::RunCleanupsScope Scope(CGF); 6424 *CondVal = CGF.EvaluateExprAsBool(Cond); 6425 } 6426 } 6427 } 6428 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6429 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6430 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6431 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr); 6432 } 6433 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6434 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6435 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6436 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr); 6437 return NumThreadsClause->getNumThreads(); 6438 } 6439 return NT; 6440 } 6441 case OMPD_target_teams_distribute_simd: 6442 case OMPD_target_simd: 6443 return ReturnSequential(); 6444 default: 6445 break; 6446 } 6447 llvm_unreachable("Unsupported directive kind."); 6448 } 6449 6450 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( 6451 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6452 llvm::Value *NumThreadsVal = nullptr; 6453 llvm::Value *CondVal = nullptr; 6454 llvm::Value *ThreadLimitVal = nullptr; 6455 const Expr *ThreadLimitExpr = nullptr; 6456 int32_t UpperBound = -1; 6457 6458 const Expr *NT = getNumThreadsExprForTargetDirective( 6459 CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal, 6460 &ThreadLimitExpr); 6461 6462 // Thread limit expressions are used below, emit them. 6463 if (ThreadLimitExpr) { 6464 ThreadLimitVal = 6465 CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true); 6466 ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty, 6467 /*isSigned=*/false); 6468 } 6469 6470 // Generate the num teams expression. 6471 if (UpperBound == 1) { 6472 NumThreadsVal = CGF.Builder.getInt32(UpperBound); 6473 } else if (NT) { 6474 NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true); 6475 NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty, 6476 /*isSigned=*/false); 6477 } else if (ThreadLimitVal) { 6478 // If we do not have a num threads value but a thread limit, replace the 6479 // former with the latter. We know handled the thread limit expression. 6480 NumThreadsVal = ThreadLimitVal; 6481 ThreadLimitVal = nullptr; 6482 } else { 6483 // Default to "0" which means runtime choice. 6484 assert(!ThreadLimitVal && "Default not applicable with thread limit value"); 6485 NumThreadsVal = CGF.Builder.getInt32(0); 6486 } 6487 6488 // Handle if clause. If if clause present, the number of threads is 6489 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6490 if (CondVal) { 6491 CodeGenFunction::RunCleanupsScope Scope(CGF); 6492 NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal, 6493 CGF.Builder.getInt32(1)); 6494 } 6495 6496 // If the thread limit and num teams expression were present, take the 6497 // minimum. 6498 if (ThreadLimitVal) { 6499 NumThreadsVal = CGF.Builder.CreateSelect( 6500 CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal), 6501 ThreadLimitVal, NumThreadsVal); 6502 } 6503 6504 return NumThreadsVal; 6505 } 6506 6507 namespace { 6508 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 6509 6510 // Utility to handle information from clauses associated with a given 6511 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 6512 // It provides a convenient interface to obtain the information and generate 6513 // code for that information. 6514 class MappableExprsHandler { 6515 public: 6516 /// Get the offset of the OMP_MAP_MEMBER_OF field. 6517 static unsigned getFlagMemberOffset() { 6518 unsigned Offset = 0; 6519 for (uint64_t Remain = 6520 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 6521 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF); 6522 !(Remain & 1); Remain = Remain >> 1) 6523 Offset++; 6524 return Offset; 6525 } 6526 6527 /// Class that holds debugging information for a data mapping to be passed to 6528 /// the runtime library. 6529 class MappingExprInfo { 6530 /// The variable declaration used for the data mapping. 6531 const ValueDecl *MapDecl = nullptr; 6532 /// The original expression used in the map clause, or null if there is 6533 /// none. 6534 const Expr *MapExpr = nullptr; 6535 6536 public: 6537 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 6538 : MapDecl(MapDecl), MapExpr(MapExpr) {} 6539 6540 const ValueDecl *getMapDecl() const { return MapDecl; } 6541 const Expr *getMapExpr() const { return MapExpr; } 6542 }; 6543 6544 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy; 6545 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy; 6546 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy; 6547 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy; 6548 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy; 6549 using MapNonContiguousArrayTy = 6550 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy; 6551 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 6552 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>; 6553 6554 /// This structure contains combined information generated for mappable 6555 /// clauses, including base pointers, pointers, sizes, map types, user-defined 6556 /// mappers, and non-contiguous information. 6557 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy { 6558 MapExprsArrayTy Exprs; 6559 MapValueDeclsArrayTy Mappers; 6560 MapValueDeclsArrayTy DevicePtrDecls; 6561 6562 /// Append arrays in \a CurInfo. 6563 void append(MapCombinedInfoTy &CurInfo) { 6564 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 6565 DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(), 6566 CurInfo.DevicePtrDecls.end()); 6567 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 6568 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo); 6569 } 6570 }; 6571 6572 /// Map between a struct and the its lowest & highest elements which have been 6573 /// mapped. 6574 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 6575 /// HE(FieldIndex, Pointer)} 6576 struct StructRangeInfoTy { 6577 MapCombinedInfoTy PreliminaryMapData; 6578 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 6579 0, Address::invalid()}; 6580 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 6581 0, Address::invalid()}; 6582 Address Base = Address::invalid(); 6583 Address LB = Address::invalid(); 6584 bool IsArraySection = false; 6585 bool HasCompleteRecord = false; 6586 }; 6587 6588 private: 6589 /// Kind that defines how a device pointer has to be returned. 6590 struct MapInfo { 6591 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 6592 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 6593 ArrayRef<OpenMPMapModifierKind> MapModifiers; 6594 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 6595 bool ReturnDevicePointer = false; 6596 bool IsImplicit = false; 6597 const ValueDecl *Mapper = nullptr; 6598 const Expr *VarRef = nullptr; 6599 bool ForDeviceAddr = false; 6600 6601 MapInfo() = default; 6602 MapInfo( 6603 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 6604 OpenMPMapClauseKind MapType, 6605 ArrayRef<OpenMPMapModifierKind> MapModifiers, 6606 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 6607 bool ReturnDevicePointer, bool IsImplicit, 6608 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 6609 bool ForDeviceAddr = false) 6610 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 6611 MotionModifiers(MotionModifiers), 6612 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 6613 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 6614 }; 6615 6616 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 6617 /// member and there is no map information about it, then emission of that 6618 /// entry is deferred until the whole struct has been processed. 6619 struct DeferredDevicePtrEntryTy { 6620 const Expr *IE = nullptr; 6621 const ValueDecl *VD = nullptr; 6622 bool ForDeviceAddr = false; 6623 6624 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 6625 bool ForDeviceAddr) 6626 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 6627 }; 6628 6629 /// The target directive from where the mappable clauses were extracted. It 6630 /// is either a executable directive or a user-defined mapper directive. 6631 llvm::PointerUnion<const OMPExecutableDirective *, 6632 const OMPDeclareMapperDecl *> 6633 CurDir; 6634 6635 /// Function the directive is being generated for. 6636 CodeGenFunction &CGF; 6637 6638 /// Set of all first private variables in the current directive. 6639 /// bool data is set to true if the variable is implicitly marked as 6640 /// firstprivate, false otherwise. 6641 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 6642 6643 /// Map between device pointer declarations and their expression components. 6644 /// The key value for declarations in 'this' is null. 6645 llvm::DenseMap< 6646 const ValueDecl *, 6647 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 6648 DevPointersMap; 6649 6650 /// Map between device addr declarations and their expression components. 6651 /// The key value for declarations in 'this' is null. 6652 llvm::DenseMap< 6653 const ValueDecl *, 6654 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 6655 HasDevAddrsMap; 6656 6657 /// Map between lambda declarations and their map type. 6658 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap; 6659 6660 llvm::Value *getExprTypeSize(const Expr *E) const { 6661 QualType ExprTy = E->getType().getCanonicalType(); 6662 6663 // Calculate the size for array shaping expression. 6664 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 6665 llvm::Value *Size = 6666 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 6667 for (const Expr *SE : OAE->getDimensions()) { 6668 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 6669 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 6670 CGF.getContext().getSizeType(), 6671 SE->getExprLoc()); 6672 Size = CGF.Builder.CreateNUWMul(Size, Sz); 6673 } 6674 return Size; 6675 } 6676 6677 // Reference types are ignored for mapping purposes. 6678 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 6679 ExprTy = RefTy->getPointeeType().getCanonicalType(); 6680 6681 // Given that an array section is considered a built-in type, we need to 6682 // do the calculation based on the length of the section instead of relying 6683 // on CGF.getTypeSize(E->getType()). 6684 if (const auto *OAE = dyn_cast<ArraySectionExpr>(E)) { 6685 QualType BaseTy = ArraySectionExpr::getBaseOriginalType( 6686 OAE->getBase()->IgnoreParenImpCasts()) 6687 .getCanonicalType(); 6688 6689 // If there is no length associated with the expression and lower bound is 6690 // not specified too, that means we are using the whole length of the 6691 // base. 6692 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 6693 !OAE->getLowerBound()) 6694 return CGF.getTypeSize(BaseTy); 6695 6696 llvm::Value *ElemSize; 6697 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 6698 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 6699 } else { 6700 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 6701 assert(ATy && "Expecting array type if not a pointer type."); 6702 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 6703 } 6704 6705 // If we don't have a length at this point, that is because we have an 6706 // array section with a single element. 6707 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 6708 return ElemSize; 6709 6710 if (const Expr *LenExpr = OAE->getLength()) { 6711 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 6712 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 6713 CGF.getContext().getSizeType(), 6714 LenExpr->getExprLoc()); 6715 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 6716 } 6717 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 6718 OAE->getLowerBound() && "expected array_section[lb:]."); 6719 // Size = sizetype - lb * elemtype; 6720 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 6721 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 6722 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 6723 CGF.getContext().getSizeType(), 6724 OAE->getLowerBound()->getExprLoc()); 6725 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 6726 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 6727 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 6728 LengthVal = CGF.Builder.CreateSelect( 6729 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 6730 return LengthVal; 6731 } 6732 return CGF.getTypeSize(ExprTy); 6733 } 6734 6735 /// Return the corresponding bits for a given map clause modifier. Add 6736 /// a flag marking the map as a pointer if requested. Add a flag marking the 6737 /// map as the first one of a series of maps that relate to the same map 6738 /// expression. 6739 OpenMPOffloadMappingFlags getMapTypeBits( 6740 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 6741 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 6742 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 6743 OpenMPOffloadMappingFlags Bits = 6744 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT 6745 : OpenMPOffloadMappingFlags::OMP_MAP_NONE; 6746 switch (MapType) { 6747 case OMPC_MAP_alloc: 6748 case OMPC_MAP_release: 6749 // alloc and release is the default behavior in the runtime library, i.e. 6750 // if we don't pass any bits alloc/release that is what the runtime is 6751 // going to do. Therefore, we don't need to signal anything for these two 6752 // type modifiers. 6753 break; 6754 case OMPC_MAP_to: 6755 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO; 6756 break; 6757 case OMPC_MAP_from: 6758 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM; 6759 break; 6760 case OMPC_MAP_tofrom: 6761 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO | 6762 OpenMPOffloadMappingFlags::OMP_MAP_FROM; 6763 break; 6764 case OMPC_MAP_delete: 6765 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE; 6766 break; 6767 case OMPC_MAP_unknown: 6768 llvm_unreachable("Unexpected map type!"); 6769 } 6770 if (AddPtrFlag) 6771 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; 6772 if (AddIsTargetParamFlag) 6773 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; 6774 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always)) 6775 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS; 6776 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close)) 6777 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE; 6778 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) || 6779 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present)) 6780 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT; 6781 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold)) 6782 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; 6783 if (IsNonContiguous) 6784 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG; 6785 return Bits; 6786 } 6787 6788 /// Return true if the provided expression is a final array section. A 6789 /// final array section, is one whose length can't be proved to be one. 6790 bool isFinalArraySectionExpression(const Expr *E) const { 6791 const auto *OASE = dyn_cast<ArraySectionExpr>(E); 6792 6793 // It is not an array section and therefore not a unity-size one. 6794 if (!OASE) 6795 return false; 6796 6797 // An array section with no colon always refer to a single element. 6798 if (OASE->getColonLocFirst().isInvalid()) 6799 return false; 6800 6801 const Expr *Length = OASE->getLength(); 6802 6803 // If we don't have a length we have to check if the array has size 1 6804 // for this dimension. Also, we should always expect a length if the 6805 // base type is pointer. 6806 if (!Length) { 6807 QualType BaseQTy = ArraySectionExpr::getBaseOriginalType( 6808 OASE->getBase()->IgnoreParenImpCasts()) 6809 .getCanonicalType(); 6810 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 6811 return ATy->getSExtSize() != 1; 6812 // If we don't have a constant dimension length, we have to consider 6813 // the current section as having any size, so it is not necessarily 6814 // unitary. If it happen to be unity size, that's user fault. 6815 return true; 6816 } 6817 6818 // Check if the length evaluates to 1. 6819 Expr::EvalResult Result; 6820 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 6821 return true; // Can have more that size 1. 6822 6823 llvm::APSInt ConstLength = Result.Val.getInt(); 6824 return ConstLength.getSExtValue() != 1; 6825 } 6826 6827 /// Generate the base pointers, section pointers, sizes, map type bits, and 6828 /// user-defined mappers (all included in \a CombinedInfo) for the provided 6829 /// map type, map or motion modifiers, and expression components. 6830 /// \a IsFirstComponent should be set to true if the provided set of 6831 /// components is the first associated with a capture. 6832 void generateInfoForComponentList( 6833 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 6834 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 6835 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 6836 MapCombinedInfoTy &CombinedInfo, 6837 MapCombinedInfoTy &StructBaseCombinedInfo, 6838 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 6839 bool IsImplicit, bool GenerateAllInfoForClauses, 6840 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 6841 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 6842 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 6843 OverlappedElements = std::nullopt, 6844 bool AreBothBasePtrAndPteeMapped = false) const { 6845 // The following summarizes what has to be generated for each map and the 6846 // types below. The generated information is expressed in this order: 6847 // base pointer, section pointer, size, flags 6848 // (to add to the ones that come from the map type and modifier). 6849 // 6850 // double d; 6851 // int i[100]; 6852 // float *p; 6853 // int **a = &i; 6854 // 6855 // struct S1 { 6856 // int i; 6857 // float f[50]; 6858 // } 6859 // struct S2 { 6860 // int i; 6861 // float f[50]; 6862 // S1 s; 6863 // double *p; 6864 // struct S2 *ps; 6865 // int &ref; 6866 // } 6867 // S2 s; 6868 // S2 *ps; 6869 // 6870 // map(d) 6871 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 6872 // 6873 // map(i) 6874 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 6875 // 6876 // map(i[1:23]) 6877 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 6878 // 6879 // map(p) 6880 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 6881 // 6882 // map(p[1:24]) 6883 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 6884 // in unified shared memory mode or for local pointers 6885 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 6886 // 6887 // map((*a)[0:3]) 6888 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM 6889 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM 6890 // 6891 // map(**a) 6892 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM 6893 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM 6894 // 6895 // map(s) 6896 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 6897 // 6898 // map(s.i) 6899 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 6900 // 6901 // map(s.s.f) 6902 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 6903 // 6904 // map(s.p) 6905 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 6906 // 6907 // map(to: s.p[:22]) 6908 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 6909 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 6910 // &(s.p), &(s.p[0]), 22*sizeof(double), 6911 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 6912 // (*) alloc space for struct members, only this is a target parameter 6913 // (**) map the pointer (nothing to be mapped in this example) (the compiler 6914 // optimizes this entry out, same in the examples below) 6915 // (***) map the pointee (map: to) 6916 // 6917 // map(to: s.ref) 6918 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) 6919 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 6920 // (*) alloc space for struct members, only this is a target parameter 6921 // (**) map the pointer (nothing to be mapped in this example) (the compiler 6922 // optimizes this entry out, same in the examples below) 6923 // (***) map the pointee (map: to) 6924 // 6925 // map(s.ps) 6926 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 6927 // 6928 // map(from: s.ps->s.i) 6929 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 6930 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 6931 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 6932 // 6933 // map(to: s.ps->ps) 6934 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 6935 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 6936 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 6937 // 6938 // map(s.ps->ps->ps) 6939 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 6940 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 6941 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 6942 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 6943 // 6944 // map(to: s.ps->ps->s.f[:22]) 6945 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 6946 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 6947 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 6948 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 6949 // 6950 // map(ps) 6951 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 6952 // 6953 // map(ps->i) 6954 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 6955 // 6956 // map(ps->s.f) 6957 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 6958 // 6959 // map(from: ps->p) 6960 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 6961 // 6962 // map(to: ps->p[:22]) 6963 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 6964 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 6965 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 6966 // 6967 // map(ps->ps) 6968 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 6969 // 6970 // map(from: ps->ps->s.i) 6971 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 6972 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 6973 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 6974 // 6975 // map(from: ps->ps->ps) 6976 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 6977 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 6978 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 6979 // 6980 // map(ps->ps->ps->ps) 6981 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 6982 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 6983 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 6984 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 6985 // 6986 // map(to: ps->ps->ps->s.f[:22]) 6987 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 6988 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 6989 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 6990 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 6991 // 6992 // map(to: s.f[:22]) map(from: s.p[:33]) 6993 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 6994 // sizeof(double*) (**), TARGET_PARAM 6995 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 6996 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 6997 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 6998 // (*) allocate contiguous space needed to fit all mapped members even if 6999 // we allocate space for members not mapped (in this example, 7000 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7001 // them as well because they fall between &s.f[0] and &s.p) 7002 // 7003 // map(from: s.f[:22]) map(to: ps->p[:33]) 7004 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7005 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7006 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7007 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7008 // (*) the struct this entry pertains to is the 2nd element in the list of 7009 // arguments, hence MEMBER_OF(2) 7010 // 7011 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7012 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7013 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7014 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7015 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7016 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7017 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7018 // (*) the struct this entry pertains to is the 4th element in the list 7019 // of arguments, hence MEMBER_OF(4) 7020 // 7021 // map(p, p[:100]) 7022 // ===> map(p[:100]) 7023 // &p, &p[0], 100*sizeof(float), TARGET_PARAM | PTR_AND_OBJ | TO | FROM 7024 7025 // Track if the map information being generated is the first for a capture. 7026 bool IsCaptureFirstInfo = IsFirstComponentList; 7027 // When the variable is on a declare target link or in a to clause with 7028 // unified memory, a reference is needed to hold the host/device address 7029 // of the variable. 7030 bool RequiresReference = false; 7031 7032 // Scan the components from the base to the complete expression. 7033 auto CI = Components.rbegin(); 7034 auto CE = Components.rend(); 7035 auto I = CI; 7036 7037 // Track if the map information being generated is the first for a list of 7038 // components. 7039 bool IsExpressionFirstInfo = true; 7040 bool FirstPointerInComplexData = false; 7041 Address BP = Address::invalid(); 7042 const Expr *AssocExpr = I->getAssociatedExpression(); 7043 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7044 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr); 7045 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7046 7047 if (AreBothBasePtrAndPteeMapped && std::next(I) == CE) 7048 return; 7049 if (isa<MemberExpr>(AssocExpr)) { 7050 // The base is the 'this' pointer. The content of the pointer is going 7051 // to be the base of the field being mapped. 7052 BP = CGF.LoadCXXThisAddress(); 7053 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7054 (OASE && 7055 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7056 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); 7057 } else if (OAShE && 7058 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7059 BP = Address( 7060 CGF.EmitScalarExpr(OAShE->getBase()), 7061 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()), 7062 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7063 } else { 7064 // The base is the reference to the variable. 7065 // BP = &Var. 7066 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); 7067 if (const auto *VD = 7068 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7069 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7070 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7071 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7072 ((*Res == OMPDeclareTargetDeclAttr::MT_To || 7073 *Res == OMPDeclareTargetDeclAttr::MT_Enter) && 7074 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7075 RequiresReference = true; 7076 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7077 } 7078 } 7079 } 7080 7081 // If the variable is a pointer and is being dereferenced (i.e. is not 7082 // the last component), the base has to be the pointer itself, not its 7083 // reference. References are ignored for mapping purposes. 7084 QualType Ty = 7085 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7086 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7087 // No need to generate individual map information for the pointer, it 7088 // can be associated with the combined storage if shared memory mode is 7089 // active or the base declaration is not global variable. 7090 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7091 if (!AreBothBasePtrAndPteeMapped && 7092 (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7093 !VD || VD->hasLocalStorage())) 7094 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7095 else 7096 FirstPointerInComplexData = true; 7097 ++I; 7098 } 7099 } 7100 7101 // Track whether a component of the list should be marked as MEMBER_OF some 7102 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7103 // in a component list should be marked as MEMBER_OF, all subsequent entries 7104 // do not belong to the base struct. E.g. 7105 // struct S2 s; 7106 // s.ps->ps->ps->f[:] 7107 // (1) (2) (3) (4) 7108 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7109 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7110 // is the pointee of ps(2) which is not member of struct s, so it should not 7111 // be marked as such (it is still PTR_AND_OBJ). 7112 // The variable is initialized to false so that PTR_AND_OBJ entries which 7113 // are not struct members are not considered (e.g. array of pointers to 7114 // data). 7115 bool ShouldBeMemberOf = false; 7116 7117 // Variable keeping track of whether or not we have encountered a component 7118 // in the component list which is a member expression. Useful when we have a 7119 // pointer or a final array section, in which case it is the previous 7120 // component in the list which tells us whether we have a member expression. 7121 // E.g. X.f[:] 7122 // While processing the final array section "[:]" it is "f" which tells us 7123 // whether we are dealing with a member of a declared struct. 7124 const MemberExpr *EncounteredME = nullptr; 7125 7126 // Track for the total number of dimension. Start from one for the dummy 7127 // dimension. 7128 uint64_t DimSize = 1; 7129 7130 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7131 bool IsPrevMemberReference = false; 7132 7133 // We need to check if we will be encountering any MEs. If we do not 7134 // encounter any ME expression it means we will be mapping the whole struct. 7135 // In that case we need to skip adding an entry for the struct to the 7136 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo 7137 // list only when generating all info for clauses. 7138 bool IsMappingWholeStruct = true; 7139 if (!GenerateAllInfoForClauses) { 7140 IsMappingWholeStruct = false; 7141 } else { 7142 for (auto TempI = I; TempI != CE; ++TempI) { 7143 const MemberExpr *PossibleME = 7144 dyn_cast<MemberExpr>(TempI->getAssociatedExpression()); 7145 if (PossibleME) { 7146 IsMappingWholeStruct = false; 7147 break; 7148 } 7149 } 7150 } 7151 7152 for (; I != CE; ++I) { 7153 // If the current component is member of a struct (parent struct) mark it. 7154 if (!EncounteredME) { 7155 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7156 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7157 // as MEMBER_OF the parent struct. 7158 if (EncounteredME) { 7159 ShouldBeMemberOf = true; 7160 // Do not emit as complex pointer if this is actually not array-like 7161 // expression. 7162 if (FirstPointerInComplexData) { 7163 QualType Ty = std::prev(I) 7164 ->getAssociatedDeclaration() 7165 ->getType() 7166 .getNonReferenceType(); 7167 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7168 FirstPointerInComplexData = false; 7169 } 7170 } 7171 } 7172 7173 auto Next = std::next(I); 7174 7175 // We need to generate the addresses and sizes if this is the last 7176 // component, if the component is a pointer or if it is an array section 7177 // whose length can't be proved to be one. If this is a pointer, it 7178 // becomes the base address for the following components. 7179 7180 // A final array section, is one whose length can't be proved to be one. 7181 // If the map item is non-contiguous then we don't treat any array section 7182 // as final array section. 7183 bool IsFinalArraySection = 7184 !IsNonContiguous && 7185 isFinalArraySectionExpression(I->getAssociatedExpression()); 7186 7187 // If we have a declaration for the mapping use that, otherwise use 7188 // the base declaration of the map clause. 7189 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 7190 ? I->getAssociatedDeclaration() 7191 : BaseDecl; 7192 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression() 7193 : MapExpr; 7194 7195 // Get information on whether the element is a pointer. Have to do a 7196 // special treatment for array sections given that they are built-in 7197 // types. 7198 const auto *OASE = 7199 dyn_cast<ArraySectionExpr>(I->getAssociatedExpression()); 7200 const auto *OAShE = 7201 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7202 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7203 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7204 bool IsPointer = 7205 OAShE || 7206 (OASE && ArraySectionExpr::getBaseOriginalType(OASE) 7207 .getCanonicalType() 7208 ->isAnyPointerType()) || 7209 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7210 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && 7211 MapDecl && 7212 MapDecl->getType()->isLValueReferenceType(); 7213 bool IsNonDerefPointer = IsPointer && 7214 !(UO && UO->getOpcode() != UO_Deref) && !BO && 7215 !IsNonContiguous; 7216 7217 if (OASE) 7218 ++DimSize; 7219 7220 if (Next == CE || IsMemberReference || IsNonDerefPointer || 7221 IsFinalArraySection) { 7222 // If this is not the last component, we expect the pointer to be 7223 // associated with an array expression or member expression. 7224 assert((Next == CE || 7225 isa<MemberExpr>(Next->getAssociatedExpression()) || 7226 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7227 isa<ArraySectionExpr>(Next->getAssociatedExpression()) || 7228 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 7229 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7230 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7231 "Unexpected expression"); 7232 7233 Address LB = Address::invalid(); 7234 Address LowestElem = Address::invalid(); 7235 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, 7236 const MemberExpr *E) { 7237 const Expr *BaseExpr = E->getBase(); 7238 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a 7239 // scalar. 7240 LValue BaseLV; 7241 if (E->isArrow()) { 7242 LValueBaseInfo BaseInfo; 7243 TBAAAccessInfo TBAAInfo; 7244 Address Addr = 7245 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); 7246 QualType PtrTy = BaseExpr->getType()->getPointeeType(); 7247 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); 7248 } else { 7249 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); 7250 } 7251 return BaseLV; 7252 }; 7253 if (OAShE) { 7254 LowestElem = LB = 7255 Address(CGF.EmitScalarExpr(OAShE->getBase()), 7256 CGF.ConvertTypeForMem( 7257 OAShE->getBase()->getType()->getPointeeType()), 7258 CGF.getContext().getTypeAlignInChars( 7259 OAShE->getBase()->getType())); 7260 } else if (IsMemberReference) { 7261 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); 7262 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 7263 LowestElem = CGF.EmitLValueForFieldInitialization( 7264 BaseLVal, cast<FieldDecl>(MapDecl)) 7265 .getAddress(); 7266 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) 7267 .getAddress(); 7268 } else { 7269 LowestElem = LB = 7270 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7271 .getAddress(); 7272 } 7273 7274 // If this component is a pointer inside the base struct then we don't 7275 // need to create any entry for it - it will be combined with the object 7276 // it is pointing to into a single PTR_AND_OBJ entry. 7277 bool IsMemberPointerOrAddr = 7278 EncounteredME && 7279 (((IsPointer || ForDeviceAddr) && 7280 I->getAssociatedExpression() == EncounteredME) || 7281 (IsPrevMemberReference && !IsPointer) || 7282 (IsMemberReference && Next != CE && 7283 !Next->getAssociatedExpression()->getType()->isPointerType())); 7284 if (!OverlappedElements.empty() && Next == CE) { 7285 // Handle base element with the info for overlapped elements. 7286 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7287 assert(!IsPointer && 7288 "Unexpected base element with the pointer type."); 7289 // Mark the whole struct as the struct that requires allocation on the 7290 // device. 7291 PartialStruct.LowestElem = {0, LowestElem}; 7292 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7293 I->getAssociatedExpression()->getType()); 7294 Address HB = CGF.Builder.CreateConstGEP( 7295 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 7296 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty), 7297 TypeSize.getQuantity() - 1); 7298 PartialStruct.HighestElem = { 7299 std::numeric_limits<decltype( 7300 PartialStruct.HighestElem.first)>::max(), 7301 HB}; 7302 PartialStruct.Base = BP; 7303 PartialStruct.LB = LB; 7304 assert( 7305 PartialStruct.PreliminaryMapData.BasePointers.empty() && 7306 "Overlapped elements must be used only once for the variable."); 7307 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 7308 // Emit data for non-overlapped data. 7309 OpenMPOffloadMappingFlags Flags = 7310 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | 7311 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 7312 /*AddPtrFlag=*/false, 7313 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 7314 llvm::Value *Size = nullptr; 7315 // Do bitcopy of all non-overlapped structure elements. 7316 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7317 Component : OverlappedElements) { 7318 Address ComponentLB = Address::invalid(); 7319 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7320 Component) { 7321 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { 7322 const auto *FD = dyn_cast<FieldDecl>(VD); 7323 if (FD && FD->getType()->isLValueReferenceType()) { 7324 const auto *ME = 7325 cast<MemberExpr>(MC.getAssociatedExpression()); 7326 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 7327 ComponentLB = 7328 CGF.EmitLValueForFieldInitialization(BaseLVal, FD) 7329 .getAddress(); 7330 } else { 7331 ComponentLB = 7332 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7333 .getAddress(); 7334 } 7335 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF); 7336 llvm::Value *LBPtr = LB.emitRawPointer(CGF); 7337 Size = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr, 7338 LBPtr); 7339 break; 7340 } 7341 } 7342 assert(Size && "Failed to determine structure size"); 7343 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7344 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF)); 7345 CombinedInfo.DevicePtrDecls.push_back(nullptr); 7346 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 7347 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF)); 7348 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7349 Size, CGF.Int64Ty, /*isSigned=*/true)); 7350 CombinedInfo.Types.push_back(Flags); 7351 CombinedInfo.Mappers.push_back(nullptr); 7352 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7353 : 1); 7354 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7355 } 7356 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7357 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF)); 7358 CombinedInfo.DevicePtrDecls.push_back(nullptr); 7359 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 7360 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF)); 7361 llvm::Value *LBPtr = LB.emitRawPointer(CGF); 7362 Size = CGF.Builder.CreatePtrDiff( 7363 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF), 7364 LBPtr); 7365 CombinedInfo.Sizes.push_back( 7366 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7367 CombinedInfo.Types.push_back(Flags); 7368 CombinedInfo.Mappers.push_back(nullptr); 7369 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7370 : 1); 7371 break; 7372 } 7373 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7374 // Skip adding an entry in the CurInfo of this combined entry if the 7375 // whole struct is currently being mapped. The struct needs to be added 7376 // in the first position before any data internal to the struct is being 7377 // mapped. 7378 if (!IsMemberPointerOrAddr || 7379 (Next == CE && MapType != OMPC_MAP_unknown)) { 7380 if (!IsMappingWholeStruct) { 7381 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7382 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF)); 7383 CombinedInfo.DevicePtrDecls.push_back(nullptr); 7384 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 7385 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF)); 7386 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7387 Size, CGF.Int64Ty, /*isSigned=*/true)); 7388 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7389 : 1); 7390 } else { 7391 StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7392 StructBaseCombinedInfo.BasePointers.push_back( 7393 BP.emitRawPointer(CGF)); 7394 StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr); 7395 StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 7396 StructBaseCombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF)); 7397 StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7398 Size, CGF.Int64Ty, /*isSigned=*/true)); 7399 StructBaseCombinedInfo.NonContigInfo.Dims.push_back( 7400 IsNonContiguous ? DimSize : 1); 7401 } 7402 7403 // If Mapper is valid, the last component inherits the mapper. 7404 bool HasMapper = Mapper && Next == CE; 7405 if (!IsMappingWholeStruct) 7406 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 7407 else 7408 StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper 7409 : nullptr); 7410 7411 // We need to add a pointer flag for each map that comes from the 7412 // same expression except for the first one. We also need to signal 7413 // this map is the first one that relates with the current capture 7414 // (there is a set of entries for each capture). 7415 OpenMPOffloadMappingFlags Flags = 7416 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 7417 !IsExpressionFirstInfo || RequiresReference || 7418 FirstPointerInComplexData || IsMemberReference, 7419 AreBothBasePtrAndPteeMapped || 7420 (IsCaptureFirstInfo && !RequiresReference), 7421 IsNonContiguous); 7422 7423 if (!IsExpressionFirstInfo || IsMemberReference) { 7424 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7425 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7426 if (IsPointer || (IsMemberReference && Next != CE)) 7427 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO | 7428 OpenMPOffloadMappingFlags::OMP_MAP_FROM | 7429 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS | 7430 OpenMPOffloadMappingFlags::OMP_MAP_DELETE | 7431 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE); 7432 7433 if (ShouldBeMemberOf) { 7434 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7435 // should be later updated with the correct value of MEMBER_OF. 7436 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF; 7437 // From now on, all subsequent PTR_AND_OBJ entries should not be 7438 // marked as MEMBER_OF. 7439 ShouldBeMemberOf = false; 7440 } 7441 } 7442 7443 if (!IsMappingWholeStruct) 7444 CombinedInfo.Types.push_back(Flags); 7445 else 7446 StructBaseCombinedInfo.Types.push_back(Flags); 7447 } 7448 7449 // If we have encountered a member expression so far, keep track of the 7450 // mapped member. If the parent is "*this", then the value declaration 7451 // is nullptr. 7452 if (EncounteredME) { 7453 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 7454 unsigned FieldIndex = FD->getFieldIndex(); 7455 7456 // Update info about the lowest and highest elements for this struct 7457 if (!PartialStruct.Base.isValid()) { 7458 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 7459 if (IsFinalArraySection) { 7460 Address HB = 7461 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false) 7462 .getAddress(); 7463 PartialStruct.HighestElem = {FieldIndex, HB}; 7464 } else { 7465 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 7466 } 7467 PartialStruct.Base = BP; 7468 PartialStruct.LB = BP; 7469 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7470 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 7471 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7472 if (IsFinalArraySection) { 7473 Address HB = 7474 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false) 7475 .getAddress(); 7476 PartialStruct.HighestElem = {FieldIndex, HB}; 7477 } else { 7478 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 7479 } 7480 } 7481 } 7482 7483 // Need to emit combined struct for array sections. 7484 if (IsFinalArraySection || IsNonContiguous) 7485 PartialStruct.IsArraySection = true; 7486 7487 // If we have a final array section, we are done with this expression. 7488 if (IsFinalArraySection) 7489 break; 7490 7491 // The pointer becomes the base for the next element. 7492 if (Next != CE) 7493 BP = IsMemberReference ? LowestElem : LB; 7494 7495 IsExpressionFirstInfo = false; 7496 IsCaptureFirstInfo = false; 7497 FirstPointerInComplexData = false; 7498 IsPrevMemberReference = IsMemberReference; 7499 } else if (FirstPointerInComplexData) { 7500 QualType Ty = Components.rbegin() 7501 ->getAssociatedDeclaration() 7502 ->getType() 7503 .getNonReferenceType(); 7504 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7505 FirstPointerInComplexData = false; 7506 } 7507 } 7508 // If ran into the whole component - allocate the space for the whole 7509 // record. 7510 if (!EncounteredME) 7511 PartialStruct.HasCompleteRecord = true; 7512 7513 if (!IsNonContiguous) 7514 return; 7515 7516 const ASTContext &Context = CGF.getContext(); 7517 7518 // For supporting stride in array section, we need to initialize the first 7519 // dimension size as 1, first offset as 0, and first count as 1 7520 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 7521 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 7522 MapValuesArrayTy CurStrides; 7523 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 7524 uint64_t ElementTypeSize; 7525 7526 // Collect Size information for each dimension and get the element size as 7527 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 7528 // should be [10, 10] and the first stride is 4 btyes. 7529 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 7530 Components) { 7531 const Expr *AssocExpr = Component.getAssociatedExpression(); 7532 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr); 7533 7534 if (!OASE) 7535 continue; 7536 7537 QualType Ty = ArraySectionExpr::getBaseOriginalType(OASE->getBase()); 7538 auto *CAT = Context.getAsConstantArrayType(Ty); 7539 auto *VAT = Context.getAsVariableArrayType(Ty); 7540 7541 // We need all the dimension size except for the last dimension. 7542 assert((VAT || CAT || &Component == &*Components.begin()) && 7543 "Should be either ConstantArray or VariableArray if not the " 7544 "first Component"); 7545 7546 // Get element size if CurStrides is empty. 7547 if (CurStrides.empty()) { 7548 const Type *ElementType = nullptr; 7549 if (CAT) 7550 ElementType = CAT->getElementType().getTypePtr(); 7551 else if (VAT) 7552 ElementType = VAT->getElementType().getTypePtr(); 7553 else 7554 assert(&Component == &*Components.begin() && 7555 "Only expect pointer (non CAT or VAT) when this is the " 7556 "first Component"); 7557 // If ElementType is null, then it means the base is a pointer 7558 // (neither CAT nor VAT) and we'll attempt to get ElementType again 7559 // for next iteration. 7560 if (ElementType) { 7561 // For the case that having pointer as base, we need to remove one 7562 // level of indirection. 7563 if (&Component != &*Components.begin()) 7564 ElementType = ElementType->getPointeeOrArrayElementType(); 7565 ElementTypeSize = 7566 Context.getTypeSizeInChars(ElementType).getQuantity(); 7567 CurStrides.push_back( 7568 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 7569 } 7570 } 7571 // Get dimension value except for the last dimension since we don't need 7572 // it. 7573 if (DimSizes.size() < Components.size() - 1) { 7574 if (CAT) 7575 DimSizes.push_back( 7576 llvm::ConstantInt::get(CGF.Int64Ty, CAT->getZExtSize())); 7577 else if (VAT) 7578 DimSizes.push_back(CGF.Builder.CreateIntCast( 7579 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 7580 /*IsSigned=*/false)); 7581 } 7582 } 7583 7584 // Skip the dummy dimension since we have already have its information. 7585 auto *DI = DimSizes.begin() + 1; 7586 // Product of dimension. 7587 llvm::Value *DimProd = 7588 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 7589 7590 // Collect info for non-contiguous. Notice that offset, count, and stride 7591 // are only meaningful for array-section, so we insert a null for anything 7592 // other than array-section. 7593 // Also, the size of offset, count, and stride are not the same as 7594 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 7595 // count, and stride are the same as the number of non-contiguous 7596 // declaration in target update to/from clause. 7597 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 7598 Components) { 7599 const Expr *AssocExpr = Component.getAssociatedExpression(); 7600 7601 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 7602 llvm::Value *Offset = CGF.Builder.CreateIntCast( 7603 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 7604 /*isSigned=*/false); 7605 CurOffsets.push_back(Offset); 7606 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 7607 CurStrides.push_back(CurStrides.back()); 7608 continue; 7609 } 7610 7611 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr); 7612 7613 if (!OASE) 7614 continue; 7615 7616 // Offset 7617 const Expr *OffsetExpr = OASE->getLowerBound(); 7618 llvm::Value *Offset = nullptr; 7619 if (!OffsetExpr) { 7620 // If offset is absent, then we just set it to zero. 7621 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 7622 } else { 7623 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 7624 CGF.Int64Ty, 7625 /*isSigned=*/false); 7626 } 7627 CurOffsets.push_back(Offset); 7628 7629 // Count 7630 const Expr *CountExpr = OASE->getLength(); 7631 llvm::Value *Count = nullptr; 7632 if (!CountExpr) { 7633 // In Clang, once a high dimension is an array section, we construct all 7634 // the lower dimension as array section, however, for case like 7635 // arr[0:2][2], Clang construct the inner dimension as an array section 7636 // but it actually is not in an array section form according to spec. 7637 if (!OASE->getColonLocFirst().isValid() && 7638 !OASE->getColonLocSecond().isValid()) { 7639 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 7640 } else { 7641 // OpenMP 5.0, 2.1.5 Array Sections, Description. 7642 // When the length is absent it defaults to ⌈(size − 7643 // lower-bound)/stride⌉, where size is the size of the array 7644 // dimension. 7645 const Expr *StrideExpr = OASE->getStride(); 7646 llvm::Value *Stride = 7647 StrideExpr 7648 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 7649 CGF.Int64Ty, /*isSigned=*/false) 7650 : nullptr; 7651 if (Stride) 7652 Count = CGF.Builder.CreateUDiv( 7653 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 7654 else 7655 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 7656 } 7657 } else { 7658 Count = CGF.EmitScalarExpr(CountExpr); 7659 } 7660 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 7661 CurCounts.push_back(Count); 7662 7663 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 7664 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 7665 // Offset Count Stride 7666 // D0 0 1 4 (int) <- dummy dimension 7667 // D1 0 2 8 (2 * (1) * 4) 7668 // D2 1 2 20 (1 * (1 * 5) * 4) 7669 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 7670 const Expr *StrideExpr = OASE->getStride(); 7671 llvm::Value *Stride = 7672 StrideExpr 7673 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 7674 CGF.Int64Ty, /*isSigned=*/false) 7675 : nullptr; 7676 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 7677 if (Stride) 7678 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 7679 else 7680 CurStrides.push_back(DimProd); 7681 if (DI != DimSizes.end()) 7682 ++DI; 7683 } 7684 7685 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 7686 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 7687 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 7688 } 7689 7690 /// Return the adjusted map modifiers if the declaration a capture refers to 7691 /// appears in a first-private clause. This is expected to be used only with 7692 /// directives that start with 'target'. 7693 OpenMPOffloadMappingFlags 7694 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7695 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7696 7697 // A first private variable captured by reference will use only the 7698 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7699 // declaration is known as first-private in this handler. 7700 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 7701 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 7702 return OpenMPOffloadMappingFlags::OMP_MAP_TO | 7703 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; 7704 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE | 7705 OpenMPOffloadMappingFlags::OMP_MAP_TO; 7706 } 7707 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl()); 7708 if (I != LambdasMap.end()) 7709 // for map(to: lambda): using user specified map type. 7710 return getMapTypeBits( 7711 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(), 7712 /*MotionModifiers=*/std::nullopt, I->getSecond()->isImplicit(), 7713 /*AddPtrFlag=*/false, 7714 /*AddIsTargetParamFlag=*/false, 7715 /*isNonContiguous=*/false); 7716 return OpenMPOffloadMappingFlags::OMP_MAP_TO | 7717 OpenMPOffloadMappingFlags::OMP_MAP_FROM; 7718 } 7719 7720 void getPlainLayout(const CXXRecordDecl *RD, 7721 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7722 bool AsBase) const { 7723 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7724 7725 llvm::StructType *St = 7726 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7727 7728 unsigned NumElements = St->getNumElements(); 7729 llvm::SmallVector< 7730 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7731 RecordLayout(NumElements); 7732 7733 // Fill bases. 7734 for (const auto &I : RD->bases()) { 7735 if (I.isVirtual()) 7736 continue; 7737 7738 QualType BaseTy = I.getType(); 7739 const auto *Base = BaseTy->getAsCXXRecordDecl(); 7740 // Ignore empty bases. 7741 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy) || 7742 CGF.getContext() 7743 .getASTRecordLayout(Base) 7744 .getNonVirtualSize() 7745 .isZero()) 7746 continue; 7747 7748 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 7749 RecordLayout[FieldIndex] = Base; 7750 } 7751 // Fill in virtual bases. 7752 for (const auto &I : RD->vbases()) { 7753 QualType BaseTy = I.getType(); 7754 // Ignore empty bases. 7755 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy)) 7756 continue; 7757 7758 const auto *Base = BaseTy->getAsCXXRecordDecl(); 7759 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 7760 if (RecordLayout[FieldIndex]) 7761 continue; 7762 RecordLayout[FieldIndex] = Base; 7763 } 7764 // Fill in all the fields. 7765 assert(!RD->isUnion() && "Unexpected union."); 7766 for (const auto *Field : RD->fields()) { 7767 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 7768 // will fill in later.) 7769 if (!Field->isBitField() && 7770 !isEmptyFieldForLayout(CGF.getContext(), Field)) { 7771 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 7772 RecordLayout[FieldIndex] = Field; 7773 } 7774 } 7775 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 7776 &Data : RecordLayout) { 7777 if (Data.isNull()) 7778 continue; 7779 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 7780 getPlainLayout(Base, Layout, /*AsBase=*/true); 7781 else 7782 Layout.push_back(Data.get<const FieldDecl *>()); 7783 } 7784 } 7785 7786 /// Generate all the base pointers, section pointers, sizes, map types, and 7787 /// mappers for the extracted mappable expressions (all included in \a 7788 /// CombinedInfo). Also, for each item that relates with a device pointer, a 7789 /// pair of the relevant declaration and index where it occurs is appended to 7790 /// the device pointers info array. 7791 void generateAllInfoForClauses( 7792 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 7793 llvm::OpenMPIRBuilder &OMPBuilder, 7794 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 7795 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 7796 // We have to process the component lists that relate with the same 7797 // declaration in a single chunk so that we can generate the map flags 7798 // correctly. Therefore, we organize all lists in a map. 7799 enum MapKind { Present, Allocs, Other, Total }; 7800 llvm::MapVector<CanonicalDeclPtr<const Decl>, 7801 SmallVector<SmallVector<MapInfo, 8>, 4>> 7802 Info; 7803 7804 // Helper function to fill the information map for the different supported 7805 // clauses. 7806 auto &&InfoGen = 7807 [&Info, &SkipVarSet]( 7808 const ValueDecl *D, MapKind Kind, 7809 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 7810 OpenMPMapClauseKind MapType, 7811 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7812 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7813 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 7814 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 7815 if (SkipVarSet.contains(D)) 7816 return; 7817 auto It = Info.find(D); 7818 if (It == Info.end()) 7819 It = Info 7820 .insert(std::make_pair( 7821 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 7822 .first; 7823 It->second[Kind].emplace_back( 7824 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 7825 IsImplicit, Mapper, VarRef, ForDeviceAddr); 7826 }; 7827 7828 for (const auto *Cl : Clauses) { 7829 const auto *C = dyn_cast<OMPMapClause>(Cl); 7830 if (!C) 7831 continue; 7832 MapKind Kind = Other; 7833 if (llvm::is_contained(C->getMapTypeModifiers(), 7834 OMPC_MAP_MODIFIER_present)) 7835 Kind = Present; 7836 else if (C->getMapType() == OMPC_MAP_alloc) 7837 Kind = Allocs; 7838 const auto *EI = C->getVarRefs().begin(); 7839 for (const auto L : C->component_lists()) { 7840 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 7841 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 7842 C->getMapTypeModifiers(), std::nullopt, 7843 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 7844 E); 7845 ++EI; 7846 } 7847 } 7848 for (const auto *Cl : Clauses) { 7849 const auto *C = dyn_cast<OMPToClause>(Cl); 7850 if (!C) 7851 continue; 7852 MapKind Kind = Other; 7853 if (llvm::is_contained(C->getMotionModifiers(), 7854 OMPC_MOTION_MODIFIER_present)) 7855 Kind = Present; 7856 const auto *EI = C->getVarRefs().begin(); 7857 for (const auto L : C->component_lists()) { 7858 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, std::nullopt, 7859 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 7860 C->isImplicit(), std::get<2>(L), *EI); 7861 ++EI; 7862 } 7863 } 7864 for (const auto *Cl : Clauses) { 7865 const auto *C = dyn_cast<OMPFromClause>(Cl); 7866 if (!C) 7867 continue; 7868 MapKind Kind = Other; 7869 if (llvm::is_contained(C->getMotionModifiers(), 7870 OMPC_MOTION_MODIFIER_present)) 7871 Kind = Present; 7872 const auto *EI = C->getVarRefs().begin(); 7873 for (const auto L : C->component_lists()) { 7874 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, 7875 std::nullopt, C->getMotionModifiers(), 7876 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 7877 *EI); 7878 ++EI; 7879 } 7880 } 7881 7882 // Look at the use_device_ptr and use_device_addr clauses information and 7883 // mark the existing map entries as such. If there is no map information for 7884 // an entry in the use_device_ptr and use_device_addr list, we create one 7885 // with map type 'alloc' and zero size section. It is the user fault if that 7886 // was not mapped before. If there is no map information and the pointer is 7887 // a struct member, then we defer the emission of that entry until the whole 7888 // struct has been processed. 7889 llvm::MapVector<CanonicalDeclPtr<const Decl>, 7890 SmallVector<DeferredDevicePtrEntryTy, 4>> 7891 DeferredInfo; 7892 MapCombinedInfoTy UseDeviceDataCombinedInfo; 7893 7894 auto &&UseDeviceDataCombinedInfoGen = 7895 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr, 7896 CodeGenFunction &CGF, bool IsDevAddr) { 7897 UseDeviceDataCombinedInfo.Exprs.push_back(VD); 7898 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr); 7899 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD); 7900 UseDeviceDataCombinedInfo.DevicePointers.emplace_back( 7901 IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer); 7902 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr); 7903 UseDeviceDataCombinedInfo.Sizes.push_back( 7904 llvm::Constant::getNullValue(CGF.Int64Ty)); 7905 UseDeviceDataCombinedInfo.Types.push_back( 7906 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM); 7907 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr); 7908 }; 7909 7910 auto &&MapInfoGen = 7911 [&DeferredInfo, &UseDeviceDataCombinedInfoGen, 7912 &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD, 7913 OMPClauseMappableExprCommon::MappableExprComponentListRef 7914 Components, 7915 bool IsImplicit, bool IsDevAddr) { 7916 // We didn't find any match in our map information - generate a zero 7917 // size array section - if the pointer is a struct member we defer 7918 // this action until the whole struct has been processed. 7919 if (isa<MemberExpr>(IE)) { 7920 // Insert the pointer into Info to be processed by 7921 // generateInfoForComponentList. Because it is a member pointer 7922 // without a pointee, no entry will be generated for it, therefore 7923 // we need to generate one after the whole struct has been 7924 // processed. Nonetheless, generateInfoForComponentList must be 7925 // called to take the pointer into account for the calculation of 7926 // the range of the partial struct. 7927 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, std::nullopt, 7928 std::nullopt, /*ReturnDevicePointer=*/false, IsImplicit, 7929 nullptr, nullptr, IsDevAddr); 7930 DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr); 7931 } else { 7932 llvm::Value *Ptr; 7933 if (IsDevAddr) { 7934 if (IE->isGLValue()) 7935 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 7936 else 7937 Ptr = CGF.EmitScalarExpr(IE); 7938 } else { 7939 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 7940 } 7941 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr); 7942 } 7943 }; 7944 7945 auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD, 7946 const Expr *IE, bool IsDevAddr) -> bool { 7947 // We potentially have map information for this declaration already. 7948 // Look for the first set of components that refer to it. If found, 7949 // return true. 7950 // If the first component is a member expression, we have to look into 7951 // 'this', which maps to null in the map of map information. Otherwise 7952 // look directly for the information. 7953 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 7954 if (It != Info.end()) { 7955 bool Found = false; 7956 for (auto &Data : It->second) { 7957 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 7958 return MI.Components.back().getAssociatedDeclaration() == VD; 7959 }); 7960 // If we found a map entry, signal that the pointer has to be 7961 // returned and move on to the next declaration. Exclude cases where 7962 // the base pointer is mapped as array subscript, array section or 7963 // array shaping. The base address is passed as a pointer to base in 7964 // this case and cannot be used as a base for use_device_ptr list 7965 // item. 7966 if (CI != Data.end()) { 7967 if (IsDevAddr) { 7968 CI->ForDeviceAddr = IsDevAddr; 7969 CI->ReturnDevicePointer = true; 7970 Found = true; 7971 break; 7972 } else { 7973 auto PrevCI = std::next(CI->Components.rbegin()); 7974 const auto *VarD = dyn_cast<VarDecl>(VD); 7975 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7976 isa<MemberExpr>(IE) || 7977 !VD->getType().getNonReferenceType()->isPointerType() || 7978 PrevCI == CI->Components.rend() || 7979 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 7980 VarD->hasLocalStorage()) { 7981 CI->ForDeviceAddr = IsDevAddr; 7982 CI->ReturnDevicePointer = true; 7983 Found = true; 7984 break; 7985 } 7986 } 7987 } 7988 } 7989 return Found; 7990 } 7991 return false; 7992 }; 7993 7994 // Look at the use_device_ptr clause information and mark the existing map 7995 // entries as such. If there is no map information for an entry in the 7996 // use_device_ptr list, we create one with map type 'alloc' and zero size 7997 // section. It is the user fault if that was not mapped before. If there is 7998 // no map information and the pointer is a struct member, then we defer the 7999 // emission of that entry until the whole struct has been processed. 8000 for (const auto *Cl : Clauses) { 8001 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 8002 if (!C) 8003 continue; 8004 for (const auto L : C->component_lists()) { 8005 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8006 std::get<1>(L); 8007 assert(!Components.empty() && 8008 "Not expecting empty list of components!"); 8009 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8010 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8011 const Expr *IE = Components.back().getAssociatedExpression(); 8012 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false)) 8013 continue; 8014 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(), 8015 /*IsDevAddr=*/false); 8016 } 8017 } 8018 8019 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8020 for (const auto *Cl : Clauses) { 8021 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8022 if (!C) 8023 continue; 8024 for (const auto L : C->component_lists()) { 8025 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8026 std::get<1>(L); 8027 assert(!std::get<1>(L).empty() && 8028 "Not expecting empty list of components!"); 8029 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8030 if (!Processed.insert(VD).second) 8031 continue; 8032 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8033 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8034 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true)) 8035 continue; 8036 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(), 8037 /*IsDevAddr=*/true); 8038 } 8039 } 8040 8041 for (const auto &Data : Info) { 8042 StructRangeInfoTy PartialStruct; 8043 // Current struct information: 8044 MapCombinedInfoTy CurInfo; 8045 // Current struct base information: 8046 MapCombinedInfoTy StructBaseCurInfo; 8047 const Decl *D = Data.first; 8048 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8049 bool HasMapBasePtr = false; 8050 bool HasMapArraySec = false; 8051 if (VD && VD->getType()->isAnyPointerType()) { 8052 for (const auto &M : Data.second) { 8053 HasMapBasePtr = any_of(M, [](const MapInfo &L) { 8054 return isa_and_present<DeclRefExpr>(L.VarRef); 8055 }); 8056 HasMapArraySec = any_of(M, [](const MapInfo &L) { 8057 return isa_and_present<ArraySectionExpr, ArraySubscriptExpr>( 8058 L.VarRef); 8059 }); 8060 if (HasMapBasePtr && HasMapArraySec) 8061 break; 8062 } 8063 } 8064 for (const auto &M : Data.second) { 8065 for (const MapInfo &L : M) { 8066 assert(!L.Components.empty() && 8067 "Not expecting declaration with no component lists."); 8068 8069 // Remember the current base pointer index. 8070 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8071 unsigned StructBasePointersIdx = 8072 StructBaseCurInfo.BasePointers.size(); 8073 CurInfo.NonContigInfo.IsNonContiguous = 8074 L.Components.back().isNonContiguous(); 8075 generateInfoForComponentList( 8076 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8077 CurInfo, StructBaseCurInfo, PartialStruct, 8078 /*IsFirstComponentList=*/false, L.IsImplicit, 8079 /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD, 8080 L.VarRef, /*OverlappedElements*/ std::nullopt, 8081 HasMapBasePtr && HasMapArraySec); 8082 8083 // If this entry relates to a device pointer, set the relevant 8084 // declaration and add the 'return pointer' flag. 8085 if (L.ReturnDevicePointer) { 8086 // Check whether a value was added to either CurInfo or 8087 // StructBaseCurInfo and error if no value was added to either of 8088 // them: 8089 assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() || 8090 StructBasePointersIdx < 8091 StructBaseCurInfo.BasePointers.size()) && 8092 "Unexpected number of mapped base pointers."); 8093 8094 // Choose a base pointer index which is always valid: 8095 const ValueDecl *RelevantVD = 8096 L.Components.back().getAssociatedDeclaration(); 8097 assert(RelevantVD && 8098 "No relevant declaration related with device pointer??"); 8099 8100 // If StructBaseCurInfo has been updated this iteration then work on 8101 // the first new entry added to it i.e. make sure that when multiple 8102 // values are added to any of the lists, the first value added is 8103 // being modified by the assignments below (not the last value 8104 // added). 8105 if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) { 8106 StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] = 8107 RelevantVD; 8108 StructBaseCurInfo.DevicePointers[StructBasePointersIdx] = 8109 L.ForDeviceAddr ? DeviceInfoTy::Address 8110 : DeviceInfoTy::Pointer; 8111 StructBaseCurInfo.Types[StructBasePointersIdx] |= 8112 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM; 8113 } else { 8114 CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD; 8115 CurInfo.DevicePointers[CurrentBasePointersIdx] = 8116 L.ForDeviceAddr ? DeviceInfoTy::Address 8117 : DeviceInfoTy::Pointer; 8118 CurInfo.Types[CurrentBasePointersIdx] |= 8119 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM; 8120 } 8121 } 8122 } 8123 } 8124 8125 // Append any pending zero-length pointers which are struct members and 8126 // used with use_device_ptr or use_device_addr. 8127 auto CI = DeferredInfo.find(Data.first); 8128 if (CI != DeferredInfo.end()) { 8129 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8130 llvm::Value *BasePtr; 8131 llvm::Value *Ptr; 8132 if (L.ForDeviceAddr) { 8133 if (L.IE->isGLValue()) 8134 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8135 else 8136 Ptr = this->CGF.EmitScalarExpr(L.IE); 8137 BasePtr = Ptr; 8138 // Entry is RETURN_PARAM. Also, set the placeholder value 8139 // MEMBER_OF=FFFF so that the entry is later updated with the 8140 // correct value of MEMBER_OF. 8141 CurInfo.Types.push_back( 8142 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM | 8143 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF); 8144 } else { 8145 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8146 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8147 L.IE->getExprLoc()); 8148 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8149 // placeholder value MEMBER_OF=FFFF so that the entry is later 8150 // updated with the correct value of MEMBER_OF. 8151 CurInfo.Types.push_back( 8152 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | 8153 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM | 8154 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF); 8155 } 8156 CurInfo.Exprs.push_back(L.VD); 8157 CurInfo.BasePointers.emplace_back(BasePtr); 8158 CurInfo.DevicePtrDecls.emplace_back(L.VD); 8159 CurInfo.DevicePointers.emplace_back( 8160 L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer); 8161 CurInfo.Pointers.push_back(Ptr); 8162 CurInfo.Sizes.push_back( 8163 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8164 CurInfo.Mappers.push_back(nullptr); 8165 } 8166 } 8167 8168 // Unify entries in one list making sure the struct mapping precedes the 8169 // individual fields: 8170 MapCombinedInfoTy UnionCurInfo; 8171 UnionCurInfo.append(StructBaseCurInfo); 8172 UnionCurInfo.append(CurInfo); 8173 8174 // If there is an entry in PartialStruct it means we have a struct with 8175 // individual members mapped. Emit an extra combined entry. 8176 if (PartialStruct.Base.isValid()) { 8177 UnionCurInfo.NonContigInfo.Dims.push_back(0); 8178 // Emit a combined entry: 8179 emitCombinedEntry(CombinedInfo, UnionCurInfo.Types, PartialStruct, 8180 /*IsMapThis*/ !VD, OMPBuilder, VD); 8181 } 8182 8183 // We need to append the results of this capture to what we already have. 8184 CombinedInfo.append(UnionCurInfo); 8185 } 8186 // Append data for use_device_ptr clauses. 8187 CombinedInfo.append(UseDeviceDataCombinedInfo); 8188 } 8189 8190 public: 8191 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8192 : CurDir(&Dir), CGF(CGF) { 8193 // Extract firstprivate clause information. 8194 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8195 for (const auto *D : C->varlists()) 8196 FirstPrivateDecls.try_emplace( 8197 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8198 // Extract implicit firstprivates from uses_allocators clauses. 8199 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8200 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8201 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8202 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8203 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8204 /*Implicit=*/true); 8205 else if (const auto *VD = dyn_cast<VarDecl>( 8206 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8207 ->getDecl())) 8208 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8209 } 8210 } 8211 // Extract device pointer clause information. 8212 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8213 for (auto L : C->component_lists()) 8214 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8215 // Extract device addr clause information. 8216 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>()) 8217 for (auto L : C->component_lists()) 8218 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L)); 8219 // Extract map information. 8220 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) { 8221 if (C->getMapType() != OMPC_MAP_to) 8222 continue; 8223 for (auto L : C->component_lists()) { 8224 const ValueDecl *VD = std::get<0>(L); 8225 const auto *RD = VD ? VD->getType() 8226 .getCanonicalType() 8227 .getNonReferenceType() 8228 ->getAsCXXRecordDecl() 8229 : nullptr; 8230 if (RD && RD->isLambda()) 8231 LambdasMap.try_emplace(std::get<0>(L), C); 8232 } 8233 } 8234 } 8235 8236 /// Constructor for the declare mapper directive. 8237 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8238 : CurDir(&Dir), CGF(CGF) {} 8239 8240 /// Generate code for the combined entry if we have a partially mapped struct 8241 /// and take care of the mapping flags of the arguments corresponding to 8242 /// individual struct members. 8243 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8244 MapFlagsArrayTy &CurTypes, 8245 const StructRangeInfoTy &PartialStruct, bool IsMapThis, 8246 llvm::OpenMPIRBuilder &OMPBuilder, 8247 const ValueDecl *VD = nullptr, 8248 bool NotTargetParams = true) const { 8249 if (CurTypes.size() == 1 && 8250 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) != 8251 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) && 8252 !PartialStruct.IsArraySection) 8253 return; 8254 Address LBAddr = PartialStruct.LowestElem.second; 8255 Address HBAddr = PartialStruct.HighestElem.second; 8256 if (PartialStruct.HasCompleteRecord) { 8257 LBAddr = PartialStruct.LB; 8258 HBAddr = PartialStruct.LB; 8259 } 8260 CombinedInfo.Exprs.push_back(VD); 8261 // Base is the base of the struct 8262 CombinedInfo.BasePointers.push_back(PartialStruct.Base.emitRawPointer(CGF)); 8263 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8264 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8265 // Pointer is the address of the lowest element 8266 llvm::Value *LB = LBAddr.emitRawPointer(CGF); 8267 const CXXMethodDecl *MD = 8268 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr; 8269 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr; 8270 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false; 8271 // There should not be a mapper for a combined entry. 8272 if (HasBaseClass) { 8273 // OpenMP 5.2 148:21: 8274 // If the target construct is within a class non-static member function, 8275 // and a variable is an accessible data member of the object for which the 8276 // non-static data member function is invoked, the variable is treated as 8277 // if the this[:1] expression had appeared in a map clause with a map-type 8278 // of tofrom. 8279 // Emit this[:1] 8280 CombinedInfo.Pointers.push_back(PartialStruct.Base.emitRawPointer(CGF)); 8281 QualType Ty = MD->getFunctionObjectParameterType(); 8282 llvm::Value *Size = 8283 CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty, 8284 /*isSigned=*/true); 8285 CombinedInfo.Sizes.push_back(Size); 8286 } else { 8287 CombinedInfo.Pointers.push_back(LB); 8288 // Size is (addr of {highest+1} element) - (addr of lowest element) 8289 llvm::Value *HB = HBAddr.emitRawPointer(CGF); 8290 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32( 8291 HBAddr.getElementType(), HB, /*Idx0=*/1); 8292 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8293 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8294 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr); 8295 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8296 /*isSigned=*/false); 8297 CombinedInfo.Sizes.push_back(Size); 8298 } 8299 CombinedInfo.Mappers.push_back(nullptr); 8300 // Map type is always TARGET_PARAM, if generate info for captures. 8301 CombinedInfo.Types.push_back( 8302 NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE 8303 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM); 8304 // If any element has the present modifier, then make sure the runtime 8305 // doesn't attempt to allocate the struct. 8306 if (CurTypes.end() != 8307 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8308 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 8309 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT); 8310 })) 8311 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT; 8312 // Remove TARGET_PARAM flag from the first element 8313 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; 8314 // If any element has the ompx_hold modifier, then make sure the runtime 8315 // uses the hold reference count for the struct as a whole so that it won't 8316 // be unmapped by an extra dynamic reference count decrement. Add it to all 8317 // elements as well so the runtime knows which reference count to check 8318 // when determining whether it's time for device-to-host transfers of 8319 // individual elements. 8320 if (CurTypes.end() != 8321 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8322 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 8323 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD); 8324 })) { 8325 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; 8326 for (auto &M : CurTypes) 8327 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; 8328 } 8329 8330 // All other current entries will be MEMBER_OF the combined entry 8331 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8332 // 0xFFFF in the MEMBER_OF field). 8333 OpenMPOffloadMappingFlags MemberOfFlag = 8334 OMPBuilder.getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 8335 for (auto &M : CurTypes) 8336 OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag); 8337 } 8338 8339 /// Generate all the base pointers, section pointers, sizes, map types, and 8340 /// mappers for the extracted mappable expressions (all included in \a 8341 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8342 /// pair of the relevant declaration and index where it occurs is appended to 8343 /// the device pointers info array. 8344 void generateAllInfo( 8345 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder, 8346 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8347 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8348 assert(CurDir.is<const OMPExecutableDirective *>() && 8349 "Expect a executable directive"); 8350 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8351 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder, 8352 SkipVarSet); 8353 } 8354 8355 /// Generate all the base pointers, section pointers, sizes, map types, and 8356 /// mappers for the extracted map clauses of user-defined mapper (all included 8357 /// in \a CombinedInfo). 8358 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo, 8359 llvm::OpenMPIRBuilder &OMPBuilder) const { 8360 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8361 "Expect a declare mapper directive"); 8362 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8363 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo, 8364 OMPBuilder); 8365 } 8366 8367 /// Emit capture info for lambdas for variables captured by reference. 8368 void generateInfoForLambdaCaptures( 8369 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8370 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8371 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType(); 8372 const auto *RD = VDType->getAsCXXRecordDecl(); 8373 if (!RD || !RD->isLambda()) 8374 return; 8375 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType), 8376 CGF.getContext().getDeclAlign(VD)); 8377 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType); 8378 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures; 8379 FieldDecl *ThisCapture = nullptr; 8380 RD->getCaptureFields(Captures, ThisCapture); 8381 if (ThisCapture) { 8382 LValue ThisLVal = 8383 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8384 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8385 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8386 VDLVal.getPointer(CGF)); 8387 CombinedInfo.Exprs.push_back(VD); 8388 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 8389 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8390 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8391 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 8392 CombinedInfo.Sizes.push_back( 8393 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8394 CGF.Int64Ty, /*isSigned=*/true)); 8395 CombinedInfo.Types.push_back( 8396 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | 8397 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | 8398 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | 8399 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); 8400 CombinedInfo.Mappers.push_back(nullptr); 8401 } 8402 for (const LambdaCapture &LC : RD->captures()) { 8403 if (!LC.capturesVariable()) 8404 continue; 8405 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar()); 8406 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8407 continue; 8408 auto It = Captures.find(VD); 8409 assert(It != Captures.end() && "Found lambda capture without field."); 8410 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8411 if (LC.getCaptureKind() == LCK_ByRef) { 8412 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8413 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8414 VDLVal.getPointer(CGF)); 8415 CombinedInfo.Exprs.push_back(VD); 8416 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8417 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8418 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8419 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 8420 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8421 CGF.getTypeSize( 8422 VD->getType().getCanonicalType().getNonReferenceType()), 8423 CGF.Int64Ty, /*isSigned=*/true)); 8424 } else { 8425 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8426 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8427 VDLVal.getPointer(CGF)); 8428 CombinedInfo.Exprs.push_back(VD); 8429 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8430 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8431 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8432 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 8433 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8434 } 8435 CombinedInfo.Types.push_back( 8436 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | 8437 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | 8438 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | 8439 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); 8440 CombinedInfo.Mappers.push_back(nullptr); 8441 } 8442 } 8443 8444 /// Set correct indices for lambdas captures. 8445 void adjustMemberOfForLambdaCaptures( 8446 llvm::OpenMPIRBuilder &OMPBuilder, 8447 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8448 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8449 MapFlagsArrayTy &Types) const { 8450 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8451 // Set correct member_of idx for all implicit lambda captures. 8452 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | 8453 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | 8454 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | 8455 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)) 8456 continue; 8457 llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]); 8458 assert(BasePtr && "Unable to find base lambda address."); 8459 int TgtIdx = -1; 8460 for (unsigned J = I; J > 0; --J) { 8461 unsigned Idx = J - 1; 8462 if (Pointers[Idx] != BasePtr) 8463 continue; 8464 TgtIdx = Idx; 8465 break; 8466 } 8467 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8468 // All other current entries will be MEMBER_OF the combined entry 8469 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8470 // 0xFFFF in the MEMBER_OF field). 8471 OpenMPOffloadMappingFlags MemberOfFlag = 8472 OMPBuilder.getMemberOfFlag(TgtIdx); 8473 OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8474 } 8475 } 8476 8477 /// Generate the base pointers, section pointers, sizes, map types, and 8478 /// mappers associated to a given capture (all included in \a CombinedInfo). 8479 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8480 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8481 StructRangeInfoTy &PartialStruct) const { 8482 assert(!Cap->capturesVariableArrayType() && 8483 "Not expecting to generate map info for a variable array type!"); 8484 8485 // We need to know when we generating information for the first component 8486 const ValueDecl *VD = Cap->capturesThis() 8487 ? nullptr 8488 : Cap->getCapturedVar()->getCanonicalDecl(); 8489 8490 // for map(to: lambda): skip here, processing it in 8491 // generateDefaultMapInfo 8492 if (LambdasMap.count(VD)) 8493 return; 8494 8495 // If this declaration appears in a is_device_ptr clause we just have to 8496 // pass the pointer by value. If it is a reference to a declaration, we just 8497 // pass its value. 8498 if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) { 8499 CombinedInfo.Exprs.push_back(VD); 8500 CombinedInfo.BasePointers.emplace_back(Arg); 8501 CombinedInfo.DevicePtrDecls.emplace_back(VD); 8502 CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer); 8503 CombinedInfo.Pointers.push_back(Arg); 8504 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8505 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 8506 /*isSigned=*/true)); 8507 CombinedInfo.Types.push_back( 8508 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | 8509 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM); 8510 CombinedInfo.Mappers.push_back(nullptr); 8511 return; 8512 } 8513 8514 using MapData = 8515 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8516 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 8517 const ValueDecl *, const Expr *>; 8518 SmallVector<MapData, 4> DeclComponentLists; 8519 // For member fields list in is_device_ptr, store it in 8520 // DeclComponentLists for generating components info. 8521 static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown; 8522 auto It = DevPointersMap.find(VD); 8523 if (It != DevPointersMap.end()) 8524 for (const auto &MCL : It->second) 8525 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown, 8526 /*IsImpicit = */ true, nullptr, 8527 nullptr); 8528 auto I = HasDevAddrsMap.find(VD); 8529 if (I != HasDevAddrsMap.end()) 8530 for (const auto &MCL : I->second) 8531 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown, 8532 /*IsImpicit = */ true, nullptr, 8533 nullptr); 8534 assert(CurDir.is<const OMPExecutableDirective *>() && 8535 "Expect a executable directive"); 8536 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8537 bool HasMapBasePtr = false; 8538 bool HasMapArraySec = false; 8539 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8540 const auto *EI = C->getVarRefs().begin(); 8541 for (const auto L : C->decl_component_lists(VD)) { 8542 const ValueDecl *VDecl, *Mapper; 8543 // The Expression is not correct if the mapping is implicit 8544 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8545 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8546 std::tie(VDecl, Components, Mapper) = L; 8547 assert(VDecl == VD && "We got information for the wrong declaration??"); 8548 assert(!Components.empty() && 8549 "Not expecting declaration with no component lists."); 8550 if (VD && E && VD->getType()->isAnyPointerType() && isa<DeclRefExpr>(E)) 8551 HasMapBasePtr = true; 8552 if (VD && E && VD->getType()->isAnyPointerType() && 8553 (isa<ArraySectionExpr>(E) || isa<ArraySubscriptExpr>(E))) 8554 HasMapArraySec = true; 8555 DeclComponentLists.emplace_back(Components, C->getMapType(), 8556 C->getMapTypeModifiers(), 8557 C->isImplicit(), Mapper, E); 8558 ++EI; 8559 } 8560 } 8561 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 8562 const MapData &RHS) { 8563 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 8564 OpenMPMapClauseKind MapType = std::get<1>(RHS); 8565 bool HasPresent = 8566 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 8567 bool HasAllocs = MapType == OMPC_MAP_alloc; 8568 MapModifiers = std::get<2>(RHS); 8569 MapType = std::get<1>(LHS); 8570 bool HasPresentR = 8571 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 8572 bool HasAllocsR = MapType == OMPC_MAP_alloc; 8573 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 8574 }); 8575 8576 // Find overlapping elements (including the offset from the base element). 8577 llvm::SmallDenseMap< 8578 const MapData *, 8579 llvm::SmallVector< 8580 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8581 4> 8582 OverlappedData; 8583 size_t Count = 0; 8584 for (const MapData &L : DeclComponentLists) { 8585 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8586 OpenMPMapClauseKind MapType; 8587 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8588 bool IsImplicit; 8589 const ValueDecl *Mapper; 8590 const Expr *VarRef; 8591 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8592 L; 8593 ++Count; 8594 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) { 8595 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8596 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 8597 VarRef) = L1; 8598 auto CI = Components.rbegin(); 8599 auto CE = Components.rend(); 8600 auto SI = Components1.rbegin(); 8601 auto SE = Components1.rend(); 8602 for (; CI != CE && SI != SE; ++CI, ++SI) { 8603 if (CI->getAssociatedExpression()->getStmtClass() != 8604 SI->getAssociatedExpression()->getStmtClass()) 8605 break; 8606 // Are we dealing with different variables/fields? 8607 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8608 break; 8609 } 8610 // Found overlapping if, at least for one component, reached the head 8611 // of the components list. 8612 if (CI == CE || SI == SE) { 8613 // Ignore it if it is the same component. 8614 if (CI == CE && SI == SE) 8615 continue; 8616 const auto It = (SI == SE) ? CI : SI; 8617 // If one component is a pointer and another one is a kind of 8618 // dereference of this pointer (array subscript, section, dereference, 8619 // etc.), it is not an overlapping. 8620 // Same, if one component is a base and another component is a 8621 // dereferenced pointer memberexpr with the same base. 8622 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 8623 (std::prev(It)->getAssociatedDeclaration() && 8624 std::prev(It) 8625 ->getAssociatedDeclaration() 8626 ->getType() 8627 ->isPointerType()) || 8628 (It->getAssociatedDeclaration() && 8629 It->getAssociatedDeclaration()->getType()->isPointerType() && 8630 std::next(It) != CE && std::next(It) != SE)) 8631 continue; 8632 const MapData &BaseData = CI == CE ? L : L1; 8633 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8634 SI == SE ? Components : Components1; 8635 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8636 OverlappedElements.getSecond().push_back(SubData); 8637 } 8638 } 8639 } 8640 // Sort the overlapped elements for each item. 8641 llvm::SmallVector<const FieldDecl *, 4> Layout; 8642 if (!OverlappedData.empty()) { 8643 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 8644 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 8645 while (BaseType != OrigType) { 8646 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 8647 OrigType = BaseType->getPointeeOrArrayElementType(); 8648 } 8649 8650 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 8651 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8652 else { 8653 const auto *RD = BaseType->getAsRecordDecl(); 8654 Layout.append(RD->field_begin(), RD->field_end()); 8655 } 8656 } 8657 for (auto &Pair : OverlappedData) { 8658 llvm::stable_sort( 8659 Pair.getSecond(), 8660 [&Layout]( 8661 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8662 OMPClauseMappableExprCommon::MappableExprComponentListRef 8663 Second) { 8664 auto CI = First.rbegin(); 8665 auto CE = First.rend(); 8666 auto SI = Second.rbegin(); 8667 auto SE = Second.rend(); 8668 for (; CI != CE && SI != SE; ++CI, ++SI) { 8669 if (CI->getAssociatedExpression()->getStmtClass() != 8670 SI->getAssociatedExpression()->getStmtClass()) 8671 break; 8672 // Are we dealing with different variables/fields? 8673 if (CI->getAssociatedDeclaration() != 8674 SI->getAssociatedDeclaration()) 8675 break; 8676 } 8677 8678 // Lists contain the same elements. 8679 if (CI == CE && SI == SE) 8680 return false; 8681 8682 // List with less elements is less than list with more elements. 8683 if (CI == CE || SI == SE) 8684 return CI == CE; 8685 8686 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8687 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8688 if (FD1->getParent() == FD2->getParent()) 8689 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8690 const auto *It = 8691 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8692 return FD == FD1 || FD == FD2; 8693 }); 8694 return *It == FD1; 8695 }); 8696 } 8697 8698 // Associated with a capture, because the mapping flags depend on it. 8699 // Go through all of the elements with the overlapped elements. 8700 bool IsFirstComponentList = true; 8701 MapCombinedInfoTy StructBaseCombinedInfo; 8702 for (const auto &Pair : OverlappedData) { 8703 const MapData &L = *Pair.getFirst(); 8704 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8705 OpenMPMapClauseKind MapType; 8706 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8707 bool IsImplicit; 8708 const ValueDecl *Mapper; 8709 const Expr *VarRef; 8710 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8711 L; 8712 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8713 OverlappedComponents = Pair.getSecond(); 8714 generateInfoForComponentList( 8715 MapType, MapModifiers, std::nullopt, Components, CombinedInfo, 8716 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList, 8717 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper, 8718 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 8719 IsFirstComponentList = false; 8720 } 8721 // Go through other elements without overlapped elements. 8722 for (const MapData &L : DeclComponentLists) { 8723 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8724 OpenMPMapClauseKind MapType; 8725 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8726 bool IsImplicit; 8727 const ValueDecl *Mapper; 8728 const Expr *VarRef; 8729 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8730 L; 8731 auto It = OverlappedData.find(&L); 8732 if (It == OverlappedData.end()) 8733 generateInfoForComponentList( 8734 MapType, MapModifiers, std::nullopt, Components, CombinedInfo, 8735 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList, 8736 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper, 8737 /*ForDeviceAddr=*/false, VD, VarRef, 8738 /*OverlappedElements*/ std::nullopt, 8739 HasMapBasePtr && HasMapArraySec); 8740 IsFirstComponentList = false; 8741 } 8742 } 8743 8744 /// Generate the default map information for a given capture \a CI, 8745 /// record field declaration \a RI and captured value \a CV. 8746 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8747 const FieldDecl &RI, llvm::Value *CV, 8748 MapCombinedInfoTy &CombinedInfo) const { 8749 bool IsImplicit = true; 8750 // Do the default mapping. 8751 if (CI.capturesThis()) { 8752 CombinedInfo.Exprs.push_back(nullptr); 8753 CombinedInfo.BasePointers.push_back(CV); 8754 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8755 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8756 CombinedInfo.Pointers.push_back(CV); 8757 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8758 CombinedInfo.Sizes.push_back( 8759 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8760 CGF.Int64Ty, /*isSigned=*/true)); 8761 // Default map type. 8762 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO | 8763 OpenMPOffloadMappingFlags::OMP_MAP_FROM); 8764 } else if (CI.capturesVariableByCopy()) { 8765 const VarDecl *VD = CI.getCapturedVar(); 8766 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 8767 CombinedInfo.BasePointers.push_back(CV); 8768 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8769 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8770 CombinedInfo.Pointers.push_back(CV); 8771 if (!RI.getType()->isAnyPointerType()) { 8772 // We have to signal to the runtime captures passed by value that are 8773 // not pointers. 8774 CombinedInfo.Types.push_back( 8775 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL); 8776 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8777 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8778 } else { 8779 // Pointers are implicitly mapped with a zero size and no flags 8780 // (other than first map that is added for all implicit maps). 8781 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE); 8782 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8783 } 8784 auto I = FirstPrivateDecls.find(VD); 8785 if (I != FirstPrivateDecls.end()) 8786 IsImplicit = I->getSecond(); 8787 } else { 8788 assert(CI.capturesVariable() && "Expected captured reference."); 8789 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8790 QualType ElementType = PtrTy->getPointeeType(); 8791 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8792 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8793 // The default map type for a scalar/complex type is 'to' because by 8794 // default the value doesn't have to be retrieved. For an aggregate 8795 // type, the default is 'tofrom'. 8796 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 8797 const VarDecl *VD = CI.getCapturedVar(); 8798 auto I = FirstPrivateDecls.find(VD); 8799 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 8800 CombinedInfo.BasePointers.push_back(CV); 8801 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8802 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8803 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 8804 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8805 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8806 AlignmentSource::Decl)); 8807 CombinedInfo.Pointers.push_back(PtrAddr.emitRawPointer(CGF)); 8808 } else { 8809 CombinedInfo.Pointers.push_back(CV); 8810 } 8811 if (I != FirstPrivateDecls.end()) 8812 IsImplicit = I->getSecond(); 8813 } 8814 // Every default map produces a single argument which is a target parameter. 8815 CombinedInfo.Types.back() |= 8816 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; 8817 8818 // Add flag stating this is an implicit map. 8819 if (IsImplicit) 8820 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; 8821 8822 // No user-defined mapper for default mapping. 8823 CombinedInfo.Mappers.push_back(nullptr); 8824 } 8825 }; 8826 } // anonymous namespace 8827 8828 // Try to extract the base declaration from a `this->x` expression if possible. 8829 static ValueDecl *getDeclFromThisExpr(const Expr *E) { 8830 if (!E) 8831 return nullptr; 8832 8833 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E->IgnoreParenCasts())) 8834 if (const MemberExpr *ME = 8835 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts())) 8836 return ME->getMemberDecl(); 8837 return nullptr; 8838 } 8839 8840 /// Emit a string constant containing the names of the values mapped to the 8841 /// offloading runtime library. 8842 llvm::Constant * 8843 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 8844 MappableExprsHandler::MappingExprInfo &MapExprs) { 8845 8846 uint32_t SrcLocStrSize; 8847 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) 8848 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 8849 8850 SourceLocation Loc; 8851 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { 8852 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr())) 8853 Loc = VD->getLocation(); 8854 else 8855 Loc = MapExprs.getMapExpr()->getExprLoc(); 8856 } else { 8857 Loc = MapExprs.getMapDecl()->getLocation(); 8858 } 8859 8860 std::string ExprName; 8861 if (MapExprs.getMapExpr()) { 8862 PrintingPolicy P(CGF.getContext().getLangOpts()); 8863 llvm::raw_string_ostream OS(ExprName); 8864 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 8865 OS.flush(); 8866 } else { 8867 ExprName = MapExprs.getMapDecl()->getNameAsString(); 8868 } 8869 8870 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 8871 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName, 8872 PLoc.getLine(), PLoc.getColumn(), 8873 SrcLocStrSize); 8874 } 8875 8876 /// Emit the arrays used to pass the captures and map information to the 8877 /// offloading runtime library. If there is no map or capture information, 8878 /// return nullptr by reference. 8879 static void emitOffloadingArrays( 8880 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 8881 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 8882 bool IsNonContiguous = false) { 8883 CodeGenModule &CGM = CGF.CGM; 8884 8885 // Reset the array information. 8886 Info.clearArrayInfo(); 8887 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 8888 8889 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 8890 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(), 8891 CGF.AllocaInsertPt->getIterator()); 8892 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(), 8893 CGF.Builder.GetInsertPoint()); 8894 8895 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 8896 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 8897 }; 8898 if (CGM.getCodeGenOpts().getDebugInfo() != 8899 llvm::codegenoptions::NoDebugInfo) { 8900 CombinedInfo.Names.resize(CombinedInfo.Exprs.size()); 8901 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(), 8902 FillInfoMap); 8903 } 8904 8905 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) { 8906 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) { 8907 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl); 8908 } 8909 }; 8910 8911 auto CustomMapperCB = [&](unsigned int I) { 8912 llvm::Value *MFunc = nullptr; 8913 if (CombinedInfo.Mappers[I]) { 8914 Info.HasMapper = true; 8915 MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 8916 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 8917 } 8918 return MFunc; 8919 }; 8920 OMPBuilder.emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info, 8921 /*IsNonContiguous=*/true, DeviceAddrCB, 8922 CustomMapperCB); 8923 } 8924 8925 /// Check for inner distribute directive. 8926 static const OMPExecutableDirective * 8927 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 8928 const auto *CS = D.getInnermostCapturedStmt(); 8929 const auto *Body = 8930 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 8931 const Stmt *ChildStmt = 8932 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8933 8934 if (const auto *NestedDir = 8935 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8936 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 8937 switch (D.getDirectiveKind()) { 8938 case OMPD_target: 8939 // For now, treat 'target' with nested 'teams loop' as if it's 8940 // distributed (target teams distribute). 8941 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop) 8942 return NestedDir; 8943 if (DKind == OMPD_teams) { 8944 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 8945 /*IgnoreCaptured=*/true); 8946 if (!Body) 8947 return nullptr; 8948 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8949 if (const auto *NND = 8950 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8951 DKind = NND->getDirectiveKind(); 8952 if (isOpenMPDistributeDirective(DKind)) 8953 return NND; 8954 } 8955 } 8956 return nullptr; 8957 case OMPD_target_teams: 8958 if (isOpenMPDistributeDirective(DKind)) 8959 return NestedDir; 8960 return nullptr; 8961 case OMPD_target_parallel: 8962 case OMPD_target_simd: 8963 case OMPD_target_parallel_for: 8964 case OMPD_target_parallel_for_simd: 8965 return nullptr; 8966 case OMPD_target_teams_distribute: 8967 case OMPD_target_teams_distribute_simd: 8968 case OMPD_target_teams_distribute_parallel_for: 8969 case OMPD_target_teams_distribute_parallel_for_simd: 8970 case OMPD_parallel: 8971 case OMPD_for: 8972 case OMPD_parallel_for: 8973 case OMPD_parallel_master: 8974 case OMPD_parallel_sections: 8975 case OMPD_for_simd: 8976 case OMPD_parallel_for_simd: 8977 case OMPD_cancel: 8978 case OMPD_cancellation_point: 8979 case OMPD_ordered: 8980 case OMPD_threadprivate: 8981 case OMPD_allocate: 8982 case OMPD_task: 8983 case OMPD_simd: 8984 case OMPD_tile: 8985 case OMPD_unroll: 8986 case OMPD_sections: 8987 case OMPD_section: 8988 case OMPD_single: 8989 case OMPD_master: 8990 case OMPD_critical: 8991 case OMPD_taskyield: 8992 case OMPD_barrier: 8993 case OMPD_taskwait: 8994 case OMPD_taskgroup: 8995 case OMPD_atomic: 8996 case OMPD_flush: 8997 case OMPD_depobj: 8998 case OMPD_scan: 8999 case OMPD_teams: 9000 case OMPD_target_data: 9001 case OMPD_target_exit_data: 9002 case OMPD_target_enter_data: 9003 case OMPD_distribute: 9004 case OMPD_distribute_simd: 9005 case OMPD_distribute_parallel_for: 9006 case OMPD_distribute_parallel_for_simd: 9007 case OMPD_teams_distribute: 9008 case OMPD_teams_distribute_simd: 9009 case OMPD_teams_distribute_parallel_for: 9010 case OMPD_teams_distribute_parallel_for_simd: 9011 case OMPD_target_update: 9012 case OMPD_declare_simd: 9013 case OMPD_declare_variant: 9014 case OMPD_begin_declare_variant: 9015 case OMPD_end_declare_variant: 9016 case OMPD_declare_target: 9017 case OMPD_end_declare_target: 9018 case OMPD_declare_reduction: 9019 case OMPD_declare_mapper: 9020 case OMPD_taskloop: 9021 case OMPD_taskloop_simd: 9022 case OMPD_master_taskloop: 9023 case OMPD_master_taskloop_simd: 9024 case OMPD_parallel_master_taskloop: 9025 case OMPD_parallel_master_taskloop_simd: 9026 case OMPD_requires: 9027 case OMPD_metadirective: 9028 case OMPD_unknown: 9029 default: 9030 llvm_unreachable("Unexpected directive."); 9031 } 9032 } 9033 9034 return nullptr; 9035 } 9036 9037 /// Emit the user-defined mapper function. The code generation follows the 9038 /// pattern in the example below. 9039 /// \code 9040 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9041 /// void *base, void *begin, 9042 /// int64_t size, int64_t type, 9043 /// void *name = nullptr) { 9044 /// // Allocate space for an array section first or add a base/begin for 9045 /// // pointer dereference. 9046 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9047 /// !maptype.IsDelete) 9048 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9049 /// size*sizeof(Ty), clearToFromMember(type)); 9050 /// // Map members. 9051 /// for (unsigned i = 0; i < size; i++) { 9052 /// // For each component specified by this mapper: 9053 /// for (auto c : begin[i]->all_components) { 9054 /// if (c.hasMapper()) 9055 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9056 /// c.arg_type, c.arg_name); 9057 /// else 9058 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9059 /// c.arg_begin, c.arg_size, c.arg_type, 9060 /// c.arg_name); 9061 /// } 9062 /// } 9063 /// // Delete the array section. 9064 /// if (size > 1 && maptype.IsDelete) 9065 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9066 /// size*sizeof(Ty), clearToFromMember(type)); 9067 /// } 9068 /// \endcode 9069 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9070 CodeGenFunction *CGF) { 9071 if (UDMMap.count(D) > 0) 9072 return; 9073 ASTContext &C = CGM.getContext(); 9074 QualType Ty = D->getType(); 9075 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9076 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9077 auto *MapperVarDecl = 9078 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9079 SourceLocation Loc = D->getLocation(); 9080 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9081 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty); 9082 9083 // Prepare mapper function arguments and attributes. 9084 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9085 C.VoidPtrTy, ImplicitParamKind::Other); 9086 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9087 ImplicitParamKind::Other); 9088 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9089 C.VoidPtrTy, ImplicitParamKind::Other); 9090 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9091 ImplicitParamKind::Other); 9092 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9093 ImplicitParamKind::Other); 9094 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9095 ImplicitParamKind::Other); 9096 FunctionArgList Args; 9097 Args.push_back(&HandleArg); 9098 Args.push_back(&BaseArg); 9099 Args.push_back(&BeginArg); 9100 Args.push_back(&SizeArg); 9101 Args.push_back(&TypeArg); 9102 Args.push_back(&NameArg); 9103 const CGFunctionInfo &FnInfo = 9104 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9105 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9106 SmallString<64> TyStr; 9107 llvm::raw_svector_ostream Out(TyStr); 9108 CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out); 9109 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9110 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9111 Name, &CGM.getModule()); 9112 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9113 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9114 // Start the mapper function code generation. 9115 CodeGenFunction MapperCGF(CGM); 9116 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9117 // Compute the starting and end addresses of array elements. 9118 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9119 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9120 C.getPointerType(Int64Ty), Loc); 9121 // Prepare common arguments for array initiation and deletion. 9122 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9123 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9124 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9125 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9126 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9127 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9128 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9129 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9130 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9131 // Convert the size in bytes into the number of array elements. 9132 Size = MapperCGF.Builder.CreateExactUDiv( 9133 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9134 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9135 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 9136 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size); 9137 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9138 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9139 C.getPointerType(Int64Ty), Loc); 9140 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar( 9141 MapperCGF.GetAddrOfLocalVar(&NameArg), 9142 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9143 9144 // Emit array initiation if this is an array section and \p MapType indicates 9145 // that memory allocation is required. 9146 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9147 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9148 MapName, ElementSize, HeadBB, /*IsInit=*/true); 9149 9150 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9151 9152 // Emit the loop header block. 9153 MapperCGF.EmitBlock(HeadBB); 9154 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9155 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9156 // Evaluate whether the initial condition is satisfied. 9157 llvm::Value *IsEmpty = 9158 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9159 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9160 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9161 9162 // Emit the loop body block. 9163 MapperCGF.EmitBlock(BodyBB); 9164 llvm::BasicBlock *LastBB = BodyBB; 9165 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9166 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9167 PtrPHI->addIncoming(PtrBegin, EntryBB); 9168 Address PtrCurrent(PtrPHI, ElemTy, 9169 MapperCGF.GetAddrOfLocalVar(&BeginArg) 9170 .getAlignment() 9171 .alignmentOfArrayElement(ElementSize)); 9172 // Privatize the declared variable of mapper to be the current array element. 9173 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9174 Scope.addPrivate(MapperVarDecl, PtrCurrent); 9175 (void)Scope.Privatize(); 9176 9177 // Get map clause information. Fill up the arrays with all mapped variables. 9178 MappableExprsHandler::MapCombinedInfoTy Info; 9179 MappableExprsHandler MEHandler(*D, MapperCGF); 9180 MEHandler.generateAllInfoForMapper(Info, OMPBuilder); 9181 9182 // Call the runtime API __tgt_mapper_num_components to get the number of 9183 // pre-existing components. 9184 llvm::Value *OffloadingArgs[] = {Handle}; 9185 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9186 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9187 OMPRTL___tgt_mapper_num_components), 9188 OffloadingArgs); 9189 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9190 PreviousSize, 9191 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9192 9193 // Fill up the runtime mapper handle for all components. 9194 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 9195 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9196 Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9197 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9198 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9199 llvm::Value *CurSizeArg = Info.Sizes[I]; 9200 llvm::Value *CurNameArg = 9201 (CGM.getCodeGenOpts().getDebugInfo() == 9202 llvm::codegenoptions::NoDebugInfo) 9203 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 9204 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 9205 9206 // Extract the MEMBER_OF field from the map type. 9207 llvm::Value *OriMapType = MapperCGF.Builder.getInt64( 9208 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9209 Info.Types[I])); 9210 llvm::Value *MemberMapType = 9211 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9212 9213 // Combine the map type inherited from user-defined mapper with that 9214 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9215 // bits of the \a MapType, which is the input argument of the mapper 9216 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9217 // bits of MemberMapType. 9218 // [OpenMP 5.0], 1.2.6. map-type decay. 9219 // | alloc | to | from | tofrom | release | delete 9220 // ---------------------------------------------------------- 9221 // alloc | alloc | alloc | alloc | alloc | release | delete 9222 // to | alloc | to | alloc | to | release | delete 9223 // from | alloc | alloc | from | from | release | delete 9224 // tofrom | alloc | to | from | tofrom | release | delete 9225 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9226 MapType, 9227 MapperCGF.Builder.getInt64( 9228 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9229 OpenMPOffloadMappingFlags::OMP_MAP_TO | 9230 OpenMPOffloadMappingFlags::OMP_MAP_FROM))); 9231 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9232 llvm::BasicBlock *AllocElseBB = 9233 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9234 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9235 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9236 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9237 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9238 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9239 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9240 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9241 MapperCGF.EmitBlock(AllocBB); 9242 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9243 MemberMapType, 9244 MapperCGF.Builder.getInt64( 9245 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9246 OpenMPOffloadMappingFlags::OMP_MAP_TO | 9247 OpenMPOffloadMappingFlags::OMP_MAP_FROM))); 9248 MapperCGF.Builder.CreateBr(EndBB); 9249 MapperCGF.EmitBlock(AllocElseBB); 9250 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9251 LeftToFrom, 9252 MapperCGF.Builder.getInt64( 9253 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9254 OpenMPOffloadMappingFlags::OMP_MAP_TO))); 9255 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9256 // In case of to, clear OMP_MAP_FROM. 9257 MapperCGF.EmitBlock(ToBB); 9258 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9259 MemberMapType, 9260 MapperCGF.Builder.getInt64( 9261 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9262 OpenMPOffloadMappingFlags::OMP_MAP_FROM))); 9263 MapperCGF.Builder.CreateBr(EndBB); 9264 MapperCGF.EmitBlock(ToElseBB); 9265 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9266 LeftToFrom, 9267 MapperCGF.Builder.getInt64( 9268 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9269 OpenMPOffloadMappingFlags::OMP_MAP_FROM))); 9270 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9271 // In case of from, clear OMP_MAP_TO. 9272 MapperCGF.EmitBlock(FromBB); 9273 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9274 MemberMapType, 9275 MapperCGF.Builder.getInt64( 9276 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9277 OpenMPOffloadMappingFlags::OMP_MAP_TO))); 9278 // In case of tofrom, do nothing. 9279 MapperCGF.EmitBlock(EndBB); 9280 LastBB = EndBB; 9281 llvm::PHINode *CurMapType = 9282 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9283 CurMapType->addIncoming(AllocMapType, AllocBB); 9284 CurMapType->addIncoming(ToMapType, ToBB); 9285 CurMapType->addIncoming(FromMapType, FromBB); 9286 CurMapType->addIncoming(MemberMapType, ToElseBB); 9287 9288 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9289 CurSizeArg, CurMapType, CurNameArg}; 9290 if (Info.Mappers[I]) { 9291 // Call the corresponding mapper function. 9292 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 9293 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 9294 assert(MapperFunc && "Expect a valid mapper function is available."); 9295 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 9296 } else { 9297 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9298 // data structure. 9299 MapperCGF.EmitRuntimeCall( 9300 OMPBuilder.getOrCreateRuntimeFunction( 9301 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 9302 OffloadingArgs); 9303 } 9304 } 9305 9306 // Update the pointer to point to the next element that needs to be mapped, 9307 // and check whether we have mapped all elements. 9308 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9309 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9310 PtrPHI->addIncoming(PtrNext, LastBB); 9311 llvm::Value *IsDone = 9312 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9313 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9314 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9315 9316 MapperCGF.EmitBlock(ExitBB); 9317 // Emit array deletion if this is an array section and \p MapType indicates 9318 // that deletion is required. 9319 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9320 MapName, ElementSize, DoneBB, /*IsInit=*/false); 9321 9322 // Emit the function exit block. 9323 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9324 MapperCGF.FinishFunction(); 9325 UDMMap.try_emplace(D, Fn); 9326 if (CGF) { 9327 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9328 Decls.second.push_back(D); 9329 } 9330 } 9331 9332 /// Emit the array initialization or deletion portion for user-defined mapper 9333 /// code generation. First, it evaluates whether an array section is mapped and 9334 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9335 /// true, and \a MapType indicates to not delete this array, array 9336 /// initialization code is generated. If \a IsInit is false, and \a MapType 9337 /// indicates to not this array, array deletion code is generated. 9338 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9339 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9340 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9341 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, 9342 bool IsInit) { 9343 StringRef Prefix = IsInit ? ".init" : ".del"; 9344 9345 // Evaluate if this is an array section. 9346 llvm::BasicBlock *BodyBB = 9347 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9348 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 9349 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9350 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9351 MapType, 9352 MapperCGF.Builder.getInt64( 9353 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9354 OpenMPOffloadMappingFlags::OMP_MAP_DELETE))); 9355 llvm::Value *DeleteCond; 9356 llvm::Value *Cond; 9357 if (IsInit) { 9358 // base != begin? 9359 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin); 9360 // IsPtrAndObj? 9361 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 9362 MapType, 9363 MapperCGF.Builder.getInt64( 9364 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9365 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ))); 9366 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 9367 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 9368 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 9369 DeleteCond = MapperCGF.Builder.CreateIsNull( 9370 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9371 } else { 9372 Cond = IsArray; 9373 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9374 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9375 } 9376 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 9377 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 9378 9379 MapperCGF.EmitBlock(BodyBB); 9380 // Get the array size by multiplying element size and element number (i.e., \p 9381 // Size). 9382 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9383 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9384 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9385 // memory allocation/deletion purpose only. 9386 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9387 MapType, 9388 MapperCGF.Builder.getInt64( 9389 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9390 OpenMPOffloadMappingFlags::OMP_MAP_TO | 9391 OpenMPOffloadMappingFlags::OMP_MAP_FROM))); 9392 MapTypeArg = MapperCGF.Builder.CreateOr( 9393 MapTypeArg, 9394 MapperCGF.Builder.getInt64( 9395 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9396 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))); 9397 9398 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9399 // data structure. 9400 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 9401 ArraySize, MapTypeArg, MapName}; 9402 MapperCGF.EmitRuntimeCall( 9403 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9404 OMPRTL___tgt_push_mapper_component), 9405 OffloadingArgs); 9406 } 9407 9408 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 9409 const OMPDeclareMapperDecl *D) { 9410 auto I = UDMMap.find(D); 9411 if (I != UDMMap.end()) 9412 return I->second; 9413 emitUserDefinedMapper(D); 9414 return UDMMap.lookup(D); 9415 } 9416 9417 llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall( 9418 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9419 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9420 const OMPLoopDirective &D)> 9421 SizeEmitter) { 9422 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9423 const OMPExecutableDirective *TD = &D; 9424 // Get nested teams distribute kind directive, if any. For now, treat 9425 // 'target_teams_loop' as if it's really a target_teams_distribute. 9426 if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) && 9427 Kind != OMPD_target_teams_loop) 9428 TD = getNestedDistributeDirective(CGM.getContext(), D); 9429 if (!TD) 9430 return llvm::ConstantInt::get(CGF.Int64Ty, 0); 9431 9432 const auto *LD = cast<OMPLoopDirective>(TD); 9433 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) 9434 return NumIterations; 9435 return llvm::ConstantInt::get(CGF.Int64Ty, 0); 9436 } 9437 9438 static void 9439 emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, 9440 const OMPExecutableDirective &D, 9441 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, 9442 bool RequiresOuterTask, const CapturedStmt &CS, 9443 bool OffloadingMandatory, CodeGenFunction &CGF) { 9444 if (OffloadingMandatory) { 9445 CGF.Builder.CreateUnreachable(); 9446 } else { 9447 if (RequiresOuterTask) { 9448 CapturedVars.clear(); 9449 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9450 } 9451 OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, 9452 CapturedVars); 9453 } 9454 } 9455 9456 static llvm::Value *emitDeviceID( 9457 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9458 CodeGenFunction &CGF) { 9459 // Emit device ID if any. 9460 llvm::Value *DeviceID; 9461 if (Device.getPointer()) { 9462 assert((Device.getInt() == OMPC_DEVICE_unknown || 9463 Device.getInt() == OMPC_DEVICE_device_num) && 9464 "Expected device_num modifier."); 9465 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 9466 DeviceID = 9467 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 9468 } else { 9469 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9470 } 9471 return DeviceID; 9472 } 9473 9474 llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D, 9475 CodeGenFunction &CGF) { 9476 llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0); 9477 9478 if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) { 9479 CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF); 9480 llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr( 9481 DynMemClause->getSize(), /*IgnoreResultAssign=*/true); 9482 DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty, 9483 /*isSigned=*/false); 9484 } 9485 return DynCGroupMem; 9486 } 9487 9488 static void emitTargetCallKernelLaunch( 9489 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, 9490 const OMPExecutableDirective &D, 9491 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask, 9492 const CapturedStmt &CS, bool OffloadingMandatory, 9493 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9494 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo, 9495 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray, 9496 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9497 const OMPLoopDirective &D)> 9498 SizeEmitter, 9499 CodeGenFunction &CGF, CodeGenModule &CGM) { 9500 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder(); 9501 9502 // Fill up the arrays with all the captured variables. 9503 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 9504 9505 // Get mappable expression information. 9506 MappableExprsHandler MEHandler(D, CGF); 9507 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 9508 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 9509 9510 auto RI = CS.getCapturedRecordDecl()->field_begin(); 9511 auto *CV = CapturedVars.begin(); 9512 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 9513 CE = CS.capture_end(); 9514 CI != CE; ++CI, ++RI, ++CV) { 9515 MappableExprsHandler::MapCombinedInfoTy CurInfo; 9516 MappableExprsHandler::StructRangeInfoTy PartialStruct; 9517 9518 // VLA sizes are passed to the outlined region by copy and do not have map 9519 // information associated. 9520 if (CI->capturesVariableArrayType()) { 9521 CurInfo.Exprs.push_back(nullptr); 9522 CurInfo.BasePointers.push_back(*CV); 9523 CurInfo.DevicePtrDecls.push_back(nullptr); 9524 CurInfo.DevicePointers.push_back( 9525 MappableExprsHandler::DeviceInfoTy::None); 9526 CurInfo.Pointers.push_back(*CV); 9527 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9528 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 9529 // Copy to the device as an argument. No need to retrieve it. 9530 CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | 9531 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM | 9532 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); 9533 CurInfo.Mappers.push_back(nullptr); 9534 } else { 9535 // If we have any information in the map clause, we use it, otherwise we 9536 // just do a default mapping. 9537 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 9538 if (!CI->capturesThis()) 9539 MappedVarSet.insert(CI->getCapturedVar()); 9540 else 9541 MappedVarSet.insert(nullptr); 9542 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 9543 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 9544 // Generate correct mapping for variables captured by reference in 9545 // lambdas. 9546 if (CI->capturesVariable()) 9547 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 9548 CurInfo, LambdaPointers); 9549 } 9550 // We expect to have at least an element of information for this capture. 9551 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 9552 "Non-existing map pointer for capture!"); 9553 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 9554 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 9555 CurInfo.BasePointers.size() == CurInfo.Types.size() && 9556 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 9557 "Inconsistent map information sizes!"); 9558 9559 // If there is an entry in PartialStruct it means we have a struct with 9560 // individual members mapped. Emit an extra combined entry. 9561 if (PartialStruct.Base.isValid()) { 9562 CombinedInfo.append(PartialStruct.PreliminaryMapData); 9563 MEHandler.emitCombinedEntry( 9564 CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(), 9565 OMPBuilder, nullptr, 9566 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 9567 } 9568 9569 // We need to append the results of this capture to what we already have. 9570 CombinedInfo.append(CurInfo); 9571 } 9572 // Adjust MEMBER_OF flags for the lambdas captures. 9573 MEHandler.adjustMemberOfForLambdaCaptures( 9574 OMPBuilder, LambdaPointers, CombinedInfo.BasePointers, 9575 CombinedInfo.Pointers, CombinedInfo.Types); 9576 // Map any list items in a map clause that were not captures because they 9577 // weren't referenced within the construct. 9578 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet); 9579 9580 CGOpenMPRuntime::TargetDataInfo Info; 9581 // Fill up the arrays and create the arguments. 9582 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 9583 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() != 9584 llvm::codegenoptions::NoDebugInfo; 9585 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info, 9586 EmitDebug, 9587 /*ForEndCall=*/false); 9588 9589 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9590 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, 9591 CGF.VoidPtrTy, CGM.getPointerAlign()); 9592 InputInfo.PointersArray = 9593 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 9594 InputInfo.SizesArray = 9595 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); 9596 InputInfo.MappersArray = 9597 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 9598 MapTypesArray = Info.RTArgs.MapTypesArray; 9599 MapNamesArray = Info.RTArgs.MapNamesArray; 9600 9601 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars, 9602 RequiresOuterTask, &CS, OffloadingMandatory, Device, 9603 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray, 9604 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9605 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor; 9606 9607 if (IsReverseOffloading) { 9608 // Reverse offloading is not supported, so just execute on the host. 9609 // FIXME: This fallback solution is incorrect since it ignores the 9610 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to 9611 // assert here and ensure SEMA emits an error. 9612 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars, 9613 RequiresOuterTask, CS, OffloadingMandatory, CGF); 9614 return; 9615 } 9616 9617 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>(); 9618 unsigned NumTargetItems = InputInfo.NumberOfTargetItems; 9619 9620 llvm::Value *BasePointersArray = 9621 InputInfo.BasePointersArray.emitRawPointer(CGF); 9622 llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF); 9623 llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF); 9624 llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF); 9625 9626 auto &&EmitTargetCallFallbackCB = 9627 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS, 9628 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) 9629 -> llvm::OpenMPIRBuilder::InsertPointTy { 9630 CGF.Builder.restoreIP(IP); 9631 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars, 9632 RequiresOuterTask, CS, OffloadingMandatory, CGF); 9633 return CGF.Builder.saveIP(); 9634 }; 9635 9636 llvm::Value *DeviceID = emitDeviceID(Device, CGF); 9637 llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D); 9638 llvm::Value *NumThreads = 9639 OMPRuntime->emitNumThreadsForTargetDirective(CGF, D); 9640 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc()); 9641 llvm::Value *NumIterations = 9642 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter); 9643 llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF); 9644 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( 9645 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator()); 9646 9647 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs( 9648 BasePointersArray, PointersArray, SizesArray, MapTypesArray, 9649 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray); 9650 9651 llvm::OpenMPIRBuilder::TargetKernelArgs Args( 9652 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads, 9653 DynCGGroupMem, HasNoWait); 9654 9655 CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch( 9656 CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args, 9657 DeviceID, RTLoc, AllocaIP)); 9658 }; 9659 9660 if (RequiresOuterTask) 9661 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9662 else 9663 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9664 } 9665 9666 static void 9667 emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, 9668 const OMPExecutableDirective &D, 9669 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, 9670 bool RequiresOuterTask, const CapturedStmt &CS, 9671 bool OffloadingMandatory, CodeGenFunction &CGF) { 9672 9673 // Notify that the host version must be executed. 9674 auto &&ElseGen = 9675 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS, 9676 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) { 9677 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars, 9678 RequiresOuterTask, CS, OffloadingMandatory, CGF); 9679 }; 9680 9681 if (RequiresOuterTask) { 9682 CodeGenFunction::OMPTargetDataInfo InputInfo; 9683 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 9684 } else { 9685 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 9686 } 9687 } 9688 9689 void CGOpenMPRuntime::emitTargetCall( 9690 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9691 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9692 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9693 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9694 const OMPLoopDirective &D)> 9695 SizeEmitter) { 9696 if (!CGF.HaveInsertPoint()) 9697 return; 9698 9699 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice && 9700 CGM.getLangOpts().OpenMPOffloadMandatory; 9701 9702 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!"); 9703 9704 const bool RequiresOuterTask = 9705 D.hasClausesOfKind<OMPDependClause>() || 9706 D.hasClausesOfKind<OMPNowaitClause>() || 9707 D.hasClausesOfKind<OMPInReductionClause>() || 9708 (CGM.getLangOpts().OpenMP >= 51 && 9709 needsTaskBasedThreadLimit(D.getDirectiveKind()) && 9710 D.hasClausesOfKind<OMPThreadLimitClause>()); 9711 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9712 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9713 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9714 PrePostActionTy &) { 9715 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9716 }; 9717 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9718 9719 CodeGenFunction::OMPTargetDataInfo InputInfo; 9720 llvm::Value *MapTypesArray = nullptr; 9721 llvm::Value *MapNamesArray = nullptr; 9722 9723 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars, 9724 RequiresOuterTask, &CS, OffloadingMandatory, Device, 9725 OutlinedFnID, &InputInfo, &MapTypesArray, 9726 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF, 9727 PrePostActionTy &) { 9728 emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars, 9729 RequiresOuterTask, CS, OffloadingMandatory, 9730 Device, OutlinedFnID, InputInfo, MapTypesArray, 9731 MapNamesArray, SizeEmitter, CGF, CGM); 9732 }; 9733 9734 auto &&TargetElseGen = 9735 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS, 9736 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) { 9737 emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask, 9738 CS, OffloadingMandatory, CGF); 9739 }; 9740 9741 // If we have a target function ID it means that we need to support 9742 // offloading, otherwise, just execute on the host. We need to execute on host 9743 // regardless of the conditional in the if clause if, e.g., the user do not 9744 // specify target triples. 9745 if (OutlinedFnID) { 9746 if (IfCond) { 9747 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 9748 } else { 9749 RegionCodeGenTy ThenRCG(TargetThenGen); 9750 ThenRCG(CGF); 9751 } 9752 } else { 9753 RegionCodeGenTy ElseRCG(TargetElseGen); 9754 ElseRCG(CGF); 9755 } 9756 } 9757 9758 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 9759 StringRef ParentName) { 9760 if (!S) 9761 return; 9762 9763 // Codegen OMP target directives that offload compute to the device. 9764 bool RequiresDeviceCodegen = 9765 isa<OMPExecutableDirective>(S) && 9766 isOpenMPTargetExecutionDirective( 9767 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 9768 9769 if (RequiresDeviceCodegen) { 9770 const auto &E = *cast<OMPExecutableDirective>(S); 9771 9772 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc( 9773 CGM, OMPBuilder, E.getBeginLoc(), ParentName); 9774 9775 // Is this a target region that should not be emitted as an entry point? If 9776 // so just signal we are done with this target region. 9777 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo)) 9778 return; 9779 9780 switch (E.getDirectiveKind()) { 9781 case OMPD_target: 9782 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 9783 cast<OMPTargetDirective>(E)); 9784 break; 9785 case OMPD_target_parallel: 9786 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 9787 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 9788 break; 9789 case OMPD_target_teams: 9790 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 9791 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 9792 break; 9793 case OMPD_target_teams_distribute: 9794 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 9795 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 9796 break; 9797 case OMPD_target_teams_distribute_simd: 9798 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 9799 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 9800 break; 9801 case OMPD_target_parallel_for: 9802 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 9803 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 9804 break; 9805 case OMPD_target_parallel_for_simd: 9806 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 9807 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 9808 break; 9809 case OMPD_target_simd: 9810 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 9811 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 9812 break; 9813 case OMPD_target_teams_distribute_parallel_for: 9814 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 9815 CGM, ParentName, 9816 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 9817 break; 9818 case OMPD_target_teams_distribute_parallel_for_simd: 9819 CodeGenFunction:: 9820 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 9821 CGM, ParentName, 9822 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 9823 break; 9824 case OMPD_target_teams_loop: 9825 CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction( 9826 CGM, ParentName, cast<OMPTargetTeamsGenericLoopDirective>(E)); 9827 break; 9828 case OMPD_target_parallel_loop: 9829 CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction( 9830 CGM, ParentName, cast<OMPTargetParallelGenericLoopDirective>(E)); 9831 break; 9832 case OMPD_parallel: 9833 case OMPD_for: 9834 case OMPD_parallel_for: 9835 case OMPD_parallel_master: 9836 case OMPD_parallel_sections: 9837 case OMPD_for_simd: 9838 case OMPD_parallel_for_simd: 9839 case OMPD_cancel: 9840 case OMPD_cancellation_point: 9841 case OMPD_ordered: 9842 case OMPD_threadprivate: 9843 case OMPD_allocate: 9844 case OMPD_task: 9845 case OMPD_simd: 9846 case OMPD_tile: 9847 case OMPD_unroll: 9848 case OMPD_sections: 9849 case OMPD_section: 9850 case OMPD_single: 9851 case OMPD_master: 9852 case OMPD_critical: 9853 case OMPD_taskyield: 9854 case OMPD_barrier: 9855 case OMPD_taskwait: 9856 case OMPD_taskgroup: 9857 case OMPD_atomic: 9858 case OMPD_flush: 9859 case OMPD_depobj: 9860 case OMPD_scan: 9861 case OMPD_teams: 9862 case OMPD_target_data: 9863 case OMPD_target_exit_data: 9864 case OMPD_target_enter_data: 9865 case OMPD_distribute: 9866 case OMPD_distribute_simd: 9867 case OMPD_distribute_parallel_for: 9868 case OMPD_distribute_parallel_for_simd: 9869 case OMPD_teams_distribute: 9870 case OMPD_teams_distribute_simd: 9871 case OMPD_teams_distribute_parallel_for: 9872 case OMPD_teams_distribute_parallel_for_simd: 9873 case OMPD_target_update: 9874 case OMPD_declare_simd: 9875 case OMPD_declare_variant: 9876 case OMPD_begin_declare_variant: 9877 case OMPD_end_declare_variant: 9878 case OMPD_declare_target: 9879 case OMPD_end_declare_target: 9880 case OMPD_declare_reduction: 9881 case OMPD_declare_mapper: 9882 case OMPD_taskloop: 9883 case OMPD_taskloop_simd: 9884 case OMPD_master_taskloop: 9885 case OMPD_master_taskloop_simd: 9886 case OMPD_parallel_master_taskloop: 9887 case OMPD_parallel_master_taskloop_simd: 9888 case OMPD_requires: 9889 case OMPD_metadirective: 9890 case OMPD_unknown: 9891 default: 9892 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 9893 } 9894 return; 9895 } 9896 9897 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 9898 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 9899 return; 9900 9901 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 9902 return; 9903 } 9904 9905 // If this is a lambda function, look into its body. 9906 if (const auto *L = dyn_cast<LambdaExpr>(S)) 9907 S = L->getBody(); 9908 9909 // Keep looking for target regions recursively. 9910 for (const Stmt *II : S->children()) 9911 scanForTargetRegionsFunctions(II, ParentName); 9912 } 9913 9914 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { 9915 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9916 OMPDeclareTargetDeclAttr::getDeviceType(VD); 9917 if (!DevTy) 9918 return false; 9919 // Do not emit device_type(nohost) functions for the host. 9920 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 9921 return true; 9922 // Do not emit device_type(host) functions for the device. 9923 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host) 9924 return true; 9925 return false; 9926 } 9927 9928 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 9929 // If emitting code for the host, we do not process FD here. Instead we do 9930 // the normal code generation. 9931 if (!CGM.getLangOpts().OpenMPIsTargetDevice) { 9932 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) 9933 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 9934 CGM.getLangOpts().OpenMPIsTargetDevice)) 9935 return true; 9936 return false; 9937 } 9938 9939 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 9940 // Try to detect target regions in the function. 9941 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 9942 StringRef Name = CGM.getMangledName(GD); 9943 scanForTargetRegionsFunctions(FD->getBody(), Name); 9944 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 9945 CGM.getLangOpts().OpenMPIsTargetDevice)) 9946 return true; 9947 } 9948 9949 // Do not to emit function if it is not marked as declare target. 9950 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 9951 AlreadyEmittedTargetDecls.count(VD) == 0; 9952 } 9953 9954 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9955 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()), 9956 CGM.getLangOpts().OpenMPIsTargetDevice)) 9957 return true; 9958 9959 if (!CGM.getLangOpts().OpenMPIsTargetDevice) 9960 return false; 9961 9962 // Check if there are Ctors/Dtors in this declaration and look for target 9963 // regions in it. We use the complete variant to produce the kernel name 9964 // mangling. 9965 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 9966 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 9967 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 9968 StringRef ParentName = 9969 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 9970 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 9971 } 9972 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 9973 StringRef ParentName = 9974 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 9975 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 9976 } 9977 } 9978 9979 // Do not to emit variable if it is not marked as declare target. 9980 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9981 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 9982 cast<VarDecl>(GD.getDecl())); 9983 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 9984 ((*Res == OMPDeclareTargetDeclAttr::MT_To || 9985 *Res == OMPDeclareTargetDeclAttr::MT_Enter) && 9986 HasRequiresUnifiedSharedMemory)) { 9987 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 9988 return true; 9989 } 9990 return false; 9991 } 9992 9993 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 9994 llvm::Constant *Addr) { 9995 if (CGM.getLangOpts().OMPTargetTriples.empty() && 9996 !CGM.getLangOpts().OpenMPIsTargetDevice) 9997 return; 9998 9999 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10000 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10001 10002 // If this is an 'extern' declaration we defer to the canonical definition and 10003 // do not emit an offloading entry. 10004 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link && 10005 VD->hasExternalStorage()) 10006 return; 10007 10008 if (!Res) { 10009 if (CGM.getLangOpts().OpenMPIsTargetDevice) { 10010 // Register non-target variables being emitted in device code (debug info 10011 // may cause this). 10012 StringRef VarName = CGM.getMangledName(VD); 10013 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10014 } 10015 return; 10016 } 10017 10018 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); }; 10019 auto LinkageForVariable = [&VD, this]() { 10020 return CGM.getLLVMLinkageVarDefinition(VD); 10021 }; 10022 10023 std::vector<llvm::GlobalVariable *> GeneratedRefs; 10024 OMPBuilder.registerTargetGlobalVariable( 10025 convertCaptureClause(VD), convertDeviceClause(VD), 10026 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly, 10027 VD->isExternallyVisible(), 10028 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, 10029 VD->getCanonicalDecl()->getBeginLoc()), 10030 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd, 10031 CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable, 10032 CGM.getTypes().ConvertTypeForMem( 10033 CGM.getContext().getPointerType(VD->getType())), 10034 Addr); 10035 10036 for (auto *ref : GeneratedRefs) 10037 CGM.addCompilerUsedGlobal(ref); 10038 } 10039 10040 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10041 if (isa<FunctionDecl>(GD.getDecl()) || 10042 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10043 return emitTargetFunctions(GD); 10044 10045 return emitTargetGlobalVariable(GD); 10046 } 10047 10048 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10049 for (const VarDecl *VD : DeferredGlobalVariables) { 10050 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10051 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10052 if (!Res) 10053 continue; 10054 if ((*Res == OMPDeclareTargetDeclAttr::MT_To || 10055 *Res == OMPDeclareTargetDeclAttr::MT_Enter) && 10056 !HasRequiresUnifiedSharedMemory) { 10057 CGM.EmitGlobal(VD); 10058 } else { 10059 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10060 ((*Res == OMPDeclareTargetDeclAttr::MT_To || 10061 *Res == OMPDeclareTargetDeclAttr::MT_Enter) && 10062 HasRequiresUnifiedSharedMemory)) && 10063 "Expected link clause or to clause with unified memory."); 10064 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10065 } 10066 } 10067 } 10068 10069 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10070 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10071 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10072 " Expected target-based directive."); 10073 } 10074 10075 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10076 for (const OMPClause *Clause : D->clauselists()) { 10077 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10078 HasRequiresUnifiedSharedMemory = true; 10079 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true); 10080 } else if (const auto *AC = 10081 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10082 switch (AC->getAtomicDefaultMemOrderKind()) { 10083 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10084 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10085 break; 10086 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10087 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10088 break; 10089 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10090 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10091 break; 10092 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10093 break; 10094 } 10095 } 10096 } 10097 } 10098 10099 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10100 return RequiresAtomicOrdering; 10101 } 10102 10103 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10104 LangAS &AS) { 10105 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10106 return false; 10107 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10108 switch(A->getAllocatorType()) { 10109 case OMPAllocateDeclAttr::OMPNullMemAlloc: 10110 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10111 // Not supported, fallback to the default mem space. 10112 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10113 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10114 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10115 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10116 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10117 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10118 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10119 AS = LangAS::Default; 10120 return true; 10121 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10122 llvm_unreachable("Expected predefined allocator for the variables with the " 10123 "static storage."); 10124 } 10125 return false; 10126 } 10127 10128 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10129 return HasRequiresUnifiedSharedMemory; 10130 } 10131 10132 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10133 CodeGenModule &CGM) 10134 : CGM(CGM) { 10135 if (CGM.getLangOpts().OpenMPIsTargetDevice) { 10136 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10137 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10138 } 10139 } 10140 10141 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10142 if (CGM.getLangOpts().OpenMPIsTargetDevice) 10143 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10144 } 10145 10146 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10147 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal) 10148 return true; 10149 10150 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10151 // Do not to emit function if it is marked as declare target as it was already 10152 // emitted. 10153 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10154 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10155 if (auto *F = dyn_cast_or_null<llvm::Function>( 10156 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10157 return !F->isDeclaration(); 10158 return false; 10159 } 10160 return true; 10161 } 10162 10163 return !AlreadyEmittedTargetDecls.insert(D).second; 10164 } 10165 10166 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10167 const OMPExecutableDirective &D, 10168 SourceLocation Loc, 10169 llvm::Function *OutlinedFn, 10170 ArrayRef<llvm::Value *> CapturedVars) { 10171 if (!CGF.HaveInsertPoint()) 10172 return; 10173 10174 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10175 CodeGenFunction::RunCleanupsScope Scope(CGF); 10176 10177 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10178 llvm::Value *Args[] = { 10179 RTLoc, 10180 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10181 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10182 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10183 RealArgs.append(std::begin(Args), std::end(Args)); 10184 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10185 10186 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 10187 CGM.getModule(), OMPRTL___kmpc_fork_teams); 10188 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10189 } 10190 10191 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10192 const Expr *NumTeams, 10193 const Expr *ThreadLimit, 10194 SourceLocation Loc) { 10195 if (!CGF.HaveInsertPoint()) 10196 return; 10197 10198 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10199 10200 llvm::Value *NumTeamsVal = 10201 NumTeams 10202 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10203 CGF.CGM.Int32Ty, /* isSigned = */ true) 10204 : CGF.Builder.getInt32(0); 10205 10206 llvm::Value *ThreadLimitVal = 10207 ThreadLimit 10208 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10209 CGF.CGM.Int32Ty, /* isSigned = */ true) 10210 : CGF.Builder.getInt32(0); 10211 10212 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10213 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10214 ThreadLimitVal}; 10215 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10216 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 10217 PushNumTeamsArgs); 10218 } 10219 10220 void CGOpenMPRuntime::emitThreadLimitClause(CodeGenFunction &CGF, 10221 const Expr *ThreadLimit, 10222 SourceLocation Loc) { 10223 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10224 llvm::Value *ThreadLimitVal = 10225 ThreadLimit 10226 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10227 CGF.CGM.Int32Ty, /* isSigned = */ true) 10228 : CGF.Builder.getInt32(0); 10229 10230 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit) 10231 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc), 10232 ThreadLimitVal}; 10233 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10234 CGM.getModule(), OMPRTL___kmpc_set_thread_limit), 10235 ThreadLimitArgs); 10236 } 10237 10238 void CGOpenMPRuntime::emitTargetDataCalls( 10239 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10240 const Expr *Device, const RegionCodeGenTy &CodeGen, 10241 CGOpenMPRuntime::TargetDataInfo &Info) { 10242 if (!CGF.HaveInsertPoint()) 10243 return; 10244 10245 // Action used to replace the default codegen action and turn privatization 10246 // off. 10247 PrePostActionTy NoPrivAction; 10248 10249 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 10250 10251 llvm::Value *IfCondVal = nullptr; 10252 if (IfCond) 10253 IfCondVal = CGF.EvaluateExprAsBool(IfCond); 10254 10255 // Emit device ID if any. 10256 llvm::Value *DeviceID = nullptr; 10257 if (Device) { 10258 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10259 CGF.Int64Ty, /*isSigned=*/true); 10260 } else { 10261 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10262 } 10263 10264 // Fill up the arrays with all the mapped variables. 10265 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10266 auto GenMapInfoCB = 10267 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & { 10268 CGF.Builder.restoreIP(CodeGenIP); 10269 // Get map clause information. 10270 MappableExprsHandler MEHandler(D, CGF); 10271 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder); 10272 10273 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 10274 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 10275 }; 10276 if (CGM.getCodeGenOpts().getDebugInfo() != 10277 llvm::codegenoptions::NoDebugInfo) { 10278 CombinedInfo.Names.resize(CombinedInfo.Exprs.size()); 10279 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(), 10280 FillInfoMap); 10281 } 10282 10283 return CombinedInfo; 10284 }; 10285 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy; 10286 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) { 10287 CGF.Builder.restoreIP(CodeGenIP); 10288 switch (BodyGenType) { 10289 case BodyGenTy::Priv: 10290 if (!Info.CaptureDeviceAddrMap.empty()) 10291 CodeGen(CGF); 10292 break; 10293 case BodyGenTy::DupNoPriv: 10294 if (!Info.CaptureDeviceAddrMap.empty()) { 10295 CodeGen.setAction(NoPrivAction); 10296 CodeGen(CGF); 10297 } 10298 break; 10299 case BodyGenTy::NoPriv: 10300 if (Info.CaptureDeviceAddrMap.empty()) { 10301 CodeGen.setAction(NoPrivAction); 10302 CodeGen(CGF); 10303 } 10304 break; 10305 } 10306 return InsertPointTy(CGF.Builder.GetInsertBlock(), 10307 CGF.Builder.GetInsertPoint()); 10308 }; 10309 10310 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) { 10311 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) { 10312 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl); 10313 } 10314 }; 10315 10316 auto CustomMapperCB = [&](unsigned int I) { 10317 llvm::Value *MFunc = nullptr; 10318 if (CombinedInfo.Mappers[I]) { 10319 Info.HasMapper = true; 10320 MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 10321 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 10322 } 10323 return MFunc; 10324 }; 10325 10326 // Source location for the ident struct 10327 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10328 10329 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(), 10330 CGF.AllocaInsertPt->getIterator()); 10331 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(), 10332 CGF.Builder.GetInsertPoint()); 10333 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP); 10334 CGF.Builder.restoreIP(OMPBuilder.createTargetData( 10335 OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB, 10336 /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc)); 10337 } 10338 10339 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10340 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10341 const Expr *Device) { 10342 if (!CGF.HaveInsertPoint()) 10343 return; 10344 10345 assert((isa<OMPTargetEnterDataDirective>(D) || 10346 isa<OMPTargetExitDataDirective>(D) || 10347 isa<OMPTargetUpdateDirective>(D)) && 10348 "Expecting either target enter, exit data, or update directives."); 10349 10350 CodeGenFunction::OMPTargetDataInfo InputInfo; 10351 llvm::Value *MapTypesArray = nullptr; 10352 llvm::Value *MapNamesArray = nullptr; 10353 // Generate the code for the opening of the data environment. 10354 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 10355 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10356 // Emit device ID if any. 10357 llvm::Value *DeviceID = nullptr; 10358 if (Device) { 10359 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10360 CGF.Int64Ty, /*isSigned=*/true); 10361 } else { 10362 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10363 } 10364 10365 // Emit the number of elements in the offloading arrays. 10366 llvm::Constant *PointerNum = 10367 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10368 10369 // Source location for the ident struct 10370 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10371 10372 SmallVector<llvm::Value *, 13> OffloadingArgs( 10373 {RTLoc, DeviceID, PointerNum, 10374 InputInfo.BasePointersArray.emitRawPointer(CGF), 10375 InputInfo.PointersArray.emitRawPointer(CGF), 10376 InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray, 10377 InputInfo.MappersArray.emitRawPointer(CGF)}); 10378 10379 // Select the right runtime function call for each standalone 10380 // directive. 10381 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10382 RuntimeFunction RTLFn; 10383 switch (D.getDirectiveKind()) { 10384 case OMPD_target_enter_data: 10385 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 10386 : OMPRTL___tgt_target_data_begin_mapper; 10387 break; 10388 case OMPD_target_exit_data: 10389 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 10390 : OMPRTL___tgt_target_data_end_mapper; 10391 break; 10392 case OMPD_target_update: 10393 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 10394 : OMPRTL___tgt_target_data_update_mapper; 10395 break; 10396 case OMPD_parallel: 10397 case OMPD_for: 10398 case OMPD_parallel_for: 10399 case OMPD_parallel_master: 10400 case OMPD_parallel_sections: 10401 case OMPD_for_simd: 10402 case OMPD_parallel_for_simd: 10403 case OMPD_cancel: 10404 case OMPD_cancellation_point: 10405 case OMPD_ordered: 10406 case OMPD_threadprivate: 10407 case OMPD_allocate: 10408 case OMPD_task: 10409 case OMPD_simd: 10410 case OMPD_tile: 10411 case OMPD_unroll: 10412 case OMPD_sections: 10413 case OMPD_section: 10414 case OMPD_single: 10415 case OMPD_master: 10416 case OMPD_critical: 10417 case OMPD_taskyield: 10418 case OMPD_barrier: 10419 case OMPD_taskwait: 10420 case OMPD_taskgroup: 10421 case OMPD_atomic: 10422 case OMPD_flush: 10423 case OMPD_depobj: 10424 case OMPD_scan: 10425 case OMPD_teams: 10426 case OMPD_target_data: 10427 case OMPD_distribute: 10428 case OMPD_distribute_simd: 10429 case OMPD_distribute_parallel_for: 10430 case OMPD_distribute_parallel_for_simd: 10431 case OMPD_teams_distribute: 10432 case OMPD_teams_distribute_simd: 10433 case OMPD_teams_distribute_parallel_for: 10434 case OMPD_teams_distribute_parallel_for_simd: 10435 case OMPD_declare_simd: 10436 case OMPD_declare_variant: 10437 case OMPD_begin_declare_variant: 10438 case OMPD_end_declare_variant: 10439 case OMPD_declare_target: 10440 case OMPD_end_declare_target: 10441 case OMPD_declare_reduction: 10442 case OMPD_declare_mapper: 10443 case OMPD_taskloop: 10444 case OMPD_taskloop_simd: 10445 case OMPD_master_taskloop: 10446 case OMPD_master_taskloop_simd: 10447 case OMPD_parallel_master_taskloop: 10448 case OMPD_parallel_master_taskloop_simd: 10449 case OMPD_target: 10450 case OMPD_target_simd: 10451 case OMPD_target_teams_distribute: 10452 case OMPD_target_teams_distribute_simd: 10453 case OMPD_target_teams_distribute_parallel_for: 10454 case OMPD_target_teams_distribute_parallel_for_simd: 10455 case OMPD_target_teams: 10456 case OMPD_target_parallel: 10457 case OMPD_target_parallel_for: 10458 case OMPD_target_parallel_for_simd: 10459 case OMPD_requires: 10460 case OMPD_metadirective: 10461 case OMPD_unknown: 10462 default: 10463 llvm_unreachable("Unexpected standalone target data directive."); 10464 break; 10465 } 10466 if (HasNowait) { 10467 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty)); 10468 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy)); 10469 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty)); 10470 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy)); 10471 } 10472 CGF.EmitRuntimeCall( 10473 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 10474 OffloadingArgs); 10475 }; 10476 10477 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10478 &MapNamesArray](CodeGenFunction &CGF, 10479 PrePostActionTy &) { 10480 // Fill up the arrays with all the mapped variables. 10481 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10482 10483 // Get map clause information. 10484 MappableExprsHandler MEHandler(D, CGF); 10485 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder); 10486 10487 CGOpenMPRuntime::TargetDataInfo Info; 10488 // Fill up the arrays and create the arguments. 10489 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 10490 /*IsNonContiguous=*/true); 10491 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10492 D.hasClausesOfKind<OMPNowaitClause>(); 10493 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() != 10494 llvm::codegenoptions::NoDebugInfo; 10495 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info, 10496 EmitDebug, 10497 /*ForEndCall=*/false); 10498 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10499 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, 10500 CGF.VoidPtrTy, CGM.getPointerAlign()); 10501 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, 10502 CGM.getPointerAlign()); 10503 InputInfo.SizesArray = 10504 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); 10505 InputInfo.MappersArray = 10506 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 10507 MapTypesArray = Info.RTArgs.MapTypesArray; 10508 MapNamesArray = Info.RTArgs.MapNamesArray; 10509 if (RequiresOuterTask) 10510 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10511 else 10512 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10513 }; 10514 10515 if (IfCond) { 10516 emitIfClause(CGF, IfCond, TargetThenGen, 10517 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 10518 } else { 10519 RegionCodeGenTy ThenRCG(TargetThenGen); 10520 ThenRCG(CGF); 10521 } 10522 } 10523 10524 namespace { 10525 /// Kind of parameter in a function with 'declare simd' directive. 10526 enum ParamKindTy { 10527 Linear, 10528 LinearRef, 10529 LinearUVal, 10530 LinearVal, 10531 Uniform, 10532 Vector, 10533 }; 10534 /// Attribute set of the parameter. 10535 struct ParamAttrTy { 10536 ParamKindTy Kind = Vector; 10537 llvm::APSInt StrideOrArg; 10538 llvm::APSInt Alignment; 10539 bool HasVarStride = false; 10540 }; 10541 } // namespace 10542 10543 static unsigned evaluateCDTSize(const FunctionDecl *FD, 10544 ArrayRef<ParamAttrTy> ParamAttrs) { 10545 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 10546 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 10547 // of that clause. The VLEN value must be power of 2. 10548 // In other case the notion of the function`s "characteristic data type" (CDT) 10549 // is used to compute the vector length. 10550 // CDT is defined in the following order: 10551 // a) For non-void function, the CDT is the return type. 10552 // b) If the function has any non-uniform, non-linear parameters, then the 10553 // CDT is the type of the first such parameter. 10554 // c) If the CDT determined by a) or b) above is struct, union, or class 10555 // type which is pass-by-value (except for the type that maps to the 10556 // built-in complex data type), the characteristic data type is int. 10557 // d) If none of the above three cases is applicable, the CDT is int. 10558 // The VLEN is then determined based on the CDT and the size of vector 10559 // register of that ISA for which current vector version is generated. The 10560 // VLEN is computed using the formula below: 10561 // VLEN = sizeof(vector_register) / sizeof(CDT), 10562 // where vector register size specified in section 3.2.1 Registers and the 10563 // Stack Frame of original AMD64 ABI document. 10564 QualType RetType = FD->getReturnType(); 10565 if (RetType.isNull()) 10566 return 0; 10567 ASTContext &C = FD->getASTContext(); 10568 QualType CDT; 10569 if (!RetType.isNull() && !RetType->isVoidType()) { 10570 CDT = RetType; 10571 } else { 10572 unsigned Offset = 0; 10573 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 10574 if (ParamAttrs[Offset].Kind == Vector) 10575 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 10576 ++Offset; 10577 } 10578 if (CDT.isNull()) { 10579 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10580 if (ParamAttrs[I + Offset].Kind == Vector) { 10581 CDT = FD->getParamDecl(I)->getType(); 10582 break; 10583 } 10584 } 10585 } 10586 } 10587 if (CDT.isNull()) 10588 CDT = C.IntTy; 10589 CDT = CDT->getCanonicalTypeUnqualified(); 10590 if (CDT->isRecordType() || CDT->isUnionType()) 10591 CDT = C.IntTy; 10592 return C.getTypeSize(CDT); 10593 } 10594 10595 /// Mangle the parameter part of the vector function name according to 10596 /// their OpenMP classification. The mangling function is defined in 10597 /// section 4.5 of the AAVFABI(2021Q1). 10598 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 10599 SmallString<256> Buffer; 10600 llvm::raw_svector_ostream Out(Buffer); 10601 for (const auto &ParamAttr : ParamAttrs) { 10602 switch (ParamAttr.Kind) { 10603 case Linear: 10604 Out << 'l'; 10605 break; 10606 case LinearRef: 10607 Out << 'R'; 10608 break; 10609 case LinearUVal: 10610 Out << 'U'; 10611 break; 10612 case LinearVal: 10613 Out << 'L'; 10614 break; 10615 case Uniform: 10616 Out << 'u'; 10617 break; 10618 case Vector: 10619 Out << 'v'; 10620 break; 10621 } 10622 if (ParamAttr.HasVarStride) 10623 Out << "s" << ParamAttr.StrideOrArg; 10624 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef || 10625 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) { 10626 // Don't print the step value if it is not present or if it is 10627 // equal to 1. 10628 if (ParamAttr.StrideOrArg < 0) 10629 Out << 'n' << -ParamAttr.StrideOrArg; 10630 else if (ParamAttr.StrideOrArg != 1) 10631 Out << ParamAttr.StrideOrArg; 10632 } 10633 10634 if (!!ParamAttr.Alignment) 10635 Out << 'a' << ParamAttr.Alignment; 10636 } 10637 10638 return std::string(Out.str()); 10639 } 10640 10641 static void 10642 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 10643 const llvm::APSInt &VLENVal, 10644 ArrayRef<ParamAttrTy> ParamAttrs, 10645 OMPDeclareSimdDeclAttr::BranchStateTy State) { 10646 struct ISADataTy { 10647 char ISA; 10648 unsigned VecRegSize; 10649 }; 10650 ISADataTy ISAData[] = { 10651 { 10652 'b', 128 10653 }, // SSE 10654 { 10655 'c', 256 10656 }, // AVX 10657 { 10658 'd', 256 10659 }, // AVX2 10660 { 10661 'e', 512 10662 }, // AVX512 10663 }; 10664 llvm::SmallVector<char, 2> Masked; 10665 switch (State) { 10666 case OMPDeclareSimdDeclAttr::BS_Undefined: 10667 Masked.push_back('N'); 10668 Masked.push_back('M'); 10669 break; 10670 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10671 Masked.push_back('N'); 10672 break; 10673 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10674 Masked.push_back('M'); 10675 break; 10676 } 10677 for (char Mask : Masked) { 10678 for (const ISADataTy &Data : ISAData) { 10679 SmallString<256> Buffer; 10680 llvm::raw_svector_ostream Out(Buffer); 10681 Out << "_ZGV" << Data.ISA << Mask; 10682 if (!VLENVal) { 10683 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 10684 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 10685 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 10686 } else { 10687 Out << VLENVal; 10688 } 10689 Out << mangleVectorParameters(ParamAttrs); 10690 Out << '_' << Fn->getName(); 10691 Fn->addFnAttr(Out.str()); 10692 } 10693 } 10694 } 10695 10696 // This are the Functions that are needed to mangle the name of the 10697 // vector functions generated by the compiler, according to the rules 10698 // defined in the "Vector Function ABI specifications for AArch64", 10699 // available at 10700 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 10701 10702 /// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1). 10703 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 10704 QT = QT.getCanonicalType(); 10705 10706 if (QT->isVoidType()) 10707 return false; 10708 10709 if (Kind == ParamKindTy::Uniform) 10710 return false; 10711 10712 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef) 10713 return false; 10714 10715 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) && 10716 !QT->isReferenceType()) 10717 return false; 10718 10719 return true; 10720 } 10721 10722 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 10723 static bool getAArch64PBV(QualType QT, ASTContext &C) { 10724 QT = QT.getCanonicalType(); 10725 unsigned Size = C.getTypeSize(QT); 10726 10727 // Only scalars and complex within 16 bytes wide set PVB to true. 10728 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 10729 return false; 10730 10731 if (QT->isFloatingType()) 10732 return true; 10733 10734 if (QT->isIntegerType()) 10735 return true; 10736 10737 if (QT->isPointerType()) 10738 return true; 10739 10740 // TODO: Add support for complex types (section 3.1.2, item 2). 10741 10742 return false; 10743 } 10744 10745 /// Computes the lane size (LS) of a return type or of an input parameter, 10746 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 10747 /// TODO: Add support for references, section 3.2.1, item 1. 10748 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 10749 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 10750 QualType PTy = QT.getCanonicalType()->getPointeeType(); 10751 if (getAArch64PBV(PTy, C)) 10752 return C.getTypeSize(PTy); 10753 } 10754 if (getAArch64PBV(QT, C)) 10755 return C.getTypeSize(QT); 10756 10757 return C.getTypeSize(C.getUIntPtrType()); 10758 } 10759 10760 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 10761 // signature of the scalar function, as defined in 3.2.2 of the 10762 // AAVFABI. 10763 static std::tuple<unsigned, unsigned, bool> 10764 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 10765 QualType RetType = FD->getReturnType().getCanonicalType(); 10766 10767 ASTContext &C = FD->getASTContext(); 10768 10769 bool OutputBecomesInput = false; 10770 10771 llvm::SmallVector<unsigned, 8> Sizes; 10772 if (!RetType->isVoidType()) { 10773 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 10774 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 10775 OutputBecomesInput = true; 10776 } 10777 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10778 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 10779 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 10780 } 10781 10782 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 10783 // The LS of a function parameter / return value can only be a power 10784 // of 2, starting from 8 bits, up to 128. 10785 assert(llvm::all_of(Sizes, 10786 [](unsigned Size) { 10787 return Size == 8 || Size == 16 || Size == 32 || 10788 Size == 64 || Size == 128; 10789 }) && 10790 "Invalid size"); 10791 10792 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 10793 *std::max_element(std::begin(Sizes), std::end(Sizes)), 10794 OutputBecomesInput); 10795 } 10796 10797 // Function used to add the attribute. The parameter `VLEN` is 10798 // templated to allow the use of "x" when targeting scalable functions 10799 // for SVE. 10800 template <typename T> 10801 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 10802 char ISA, StringRef ParSeq, 10803 StringRef MangledName, bool OutputBecomesInput, 10804 llvm::Function *Fn) { 10805 SmallString<256> Buffer; 10806 llvm::raw_svector_ostream Out(Buffer); 10807 Out << Prefix << ISA << LMask << VLEN; 10808 if (OutputBecomesInput) 10809 Out << "v"; 10810 Out << ParSeq << "_" << MangledName; 10811 Fn->addFnAttr(Out.str()); 10812 } 10813 10814 // Helper function to generate the Advanced SIMD names depending on 10815 // the value of the NDS when simdlen is not present. 10816 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 10817 StringRef Prefix, char ISA, 10818 StringRef ParSeq, StringRef MangledName, 10819 bool OutputBecomesInput, 10820 llvm::Function *Fn) { 10821 switch (NDS) { 10822 case 8: 10823 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10824 OutputBecomesInput, Fn); 10825 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 10826 OutputBecomesInput, Fn); 10827 break; 10828 case 16: 10829 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10830 OutputBecomesInput, Fn); 10831 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10832 OutputBecomesInput, Fn); 10833 break; 10834 case 32: 10835 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10836 OutputBecomesInput, Fn); 10837 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10838 OutputBecomesInput, Fn); 10839 break; 10840 case 64: 10841 case 128: 10842 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10843 OutputBecomesInput, Fn); 10844 break; 10845 default: 10846 llvm_unreachable("Scalar type is too wide."); 10847 } 10848 } 10849 10850 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 10851 static void emitAArch64DeclareSimdFunction( 10852 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 10853 ArrayRef<ParamAttrTy> ParamAttrs, 10854 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 10855 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 10856 10857 // Get basic data for building the vector signature. 10858 const auto Data = getNDSWDS(FD, ParamAttrs); 10859 const unsigned NDS = std::get<0>(Data); 10860 const unsigned WDS = std::get<1>(Data); 10861 const bool OutputBecomesInput = std::get<2>(Data); 10862 10863 // Check the values provided via `simdlen` by the user. 10864 // 1. A `simdlen(1)` doesn't produce vector signatures, 10865 if (UserVLEN == 1) { 10866 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10867 DiagnosticsEngine::Warning, 10868 "The clause simdlen(1) has no effect when targeting aarch64."); 10869 CGM.getDiags().Report(SLoc, DiagID); 10870 return; 10871 } 10872 10873 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 10874 // Advanced SIMD output. 10875 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 10876 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10877 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 10878 "power of 2 when targeting Advanced SIMD."); 10879 CGM.getDiags().Report(SLoc, DiagID); 10880 return; 10881 } 10882 10883 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 10884 // limits. 10885 if (ISA == 's' && UserVLEN != 0) { 10886 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 10887 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10888 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 10889 "lanes in the architectural constraints " 10890 "for SVE (min is 128-bit, max is " 10891 "2048-bit, by steps of 128-bit)"); 10892 CGM.getDiags().Report(SLoc, DiagID) << WDS; 10893 return; 10894 } 10895 } 10896 10897 // Sort out parameter sequence. 10898 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 10899 StringRef Prefix = "_ZGV"; 10900 // Generate simdlen from user input (if any). 10901 if (UserVLEN) { 10902 if (ISA == 's') { 10903 // SVE generates only a masked function. 10904 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10905 OutputBecomesInput, Fn); 10906 } else { 10907 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10908 // Advanced SIMD generates one or two functions, depending on 10909 // the `[not]inbranch` clause. 10910 switch (State) { 10911 case OMPDeclareSimdDeclAttr::BS_Undefined: 10912 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10913 OutputBecomesInput, Fn); 10914 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10915 OutputBecomesInput, Fn); 10916 break; 10917 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10918 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10919 OutputBecomesInput, Fn); 10920 break; 10921 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10922 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10923 OutputBecomesInput, Fn); 10924 break; 10925 } 10926 } 10927 } else { 10928 // If no user simdlen is provided, follow the AAVFABI rules for 10929 // generating the vector length. 10930 if (ISA == 's') { 10931 // SVE, section 3.4.1, item 1. 10932 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 10933 OutputBecomesInput, Fn); 10934 } else { 10935 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10936 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 10937 // two vector names depending on the use of the clause 10938 // `[not]inbranch`. 10939 switch (State) { 10940 case OMPDeclareSimdDeclAttr::BS_Undefined: 10941 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10942 OutputBecomesInput, Fn); 10943 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10944 OutputBecomesInput, Fn); 10945 break; 10946 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10947 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10948 OutputBecomesInput, Fn); 10949 break; 10950 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10951 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10952 OutputBecomesInput, Fn); 10953 break; 10954 } 10955 } 10956 } 10957 } 10958 10959 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 10960 llvm::Function *Fn) { 10961 ASTContext &C = CGM.getContext(); 10962 FD = FD->getMostRecentDecl(); 10963 while (FD) { 10964 // Map params to their positions in function decl. 10965 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 10966 if (isa<CXXMethodDecl>(FD)) 10967 ParamPositions.try_emplace(FD, 0); 10968 unsigned ParamPos = ParamPositions.size(); 10969 for (const ParmVarDecl *P : FD->parameters()) { 10970 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 10971 ++ParamPos; 10972 } 10973 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 10974 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 10975 // Mark uniform parameters. 10976 for (const Expr *E : Attr->uniforms()) { 10977 E = E->IgnoreParenImpCasts(); 10978 unsigned Pos; 10979 if (isa<CXXThisExpr>(E)) { 10980 Pos = ParamPositions[FD]; 10981 } else { 10982 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10983 ->getCanonicalDecl(); 10984 auto It = ParamPositions.find(PVD); 10985 assert(It != ParamPositions.end() && "Function parameter not found"); 10986 Pos = It->second; 10987 } 10988 ParamAttrs[Pos].Kind = Uniform; 10989 } 10990 // Get alignment info. 10991 auto *NI = Attr->alignments_begin(); 10992 for (const Expr *E : Attr->aligneds()) { 10993 E = E->IgnoreParenImpCasts(); 10994 unsigned Pos; 10995 QualType ParmTy; 10996 if (isa<CXXThisExpr>(E)) { 10997 Pos = ParamPositions[FD]; 10998 ParmTy = E->getType(); 10999 } else { 11000 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11001 ->getCanonicalDecl(); 11002 auto It = ParamPositions.find(PVD); 11003 assert(It != ParamPositions.end() && "Function parameter not found"); 11004 Pos = It->second; 11005 ParmTy = PVD->getType(); 11006 } 11007 ParamAttrs[Pos].Alignment = 11008 (*NI) 11009 ? (*NI)->EvaluateKnownConstInt(C) 11010 : llvm::APSInt::getUnsigned( 11011 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11012 .getQuantity()); 11013 ++NI; 11014 } 11015 // Mark linear parameters. 11016 auto *SI = Attr->steps_begin(); 11017 auto *MI = Attr->modifiers_begin(); 11018 for (const Expr *E : Attr->linears()) { 11019 E = E->IgnoreParenImpCasts(); 11020 unsigned Pos; 11021 bool IsReferenceType = false; 11022 // Rescaling factor needed to compute the linear parameter 11023 // value in the mangled name. 11024 unsigned PtrRescalingFactor = 1; 11025 if (isa<CXXThisExpr>(E)) { 11026 Pos = ParamPositions[FD]; 11027 auto *P = cast<PointerType>(E->getType()); 11028 PtrRescalingFactor = CGM.getContext() 11029 .getTypeSizeInChars(P->getPointeeType()) 11030 .getQuantity(); 11031 } else { 11032 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11033 ->getCanonicalDecl(); 11034 auto It = ParamPositions.find(PVD); 11035 assert(It != ParamPositions.end() && "Function parameter not found"); 11036 Pos = It->second; 11037 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 11038 PtrRescalingFactor = CGM.getContext() 11039 .getTypeSizeInChars(P->getPointeeType()) 11040 .getQuantity(); 11041 else if (PVD->getType()->isReferenceType()) { 11042 IsReferenceType = true; 11043 PtrRescalingFactor = 11044 CGM.getContext() 11045 .getTypeSizeInChars(PVD->getType().getNonReferenceType()) 11046 .getQuantity(); 11047 } 11048 } 11049 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 11050 if (*MI == OMPC_LINEAR_ref) 11051 ParamAttr.Kind = LinearRef; 11052 else if (*MI == OMPC_LINEAR_uval) 11053 ParamAttr.Kind = LinearUVal; 11054 else if (IsReferenceType) 11055 ParamAttr.Kind = LinearVal; 11056 else 11057 ParamAttr.Kind = Linear; 11058 // Assuming a stride of 1, for `linear` without modifiers. 11059 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 11060 if (*SI) { 11061 Expr::EvalResult Result; 11062 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 11063 if (const auto *DRE = 11064 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 11065 if (const auto *StridePVD = 11066 dyn_cast<ParmVarDecl>(DRE->getDecl())) { 11067 ParamAttr.HasVarStride = true; 11068 auto It = ParamPositions.find(StridePVD->getCanonicalDecl()); 11069 assert(It != ParamPositions.end() && 11070 "Function parameter not found"); 11071 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second); 11072 } 11073 } 11074 } else { 11075 ParamAttr.StrideOrArg = Result.Val.getInt(); 11076 } 11077 } 11078 // If we are using a linear clause on a pointer, we need to 11079 // rescale the value of linear_step with the byte size of the 11080 // pointee type. 11081 if (!ParamAttr.HasVarStride && 11082 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef)) 11083 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 11084 ++SI; 11085 ++MI; 11086 } 11087 llvm::APSInt VLENVal; 11088 SourceLocation ExprLoc; 11089 const Expr *VLENExpr = Attr->getSimdlen(); 11090 if (VLENExpr) { 11091 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11092 ExprLoc = VLENExpr->getExprLoc(); 11093 } 11094 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11095 if (CGM.getTriple().isX86()) { 11096 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11097 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11098 unsigned VLEN = VLENVal.getExtValue(); 11099 StringRef MangledName = Fn->getName(); 11100 if (CGM.getTarget().hasFeature("sve")) 11101 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11102 MangledName, 's', 128, Fn, ExprLoc); 11103 else if (CGM.getTarget().hasFeature("neon")) 11104 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11105 MangledName, 'n', 128, Fn, ExprLoc); 11106 } 11107 } 11108 FD = FD->getPreviousDecl(); 11109 } 11110 } 11111 11112 namespace { 11113 /// Cleanup action for doacross support. 11114 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 11115 public: 11116 static const int DoacrossFinArgs = 2; 11117 11118 private: 11119 llvm::FunctionCallee RTLFn; 11120 llvm::Value *Args[DoacrossFinArgs]; 11121 11122 public: 11123 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 11124 ArrayRef<llvm::Value *> CallArgs) 11125 : RTLFn(RTLFn) { 11126 assert(CallArgs.size() == DoacrossFinArgs); 11127 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11128 } 11129 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11130 if (!CGF.HaveInsertPoint()) 11131 return; 11132 CGF.EmitRuntimeCall(RTLFn, Args); 11133 } 11134 }; 11135 } // namespace 11136 11137 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11138 const OMPLoopDirective &D, 11139 ArrayRef<Expr *> NumIterations) { 11140 if (!CGF.HaveInsertPoint()) 11141 return; 11142 11143 ASTContext &C = CGM.getContext(); 11144 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11145 RecordDecl *RD; 11146 if (KmpDimTy.isNull()) { 11147 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11148 // kmp_int64 lo; // lower 11149 // kmp_int64 up; // upper 11150 // kmp_int64 st; // stride 11151 // }; 11152 RD = C.buildImplicitRecord("kmp_dim"); 11153 RD->startDefinition(); 11154 addFieldToRecordDecl(C, RD, Int64Ty); 11155 addFieldToRecordDecl(C, RD, Int64Ty); 11156 addFieldToRecordDecl(C, RD, Int64Ty); 11157 RD->completeDefinition(); 11158 KmpDimTy = C.getRecordType(RD); 11159 } else { 11160 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11161 } 11162 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11163 QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr, 11164 ArraySizeModifier::Normal, 0); 11165 11166 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11167 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11168 enum { LowerFD = 0, UpperFD, StrideFD }; 11169 // Fill dims with data. 11170 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11171 LValue DimsLVal = CGF.MakeAddrLValue( 11172 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11173 // dims.upper = num_iterations; 11174 LValue UpperLVal = CGF.EmitLValueForField( 11175 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11176 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 11177 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 11178 Int64Ty, NumIterations[I]->getExprLoc()); 11179 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11180 // dims.stride = 1; 11181 LValue StrideLVal = CGF.EmitLValueForField( 11182 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11183 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11184 StrideLVal); 11185 } 11186 11187 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11188 // kmp_int32 num_dims, struct kmp_dim * dims); 11189 llvm::Value *Args[] = { 11190 emitUpdateLocation(CGF, D.getBeginLoc()), 11191 getThreadID(CGF, D.getBeginLoc()), 11192 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11193 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11194 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).emitRawPointer(CGF), 11195 CGM.VoidPtrTy)}; 11196 11197 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11198 CGM.getModule(), OMPRTL___kmpc_doacross_init); 11199 CGF.EmitRuntimeCall(RTLFn, Args); 11200 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11201 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11202 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11203 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 11204 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11205 llvm::ArrayRef(FiniArgs)); 11206 } 11207 11208 template <typename T> 11209 static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM, 11210 const T *C, llvm::Value *ULoc, 11211 llvm::Value *ThreadID) { 11212 QualType Int64Ty = 11213 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11214 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11215 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11216 Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0); 11217 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11218 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11219 const Expr *CounterVal = C->getLoopData(I); 11220 assert(CounterVal); 11221 llvm::Value *CntVal = CGF.EmitScalarConversion( 11222 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11223 CounterVal->getExprLoc()); 11224 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11225 /*Volatile=*/false, Int64Ty); 11226 } 11227 llvm::Value *Args[] = { 11228 ULoc, ThreadID, 11229 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).emitRawPointer(CGF)}; 11230 llvm::FunctionCallee RTLFn; 11231 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 11232 OMPDoacrossKind<T> ODK; 11233 if (ODK.isSource(C)) { 11234 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11235 OMPRTL___kmpc_doacross_post); 11236 } else { 11237 assert(ODK.isSink(C) && "Expect sink modifier."); 11238 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11239 OMPRTL___kmpc_doacross_wait); 11240 } 11241 CGF.EmitRuntimeCall(RTLFn, Args); 11242 } 11243 11244 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11245 const OMPDependClause *C) { 11246 return EmitDoacrossOrdered<OMPDependClause>( 11247 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()), 11248 getThreadID(CGF, C->getBeginLoc())); 11249 } 11250 11251 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11252 const OMPDoacrossClause *C) { 11253 return EmitDoacrossOrdered<OMPDoacrossClause>( 11254 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()), 11255 getThreadID(CGF, C->getBeginLoc())); 11256 } 11257 11258 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11259 llvm::FunctionCallee Callee, 11260 ArrayRef<llvm::Value *> Args) const { 11261 assert(Loc.isValid() && "Outlined function call location must be valid."); 11262 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11263 11264 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11265 if (Fn->doesNotThrow()) { 11266 CGF.EmitNounwindRuntimeCall(Fn, Args); 11267 return; 11268 } 11269 } 11270 CGF.EmitRuntimeCall(Callee, Args); 11271 } 11272 11273 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11274 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11275 ArrayRef<llvm::Value *> Args) const { 11276 emitCall(CGF, Loc, OutlinedFn, Args); 11277 } 11278 11279 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11280 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11281 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11282 HasEmittedDeclareTargetRegion = true; 11283 } 11284 11285 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11286 const VarDecl *NativeParam, 11287 const VarDecl *TargetParam) const { 11288 return CGF.GetAddrOfLocalVar(NativeParam); 11289 } 11290 11291 /// Return allocator value from expression, or return a null allocator (default 11292 /// when no allocator specified). 11293 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF, 11294 const Expr *Allocator) { 11295 llvm::Value *AllocVal; 11296 if (Allocator) { 11297 AllocVal = CGF.EmitScalarExpr(Allocator); 11298 // According to the standard, the original allocator type is a enum 11299 // (integer). Convert to pointer type, if required. 11300 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 11301 CGF.getContext().VoidPtrTy, 11302 Allocator->getExprLoc()); 11303 } else { 11304 // If no allocator specified, it defaults to the null allocator. 11305 AllocVal = llvm::Constant::getNullValue( 11306 CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy)); 11307 } 11308 return AllocVal; 11309 } 11310 11311 /// Return the alignment from an allocate directive if present. 11312 static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) { 11313 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD); 11314 11315 if (!AllocateAlignment) 11316 return nullptr; 11317 11318 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity()); 11319 } 11320 11321 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11322 const VarDecl *VD) { 11323 if (!VD) 11324 return Address::invalid(); 11325 Address UntiedAddr = Address::invalid(); 11326 Address UntiedRealAddr = Address::invalid(); 11327 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11328 if (It != FunctionToUntiedTaskStackMap.end()) { 11329 const UntiedLocalVarsAddressesMap &UntiedData = 11330 UntiedLocalVarsStack[It->second]; 11331 auto I = UntiedData.find(VD); 11332 if (I != UntiedData.end()) { 11333 UntiedAddr = I->second.first; 11334 UntiedRealAddr = I->second.second; 11335 } 11336 } 11337 const VarDecl *CVD = VD->getCanonicalDecl(); 11338 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 11339 // Use the default allocation. 11340 if (!isAllocatableDecl(VD)) 11341 return UntiedAddr; 11342 llvm::Value *Size; 11343 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11344 if (CVD->getType()->isVariablyModifiedType()) { 11345 Size = CGF.getTypeSize(CVD->getType()); 11346 // Align the size: ((size + align - 1) / align) * align 11347 Size = CGF.Builder.CreateNUWAdd( 11348 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11349 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11350 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11351 } else { 11352 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11353 Size = CGM.getSize(Sz.alignTo(Align)); 11354 } 11355 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11356 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11357 const Expr *Allocator = AA->getAllocator(); 11358 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator); 11359 llvm::Value *Alignment = getAlignmentValue(CGM, CVD); 11360 SmallVector<llvm::Value *, 4> Args; 11361 Args.push_back(ThreadID); 11362 if (Alignment) 11363 Args.push_back(Alignment); 11364 Args.push_back(Size); 11365 Args.push_back(AllocVal); 11366 llvm::omp::RuntimeFunction FnID = 11367 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc; 11368 llvm::Value *Addr = CGF.EmitRuntimeCall( 11369 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args, 11370 getName({CVD->getName(), ".void.addr"})); 11371 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11372 CGM.getModule(), OMPRTL___kmpc_free); 11373 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 11374 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11375 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 11376 if (UntiedAddr.isValid()) 11377 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 11378 11379 // Cleanup action for allocate support. 11380 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 11381 llvm::FunctionCallee RTLFn; 11382 SourceLocation::UIntTy LocEncoding; 11383 Address Addr; 11384 const Expr *AllocExpr; 11385 11386 public: 11387 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 11388 SourceLocation::UIntTy LocEncoding, Address Addr, 11389 const Expr *AllocExpr) 11390 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 11391 AllocExpr(AllocExpr) {} 11392 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11393 if (!CGF.HaveInsertPoint()) 11394 return; 11395 llvm::Value *Args[3]; 11396 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 11397 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 11398 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11399 Addr.emitRawPointer(CGF), CGF.VoidPtrTy); 11400 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr); 11401 Args[2] = AllocVal; 11402 CGF.EmitRuntimeCall(RTLFn, Args); 11403 } 11404 }; 11405 Address VDAddr = 11406 UntiedRealAddr.isValid() 11407 ? UntiedRealAddr 11408 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align); 11409 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 11410 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 11411 VDAddr, Allocator); 11412 if (UntiedRealAddr.isValid()) 11413 if (auto *Region = 11414 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 11415 Region->emitUntiedSwitch(CGF); 11416 return VDAddr; 11417 } 11418 return UntiedAddr; 11419 } 11420 11421 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 11422 const VarDecl *VD) const { 11423 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11424 if (It == FunctionToUntiedTaskStackMap.end()) 11425 return false; 11426 return UntiedLocalVarsStack[It->second].count(VD) > 0; 11427 } 11428 11429 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11430 CodeGenModule &CGM, const OMPLoopDirective &S) 11431 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11432 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11433 if (!NeedToPush) 11434 return; 11435 NontemporalDeclsSet &DS = 11436 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11437 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11438 for (const Stmt *Ref : C->private_refs()) { 11439 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11440 const ValueDecl *VD; 11441 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11442 VD = DRE->getDecl(); 11443 } else { 11444 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11445 assert((ME->isImplicitCXXThis() || 11446 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11447 "Expected member of current class."); 11448 VD = ME->getMemberDecl(); 11449 } 11450 DS.insert(VD); 11451 } 11452 } 11453 } 11454 11455 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 11456 if (!NeedToPush) 11457 return; 11458 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 11459 } 11460 11461 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 11462 CodeGenFunction &CGF, 11463 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 11464 std::pair<Address, Address>> &LocalVars) 11465 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 11466 if (!NeedToPush) 11467 return; 11468 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 11469 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 11470 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 11471 } 11472 11473 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 11474 if (!NeedToPush) 11475 return; 11476 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 11477 } 11478 11479 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 11480 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11481 11482 return llvm::any_of( 11483 CGM.getOpenMPRuntime().NontemporalDeclsStack, 11484 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); }); 11485 } 11486 11487 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 11488 const OMPExecutableDirective &S, 11489 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 11490 const { 11491 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 11492 // Vars in target/task regions must be excluded completely. 11493 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 11494 isOpenMPTaskingDirective(S.getDirectiveKind())) { 11495 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11496 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 11497 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 11498 for (const CapturedStmt::Capture &Cap : CS->captures()) { 11499 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 11500 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 11501 } 11502 } 11503 // Exclude vars in private clauses. 11504 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 11505 for (const Expr *Ref : C->varlists()) { 11506 if (!Ref->getType()->isScalarType()) 11507 continue; 11508 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11509 if (!DRE) 11510 continue; 11511 NeedToCheckForLPCs.insert(DRE->getDecl()); 11512 } 11513 } 11514 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 11515 for (const Expr *Ref : C->varlists()) { 11516 if (!Ref->getType()->isScalarType()) 11517 continue; 11518 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11519 if (!DRE) 11520 continue; 11521 NeedToCheckForLPCs.insert(DRE->getDecl()); 11522 } 11523 } 11524 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11525 for (const Expr *Ref : C->varlists()) { 11526 if (!Ref->getType()->isScalarType()) 11527 continue; 11528 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11529 if (!DRE) 11530 continue; 11531 NeedToCheckForLPCs.insert(DRE->getDecl()); 11532 } 11533 } 11534 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 11535 for (const Expr *Ref : C->varlists()) { 11536 if (!Ref->getType()->isScalarType()) 11537 continue; 11538 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11539 if (!DRE) 11540 continue; 11541 NeedToCheckForLPCs.insert(DRE->getDecl()); 11542 } 11543 } 11544 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 11545 for (const Expr *Ref : C->varlists()) { 11546 if (!Ref->getType()->isScalarType()) 11547 continue; 11548 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11549 if (!DRE) 11550 continue; 11551 NeedToCheckForLPCs.insert(DRE->getDecl()); 11552 } 11553 } 11554 for (const Decl *VD : NeedToCheckForLPCs) { 11555 for (const LastprivateConditionalData &Data : 11556 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 11557 if (Data.DeclToUniqueName.count(VD) > 0) { 11558 if (!Data.Disabled) 11559 NeedToAddForLPCsAsDisabled.insert(VD); 11560 break; 11561 } 11562 } 11563 } 11564 } 11565 11566 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11567 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 11568 : CGM(CGF.CGM), 11569 Action((CGM.getLangOpts().OpenMP >= 50 && 11570 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 11571 [](const OMPLastprivateClause *C) { 11572 return C->getKind() == 11573 OMPC_LASTPRIVATE_conditional; 11574 })) 11575 ? ActionToDo::PushAsLastprivateConditional 11576 : ActionToDo::DoNotPush) { 11577 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11578 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 11579 return; 11580 assert(Action == ActionToDo::PushAsLastprivateConditional && 11581 "Expected a push action."); 11582 LastprivateConditionalData &Data = 11583 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11584 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11585 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 11586 continue; 11587 11588 for (const Expr *Ref : C->varlists()) { 11589 Data.DeclToUniqueName.insert(std::make_pair( 11590 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 11591 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 11592 } 11593 } 11594 Data.IVLVal = IVLVal; 11595 Data.Fn = CGF.CurFn; 11596 } 11597 11598 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11599 CodeGenFunction &CGF, const OMPExecutableDirective &S) 11600 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 11601 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11602 if (CGM.getLangOpts().OpenMP < 50) 11603 return; 11604 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 11605 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 11606 if (!NeedToAddForLPCsAsDisabled.empty()) { 11607 Action = ActionToDo::DisableLastprivateConditional; 11608 LastprivateConditionalData &Data = 11609 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11610 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 11611 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 11612 Data.Fn = CGF.CurFn; 11613 Data.Disabled = true; 11614 } 11615 } 11616 11617 CGOpenMPRuntime::LastprivateConditionalRAII 11618 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 11619 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 11620 return LastprivateConditionalRAII(CGF, S); 11621 } 11622 11623 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 11624 if (CGM.getLangOpts().OpenMP < 50) 11625 return; 11626 if (Action == ActionToDo::DisableLastprivateConditional) { 11627 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11628 "Expected list of disabled private vars."); 11629 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11630 } 11631 if (Action == ActionToDo::PushAsLastprivateConditional) { 11632 assert( 11633 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11634 "Expected list of lastprivate conditional vars."); 11635 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11636 } 11637 } 11638 11639 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 11640 const VarDecl *VD) { 11641 ASTContext &C = CGM.getContext(); 11642 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 11643 if (I == LastprivateConditionalToTypes.end()) 11644 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 11645 QualType NewType; 11646 const FieldDecl *VDField; 11647 const FieldDecl *FiredField; 11648 LValue BaseLVal; 11649 auto VI = I->getSecond().find(VD); 11650 if (VI == I->getSecond().end()) { 11651 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 11652 RD->startDefinition(); 11653 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 11654 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 11655 RD->completeDefinition(); 11656 NewType = C.getRecordType(RD); 11657 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 11658 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 11659 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 11660 } else { 11661 NewType = std::get<0>(VI->getSecond()); 11662 VDField = std::get<1>(VI->getSecond()); 11663 FiredField = std::get<2>(VI->getSecond()); 11664 BaseLVal = std::get<3>(VI->getSecond()); 11665 } 11666 LValue FiredLVal = 11667 CGF.EmitLValueForField(BaseLVal, FiredField); 11668 CGF.EmitStoreOfScalar( 11669 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 11670 FiredLVal); 11671 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(); 11672 } 11673 11674 namespace { 11675 /// Checks if the lastprivate conditional variable is referenced in LHS. 11676 class LastprivateConditionalRefChecker final 11677 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 11678 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 11679 const Expr *FoundE = nullptr; 11680 const Decl *FoundD = nullptr; 11681 StringRef UniqueDeclName; 11682 LValue IVLVal; 11683 llvm::Function *FoundFn = nullptr; 11684 SourceLocation Loc; 11685 11686 public: 11687 bool VisitDeclRefExpr(const DeclRefExpr *E) { 11688 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11689 llvm::reverse(LPM)) { 11690 auto It = D.DeclToUniqueName.find(E->getDecl()); 11691 if (It == D.DeclToUniqueName.end()) 11692 continue; 11693 if (D.Disabled) 11694 return false; 11695 FoundE = E; 11696 FoundD = E->getDecl()->getCanonicalDecl(); 11697 UniqueDeclName = It->second; 11698 IVLVal = D.IVLVal; 11699 FoundFn = D.Fn; 11700 break; 11701 } 11702 return FoundE == E; 11703 } 11704 bool VisitMemberExpr(const MemberExpr *E) { 11705 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 11706 return false; 11707 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11708 llvm::reverse(LPM)) { 11709 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 11710 if (It == D.DeclToUniqueName.end()) 11711 continue; 11712 if (D.Disabled) 11713 return false; 11714 FoundE = E; 11715 FoundD = E->getMemberDecl()->getCanonicalDecl(); 11716 UniqueDeclName = It->second; 11717 IVLVal = D.IVLVal; 11718 FoundFn = D.Fn; 11719 break; 11720 } 11721 return FoundE == E; 11722 } 11723 bool VisitStmt(const Stmt *S) { 11724 for (const Stmt *Child : S->children()) { 11725 if (!Child) 11726 continue; 11727 if (const auto *E = dyn_cast<Expr>(Child)) 11728 if (!E->isGLValue()) 11729 continue; 11730 if (Visit(Child)) 11731 return true; 11732 } 11733 return false; 11734 } 11735 explicit LastprivateConditionalRefChecker( 11736 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 11737 : LPM(LPM) {} 11738 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 11739 getFoundData() const { 11740 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 11741 } 11742 }; 11743 } // namespace 11744 11745 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 11746 LValue IVLVal, 11747 StringRef UniqueDeclName, 11748 LValue LVal, 11749 SourceLocation Loc) { 11750 // Last updated loop counter for the lastprivate conditional var. 11751 // int<xx> last_iv = 0; 11752 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 11753 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable( 11754 LLIVTy, getName({UniqueDeclName, "iv"})); 11755 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 11756 IVLVal.getAlignment().getAsAlign()); 11757 LValue LastIVLVal = 11758 CGF.MakeNaturalAlignRawAddrLValue(LastIV, IVLVal.getType()); 11759 11760 // Last value of the lastprivate conditional. 11761 // decltype(priv_a) last_a; 11762 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable( 11763 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 11764 cast<llvm::GlobalVariable>(Last)->setAlignment( 11765 LVal.getAlignment().getAsAlign()); 11766 LValue LastLVal = 11767 CGF.MakeRawAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 11768 11769 // Global loop counter. Required to handle inner parallel-for regions. 11770 // iv 11771 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 11772 11773 // #pragma omp critical(a) 11774 // if (last_iv <= iv) { 11775 // last_iv = iv; 11776 // last_a = priv_a; 11777 // } 11778 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 11779 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 11780 Action.Enter(CGF); 11781 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 11782 // (last_iv <= iv) ? Check if the variable is updated and store new 11783 // value in global var. 11784 llvm::Value *CmpRes; 11785 if (IVLVal.getType()->isSignedIntegerType()) { 11786 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 11787 } else { 11788 assert(IVLVal.getType()->isUnsignedIntegerType() && 11789 "Loop iteration variable must be integer."); 11790 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 11791 } 11792 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 11793 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 11794 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 11795 // { 11796 CGF.EmitBlock(ThenBB); 11797 11798 // last_iv = iv; 11799 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 11800 11801 // last_a = priv_a; 11802 switch (CGF.getEvaluationKind(LVal.getType())) { 11803 case TEK_Scalar: { 11804 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 11805 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 11806 break; 11807 } 11808 case TEK_Complex: { 11809 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 11810 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 11811 break; 11812 } 11813 case TEK_Aggregate: 11814 llvm_unreachable( 11815 "Aggregates are not supported in lastprivate conditional."); 11816 } 11817 // } 11818 CGF.EmitBranch(ExitBB); 11819 // There is no need to emit line number for unconditional branch. 11820 (void)ApplyDebugLocation::CreateEmpty(CGF); 11821 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 11822 }; 11823 11824 if (CGM.getLangOpts().OpenMPSimd) { 11825 // Do not emit as a critical region as no parallel region could be emitted. 11826 RegionCodeGenTy ThenRCG(CodeGen); 11827 ThenRCG(CGF); 11828 } else { 11829 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 11830 } 11831 } 11832 11833 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 11834 const Expr *LHS) { 11835 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11836 return; 11837 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 11838 if (!Checker.Visit(LHS)) 11839 return; 11840 const Expr *FoundE; 11841 const Decl *FoundD; 11842 StringRef UniqueDeclName; 11843 LValue IVLVal; 11844 llvm::Function *FoundFn; 11845 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 11846 Checker.getFoundData(); 11847 if (FoundFn != CGF.CurFn) { 11848 // Special codegen for inner parallel regions. 11849 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 11850 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 11851 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 11852 "Lastprivate conditional is not found in outer region."); 11853 QualType StructTy = std::get<0>(It->getSecond()); 11854 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 11855 LValue PrivLVal = CGF.EmitLValue(FoundE); 11856 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11857 PrivLVal.getAddress(), 11858 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)), 11859 CGF.ConvertTypeForMem(StructTy)); 11860 LValue BaseLVal = 11861 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 11862 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 11863 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 11864 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 11865 FiredLVal, llvm::AtomicOrdering::Unordered, 11866 /*IsVolatile=*/true, /*isInit=*/false); 11867 return; 11868 } 11869 11870 // Private address of the lastprivate conditional in the current context. 11871 // priv_a 11872 LValue LVal = CGF.EmitLValue(FoundE); 11873 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 11874 FoundE->getExprLoc()); 11875 } 11876 11877 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 11878 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11879 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 11880 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11881 return; 11882 auto Range = llvm::reverse(LastprivateConditionalStack); 11883 auto It = llvm::find_if( 11884 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 11885 if (It == Range.end() || It->Fn != CGF.CurFn) 11886 return; 11887 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 11888 assert(LPCI != LastprivateConditionalToTypes.end() && 11889 "Lastprivates must be registered already."); 11890 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11891 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 11892 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 11893 for (const auto &Pair : It->DeclToUniqueName) { 11894 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 11895 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD)) 11896 continue; 11897 auto I = LPCI->getSecond().find(Pair.first); 11898 assert(I != LPCI->getSecond().end() && 11899 "Lastprivate must be rehistered already."); 11900 // bool Cmp = priv_a.Fired != 0; 11901 LValue BaseLVal = std::get<3>(I->getSecond()); 11902 LValue FiredLVal = 11903 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 11904 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 11905 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 11906 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 11907 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 11908 // if (Cmp) { 11909 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 11910 CGF.EmitBlock(ThenBB); 11911 Address Addr = CGF.GetAddrOfLocalVar(VD); 11912 LValue LVal; 11913 if (VD->getType()->isReferenceType()) 11914 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 11915 AlignmentSource::Decl); 11916 else 11917 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 11918 AlignmentSource::Decl); 11919 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 11920 D.getBeginLoc()); 11921 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 11922 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 11923 // } 11924 } 11925 } 11926 11927 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 11928 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 11929 SourceLocation Loc) { 11930 if (CGF.getLangOpts().OpenMP < 50) 11931 return; 11932 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 11933 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 11934 "Unknown lastprivate conditional variable."); 11935 StringRef UniqueName = It->second; 11936 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 11937 // The variable was not updated in the region - exit. 11938 if (!GV) 11939 return; 11940 LValue LPLVal = CGF.MakeRawAddrLValue( 11941 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 11942 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 11943 CGF.EmitStoreOfScalar(Res, PrivLVal); 11944 } 11945 11946 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 11947 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11948 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 11949 const RegionCodeGenTy &CodeGen) { 11950 llvm_unreachable("Not supported in SIMD-only mode"); 11951 } 11952 11953 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 11954 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11955 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 11956 const RegionCodeGenTy &CodeGen) { 11957 llvm_unreachable("Not supported in SIMD-only mode"); 11958 } 11959 11960 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 11961 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11962 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 11963 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 11964 bool Tied, unsigned &NumberOfParts) { 11965 llvm_unreachable("Not supported in SIMD-only mode"); 11966 } 11967 11968 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 11969 SourceLocation Loc, 11970 llvm::Function *OutlinedFn, 11971 ArrayRef<llvm::Value *> CapturedVars, 11972 const Expr *IfCond, 11973 llvm::Value *NumThreads) { 11974 llvm_unreachable("Not supported in SIMD-only mode"); 11975 } 11976 11977 void CGOpenMPSIMDRuntime::emitCriticalRegion( 11978 CodeGenFunction &CGF, StringRef CriticalName, 11979 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 11980 const Expr *Hint) { 11981 llvm_unreachable("Not supported in SIMD-only mode"); 11982 } 11983 11984 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 11985 const RegionCodeGenTy &MasterOpGen, 11986 SourceLocation Loc) { 11987 llvm_unreachable("Not supported in SIMD-only mode"); 11988 } 11989 11990 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, 11991 const RegionCodeGenTy &MasterOpGen, 11992 SourceLocation Loc, 11993 const Expr *Filter) { 11994 llvm_unreachable("Not supported in SIMD-only mode"); 11995 } 11996 11997 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 11998 SourceLocation Loc) { 11999 llvm_unreachable("Not supported in SIMD-only mode"); 12000 } 12001 12002 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12003 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12004 SourceLocation Loc) { 12005 llvm_unreachable("Not supported in SIMD-only mode"); 12006 } 12007 12008 void CGOpenMPSIMDRuntime::emitSingleRegion( 12009 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12010 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12011 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12012 ArrayRef<const Expr *> AssignmentOps) { 12013 llvm_unreachable("Not supported in SIMD-only mode"); 12014 } 12015 12016 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12017 const RegionCodeGenTy &OrderedOpGen, 12018 SourceLocation Loc, 12019 bool IsThreads) { 12020 llvm_unreachable("Not supported in SIMD-only mode"); 12021 } 12022 12023 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12024 SourceLocation Loc, 12025 OpenMPDirectiveKind Kind, 12026 bool EmitChecks, 12027 bool ForceSimpleCall) { 12028 llvm_unreachable("Not supported in SIMD-only mode"); 12029 } 12030 12031 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12032 CodeGenFunction &CGF, SourceLocation Loc, 12033 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12034 bool Ordered, const DispatchRTInput &DispatchValues) { 12035 llvm_unreachable("Not supported in SIMD-only mode"); 12036 } 12037 12038 void CGOpenMPSIMDRuntime::emitForDispatchDeinit(CodeGenFunction &CGF, 12039 SourceLocation Loc) { 12040 llvm_unreachable("Not supported in SIMD-only mode"); 12041 } 12042 12043 void CGOpenMPSIMDRuntime::emitForStaticInit( 12044 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12045 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12046 llvm_unreachable("Not supported in SIMD-only mode"); 12047 } 12048 12049 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12050 CodeGenFunction &CGF, SourceLocation Loc, 12051 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12052 llvm_unreachable("Not supported in SIMD-only mode"); 12053 } 12054 12055 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12056 SourceLocation Loc, 12057 unsigned IVSize, 12058 bool IVSigned) { 12059 llvm_unreachable("Not supported in SIMD-only mode"); 12060 } 12061 12062 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12063 SourceLocation Loc, 12064 OpenMPDirectiveKind DKind) { 12065 llvm_unreachable("Not supported in SIMD-only mode"); 12066 } 12067 12068 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12069 SourceLocation Loc, 12070 unsigned IVSize, bool IVSigned, 12071 Address IL, Address LB, 12072 Address UB, Address ST) { 12073 llvm_unreachable("Not supported in SIMD-only mode"); 12074 } 12075 12076 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12077 llvm::Value *NumThreads, 12078 SourceLocation Loc) { 12079 llvm_unreachable("Not supported in SIMD-only mode"); 12080 } 12081 12082 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12083 ProcBindKind ProcBind, 12084 SourceLocation Loc) { 12085 llvm_unreachable("Not supported in SIMD-only mode"); 12086 } 12087 12088 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12089 const VarDecl *VD, 12090 Address VDAddr, 12091 SourceLocation Loc) { 12092 llvm_unreachable("Not supported in SIMD-only mode"); 12093 } 12094 12095 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12096 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12097 CodeGenFunction *CGF) { 12098 llvm_unreachable("Not supported in SIMD-only mode"); 12099 } 12100 12101 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12102 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12103 llvm_unreachable("Not supported in SIMD-only mode"); 12104 } 12105 12106 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 12107 ArrayRef<const Expr *> Vars, 12108 SourceLocation Loc, 12109 llvm::AtomicOrdering AO) { 12110 llvm_unreachable("Not supported in SIMD-only mode"); 12111 } 12112 12113 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 12114 const OMPExecutableDirective &D, 12115 llvm::Function *TaskFunction, 12116 QualType SharedsTy, Address Shareds, 12117 const Expr *IfCond, 12118 const OMPTaskDataTy &Data) { 12119 llvm_unreachable("Not supported in SIMD-only mode"); 12120 } 12121 12122 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 12123 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 12124 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 12125 const Expr *IfCond, const OMPTaskDataTy &Data) { 12126 llvm_unreachable("Not supported in SIMD-only mode"); 12127 } 12128 12129 void CGOpenMPSIMDRuntime::emitReduction( 12130 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 12131 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 12132 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 12133 assert(Options.SimpleReduction && "Only simple reduction is expected."); 12134 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 12135 ReductionOps, Options); 12136 } 12137 12138 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 12139 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 12140 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 12141 llvm_unreachable("Not supported in SIMD-only mode"); 12142 } 12143 12144 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 12145 SourceLocation Loc, 12146 bool IsWorksharingReduction) { 12147 llvm_unreachable("Not supported in SIMD-only mode"); 12148 } 12149 12150 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 12151 SourceLocation Loc, 12152 ReductionCodeGen &RCG, 12153 unsigned N) { 12154 llvm_unreachable("Not supported in SIMD-only mode"); 12155 } 12156 12157 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 12158 SourceLocation Loc, 12159 llvm::Value *ReductionsPtr, 12160 LValue SharedLVal) { 12161 llvm_unreachable("Not supported in SIMD-only mode"); 12162 } 12163 12164 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 12165 SourceLocation Loc, 12166 const OMPTaskDataTy &Data) { 12167 llvm_unreachable("Not supported in SIMD-only mode"); 12168 } 12169 12170 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 12171 CodeGenFunction &CGF, SourceLocation Loc, 12172 OpenMPDirectiveKind CancelRegion) { 12173 llvm_unreachable("Not supported in SIMD-only mode"); 12174 } 12175 12176 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 12177 SourceLocation Loc, const Expr *IfCond, 12178 OpenMPDirectiveKind CancelRegion) { 12179 llvm_unreachable("Not supported in SIMD-only mode"); 12180 } 12181 12182 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 12183 const OMPExecutableDirective &D, StringRef ParentName, 12184 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 12185 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 12186 llvm_unreachable("Not supported in SIMD-only mode"); 12187 } 12188 12189 void CGOpenMPSIMDRuntime::emitTargetCall( 12190 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12191 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 12192 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 12193 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 12194 const OMPLoopDirective &D)> 12195 SizeEmitter) { 12196 llvm_unreachable("Not supported in SIMD-only mode"); 12197 } 12198 12199 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 12200 llvm_unreachable("Not supported in SIMD-only mode"); 12201 } 12202 12203 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 12204 llvm_unreachable("Not supported in SIMD-only mode"); 12205 } 12206 12207 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 12208 return false; 12209 } 12210 12211 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 12212 const OMPExecutableDirective &D, 12213 SourceLocation Loc, 12214 llvm::Function *OutlinedFn, 12215 ArrayRef<llvm::Value *> CapturedVars) { 12216 llvm_unreachable("Not supported in SIMD-only mode"); 12217 } 12218 12219 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 12220 const Expr *NumTeams, 12221 const Expr *ThreadLimit, 12222 SourceLocation Loc) { 12223 llvm_unreachable("Not supported in SIMD-only mode"); 12224 } 12225 12226 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 12227 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12228 const Expr *Device, const RegionCodeGenTy &CodeGen, 12229 CGOpenMPRuntime::TargetDataInfo &Info) { 12230 llvm_unreachable("Not supported in SIMD-only mode"); 12231 } 12232 12233 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 12234 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12235 const Expr *Device) { 12236 llvm_unreachable("Not supported in SIMD-only mode"); 12237 } 12238 12239 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12240 const OMPLoopDirective &D, 12241 ArrayRef<Expr *> NumIterations) { 12242 llvm_unreachable("Not supported in SIMD-only mode"); 12243 } 12244 12245 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12246 const OMPDependClause *C) { 12247 llvm_unreachable("Not supported in SIMD-only mode"); 12248 } 12249 12250 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12251 const OMPDoacrossClause *C) { 12252 llvm_unreachable("Not supported in SIMD-only mode"); 12253 } 12254 12255 const VarDecl * 12256 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12257 const VarDecl *NativeParam) const { 12258 llvm_unreachable("Not supported in SIMD-only mode"); 12259 } 12260 12261 Address 12262 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12263 const VarDecl *NativeParam, 12264 const VarDecl *TargetParam) const { 12265 llvm_unreachable("Not supported in SIMD-only mode"); 12266 } 12267