1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/APValue.h" 20 #include "clang/AST/Attr.h" 21 #include "clang/AST/Decl.h" 22 #include "clang/AST/OpenMPClause.h" 23 #include "clang/AST/StmtOpenMP.h" 24 #include "clang/AST/StmtVisitor.h" 25 #include "clang/Basic/BitmaskEnum.h" 26 #include "clang/Basic/FileManager.h" 27 #include "clang/Basic/OpenMPKinds.h" 28 #include "clang/Basic/SourceManager.h" 29 #include "clang/CodeGen/ConstantInitBuilder.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/SetOperations.h" 32 #include "llvm/ADT/SmallBitVector.h" 33 #include "llvm/ADT/StringExtras.h" 34 #include "llvm/Bitcode/BitcodeReader.h" 35 #include "llvm/IR/Constants.h" 36 #include "llvm/IR/DerivedTypes.h" 37 #include "llvm/IR/GlobalValue.h" 38 #include "llvm/IR/InstrTypes.h" 39 #include "llvm/IR/Value.h" 40 #include "llvm/Support/AtomicOrdering.h" 41 #include "llvm/Support/Format.h" 42 #include "llvm/Support/raw_ostream.h" 43 #include <cassert> 44 #include <cstdint> 45 #include <numeric> 46 #include <optional> 47 48 using namespace clang; 49 using namespace CodeGen; 50 using namespace llvm::omp; 51 52 namespace { 53 /// Base class for handling code generation inside OpenMP regions. 54 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 55 public: 56 /// Kinds of OpenMP regions used in codegen. 57 enum CGOpenMPRegionKind { 58 /// Region with outlined function for standalone 'parallel' 59 /// directive. 60 ParallelOutlinedRegion, 61 /// Region with outlined function for standalone 'task' directive. 62 TaskOutlinedRegion, 63 /// Region for constructs that do not require function outlining, 64 /// like 'for', 'sections', 'atomic' etc. directives. 65 InlinedRegion, 66 /// Region with outlined function for standalone 'target' directive. 67 TargetRegion, 68 }; 69 70 CGOpenMPRegionInfo(const CapturedStmt &CS, 71 const CGOpenMPRegionKind RegionKind, 72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 73 bool HasCancel) 74 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 75 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 76 77 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 78 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 79 bool HasCancel) 80 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 81 Kind(Kind), HasCancel(HasCancel) {} 82 83 /// Get a variable or parameter for storing global thread id 84 /// inside OpenMP construct. 85 virtual const VarDecl *getThreadIDVariable() const = 0; 86 87 /// Emit the captured statement body. 88 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 89 90 /// Get an LValue for the current ThreadID variable. 91 /// \return LValue for thread id variable. This LValue always has type int32*. 92 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 93 94 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 95 96 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 97 98 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 99 100 bool hasCancel() const { return HasCancel; } 101 102 static bool classof(const CGCapturedStmtInfo *Info) { 103 return Info->getKind() == CR_OpenMP; 104 } 105 106 ~CGOpenMPRegionInfo() override = default; 107 108 protected: 109 CGOpenMPRegionKind RegionKind; 110 RegionCodeGenTy CodeGen; 111 OpenMPDirectiveKind Kind; 112 bool HasCancel; 113 }; 114 115 /// API for captured statement code generation in OpenMP constructs. 116 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 117 public: 118 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 119 const RegionCodeGenTy &CodeGen, 120 OpenMPDirectiveKind Kind, bool HasCancel, 121 StringRef HelperName) 122 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 123 HasCancel), 124 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 125 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 126 } 127 128 /// Get a variable or parameter for storing global thread id 129 /// inside OpenMP construct. 130 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 131 132 /// Get the name of the capture helper. 133 StringRef getHelperName() const override { return HelperName; } 134 135 static bool classof(const CGCapturedStmtInfo *Info) { 136 return CGOpenMPRegionInfo::classof(Info) && 137 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 138 ParallelOutlinedRegion; 139 } 140 141 private: 142 /// A variable or parameter storing global thread id for OpenMP 143 /// constructs. 144 const VarDecl *ThreadIDVar; 145 StringRef HelperName; 146 }; 147 148 /// API for captured statement code generation in OpenMP constructs. 149 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 150 public: 151 class UntiedTaskActionTy final : public PrePostActionTy { 152 bool Untied; 153 const VarDecl *PartIDVar; 154 const RegionCodeGenTy UntiedCodeGen; 155 llvm::SwitchInst *UntiedSwitch = nullptr; 156 157 public: 158 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 159 const RegionCodeGenTy &UntiedCodeGen) 160 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 161 void Enter(CodeGenFunction &CGF) override { 162 if (Untied) { 163 // Emit task switching point. 164 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 165 CGF.GetAddrOfLocalVar(PartIDVar), 166 PartIDVar->getType()->castAs<PointerType>()); 167 llvm::Value *Res = 168 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 169 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 170 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 171 CGF.EmitBlock(DoneBB); 172 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 173 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 174 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 175 CGF.Builder.GetInsertBlock()); 176 emitUntiedSwitch(CGF); 177 } 178 } 179 void emitUntiedSwitch(CodeGenFunction &CGF) const { 180 if (Untied) { 181 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 182 CGF.GetAddrOfLocalVar(PartIDVar), 183 PartIDVar->getType()->castAs<PointerType>()); 184 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 185 PartIdLVal); 186 UntiedCodeGen(CGF); 187 CodeGenFunction::JumpDest CurPoint = 188 CGF.getJumpDestInCurrentScope(".untied.next."); 189 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 190 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 191 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 192 CGF.Builder.GetInsertBlock()); 193 CGF.EmitBranchThroughCleanup(CurPoint); 194 CGF.EmitBlock(CurPoint.getBlock()); 195 } 196 } 197 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 198 }; 199 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 200 const VarDecl *ThreadIDVar, 201 const RegionCodeGenTy &CodeGen, 202 OpenMPDirectiveKind Kind, bool HasCancel, 203 const UntiedTaskActionTy &Action) 204 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 205 ThreadIDVar(ThreadIDVar), Action(Action) { 206 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 207 } 208 209 /// Get a variable or parameter for storing global thread id 210 /// inside OpenMP construct. 211 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 212 213 /// Get an LValue for the current ThreadID variable. 214 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 215 216 /// Get the name of the capture helper. 217 StringRef getHelperName() const override { return ".omp_outlined."; } 218 219 void emitUntiedSwitch(CodeGenFunction &CGF) override { 220 Action.emitUntiedSwitch(CGF); 221 } 222 223 static bool classof(const CGCapturedStmtInfo *Info) { 224 return CGOpenMPRegionInfo::classof(Info) && 225 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 226 TaskOutlinedRegion; 227 } 228 229 private: 230 /// A variable or parameter storing global thread id for OpenMP 231 /// constructs. 232 const VarDecl *ThreadIDVar; 233 /// Action for emitting code for untied tasks. 234 const UntiedTaskActionTy &Action; 235 }; 236 237 /// API for inlined captured statement code generation in OpenMP 238 /// constructs. 239 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 240 public: 241 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 242 const RegionCodeGenTy &CodeGen, 243 OpenMPDirectiveKind Kind, bool HasCancel) 244 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 245 OldCSI(OldCSI), 246 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 247 248 // Retrieve the value of the context parameter. 249 llvm::Value *getContextValue() const override { 250 if (OuterRegionInfo) 251 return OuterRegionInfo->getContextValue(); 252 llvm_unreachable("No context value for inlined OpenMP region"); 253 } 254 255 void setContextValue(llvm::Value *V) override { 256 if (OuterRegionInfo) { 257 OuterRegionInfo->setContextValue(V); 258 return; 259 } 260 llvm_unreachable("No context value for inlined OpenMP region"); 261 } 262 263 /// Lookup the captured field decl for a variable. 264 const FieldDecl *lookup(const VarDecl *VD) const override { 265 if (OuterRegionInfo) 266 return OuterRegionInfo->lookup(VD); 267 // If there is no outer outlined region,no need to lookup in a list of 268 // captured variables, we can use the original one. 269 return nullptr; 270 } 271 272 FieldDecl *getThisFieldDecl() const override { 273 if (OuterRegionInfo) 274 return OuterRegionInfo->getThisFieldDecl(); 275 return nullptr; 276 } 277 278 /// Get a variable or parameter for storing global thread id 279 /// inside OpenMP construct. 280 const VarDecl *getThreadIDVariable() const override { 281 if (OuterRegionInfo) 282 return OuterRegionInfo->getThreadIDVariable(); 283 return nullptr; 284 } 285 286 /// Get an LValue for the current ThreadID variable. 287 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 288 if (OuterRegionInfo) 289 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 290 llvm_unreachable("No LValue for inlined OpenMP construct"); 291 } 292 293 /// Get the name of the capture helper. 294 StringRef getHelperName() const override { 295 if (auto *OuterRegionInfo = getOldCSI()) 296 return OuterRegionInfo->getHelperName(); 297 llvm_unreachable("No helper name for inlined OpenMP construct"); 298 } 299 300 void emitUntiedSwitch(CodeGenFunction &CGF) override { 301 if (OuterRegionInfo) 302 OuterRegionInfo->emitUntiedSwitch(CGF); 303 } 304 305 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 306 307 static bool classof(const CGCapturedStmtInfo *Info) { 308 return CGOpenMPRegionInfo::classof(Info) && 309 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 310 } 311 312 ~CGOpenMPInlinedRegionInfo() override = default; 313 314 private: 315 /// CodeGen info about outer OpenMP region. 316 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 317 CGOpenMPRegionInfo *OuterRegionInfo; 318 }; 319 320 /// API for captured statement code generation in OpenMP target 321 /// constructs. For this captures, implicit parameters are used instead of the 322 /// captured fields. The name of the target region has to be unique in a given 323 /// application so it is provided by the client, because only the client has 324 /// the information to generate that. 325 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 326 public: 327 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 328 const RegionCodeGenTy &CodeGen, StringRef HelperName) 329 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 330 /*HasCancel=*/false), 331 HelperName(HelperName) {} 332 333 /// This is unused for target regions because each starts executing 334 /// with a single thread. 335 const VarDecl *getThreadIDVariable() const override { return nullptr; } 336 337 /// Get the name of the capture helper. 338 StringRef getHelperName() const override { return HelperName; } 339 340 static bool classof(const CGCapturedStmtInfo *Info) { 341 return CGOpenMPRegionInfo::classof(Info) && 342 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 343 } 344 345 private: 346 StringRef HelperName; 347 }; 348 349 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 350 llvm_unreachable("No codegen for expressions"); 351 } 352 /// API for generation of expressions captured in a innermost OpenMP 353 /// region. 354 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 355 public: 356 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 357 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 358 OMPD_unknown, 359 /*HasCancel=*/false), 360 PrivScope(CGF) { 361 // Make sure the globals captured in the provided statement are local by 362 // using the privatization logic. We assume the same variable is not 363 // captured more than once. 364 for (const auto &C : CS.captures()) { 365 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 366 continue; 367 368 const VarDecl *VD = C.getCapturedVar(); 369 if (VD->isLocalVarDeclOrParm()) 370 continue; 371 372 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 373 /*RefersToEnclosingVariableOrCapture=*/false, 374 VD->getType().getNonReferenceType(), VK_LValue, 375 C.getLocation()); 376 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF)); 377 } 378 (void)PrivScope.Privatize(); 379 } 380 381 /// Lookup the captured field decl for a variable. 382 const FieldDecl *lookup(const VarDecl *VD) const override { 383 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 384 return FD; 385 return nullptr; 386 } 387 388 /// Emit the captured statement body. 389 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 390 llvm_unreachable("No body for expressions"); 391 } 392 393 /// Get a variable or parameter for storing global thread id 394 /// inside OpenMP construct. 395 const VarDecl *getThreadIDVariable() const override { 396 llvm_unreachable("No thread id for expressions"); 397 } 398 399 /// Get the name of the capture helper. 400 StringRef getHelperName() const override { 401 llvm_unreachable("No helper name for expressions"); 402 } 403 404 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 405 406 private: 407 /// Private scope to capture global variables. 408 CodeGenFunction::OMPPrivateScope PrivScope; 409 }; 410 411 /// RAII for emitting code of OpenMP constructs. 412 class InlinedOpenMPRegionRAII { 413 CodeGenFunction &CGF; 414 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields; 415 FieldDecl *LambdaThisCaptureField = nullptr; 416 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 417 bool NoInheritance = false; 418 419 public: 420 /// Constructs region for combined constructs. 421 /// \param CodeGen Code generation sequence for combined directives. Includes 422 /// a list of functions used for code generation of implicitly inlined 423 /// regions. 424 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 425 OpenMPDirectiveKind Kind, bool HasCancel, 426 bool NoInheritance = true) 427 : CGF(CGF), NoInheritance(NoInheritance) { 428 // Start emission for the construct. 429 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 430 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 431 if (NoInheritance) { 432 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 433 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 434 CGF.LambdaThisCaptureField = nullptr; 435 BlockInfo = CGF.BlockInfo; 436 CGF.BlockInfo = nullptr; 437 } 438 } 439 440 ~InlinedOpenMPRegionRAII() { 441 // Restore original CapturedStmtInfo only if we're done with code emission. 442 auto *OldCSI = 443 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 444 delete CGF.CapturedStmtInfo; 445 CGF.CapturedStmtInfo = OldCSI; 446 if (NoInheritance) { 447 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 448 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 449 CGF.BlockInfo = BlockInfo; 450 } 451 } 452 }; 453 454 /// Values for bit flags used in the ident_t to describe the fields. 455 /// All enumeric elements are named and described in accordance with the code 456 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 457 enum OpenMPLocationFlags : unsigned { 458 /// Use trampoline for internal microtask. 459 OMP_IDENT_IMD = 0x01, 460 /// Use c-style ident structure. 461 OMP_IDENT_KMPC = 0x02, 462 /// Atomic reduction option for kmpc_reduce. 463 OMP_ATOMIC_REDUCE = 0x10, 464 /// Explicit 'barrier' directive. 465 OMP_IDENT_BARRIER_EXPL = 0x20, 466 /// Implicit barrier in code. 467 OMP_IDENT_BARRIER_IMPL = 0x40, 468 /// Implicit barrier in 'for' directive. 469 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 470 /// Implicit barrier in 'sections' directive. 471 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 472 /// Implicit barrier in 'single' directive. 473 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 474 /// Call of __kmp_for_static_init for static loop. 475 OMP_IDENT_WORK_LOOP = 0x200, 476 /// Call of __kmp_for_static_init for sections. 477 OMP_IDENT_WORK_SECTIONS = 0x400, 478 /// Call of __kmp_for_static_init for distribute. 479 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 480 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 481 }; 482 483 /// Describes ident structure that describes a source location. 484 /// All descriptions are taken from 485 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 486 /// Original structure: 487 /// typedef struct ident { 488 /// kmp_int32 reserved_1; /**< might be used in Fortran; 489 /// see above */ 490 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 491 /// KMP_IDENT_KMPC identifies this union 492 /// member */ 493 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 494 /// see above */ 495 ///#if USE_ITT_BUILD 496 /// /* but currently used for storing 497 /// region-specific ITT */ 498 /// /* contextual information. */ 499 ///#endif /* USE_ITT_BUILD */ 500 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 501 /// C++ */ 502 /// char const *psource; /**< String describing the source location. 503 /// The string is composed of semi-colon separated 504 // fields which describe the source file, 505 /// the function and a pair of line numbers that 506 /// delimit the construct. 507 /// */ 508 /// } ident_t; 509 enum IdentFieldIndex { 510 /// might be used in Fortran 511 IdentField_Reserved_1, 512 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 513 IdentField_Flags, 514 /// Not really used in Fortran any more 515 IdentField_Reserved_2, 516 /// Source[4] in Fortran, do not use for C++ 517 IdentField_Reserved_3, 518 /// String describing the source location. The string is composed of 519 /// semi-colon separated fields which describe the source file, the function 520 /// and a pair of line numbers that delimit the construct. 521 IdentField_PSource 522 }; 523 524 /// Schedule types for 'omp for' loops (these enumerators are taken from 525 /// the enum sched_type in kmp.h). 526 enum OpenMPSchedType { 527 /// Lower bound for default (unordered) versions. 528 OMP_sch_lower = 32, 529 OMP_sch_static_chunked = 33, 530 OMP_sch_static = 34, 531 OMP_sch_dynamic_chunked = 35, 532 OMP_sch_guided_chunked = 36, 533 OMP_sch_runtime = 37, 534 OMP_sch_auto = 38, 535 /// static with chunk adjustment (e.g., simd) 536 OMP_sch_static_balanced_chunked = 45, 537 /// Lower bound for 'ordered' versions. 538 OMP_ord_lower = 64, 539 OMP_ord_static_chunked = 65, 540 OMP_ord_static = 66, 541 OMP_ord_dynamic_chunked = 67, 542 OMP_ord_guided_chunked = 68, 543 OMP_ord_runtime = 69, 544 OMP_ord_auto = 70, 545 OMP_sch_default = OMP_sch_static, 546 /// dist_schedule types 547 OMP_dist_sch_static_chunked = 91, 548 OMP_dist_sch_static = 92, 549 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 550 /// Set if the monotonic schedule modifier was present. 551 OMP_sch_modifier_monotonic = (1 << 29), 552 /// Set if the nonmonotonic schedule modifier was present. 553 OMP_sch_modifier_nonmonotonic = (1 << 30), 554 }; 555 556 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 557 /// region. 558 class CleanupTy final : public EHScopeStack::Cleanup { 559 PrePostActionTy *Action; 560 561 public: 562 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 563 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 564 if (!CGF.HaveInsertPoint()) 565 return; 566 Action->Exit(CGF); 567 } 568 }; 569 570 } // anonymous namespace 571 572 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 573 CodeGenFunction::RunCleanupsScope Scope(CGF); 574 if (PrePostAction) { 575 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 576 Callback(CodeGen, CGF, *PrePostAction); 577 } else { 578 PrePostActionTy Action; 579 Callback(CodeGen, CGF, Action); 580 } 581 } 582 583 /// Check if the combiner is a call to UDR combiner and if it is so return the 584 /// UDR decl used for reduction. 585 static const OMPDeclareReductionDecl * 586 getReductionInit(const Expr *ReductionOp) { 587 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 588 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 589 if (const auto *DRE = 590 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 591 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 592 return DRD; 593 return nullptr; 594 } 595 596 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 597 const OMPDeclareReductionDecl *DRD, 598 const Expr *InitOp, 599 Address Private, Address Original, 600 QualType Ty) { 601 if (DRD->getInitializer()) { 602 std::pair<llvm::Function *, llvm::Function *> Reduction = 603 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 604 const auto *CE = cast<CallExpr>(InitOp); 605 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 606 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 607 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 608 const auto *LHSDRE = 609 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 610 const auto *RHSDRE = 611 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 612 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 613 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private); 614 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original); 615 (void)PrivateScope.Privatize(); 616 RValue Func = RValue::get(Reduction.second); 617 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 618 CGF.EmitIgnoredExpr(InitOp); 619 } else { 620 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 621 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 622 auto *GV = new llvm::GlobalVariable( 623 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 624 llvm::GlobalValue::PrivateLinkage, Init, Name); 625 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 626 RValue InitRVal; 627 switch (CGF.getEvaluationKind(Ty)) { 628 case TEK_Scalar: 629 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 630 break; 631 case TEK_Complex: 632 InitRVal = 633 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 634 break; 635 case TEK_Aggregate: { 636 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); 637 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); 638 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 639 /*IsInitializer=*/false); 640 return; 641 } 642 } 643 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue); 644 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 645 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 646 /*IsInitializer=*/false); 647 } 648 } 649 650 /// Emit initialization of arrays of complex types. 651 /// \param DestAddr Address of the array. 652 /// \param Type Type of array. 653 /// \param Init Initial expression of array. 654 /// \param SrcAddr Address of the original array. 655 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 656 QualType Type, bool EmitDeclareReductionInit, 657 const Expr *Init, 658 const OMPDeclareReductionDecl *DRD, 659 Address SrcAddr = Address::invalid()) { 660 // Perform element-by-element initialization. 661 QualType ElementTy; 662 663 // Drill down to the base element type on both arrays. 664 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 665 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 666 if (DRD) 667 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType()); 668 669 llvm::Value *SrcBegin = nullptr; 670 if (DRD) 671 SrcBegin = SrcAddr.getPointer(); 672 llvm::Value *DestBegin = DestAddr.getPointer(); 673 // Cast from pointer to array type to pointer to single element. 674 llvm::Value *DestEnd = 675 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); 676 // The basic structure here is a while-do loop. 677 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 678 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 679 llvm::Value *IsEmpty = 680 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 681 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 682 683 // Enter the loop body, making that address the current address. 684 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 685 CGF.EmitBlock(BodyBB); 686 687 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 688 689 llvm::PHINode *SrcElementPHI = nullptr; 690 Address SrcElementCurrent = Address::invalid(); 691 if (DRD) { 692 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 693 "omp.arraycpy.srcElementPast"); 694 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 695 SrcElementCurrent = 696 Address(SrcElementPHI, SrcAddr.getElementType(), 697 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 698 } 699 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 700 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 701 DestElementPHI->addIncoming(DestBegin, EntryBB); 702 Address DestElementCurrent = 703 Address(DestElementPHI, DestAddr.getElementType(), 704 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 705 706 // Emit copy. 707 { 708 CodeGenFunction::RunCleanupsScope InitScope(CGF); 709 if (EmitDeclareReductionInit) { 710 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 711 SrcElementCurrent, ElementTy); 712 } else 713 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 714 /*IsInitializer=*/false); 715 } 716 717 if (DRD) { 718 // Shift the address forward by one element. 719 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 720 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, 721 "omp.arraycpy.dest.element"); 722 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 723 } 724 725 // Shift the address forward by one element. 726 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 727 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, 728 "omp.arraycpy.dest.element"); 729 // Check whether we've reached the end. 730 llvm::Value *Done = 731 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 732 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 733 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 734 735 // Done. 736 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 737 } 738 739 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 740 return CGF.EmitOMPSharedLValue(E); 741 } 742 743 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 744 const Expr *E) { 745 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 746 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 747 return LValue(); 748 } 749 750 void ReductionCodeGen::emitAggregateInitialization( 751 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 752 const OMPDeclareReductionDecl *DRD) { 753 // Emit VarDecl with copy init for arrays. 754 // Get the address of the original variable captured in current 755 // captured region. 756 const auto *PrivateVD = 757 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 758 bool EmitDeclareReductionInit = 759 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 760 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 761 EmitDeclareReductionInit, 762 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 763 : PrivateVD->getInit(), 764 DRD, SharedAddr); 765 } 766 767 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 768 ArrayRef<const Expr *> Origs, 769 ArrayRef<const Expr *> Privates, 770 ArrayRef<const Expr *> ReductionOps) { 771 ClausesData.reserve(Shareds.size()); 772 SharedAddresses.reserve(Shareds.size()); 773 Sizes.reserve(Shareds.size()); 774 BaseDecls.reserve(Shareds.size()); 775 const auto *IOrig = Origs.begin(); 776 const auto *IPriv = Privates.begin(); 777 const auto *IRed = ReductionOps.begin(); 778 for (const Expr *Ref : Shareds) { 779 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 780 std::advance(IOrig, 1); 781 std::advance(IPriv, 1); 782 std::advance(IRed, 1); 783 } 784 } 785 786 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 787 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 788 "Number of generated lvalues must be exactly N."); 789 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 790 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 791 SharedAddresses.emplace_back(First, Second); 792 if (ClausesData[N].Shared == ClausesData[N].Ref) { 793 OrigAddresses.emplace_back(First, Second); 794 } else { 795 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 796 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 797 OrigAddresses.emplace_back(First, Second); 798 } 799 } 800 801 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 802 QualType PrivateType = getPrivateType(N); 803 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 804 if (!PrivateType->isVariablyModifiedType()) { 805 Sizes.emplace_back( 806 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 807 nullptr); 808 return; 809 } 810 llvm::Value *Size; 811 llvm::Value *SizeInChars; 812 auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType(); 813 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 814 if (AsArraySection) { 815 Size = CGF.Builder.CreatePtrDiff(ElemType, 816 OrigAddresses[N].second.getPointer(CGF), 817 OrigAddresses[N].first.getPointer(CGF)); 818 Size = CGF.Builder.CreateNUWAdd( 819 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 820 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 821 } else { 822 SizeInChars = 823 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 824 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 825 } 826 Sizes.emplace_back(SizeInChars, Size); 827 CodeGenFunction::OpaqueValueMapping OpaqueMap( 828 CGF, 829 cast<OpaqueValueExpr>( 830 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 831 RValue::get(Size)); 832 CGF.EmitVariablyModifiedType(PrivateType); 833 } 834 835 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 836 llvm::Value *Size) { 837 QualType PrivateType = getPrivateType(N); 838 if (!PrivateType->isVariablyModifiedType()) { 839 assert(!Size && !Sizes[N].second && 840 "Size should be nullptr for non-variably modified reduction " 841 "items."); 842 return; 843 } 844 CodeGenFunction::OpaqueValueMapping OpaqueMap( 845 CGF, 846 cast<OpaqueValueExpr>( 847 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 848 RValue::get(Size)); 849 CGF.EmitVariablyModifiedType(PrivateType); 850 } 851 852 void ReductionCodeGen::emitInitialization( 853 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 854 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 855 assert(SharedAddresses.size() > N && "No variable was generated"); 856 const auto *PrivateVD = 857 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 858 const OMPDeclareReductionDecl *DRD = 859 getReductionInit(ClausesData[N].ReductionOp); 860 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 861 if (DRD && DRD->getInitializer()) 862 (void)DefaultInit(CGF); 863 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD); 864 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 865 (void)DefaultInit(CGF); 866 QualType SharedType = SharedAddresses[N].first.getType(); 867 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 868 PrivateAddr, SharedAddr, SharedType); 869 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 870 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 871 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 872 PrivateVD->getType().getQualifiers(), 873 /*IsInitializer=*/false); 874 } 875 } 876 877 bool ReductionCodeGen::needCleanups(unsigned N) { 878 QualType PrivateType = getPrivateType(N); 879 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 880 return DTorKind != QualType::DK_none; 881 } 882 883 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 884 Address PrivateAddr) { 885 QualType PrivateType = getPrivateType(N); 886 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 887 if (needCleanups(N)) { 888 PrivateAddr = 889 PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType)); 890 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 891 } 892 } 893 894 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 895 LValue BaseLV) { 896 BaseTy = BaseTy.getNonReferenceType(); 897 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 898 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 899 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 900 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 901 } else { 902 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 903 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 904 } 905 BaseTy = BaseTy->getPointeeType(); 906 } 907 return CGF.MakeAddrLValue( 908 BaseLV.getAddress(CGF).withElementType(CGF.ConvertTypeForMem(ElTy)), 909 BaseLV.getType(), BaseLV.getBaseInfo(), 910 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 911 } 912 913 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 914 Address OriginalBaseAddress, llvm::Value *Addr) { 915 Address Tmp = Address::invalid(); 916 Address TopTmp = Address::invalid(); 917 Address MostTopTmp = Address::invalid(); 918 BaseTy = BaseTy.getNonReferenceType(); 919 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 920 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 921 Tmp = CGF.CreateMemTemp(BaseTy); 922 if (TopTmp.isValid()) 923 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 924 else 925 MostTopTmp = Tmp; 926 TopTmp = Tmp; 927 BaseTy = BaseTy->getPointeeType(); 928 } 929 930 if (Tmp.isValid()) { 931 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 932 Addr, Tmp.getElementType()); 933 CGF.Builder.CreateStore(Addr, Tmp); 934 return MostTopTmp; 935 } 936 937 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 938 Addr, OriginalBaseAddress.getType()); 939 return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull); 940 } 941 942 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 943 const VarDecl *OrigVD = nullptr; 944 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 945 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 946 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 947 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 948 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 949 Base = TempASE->getBase()->IgnoreParenImpCasts(); 950 DE = cast<DeclRefExpr>(Base); 951 OrigVD = cast<VarDecl>(DE->getDecl()); 952 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 953 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 954 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 955 Base = TempASE->getBase()->IgnoreParenImpCasts(); 956 DE = cast<DeclRefExpr>(Base); 957 OrigVD = cast<VarDecl>(DE->getDecl()); 958 } 959 return OrigVD; 960 } 961 962 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 963 Address PrivateAddr) { 964 const DeclRefExpr *DE; 965 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 966 BaseDecls.emplace_back(OrigVD); 967 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 968 LValue BaseLValue = 969 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 970 OriginalBaseLValue); 971 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF); 972 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 973 SharedAddr.getElementType(), BaseLValue.getPointer(CGF), 974 SharedAddr.getPointer()); 975 llvm::Value *PrivatePointer = 976 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 977 PrivateAddr.getPointer(), SharedAddr.getType()); 978 llvm::Value *Ptr = CGF.Builder.CreateGEP( 979 SharedAddr.getElementType(), PrivatePointer, Adjustment); 980 return castToBase(CGF, OrigVD->getType(), 981 SharedAddresses[N].first.getType(), 982 OriginalBaseLValue.getAddress(CGF), Ptr); 983 } 984 BaseDecls.emplace_back( 985 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 986 return PrivateAddr; 987 } 988 989 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 990 const OMPDeclareReductionDecl *DRD = 991 getReductionInit(ClausesData[N].ReductionOp); 992 return DRD && DRD->getInitializer(); 993 } 994 995 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 996 return CGF.EmitLoadOfPointerLValue( 997 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 998 getThreadIDVariable()->getType()->castAs<PointerType>()); 999 } 1000 1001 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 1002 if (!CGF.HaveInsertPoint()) 1003 return; 1004 // 1.2.2 OpenMP Language Terminology 1005 // Structured block - An executable statement with a single entry at the 1006 // top and a single exit at the bottom. 1007 // The point of exit cannot be a branch out of the structured block. 1008 // longjmp() and throw() must not violate the entry/exit criteria. 1009 CGF.EHStack.pushTerminate(); 1010 if (S) 1011 CGF.incrementProfileCounter(S); 1012 CodeGen(CGF); 1013 CGF.EHStack.popTerminate(); 1014 } 1015 1016 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1017 CodeGenFunction &CGF) { 1018 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1019 getThreadIDVariable()->getType(), 1020 AlignmentSource::Decl); 1021 } 1022 1023 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1024 QualType FieldTy) { 1025 auto *Field = FieldDecl::Create( 1026 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1027 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1028 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1029 Field->setAccess(AS_public); 1030 DC->addDecl(Field); 1031 return Field; 1032 } 1033 1034 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) 1035 : CGM(CGM), OMPBuilder(CGM.getModule()) { 1036 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1037 llvm::OpenMPIRBuilderConfig Config( 1038 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(), 1039 CGM.getLangOpts().OpenMPOffloadMandatory, 1040 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false, 1041 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false); 1042 OMPBuilder.initialize(); 1043 OMPBuilder.loadOffloadInfoMetadata(CGM.getLangOpts().OpenMPIsTargetDevice 1044 ? CGM.getLangOpts().OMPHostIRFile 1045 : StringRef{}); 1046 OMPBuilder.setConfig(Config); 1047 1048 // The user forces the compiler to behave as if omp requires 1049 // unified_shared_memory was given. 1050 if (CGM.getLangOpts().OpenMPForceUSM) { 1051 HasRequiresUnifiedSharedMemory = true; 1052 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true); 1053 } 1054 } 1055 1056 void CGOpenMPRuntime::clear() { 1057 InternalVars.clear(); 1058 // Clean non-target variable declarations possibly used only in debug info. 1059 for (const auto &Data : EmittedNonTargetVariables) { 1060 if (!Data.getValue().pointsToAliveValue()) 1061 continue; 1062 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1063 if (!GV) 1064 continue; 1065 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1066 continue; 1067 GV->eraseFromParent(); 1068 } 1069 } 1070 1071 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1072 return OMPBuilder.createPlatformSpecificName(Parts); 1073 } 1074 1075 static llvm::Function * 1076 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1077 const Expr *CombinerInitializer, const VarDecl *In, 1078 const VarDecl *Out, bool IsCombiner) { 1079 // void .omp_combiner.(Ty *in, Ty *out); 1080 ASTContext &C = CGM.getContext(); 1081 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1082 FunctionArgList Args; 1083 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1084 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other); 1085 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1086 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other); 1087 Args.push_back(&OmpOutParm); 1088 Args.push_back(&OmpInParm); 1089 const CGFunctionInfo &FnInfo = 1090 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1091 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1092 std::string Name = CGM.getOpenMPRuntime().getName( 1093 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1094 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1095 Name, &CGM.getModule()); 1096 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1097 if (CGM.getLangOpts().Optimize) { 1098 Fn->removeFnAttr(llvm::Attribute::NoInline); 1099 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1100 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1101 } 1102 CodeGenFunction CGF(CGM); 1103 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1104 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1105 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1106 Out->getLocation()); 1107 CodeGenFunction::OMPPrivateScope Scope(CGF); 1108 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1109 Scope.addPrivate( 1110 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1111 .getAddress(CGF)); 1112 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1113 Scope.addPrivate( 1114 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1115 .getAddress(CGF)); 1116 (void)Scope.Privatize(); 1117 if (!IsCombiner && Out->hasInit() && 1118 !CGF.isTrivialInitializer(Out->getInit())) { 1119 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1120 Out->getType().getQualifiers(), 1121 /*IsInitializer=*/true); 1122 } 1123 if (CombinerInitializer) 1124 CGF.EmitIgnoredExpr(CombinerInitializer); 1125 Scope.ForceCleanup(); 1126 CGF.FinishFunction(); 1127 return Fn; 1128 } 1129 1130 void CGOpenMPRuntime::emitUserDefinedReduction( 1131 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1132 if (UDRMap.count(D) > 0) 1133 return; 1134 llvm::Function *Combiner = emitCombinerOrInitializer( 1135 CGM, D->getType(), D->getCombiner(), 1136 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1137 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1138 /*IsCombiner=*/true); 1139 llvm::Function *Initializer = nullptr; 1140 if (const Expr *Init = D->getInitializer()) { 1141 Initializer = emitCombinerOrInitializer( 1142 CGM, D->getType(), 1143 D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init 1144 : nullptr, 1145 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1146 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1147 /*IsCombiner=*/false); 1148 } 1149 UDRMap.try_emplace(D, Combiner, Initializer); 1150 if (CGF) { 1151 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1152 Decls.second.push_back(D); 1153 } 1154 } 1155 1156 std::pair<llvm::Function *, llvm::Function *> 1157 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1158 auto I = UDRMap.find(D); 1159 if (I != UDRMap.end()) 1160 return I->second; 1161 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1162 return UDRMap.lookup(D); 1163 } 1164 1165 namespace { 1166 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1167 // Builder if one is present. 1168 struct PushAndPopStackRAII { 1169 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1170 bool HasCancel, llvm::omp::Directive Kind) 1171 : OMPBuilder(OMPBuilder) { 1172 if (!OMPBuilder) 1173 return; 1174 1175 // The following callback is the crucial part of clangs cleanup process. 1176 // 1177 // NOTE: 1178 // Once the OpenMPIRBuilder is used to create parallel regions (and 1179 // similar), the cancellation destination (Dest below) is determined via 1180 // IP. That means if we have variables to finalize we split the block at IP, 1181 // use the new block (=BB) as destination to build a JumpDest (via 1182 // getJumpDestInCurrentScope(BB)) which then is fed to 1183 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1184 // to push & pop an FinalizationInfo object. 1185 // The FiniCB will still be needed but at the point where the 1186 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1187 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1188 assert(IP.getBlock()->end() == IP.getPoint() && 1189 "Clang CG should cause non-terminated block!"); 1190 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1191 CGF.Builder.restoreIP(IP); 1192 CodeGenFunction::JumpDest Dest = 1193 CGF.getOMPCancelDestination(OMPD_parallel); 1194 CGF.EmitBranchThroughCleanup(Dest); 1195 }; 1196 1197 // TODO: Remove this once we emit parallel regions through the 1198 // OpenMPIRBuilder as it can do this setup internally. 1199 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); 1200 OMPBuilder->pushFinalizationCB(std::move(FI)); 1201 } 1202 ~PushAndPopStackRAII() { 1203 if (OMPBuilder) 1204 OMPBuilder->popFinalizationCB(); 1205 } 1206 llvm::OpenMPIRBuilder *OMPBuilder; 1207 }; 1208 } // namespace 1209 1210 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1211 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1212 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1213 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1214 assert(ThreadIDVar->getType()->isPointerType() && 1215 "thread id variable must be of type kmp_int32 *"); 1216 CodeGenFunction CGF(CGM, true); 1217 bool HasCancel = false; 1218 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1219 HasCancel = OPD->hasCancel(); 1220 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1221 HasCancel = OPD->hasCancel(); 1222 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1223 HasCancel = OPSD->hasCancel(); 1224 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1225 HasCancel = OPFD->hasCancel(); 1226 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1227 HasCancel = OPFD->hasCancel(); 1228 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1229 HasCancel = OPFD->hasCancel(); 1230 else if (const auto *OPFD = 1231 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1232 HasCancel = OPFD->hasCancel(); 1233 else if (const auto *OPFD = 1234 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1235 HasCancel = OPFD->hasCancel(); 1236 1237 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1238 // parallel region to make cancellation barriers work properly. 1239 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1240 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); 1241 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1242 HasCancel, OutlinedHelperName); 1243 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1244 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1245 } 1246 1247 std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const { 1248 std::string Suffix = getName({"omp_outlined"}); 1249 return (Name + Suffix).str(); 1250 } 1251 1252 std::string CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction &CGF) const { 1253 return getOutlinedHelperName(CGF.CurFn->getName()); 1254 } 1255 1256 std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const { 1257 std::string Suffix = getName({"omp", "reduction", "reduction_func"}); 1258 return (Name + Suffix).str(); 1259 } 1260 1261 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1262 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1263 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1264 const RegionCodeGenTy &CodeGen) { 1265 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1266 return emitParallelOrTeamsOutlinedFunction( 1267 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF), 1268 CodeGen); 1269 } 1270 1271 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1272 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1273 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1274 const RegionCodeGenTy &CodeGen) { 1275 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1276 return emitParallelOrTeamsOutlinedFunction( 1277 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF), 1278 CodeGen); 1279 } 1280 1281 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1282 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1283 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1284 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1285 bool Tied, unsigned &NumberOfParts) { 1286 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1287 PrePostActionTy &) { 1288 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1289 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1290 llvm::Value *TaskArgs[] = { 1291 UpLoc, ThreadID, 1292 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1293 TaskTVar->getType()->castAs<PointerType>()) 1294 .getPointer(CGF)}; 1295 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1296 CGM.getModule(), OMPRTL___kmpc_omp_task), 1297 TaskArgs); 1298 }; 1299 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1300 UntiedCodeGen); 1301 CodeGen.setAction(Action); 1302 assert(!ThreadIDVar->getType()->isPointerType() && 1303 "thread id variable must be of type kmp_int32 for tasks"); 1304 const OpenMPDirectiveKind Region = 1305 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1306 : OMPD_task; 1307 const CapturedStmt *CS = D.getCapturedStmt(Region); 1308 bool HasCancel = false; 1309 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1310 HasCancel = TD->hasCancel(); 1311 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1312 HasCancel = TD->hasCancel(); 1313 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1314 HasCancel = TD->hasCancel(); 1315 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1316 HasCancel = TD->hasCancel(); 1317 1318 CodeGenFunction CGF(CGM, true); 1319 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1320 InnermostKind, HasCancel, Action); 1321 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1322 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1323 if (!Tied) 1324 NumberOfParts = Action.getNumberOfParts(); 1325 return Res; 1326 } 1327 1328 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1329 bool AtCurrentPoint) { 1330 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1331 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1332 1333 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1334 if (AtCurrentPoint) { 1335 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1336 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1337 } else { 1338 Elem.second.ServiceInsertPt = 1339 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1340 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1341 } 1342 } 1343 1344 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1345 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1346 if (Elem.second.ServiceInsertPt) { 1347 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1348 Elem.second.ServiceInsertPt = nullptr; 1349 Ptr->eraseFromParent(); 1350 } 1351 } 1352 1353 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1354 SourceLocation Loc, 1355 SmallString<128> &Buffer) { 1356 llvm::raw_svector_ostream OS(Buffer); 1357 // Build debug location 1358 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1359 OS << ";" << PLoc.getFilename() << ";"; 1360 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1361 OS << FD->getQualifiedNameAsString(); 1362 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1363 return OS.str(); 1364 } 1365 1366 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1367 SourceLocation Loc, 1368 unsigned Flags, bool EmitLoc) { 1369 uint32_t SrcLocStrSize; 1370 llvm::Constant *SrcLocStr; 1371 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() == 1372 llvm::codegenoptions::NoDebugInfo) || 1373 Loc.isInvalid()) { 1374 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 1375 } else { 1376 std::string FunctionName; 1377 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1378 FunctionName = FD->getQualifiedNameAsString(); 1379 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1380 const char *FileName = PLoc.getFilename(); 1381 unsigned Line = PLoc.getLine(); 1382 unsigned Column = PLoc.getColumn(); 1383 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, 1384 Column, SrcLocStrSize); 1385 } 1386 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1387 return OMPBuilder.getOrCreateIdent( 1388 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags); 1389 } 1390 1391 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1392 SourceLocation Loc) { 1393 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1394 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1395 // the clang invariants used below might be broken. 1396 if (CGM.getLangOpts().OpenMPIRBuilder) { 1397 SmallString<128> Buffer; 1398 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1399 uint32_t SrcLocStrSize; 1400 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1401 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize); 1402 return OMPBuilder.getOrCreateThreadID( 1403 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize)); 1404 } 1405 1406 llvm::Value *ThreadID = nullptr; 1407 // Check whether we've already cached a load of the thread id in this 1408 // function. 1409 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1410 if (I != OpenMPLocThreadIDMap.end()) { 1411 ThreadID = I->second.ThreadID; 1412 if (ThreadID != nullptr) 1413 return ThreadID; 1414 } 1415 // If exceptions are enabled, do not use parameter to avoid possible crash. 1416 if (auto *OMPRegionInfo = 1417 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1418 if (OMPRegionInfo->getThreadIDVariable()) { 1419 // Check if this an outlined function with thread id passed as argument. 1420 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1421 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1422 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1423 !CGF.getLangOpts().CXXExceptions || 1424 CGF.Builder.GetInsertBlock() == TopBlock || 1425 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1426 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1427 TopBlock || 1428 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1429 CGF.Builder.GetInsertBlock()) { 1430 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1431 // If value loaded in entry block, cache it and use it everywhere in 1432 // function. 1433 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1434 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1435 Elem.second.ThreadID = ThreadID; 1436 } 1437 return ThreadID; 1438 } 1439 } 1440 } 1441 1442 // This is not an outlined function region - need to call __kmpc_int32 1443 // kmpc_global_thread_num(ident_t *loc). 1444 // Generate thread id value and cache this value for use across the 1445 // function. 1446 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1447 if (!Elem.second.ServiceInsertPt) 1448 setLocThreadIdInsertPt(CGF); 1449 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1450 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1451 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 1452 llvm::CallInst *Call = CGF.Builder.CreateCall( 1453 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1454 OMPRTL___kmpc_global_thread_num), 1455 emitUpdateLocation(CGF, Loc)); 1456 Call->setCallingConv(CGF.getRuntimeCC()); 1457 Elem.second.ThreadID = Call; 1458 return Call; 1459 } 1460 1461 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1462 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1463 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1464 clearLocThreadIdInsertPt(CGF); 1465 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1466 } 1467 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1468 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1469 UDRMap.erase(D); 1470 FunctionUDRMap.erase(CGF.CurFn); 1471 } 1472 auto I = FunctionUDMMap.find(CGF.CurFn); 1473 if (I != FunctionUDMMap.end()) { 1474 for(const auto *D : I->second) 1475 UDMMap.erase(D); 1476 FunctionUDMMap.erase(I); 1477 } 1478 LastprivateConditionalToTypes.erase(CGF.CurFn); 1479 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1480 } 1481 1482 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1483 return OMPBuilder.IdentPtr; 1484 } 1485 1486 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1487 if (!Kmpc_MicroTy) { 1488 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1489 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1490 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1491 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1492 } 1493 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1494 } 1495 1496 llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind 1497 convertDeviceClause(const VarDecl *VD) { 1498 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 1499 OMPDeclareTargetDeclAttr::getDeviceType(VD); 1500 if (!DevTy) 1501 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone; 1502 1503 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default 1504 case OMPDeclareTargetDeclAttr::DT_Host: 1505 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost; 1506 break; 1507 case OMPDeclareTargetDeclAttr::DT_NoHost: 1508 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost; 1509 break; 1510 case OMPDeclareTargetDeclAttr::DT_Any: 1511 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny; 1512 break; 1513 default: 1514 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone; 1515 break; 1516 } 1517 } 1518 1519 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind 1520 convertCaptureClause(const VarDecl *VD) { 1521 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType = 1522 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1523 if (!MapType) 1524 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone; 1525 switch ((int)*MapType) { // Avoid -Wcovered-switch-default 1526 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To: 1527 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo; 1528 break; 1529 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter: 1530 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter; 1531 break; 1532 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link: 1533 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink; 1534 break; 1535 default: 1536 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone; 1537 break; 1538 } 1539 } 1540 1541 static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc( 1542 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder, 1543 SourceLocation BeginLoc, llvm::StringRef ParentName = "") { 1544 1545 auto FileInfoCallBack = [&]() { 1546 SourceManager &SM = CGM.getContext().getSourceManager(); 1547 PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc); 1548 1549 llvm::sys::fs::UniqueID ID; 1550 if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1551 PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false); 1552 } 1553 1554 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine()); 1555 }; 1556 1557 return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName); 1558 } 1559 1560 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1561 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); }; 1562 1563 auto LinkageForVariable = [&VD, this]() { 1564 return CGM.getLLVMLinkageVarDefinition(VD); 1565 }; 1566 1567 std::vector<llvm::GlobalVariable *> GeneratedRefs; 1568 1569 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem( 1570 CGM.getContext().getPointerType(VD->getType())); 1571 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar( 1572 convertCaptureClause(VD), convertDeviceClause(VD), 1573 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly, 1574 VD->isExternallyVisible(), 1575 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, 1576 VD->getCanonicalDecl()->getBeginLoc()), 1577 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd, 1578 CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal, 1579 LinkageForVariable); 1580 1581 if (!addr) 1582 return Address::invalid(); 1583 return Address(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD)); 1584 } 1585 1586 llvm::Constant * 1587 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1588 assert(!CGM.getLangOpts().OpenMPUseTLS || 1589 !CGM.getContext().getTargetInfo().isTLSSupported()); 1590 // Lookup the entry, lazily creating it if necessary. 1591 std::string Suffix = getName({"cache", ""}); 1592 return OMPBuilder.getOrCreateInternalVariable( 1593 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str()); 1594 } 1595 1596 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1597 const VarDecl *VD, 1598 Address VDAddr, 1599 SourceLocation Loc) { 1600 if (CGM.getLangOpts().OpenMPUseTLS && 1601 CGM.getContext().getTargetInfo().isTLSSupported()) 1602 return VDAddr; 1603 1604 llvm::Type *VarTy = VDAddr.getElementType(); 1605 llvm::Value *Args[] = { 1606 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1607 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy), 1608 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1609 getOrCreateThreadPrivateCache(VD)}; 1610 return Address( 1611 CGF.EmitRuntimeCall( 1612 OMPBuilder.getOrCreateRuntimeFunction( 1613 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1614 Args), 1615 CGF.Int8Ty, VDAddr.getAlignment()); 1616 } 1617 1618 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1619 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1620 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1621 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1622 // library. 1623 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1624 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1625 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1626 OMPLoc); 1627 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1628 // to register constructor/destructor for variable. 1629 llvm::Value *Args[] = { 1630 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1631 Ctor, CopyCtor, Dtor}; 1632 CGF.EmitRuntimeCall( 1633 OMPBuilder.getOrCreateRuntimeFunction( 1634 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1635 Args); 1636 } 1637 1638 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1639 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1640 bool PerformInit, CodeGenFunction *CGF) { 1641 if (CGM.getLangOpts().OpenMPUseTLS && 1642 CGM.getContext().getTargetInfo().isTLSSupported()) 1643 return nullptr; 1644 1645 VD = VD->getDefinition(CGM.getContext()); 1646 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1647 QualType ASTTy = VD->getType(); 1648 1649 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1650 const Expr *Init = VD->getAnyInitializer(); 1651 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1652 // Generate function that re-emits the declaration's initializer into the 1653 // threadprivate copy of the variable VD 1654 CodeGenFunction CtorCGF(CGM); 1655 FunctionArgList Args; 1656 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1657 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1658 ImplicitParamKind::Other); 1659 Args.push_back(&Dst); 1660 1661 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1662 CGM.getContext().VoidPtrTy, Args); 1663 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1664 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1665 llvm::Function *Fn = 1666 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1667 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1668 Args, Loc, Loc); 1669 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1670 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1671 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1672 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy), 1673 VDAddr.getAlignment()); 1674 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1675 /*IsInitializer=*/true); 1676 ArgVal = CtorCGF.EmitLoadOfScalar( 1677 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1678 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1679 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1680 CtorCGF.FinishFunction(); 1681 Ctor = Fn; 1682 } 1683 if (VD->getType().isDestructedType() != QualType::DK_none) { 1684 // Generate function that emits destructor call for the threadprivate copy 1685 // of the variable VD 1686 CodeGenFunction DtorCGF(CGM); 1687 FunctionArgList Args; 1688 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1689 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1690 ImplicitParamKind::Other); 1691 Args.push_back(&Dst); 1692 1693 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1694 CGM.getContext().VoidTy, Args); 1695 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1696 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1697 llvm::Function *Fn = 1698 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1699 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1700 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1701 Loc, Loc); 1702 // Create a scope with an artificial location for the body of this function. 1703 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1704 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1705 DtorCGF.GetAddrOfLocalVar(&Dst), 1706 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1707 DtorCGF.emitDestroy( 1708 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy, 1709 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1710 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1711 DtorCGF.FinishFunction(); 1712 Dtor = Fn; 1713 } 1714 // Do not emit init function if it is not required. 1715 if (!Ctor && !Dtor) 1716 return nullptr; 1717 1718 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1719 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1720 /*isVarArg=*/false) 1721 ->getPointerTo(); 1722 // Copying constructor for the threadprivate variable. 1723 // Must be NULL - reserved by runtime, but currently it requires that this 1724 // parameter is always NULL. Otherwise it fires assertion. 1725 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1726 if (Ctor == nullptr) { 1727 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1728 /*isVarArg=*/false) 1729 ->getPointerTo(); 1730 Ctor = llvm::Constant::getNullValue(CtorTy); 1731 } 1732 if (Dtor == nullptr) { 1733 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1734 /*isVarArg=*/false) 1735 ->getPointerTo(); 1736 Dtor = llvm::Constant::getNullValue(DtorTy); 1737 } 1738 if (!CGF) { 1739 auto *InitFunctionTy = 1740 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1741 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1742 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1743 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1744 CodeGenFunction InitCGF(CGM); 1745 FunctionArgList ArgList; 1746 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1747 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1748 Loc, Loc); 1749 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1750 InitCGF.FinishFunction(); 1751 return InitFunction; 1752 } 1753 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1754 } 1755 return nullptr; 1756 } 1757 1758 void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD, 1759 llvm::GlobalValue *GV) { 1760 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr = 1761 OMPDeclareTargetDeclAttr::getActiveAttr(FD); 1762 1763 // We only need to handle active 'indirect' declare target functions. 1764 if (!ActiveAttr || !(*ActiveAttr)->getIndirect()) 1765 return; 1766 1767 // Get a mangled name to store the new device global in. 1768 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc( 1769 CGM, OMPBuilder, FD->getCanonicalDecl()->getBeginLoc(), FD->getName()); 1770 SmallString<128> Name; 1771 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo); 1772 1773 // We need to generate a new global to hold the address of the indirectly 1774 // called device function. Doing this allows us to keep the visibility and 1775 // linkage of the associated function unchanged while allowing the runtime to 1776 // access its value. 1777 llvm::GlobalValue *Addr = GV; 1778 if (CGM.getLangOpts().OpenMPIsTargetDevice) { 1779 Addr = new llvm::GlobalVariable( 1780 CGM.getModule(), CGM.VoidPtrTy, 1781 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name, 1782 nullptr, llvm::GlobalValue::NotThreadLocal, 1783 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace()); 1784 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility); 1785 } 1786 1787 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo( 1788 Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(), 1789 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect, 1790 llvm::GlobalValue::WeakODRLinkage); 1791 } 1792 1793 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 1794 QualType VarType, 1795 StringRef Name) { 1796 std::string Suffix = getName({"artificial", ""}); 1797 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 1798 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable( 1799 VarLVType, Twine(Name).concat(Suffix).str()); 1800 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 1801 CGM.getTarget().isTLSSupported()) { 1802 GAddr->setThreadLocal(/*Val=*/true); 1803 return Address(GAddr, GAddr->getValueType(), 1804 CGM.getContext().getTypeAlignInChars(VarType)); 1805 } 1806 std::string CacheSuffix = getName({"cache", ""}); 1807 llvm::Value *Args[] = { 1808 emitUpdateLocation(CGF, SourceLocation()), 1809 getThreadID(CGF, SourceLocation()), 1810 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 1811 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 1812 /*isSigned=*/false), 1813 OMPBuilder.getOrCreateInternalVariable( 1814 CGM.VoidPtrPtrTy, 1815 Twine(Name).concat(Suffix).concat(CacheSuffix).str())}; 1816 return Address( 1817 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1818 CGF.EmitRuntimeCall( 1819 OMPBuilder.getOrCreateRuntimeFunction( 1820 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1821 Args), 1822 VarLVType->getPointerTo(/*AddrSpace=*/0)), 1823 VarLVType, CGM.getContext().getTypeAlignInChars(VarType)); 1824 } 1825 1826 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 1827 const RegionCodeGenTy &ThenGen, 1828 const RegionCodeGenTy &ElseGen) { 1829 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 1830 1831 // If the condition constant folds and can be elided, try to avoid emitting 1832 // the condition and the dead arm of the if/else. 1833 bool CondConstant; 1834 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 1835 if (CondConstant) 1836 ThenGen(CGF); 1837 else 1838 ElseGen(CGF); 1839 return; 1840 } 1841 1842 // Otherwise, the condition did not fold, or we couldn't elide it. Just 1843 // emit the conditional branch. 1844 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 1845 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 1846 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 1847 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 1848 1849 // Emit the 'then' code. 1850 CGF.EmitBlock(ThenBlock); 1851 ThenGen(CGF); 1852 CGF.EmitBranch(ContBlock); 1853 // Emit the 'else' code if present. 1854 // There is no need to emit line number for unconditional branch. 1855 (void)ApplyDebugLocation::CreateEmpty(CGF); 1856 CGF.EmitBlock(ElseBlock); 1857 ElseGen(CGF); 1858 // There is no need to emit line number for unconditional branch. 1859 (void)ApplyDebugLocation::CreateEmpty(CGF); 1860 CGF.EmitBranch(ContBlock); 1861 // Emit the continuation block for code after the if. 1862 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 1863 } 1864 1865 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 1866 llvm::Function *OutlinedFn, 1867 ArrayRef<llvm::Value *> CapturedVars, 1868 const Expr *IfCond, 1869 llvm::Value *NumThreads) { 1870 if (!CGF.HaveInsertPoint()) 1871 return; 1872 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 1873 auto &M = CGM.getModule(); 1874 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 1875 this](CodeGenFunction &CGF, PrePostActionTy &) { 1876 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 1877 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 1878 llvm::Value *Args[] = { 1879 RTLoc, 1880 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 1881 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 1882 llvm::SmallVector<llvm::Value *, 16> RealArgs; 1883 RealArgs.append(std::begin(Args), std::end(Args)); 1884 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 1885 1886 llvm::FunctionCallee RTLFn = 1887 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 1888 CGF.EmitRuntimeCall(RTLFn, RealArgs); 1889 }; 1890 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 1891 this](CodeGenFunction &CGF, PrePostActionTy &) { 1892 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 1893 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 1894 // Build calls: 1895 // __kmpc_serialized_parallel(&Loc, GTid); 1896 llvm::Value *Args[] = {RTLoc, ThreadID}; 1897 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1898 M, OMPRTL___kmpc_serialized_parallel), 1899 Args); 1900 1901 // OutlinedFn(>id, &zero_bound, CapturedStruct); 1902 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 1903 Address ZeroAddrBound = 1904 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 1905 /*Name=*/".bound.zero.addr"); 1906 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound); 1907 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 1908 // ThreadId for serialized parallels is 0. 1909 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 1910 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 1911 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 1912 1913 // Ensure we do not inline the function. This is trivially true for the ones 1914 // passed to __kmpc_fork_call but the ones called in serialized regions 1915 // could be inlined. This is not a perfect but it is closer to the invariant 1916 // we want, namely, every data environment starts with a new function. 1917 // TODO: We should pass the if condition to the runtime function and do the 1918 // handling there. Much cleaner code. 1919 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline); 1920 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 1921 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 1922 1923 // __kmpc_end_serialized_parallel(&Loc, GTid); 1924 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 1925 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1926 M, OMPRTL___kmpc_end_serialized_parallel), 1927 EndArgs); 1928 }; 1929 if (IfCond) { 1930 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 1931 } else { 1932 RegionCodeGenTy ThenRCG(ThenGen); 1933 ThenRCG(CGF); 1934 } 1935 } 1936 1937 // If we're inside an (outlined) parallel region, use the region info's 1938 // thread-ID variable (it is passed in a first argument of the outlined function 1939 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 1940 // regular serial code region, get thread ID by calling kmp_int32 1941 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 1942 // return the address of that temp. 1943 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 1944 SourceLocation Loc) { 1945 if (auto *OMPRegionInfo = 1946 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 1947 if (OMPRegionInfo->getThreadIDVariable()) 1948 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 1949 1950 llvm::Value *ThreadID = getThreadID(CGF, Loc); 1951 QualType Int32Ty = 1952 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 1953 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 1954 CGF.EmitStoreOfScalar(ThreadID, 1955 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 1956 1957 return ThreadIDTemp; 1958 } 1959 1960 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 1961 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 1962 std::string Name = getName({Prefix, "var"}); 1963 return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name); 1964 } 1965 1966 namespace { 1967 /// Common pre(post)-action for different OpenMP constructs. 1968 class CommonActionTy final : public PrePostActionTy { 1969 llvm::FunctionCallee EnterCallee; 1970 ArrayRef<llvm::Value *> EnterArgs; 1971 llvm::FunctionCallee ExitCallee; 1972 ArrayRef<llvm::Value *> ExitArgs; 1973 bool Conditional; 1974 llvm::BasicBlock *ContBlock = nullptr; 1975 1976 public: 1977 CommonActionTy(llvm::FunctionCallee EnterCallee, 1978 ArrayRef<llvm::Value *> EnterArgs, 1979 llvm::FunctionCallee ExitCallee, 1980 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 1981 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 1982 ExitArgs(ExitArgs), Conditional(Conditional) {} 1983 void Enter(CodeGenFunction &CGF) override { 1984 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 1985 if (Conditional) { 1986 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 1987 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 1988 ContBlock = CGF.createBasicBlock("omp_if.end"); 1989 // Generate the branch (If-stmt) 1990 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 1991 CGF.EmitBlock(ThenBlock); 1992 } 1993 } 1994 void Done(CodeGenFunction &CGF) { 1995 // Emit the rest of blocks/branches 1996 CGF.EmitBranch(ContBlock); 1997 CGF.EmitBlock(ContBlock, true); 1998 } 1999 void Exit(CodeGenFunction &CGF) override { 2000 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2001 } 2002 }; 2003 } // anonymous namespace 2004 2005 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2006 StringRef CriticalName, 2007 const RegionCodeGenTy &CriticalOpGen, 2008 SourceLocation Loc, const Expr *Hint) { 2009 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2010 // CriticalOpGen(); 2011 // __kmpc_end_critical(ident_t *, gtid, Lock); 2012 // Prepare arguments and build a call to __kmpc_critical 2013 if (!CGF.HaveInsertPoint()) 2014 return; 2015 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2016 getCriticalRegionLock(CriticalName)}; 2017 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2018 std::end(Args)); 2019 if (Hint) { 2020 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2021 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2022 } 2023 CommonActionTy Action( 2024 OMPBuilder.getOrCreateRuntimeFunction( 2025 CGM.getModule(), 2026 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2027 EnterArgs, 2028 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2029 OMPRTL___kmpc_end_critical), 2030 Args); 2031 CriticalOpGen.setAction(Action); 2032 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2033 } 2034 2035 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2036 const RegionCodeGenTy &MasterOpGen, 2037 SourceLocation Loc) { 2038 if (!CGF.HaveInsertPoint()) 2039 return; 2040 // if(__kmpc_master(ident_t *, gtid)) { 2041 // MasterOpGen(); 2042 // __kmpc_end_master(ident_t *, gtid); 2043 // } 2044 // Prepare arguments and build a call to __kmpc_master 2045 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2046 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2047 CGM.getModule(), OMPRTL___kmpc_master), 2048 Args, 2049 OMPBuilder.getOrCreateRuntimeFunction( 2050 CGM.getModule(), OMPRTL___kmpc_end_master), 2051 Args, 2052 /*Conditional=*/true); 2053 MasterOpGen.setAction(Action); 2054 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2055 Action.Done(CGF); 2056 } 2057 2058 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, 2059 const RegionCodeGenTy &MaskedOpGen, 2060 SourceLocation Loc, const Expr *Filter) { 2061 if (!CGF.HaveInsertPoint()) 2062 return; 2063 // if(__kmpc_masked(ident_t *, gtid, filter)) { 2064 // MaskedOpGen(); 2065 // __kmpc_end_masked(iden_t *, gtid); 2066 // } 2067 // Prepare arguments and build a call to __kmpc_masked 2068 llvm::Value *FilterVal = Filter 2069 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) 2070 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 2071 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2072 FilterVal}; 2073 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), 2074 getThreadID(CGF, Loc)}; 2075 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2076 CGM.getModule(), OMPRTL___kmpc_masked), 2077 Args, 2078 OMPBuilder.getOrCreateRuntimeFunction( 2079 CGM.getModule(), OMPRTL___kmpc_end_masked), 2080 ArgsEnd, 2081 /*Conditional=*/true); 2082 MaskedOpGen.setAction(Action); 2083 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); 2084 Action.Done(CGF); 2085 } 2086 2087 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2088 SourceLocation Loc) { 2089 if (!CGF.HaveInsertPoint()) 2090 return; 2091 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2092 OMPBuilder.createTaskyield(CGF.Builder); 2093 } else { 2094 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2095 llvm::Value *Args[] = { 2096 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2097 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2098 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2099 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2100 Args); 2101 } 2102 2103 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2104 Region->emitUntiedSwitch(CGF); 2105 } 2106 2107 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2108 const RegionCodeGenTy &TaskgroupOpGen, 2109 SourceLocation Loc) { 2110 if (!CGF.HaveInsertPoint()) 2111 return; 2112 // __kmpc_taskgroup(ident_t *, gtid); 2113 // TaskgroupOpGen(); 2114 // __kmpc_end_taskgroup(ident_t *, gtid); 2115 // Prepare arguments and build a call to __kmpc_taskgroup 2116 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2117 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2118 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2119 Args, 2120 OMPBuilder.getOrCreateRuntimeFunction( 2121 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2122 Args); 2123 TaskgroupOpGen.setAction(Action); 2124 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2125 } 2126 2127 /// Given an array of pointers to variables, project the address of a 2128 /// given variable. 2129 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2130 unsigned Index, const VarDecl *Var) { 2131 // Pull out the pointer to the variable. 2132 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2133 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2134 2135 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType()); 2136 return Address( 2137 CGF.Builder.CreateBitCast( 2138 Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())), 2139 ElemTy, CGF.getContext().getDeclAlign(Var)); 2140 } 2141 2142 static llvm::Value *emitCopyprivateCopyFunction( 2143 CodeGenModule &CGM, llvm::Type *ArgsElemType, 2144 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2145 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2146 SourceLocation Loc) { 2147 ASTContext &C = CGM.getContext(); 2148 // void copy_func(void *LHSArg, void *RHSArg); 2149 FunctionArgList Args; 2150 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2151 ImplicitParamKind::Other); 2152 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2153 ImplicitParamKind::Other); 2154 Args.push_back(&LHSArg); 2155 Args.push_back(&RHSArg); 2156 const auto &CGFI = 2157 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2158 std::string Name = 2159 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2160 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2161 llvm::GlobalValue::InternalLinkage, Name, 2162 &CGM.getModule()); 2163 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2164 Fn->setDoesNotRecurse(); 2165 CodeGenFunction CGF(CGM); 2166 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2167 // Dest = (void*[n])(LHSArg); 2168 // Src = (void*[n])(RHSArg); 2169 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2170 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2171 ArgsElemType->getPointerTo()), 2172 ArgsElemType, CGF.getPointerAlign()); 2173 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2174 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2175 ArgsElemType->getPointerTo()), 2176 ArgsElemType, CGF.getPointerAlign()); 2177 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2178 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2179 // ... 2180 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2181 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2182 const auto *DestVar = 2183 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2184 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2185 2186 const auto *SrcVar = 2187 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2188 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2189 2190 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2191 QualType Type = VD->getType(); 2192 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2193 } 2194 CGF.FinishFunction(); 2195 return Fn; 2196 } 2197 2198 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2199 const RegionCodeGenTy &SingleOpGen, 2200 SourceLocation Loc, 2201 ArrayRef<const Expr *> CopyprivateVars, 2202 ArrayRef<const Expr *> SrcExprs, 2203 ArrayRef<const Expr *> DstExprs, 2204 ArrayRef<const Expr *> AssignmentOps) { 2205 if (!CGF.HaveInsertPoint()) 2206 return; 2207 assert(CopyprivateVars.size() == SrcExprs.size() && 2208 CopyprivateVars.size() == DstExprs.size() && 2209 CopyprivateVars.size() == AssignmentOps.size()); 2210 ASTContext &C = CGM.getContext(); 2211 // int32 did_it = 0; 2212 // if(__kmpc_single(ident_t *, gtid)) { 2213 // SingleOpGen(); 2214 // __kmpc_end_single(ident_t *, gtid); 2215 // did_it = 1; 2216 // } 2217 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2218 // <copy_func>, did_it); 2219 2220 Address DidIt = Address::invalid(); 2221 if (!CopyprivateVars.empty()) { 2222 // int32 did_it = 0; 2223 QualType KmpInt32Ty = 2224 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2225 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2226 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2227 } 2228 // Prepare arguments and build a call to __kmpc_single 2229 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2230 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2231 CGM.getModule(), OMPRTL___kmpc_single), 2232 Args, 2233 OMPBuilder.getOrCreateRuntimeFunction( 2234 CGM.getModule(), OMPRTL___kmpc_end_single), 2235 Args, 2236 /*Conditional=*/true); 2237 SingleOpGen.setAction(Action); 2238 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2239 if (DidIt.isValid()) { 2240 // did_it = 1; 2241 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2242 } 2243 Action.Done(CGF); 2244 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2245 // <copy_func>, did_it); 2246 if (DidIt.isValid()) { 2247 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2248 QualType CopyprivateArrayTy = C.getConstantArrayType( 2249 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal, 2250 /*IndexTypeQuals=*/0); 2251 // Create a list of all private variables for copyprivate. 2252 Address CopyprivateList = 2253 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2254 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2255 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2256 CGF.Builder.CreateStore( 2257 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2258 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2259 CGF.VoidPtrTy), 2260 Elem); 2261 } 2262 // Build function that copies private values from single region to all other 2263 // threads in the corresponding parallel region. 2264 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2265 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars, 2266 SrcExprs, DstExprs, AssignmentOps, Loc); 2267 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2268 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2269 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty); 2270 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2271 llvm::Value *Args[] = { 2272 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2273 getThreadID(CGF, Loc), // i32 <gtid> 2274 BufSize, // size_t <buf_size> 2275 CL.getPointer(), // void *<copyprivate list> 2276 CpyFn, // void (*) (void *, void *) <copy_func> 2277 DidItVal // i32 did_it 2278 }; 2279 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2280 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2281 Args); 2282 } 2283 } 2284 2285 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2286 const RegionCodeGenTy &OrderedOpGen, 2287 SourceLocation Loc, bool IsThreads) { 2288 if (!CGF.HaveInsertPoint()) 2289 return; 2290 // __kmpc_ordered(ident_t *, gtid); 2291 // OrderedOpGen(); 2292 // __kmpc_end_ordered(ident_t *, gtid); 2293 // Prepare arguments and build a call to __kmpc_ordered 2294 if (IsThreads) { 2295 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2296 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2297 CGM.getModule(), OMPRTL___kmpc_ordered), 2298 Args, 2299 OMPBuilder.getOrCreateRuntimeFunction( 2300 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2301 Args); 2302 OrderedOpGen.setAction(Action); 2303 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2304 return; 2305 } 2306 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2307 } 2308 2309 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2310 unsigned Flags; 2311 if (Kind == OMPD_for) 2312 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2313 else if (Kind == OMPD_sections) 2314 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2315 else if (Kind == OMPD_single) 2316 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2317 else if (Kind == OMPD_barrier) 2318 Flags = OMP_IDENT_BARRIER_EXPL; 2319 else 2320 Flags = OMP_IDENT_BARRIER_IMPL; 2321 return Flags; 2322 } 2323 2324 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2325 CodeGenFunction &CGF, const OMPLoopDirective &S, 2326 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2327 // Check if the loop directive is actually a doacross loop directive. In this 2328 // case choose static, 1 schedule. 2329 if (llvm::any_of( 2330 S.getClausesOfKind<OMPOrderedClause>(), 2331 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2332 ScheduleKind = OMPC_SCHEDULE_static; 2333 // Chunk size is 1 in this case. 2334 llvm::APInt ChunkSize(32, 1); 2335 ChunkExpr = IntegerLiteral::Create( 2336 CGF.getContext(), ChunkSize, 2337 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2338 SourceLocation()); 2339 } 2340 } 2341 2342 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2343 OpenMPDirectiveKind Kind, bool EmitChecks, 2344 bool ForceSimpleCall) { 2345 // Check if we should use the OMPBuilder 2346 auto *OMPRegionInfo = 2347 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2348 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2349 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2350 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2351 return; 2352 } 2353 2354 if (!CGF.HaveInsertPoint()) 2355 return; 2356 // Build call __kmpc_cancel_barrier(loc, thread_id); 2357 // Build call __kmpc_barrier(loc, thread_id); 2358 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2359 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2360 // thread_id); 2361 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2362 getThreadID(CGF, Loc)}; 2363 if (OMPRegionInfo) { 2364 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2365 llvm::Value *Result = CGF.EmitRuntimeCall( 2366 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2367 OMPRTL___kmpc_cancel_barrier), 2368 Args); 2369 if (EmitChecks) { 2370 // if (__kmpc_cancel_barrier()) { 2371 // exit from construct; 2372 // } 2373 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2374 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2375 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2376 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2377 CGF.EmitBlock(ExitBB); 2378 // exit from construct; 2379 CodeGenFunction::JumpDest CancelDestination = 2380 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2381 CGF.EmitBranchThroughCleanup(CancelDestination); 2382 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2383 } 2384 return; 2385 } 2386 } 2387 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2388 CGM.getModule(), OMPRTL___kmpc_barrier), 2389 Args); 2390 } 2391 2392 void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc, 2393 Expr *ME, bool IsFatal) { 2394 llvm::Value *MVL = 2395 ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF) 2396 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 2397 // Build call void __kmpc_error(ident_t *loc, int severity, const char 2398 // *message) 2399 llvm::Value *Args[] = { 2400 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true), 2401 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1), 2402 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)}; 2403 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2404 CGM.getModule(), OMPRTL___kmpc_error), 2405 Args); 2406 } 2407 2408 /// Map the OpenMP loop schedule to the runtime enumeration. 2409 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2410 bool Chunked, bool Ordered) { 2411 switch (ScheduleKind) { 2412 case OMPC_SCHEDULE_static: 2413 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2414 : (Ordered ? OMP_ord_static : OMP_sch_static); 2415 case OMPC_SCHEDULE_dynamic: 2416 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2417 case OMPC_SCHEDULE_guided: 2418 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2419 case OMPC_SCHEDULE_runtime: 2420 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2421 case OMPC_SCHEDULE_auto: 2422 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2423 case OMPC_SCHEDULE_unknown: 2424 assert(!Chunked && "chunk was specified but schedule kind not known"); 2425 return Ordered ? OMP_ord_static : OMP_sch_static; 2426 } 2427 llvm_unreachable("Unexpected runtime schedule"); 2428 } 2429 2430 /// Map the OpenMP distribute schedule to the runtime enumeration. 2431 static OpenMPSchedType 2432 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2433 // only static is allowed for dist_schedule 2434 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2435 } 2436 2437 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2438 bool Chunked) const { 2439 OpenMPSchedType Schedule = 2440 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2441 return Schedule == OMP_sch_static; 2442 } 2443 2444 bool CGOpenMPRuntime::isStaticNonchunked( 2445 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2446 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2447 return Schedule == OMP_dist_sch_static; 2448 } 2449 2450 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2451 bool Chunked) const { 2452 OpenMPSchedType Schedule = 2453 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2454 return Schedule == OMP_sch_static_chunked; 2455 } 2456 2457 bool CGOpenMPRuntime::isStaticChunked( 2458 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2459 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2460 return Schedule == OMP_dist_sch_static_chunked; 2461 } 2462 2463 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2464 OpenMPSchedType Schedule = 2465 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2466 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2467 return Schedule != OMP_sch_static; 2468 } 2469 2470 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2471 OpenMPScheduleClauseModifier M1, 2472 OpenMPScheduleClauseModifier M2) { 2473 int Modifier = 0; 2474 switch (M1) { 2475 case OMPC_SCHEDULE_MODIFIER_monotonic: 2476 Modifier = OMP_sch_modifier_monotonic; 2477 break; 2478 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2479 Modifier = OMP_sch_modifier_nonmonotonic; 2480 break; 2481 case OMPC_SCHEDULE_MODIFIER_simd: 2482 if (Schedule == OMP_sch_static_chunked) 2483 Schedule = OMP_sch_static_balanced_chunked; 2484 break; 2485 case OMPC_SCHEDULE_MODIFIER_last: 2486 case OMPC_SCHEDULE_MODIFIER_unknown: 2487 break; 2488 } 2489 switch (M2) { 2490 case OMPC_SCHEDULE_MODIFIER_monotonic: 2491 Modifier = OMP_sch_modifier_monotonic; 2492 break; 2493 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2494 Modifier = OMP_sch_modifier_nonmonotonic; 2495 break; 2496 case OMPC_SCHEDULE_MODIFIER_simd: 2497 if (Schedule == OMP_sch_static_chunked) 2498 Schedule = OMP_sch_static_balanced_chunked; 2499 break; 2500 case OMPC_SCHEDULE_MODIFIER_last: 2501 case OMPC_SCHEDULE_MODIFIER_unknown: 2502 break; 2503 } 2504 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2505 // If the static schedule kind is specified or if the ordered clause is 2506 // specified, and if the nonmonotonic modifier is not specified, the effect is 2507 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2508 // modifier is specified, the effect is as if the nonmonotonic modifier is 2509 // specified. 2510 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2511 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2512 Schedule == OMP_sch_static_balanced_chunked || 2513 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2514 Schedule == OMP_dist_sch_static_chunked || 2515 Schedule == OMP_dist_sch_static)) 2516 Modifier = OMP_sch_modifier_nonmonotonic; 2517 } 2518 return Schedule | Modifier; 2519 } 2520 2521 void CGOpenMPRuntime::emitForDispatchInit( 2522 CodeGenFunction &CGF, SourceLocation Loc, 2523 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2524 bool Ordered, const DispatchRTInput &DispatchValues) { 2525 if (!CGF.HaveInsertPoint()) 2526 return; 2527 OpenMPSchedType Schedule = getRuntimeSchedule( 2528 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2529 assert(Ordered || 2530 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2531 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2532 Schedule != OMP_sch_static_balanced_chunked)); 2533 // Call __kmpc_dispatch_init( 2534 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2535 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2536 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2537 2538 // If the Chunk was not specified in the clause - use default value 1. 2539 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2540 : CGF.Builder.getIntN(IVSize, 1); 2541 llvm::Value *Args[] = { 2542 emitUpdateLocation(CGF, Loc), 2543 getThreadID(CGF, Loc), 2544 CGF.Builder.getInt32(addMonoNonMonoModifier( 2545 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2546 DispatchValues.LB, // Lower 2547 DispatchValues.UB, // Upper 2548 CGF.Builder.getIntN(IVSize, 1), // Stride 2549 Chunk // Chunk 2550 }; 2551 CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned), 2552 Args); 2553 } 2554 2555 static void emitForStaticInitCall( 2556 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2557 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2558 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2559 const CGOpenMPRuntime::StaticRTInput &Values) { 2560 if (!CGF.HaveInsertPoint()) 2561 return; 2562 2563 assert(!Values.Ordered); 2564 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2565 Schedule == OMP_sch_static_balanced_chunked || 2566 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2567 Schedule == OMP_dist_sch_static || 2568 Schedule == OMP_dist_sch_static_chunked); 2569 2570 // Call __kmpc_for_static_init( 2571 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2572 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2573 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2574 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2575 llvm::Value *Chunk = Values.Chunk; 2576 if (Chunk == nullptr) { 2577 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2578 Schedule == OMP_dist_sch_static) && 2579 "expected static non-chunked schedule"); 2580 // If the Chunk was not specified in the clause - use default value 1. 2581 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2582 } else { 2583 assert((Schedule == OMP_sch_static_chunked || 2584 Schedule == OMP_sch_static_balanced_chunked || 2585 Schedule == OMP_ord_static_chunked || 2586 Schedule == OMP_dist_sch_static_chunked) && 2587 "expected static chunked schedule"); 2588 } 2589 llvm::Value *Args[] = { 2590 UpdateLocation, 2591 ThreadId, 2592 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2593 M2)), // Schedule type 2594 Values.IL.getPointer(), // &isLastIter 2595 Values.LB.getPointer(), // &LB 2596 Values.UB.getPointer(), // &UB 2597 Values.ST.getPointer(), // &Stride 2598 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2599 Chunk // Chunk 2600 }; 2601 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2602 } 2603 2604 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2605 SourceLocation Loc, 2606 OpenMPDirectiveKind DKind, 2607 const OpenMPScheduleTy &ScheduleKind, 2608 const StaticRTInput &Values) { 2609 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2610 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2611 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) && 2612 "Expected loop-based or sections-based directive."); 2613 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2614 isOpenMPLoopDirective(DKind) 2615 ? OMP_IDENT_WORK_LOOP 2616 : OMP_IDENT_WORK_SECTIONS); 2617 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2618 llvm::FunctionCallee StaticInitFunction = 2619 OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned, 2620 false); 2621 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2622 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2623 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2624 } 2625 2626 void CGOpenMPRuntime::emitDistributeStaticInit( 2627 CodeGenFunction &CGF, SourceLocation Loc, 2628 OpenMPDistScheduleClauseKind SchedKind, 2629 const CGOpenMPRuntime::StaticRTInput &Values) { 2630 OpenMPSchedType ScheduleNum = 2631 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2632 llvm::Value *UpdatedLocation = 2633 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2634 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2635 llvm::FunctionCallee StaticInitFunction; 2636 bool isGPUDistribute = 2637 CGM.getLangOpts().OpenMPIsTargetDevice && 2638 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()); 2639 StaticInitFunction = OMPBuilder.createForStaticInitFunction( 2640 Values.IVSize, Values.IVSigned, isGPUDistribute); 2641 2642 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2643 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2644 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2645 } 2646 2647 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2648 SourceLocation Loc, 2649 OpenMPDirectiveKind DKind) { 2650 if (!CGF.HaveInsertPoint()) 2651 return; 2652 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2653 llvm::Value *Args[] = { 2654 emitUpdateLocation(CGF, Loc, 2655 isOpenMPDistributeDirective(DKind) 2656 ? OMP_IDENT_WORK_DISTRIBUTE 2657 : isOpenMPLoopDirective(DKind) 2658 ? OMP_IDENT_WORK_LOOP 2659 : OMP_IDENT_WORK_SECTIONS), 2660 getThreadID(CGF, Loc)}; 2661 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2662 if (isOpenMPDistributeDirective(DKind) && 2663 CGM.getLangOpts().OpenMPIsTargetDevice && 2664 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX())) 2665 CGF.EmitRuntimeCall( 2666 OMPBuilder.getOrCreateRuntimeFunction( 2667 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini), 2668 Args); 2669 else 2670 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2671 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2672 Args); 2673 } 2674 2675 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2676 SourceLocation Loc, 2677 unsigned IVSize, 2678 bool IVSigned) { 2679 if (!CGF.HaveInsertPoint()) 2680 return; 2681 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2682 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2683 CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned), 2684 Args); 2685 } 2686 2687 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2688 SourceLocation Loc, unsigned IVSize, 2689 bool IVSigned, Address IL, 2690 Address LB, Address UB, 2691 Address ST) { 2692 // Call __kmpc_dispatch_next( 2693 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2694 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2695 // kmp_int[32|64] *p_stride); 2696 llvm::Value *Args[] = { 2697 emitUpdateLocation(CGF, Loc), 2698 getThreadID(CGF, Loc), 2699 IL.getPointer(), // &isLastIter 2700 LB.getPointer(), // &Lower 2701 UB.getPointer(), // &Upper 2702 ST.getPointer() // &Stride 2703 }; 2704 llvm::Value *Call = CGF.EmitRuntimeCall( 2705 OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args); 2706 return CGF.EmitScalarConversion( 2707 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2708 CGF.getContext().BoolTy, Loc); 2709 } 2710 2711 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2712 llvm::Value *NumThreads, 2713 SourceLocation Loc) { 2714 if (!CGF.HaveInsertPoint()) 2715 return; 2716 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2717 llvm::Value *Args[] = { 2718 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2719 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2720 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2721 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2722 Args); 2723 } 2724 2725 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2726 ProcBindKind ProcBind, 2727 SourceLocation Loc) { 2728 if (!CGF.HaveInsertPoint()) 2729 return; 2730 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2731 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2732 llvm::Value *Args[] = { 2733 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2734 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2735 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2736 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2737 Args); 2738 } 2739 2740 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2741 SourceLocation Loc, llvm::AtomicOrdering AO) { 2742 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2743 OMPBuilder.createFlush(CGF.Builder); 2744 } else { 2745 if (!CGF.HaveInsertPoint()) 2746 return; 2747 // Build call void __kmpc_flush(ident_t *loc) 2748 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2749 CGM.getModule(), OMPRTL___kmpc_flush), 2750 emitUpdateLocation(CGF, Loc)); 2751 } 2752 } 2753 2754 namespace { 2755 /// Indexes of fields for type kmp_task_t. 2756 enum KmpTaskTFields { 2757 /// List of shared variables. 2758 KmpTaskTShareds, 2759 /// Task routine. 2760 KmpTaskTRoutine, 2761 /// Partition id for the untied tasks. 2762 KmpTaskTPartId, 2763 /// Function with call of destructors for private variables. 2764 Data1, 2765 /// Task priority. 2766 Data2, 2767 /// (Taskloops only) Lower bound. 2768 KmpTaskTLowerBound, 2769 /// (Taskloops only) Upper bound. 2770 KmpTaskTUpperBound, 2771 /// (Taskloops only) Stride. 2772 KmpTaskTStride, 2773 /// (Taskloops only) Is last iteration flag. 2774 KmpTaskTLastIter, 2775 /// (Taskloops only) Reduction data. 2776 KmpTaskTReductions, 2777 }; 2778 } // anonymous namespace 2779 2780 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 2781 // If we are in simd mode or there are no entries, we don't need to do 2782 // anything. 2783 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty()) 2784 return; 2785 2786 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn = 2787 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind, 2788 const llvm::TargetRegionEntryInfo &EntryInfo) -> void { 2789 SourceLocation Loc; 2790 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) { 2791 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 2792 E = CGM.getContext().getSourceManager().fileinfo_end(); 2793 I != E; ++I) { 2794 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID && 2795 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) { 2796 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 2797 I->getFirst(), EntryInfo.Line, 1); 2798 break; 2799 } 2800 } 2801 } 2802 switch (Kind) { 2803 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: { 2804 unsigned DiagID = CGM.getDiags().getCustomDiagID( 2805 DiagnosticsEngine::Error, "Offloading entry for target region in " 2806 "%0 is incorrect: either the " 2807 "address or the ID is invalid."); 2808 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName; 2809 } break; 2810 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: { 2811 unsigned DiagID = CGM.getDiags().getCustomDiagID( 2812 DiagnosticsEngine::Error, "Offloading entry for declare target " 2813 "variable %0 is incorrect: the " 2814 "address is invalid."); 2815 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName; 2816 } break; 2817 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: { 2818 unsigned DiagID = CGM.getDiags().getCustomDiagID( 2819 DiagnosticsEngine::Error, 2820 "Offloading entry for declare target variable is incorrect: the " 2821 "address is invalid."); 2822 CGM.getDiags().Report(DiagID); 2823 } break; 2824 } 2825 }; 2826 2827 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn); 2828 } 2829 2830 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 2831 if (!KmpRoutineEntryPtrTy) { 2832 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 2833 ASTContext &C = CGM.getContext(); 2834 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 2835 FunctionProtoType::ExtProtoInfo EPI; 2836 KmpRoutineEntryPtrQTy = C.getPointerType( 2837 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 2838 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 2839 } 2840 } 2841 2842 namespace { 2843 struct PrivateHelpersTy { 2844 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 2845 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 2846 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 2847 PrivateElemInit(PrivateElemInit) {} 2848 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 2849 const Expr *OriginalRef = nullptr; 2850 const VarDecl *Original = nullptr; 2851 const VarDecl *PrivateCopy = nullptr; 2852 const VarDecl *PrivateElemInit = nullptr; 2853 bool isLocalPrivate() const { 2854 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 2855 } 2856 }; 2857 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 2858 } // anonymous namespace 2859 2860 static bool isAllocatableDecl(const VarDecl *VD) { 2861 const VarDecl *CVD = VD->getCanonicalDecl(); 2862 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 2863 return false; 2864 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 2865 // Use the default allocation. 2866 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 2867 !AA->getAllocator()); 2868 } 2869 2870 static RecordDecl * 2871 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 2872 if (!Privates.empty()) { 2873 ASTContext &C = CGM.getContext(); 2874 // Build struct .kmp_privates_t. { 2875 // /* private vars */ 2876 // }; 2877 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 2878 RD->startDefinition(); 2879 for (const auto &Pair : Privates) { 2880 const VarDecl *VD = Pair.second.Original; 2881 QualType Type = VD->getType().getNonReferenceType(); 2882 // If the private variable is a local variable with lvalue ref type, 2883 // allocate the pointer instead of the pointee type. 2884 if (Pair.second.isLocalPrivate()) { 2885 if (VD->getType()->isLValueReferenceType()) 2886 Type = C.getPointerType(Type); 2887 if (isAllocatableDecl(VD)) 2888 Type = C.getPointerType(Type); 2889 } 2890 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 2891 if (VD->hasAttrs()) { 2892 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 2893 E(VD->getAttrs().end()); 2894 I != E; ++I) 2895 FD->addAttr(*I); 2896 } 2897 } 2898 RD->completeDefinition(); 2899 return RD; 2900 } 2901 return nullptr; 2902 } 2903 2904 static RecordDecl * 2905 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 2906 QualType KmpInt32Ty, 2907 QualType KmpRoutineEntryPointerQTy) { 2908 ASTContext &C = CGM.getContext(); 2909 // Build struct kmp_task_t { 2910 // void * shareds; 2911 // kmp_routine_entry_t routine; 2912 // kmp_int32 part_id; 2913 // kmp_cmplrdata_t data1; 2914 // kmp_cmplrdata_t data2; 2915 // For taskloops additional fields: 2916 // kmp_uint64 lb; 2917 // kmp_uint64 ub; 2918 // kmp_int64 st; 2919 // kmp_int32 liter; 2920 // void * reductions; 2921 // }; 2922 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union); 2923 UD->startDefinition(); 2924 addFieldToRecordDecl(C, UD, KmpInt32Ty); 2925 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 2926 UD->completeDefinition(); 2927 QualType KmpCmplrdataTy = C.getRecordType(UD); 2928 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 2929 RD->startDefinition(); 2930 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 2931 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 2932 addFieldToRecordDecl(C, RD, KmpInt32Ty); 2933 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 2934 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 2935 if (isOpenMPTaskLoopDirective(Kind)) { 2936 QualType KmpUInt64Ty = 2937 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 2938 QualType KmpInt64Ty = 2939 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 2940 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 2941 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 2942 addFieldToRecordDecl(C, RD, KmpInt64Ty); 2943 addFieldToRecordDecl(C, RD, KmpInt32Ty); 2944 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 2945 } 2946 RD->completeDefinition(); 2947 return RD; 2948 } 2949 2950 static RecordDecl * 2951 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 2952 ArrayRef<PrivateDataTy> Privates) { 2953 ASTContext &C = CGM.getContext(); 2954 // Build struct kmp_task_t_with_privates { 2955 // kmp_task_t task_data; 2956 // .kmp_privates_t. privates; 2957 // }; 2958 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 2959 RD->startDefinition(); 2960 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 2961 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 2962 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 2963 RD->completeDefinition(); 2964 return RD; 2965 } 2966 2967 /// Emit a proxy function which accepts kmp_task_t as the second 2968 /// argument. 2969 /// \code 2970 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 2971 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 2972 /// For taskloops: 2973 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 2974 /// tt->reductions, tt->shareds); 2975 /// return 0; 2976 /// } 2977 /// \endcode 2978 static llvm::Function * 2979 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 2980 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 2981 QualType KmpTaskTWithPrivatesPtrQTy, 2982 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 2983 QualType SharedsPtrTy, llvm::Function *TaskFunction, 2984 llvm::Value *TaskPrivatesMap) { 2985 ASTContext &C = CGM.getContext(); 2986 FunctionArgList Args; 2987 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 2988 ImplicitParamKind::Other); 2989 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 2990 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 2991 ImplicitParamKind::Other); 2992 Args.push_back(&GtidArg); 2993 Args.push_back(&TaskTypeArg); 2994 const auto &TaskEntryFnInfo = 2995 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 2996 llvm::FunctionType *TaskEntryTy = 2997 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 2998 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 2999 auto *TaskEntry = llvm::Function::Create( 3000 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3001 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3002 TaskEntry->setDoesNotRecurse(); 3003 CodeGenFunction CGF(CGM); 3004 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3005 Loc, Loc); 3006 3007 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3008 // tt, 3009 // For taskloops: 3010 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3011 // tt->task_data.shareds); 3012 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3013 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3014 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3015 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3016 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3017 const auto *KmpTaskTWithPrivatesQTyRD = 3018 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3019 LValue Base = 3020 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3021 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3022 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3023 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3024 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3025 3026 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3027 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3028 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3029 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3030 CGF.ConvertTypeForMem(SharedsPtrTy)); 3031 3032 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3033 llvm::Value *PrivatesParam; 3034 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3035 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3036 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3037 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3038 } else { 3039 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3040 } 3041 3042 llvm::Value *CommonArgs[] = { 3043 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap, 3044 CGF.Builder 3045 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF), 3046 CGF.VoidPtrTy, CGF.Int8Ty) 3047 .getPointer()}; 3048 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3049 std::end(CommonArgs)); 3050 if (isOpenMPTaskLoopDirective(Kind)) { 3051 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3052 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3053 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3054 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3055 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3056 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3057 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3058 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3059 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3060 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3061 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3062 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3063 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3064 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3065 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3066 CallArgs.push_back(LBParam); 3067 CallArgs.push_back(UBParam); 3068 CallArgs.push_back(StParam); 3069 CallArgs.push_back(LIParam); 3070 CallArgs.push_back(RParam); 3071 } 3072 CallArgs.push_back(SharedsParam); 3073 3074 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3075 CallArgs); 3076 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3077 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3078 CGF.FinishFunction(); 3079 return TaskEntry; 3080 } 3081 3082 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3083 SourceLocation Loc, 3084 QualType KmpInt32Ty, 3085 QualType KmpTaskTWithPrivatesPtrQTy, 3086 QualType KmpTaskTWithPrivatesQTy) { 3087 ASTContext &C = CGM.getContext(); 3088 FunctionArgList Args; 3089 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3090 ImplicitParamKind::Other); 3091 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3092 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3093 ImplicitParamKind::Other); 3094 Args.push_back(&GtidArg); 3095 Args.push_back(&TaskTypeArg); 3096 const auto &DestructorFnInfo = 3097 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3098 llvm::FunctionType *DestructorFnTy = 3099 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3100 std::string Name = 3101 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3102 auto *DestructorFn = 3103 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3104 Name, &CGM.getModule()); 3105 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3106 DestructorFnInfo); 3107 DestructorFn->setDoesNotRecurse(); 3108 CodeGenFunction CGF(CGM); 3109 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3110 Args, Loc, Loc); 3111 3112 LValue Base = CGF.EmitLoadOfPointerLValue( 3113 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3114 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3115 const auto *KmpTaskTWithPrivatesQTyRD = 3116 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3117 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3118 Base = CGF.EmitLValueForField(Base, *FI); 3119 for (const auto *Field : 3120 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3121 if (QualType::DestructionKind DtorKind = 3122 Field->getType().isDestructedType()) { 3123 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3124 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3125 } 3126 } 3127 CGF.FinishFunction(); 3128 return DestructorFn; 3129 } 3130 3131 /// Emit a privates mapping function for correct handling of private and 3132 /// firstprivate variables. 3133 /// \code 3134 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3135 /// **noalias priv1,..., <tyn> **noalias privn) { 3136 /// *priv1 = &.privates.priv1; 3137 /// ...; 3138 /// *privn = &.privates.privn; 3139 /// } 3140 /// \endcode 3141 static llvm::Value * 3142 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3143 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3144 ArrayRef<PrivateDataTy> Privates) { 3145 ASTContext &C = CGM.getContext(); 3146 FunctionArgList Args; 3147 ImplicitParamDecl TaskPrivatesArg( 3148 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3149 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3150 ImplicitParamKind::Other); 3151 Args.push_back(&TaskPrivatesArg); 3152 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3153 unsigned Counter = 1; 3154 for (const Expr *E : Data.PrivateVars) { 3155 Args.push_back(ImplicitParamDecl::Create( 3156 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3157 C.getPointerType(C.getPointerType(E->getType())) 3158 .withConst() 3159 .withRestrict(), 3160 ImplicitParamKind::Other)); 3161 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3162 PrivateVarsPos[VD] = Counter; 3163 ++Counter; 3164 } 3165 for (const Expr *E : Data.FirstprivateVars) { 3166 Args.push_back(ImplicitParamDecl::Create( 3167 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3168 C.getPointerType(C.getPointerType(E->getType())) 3169 .withConst() 3170 .withRestrict(), 3171 ImplicitParamKind::Other)); 3172 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3173 PrivateVarsPos[VD] = Counter; 3174 ++Counter; 3175 } 3176 for (const Expr *E : Data.LastprivateVars) { 3177 Args.push_back(ImplicitParamDecl::Create( 3178 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3179 C.getPointerType(C.getPointerType(E->getType())) 3180 .withConst() 3181 .withRestrict(), 3182 ImplicitParamKind::Other)); 3183 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3184 PrivateVarsPos[VD] = Counter; 3185 ++Counter; 3186 } 3187 for (const VarDecl *VD : Data.PrivateLocals) { 3188 QualType Ty = VD->getType().getNonReferenceType(); 3189 if (VD->getType()->isLValueReferenceType()) 3190 Ty = C.getPointerType(Ty); 3191 if (isAllocatableDecl(VD)) 3192 Ty = C.getPointerType(Ty); 3193 Args.push_back(ImplicitParamDecl::Create( 3194 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3195 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3196 ImplicitParamKind::Other)); 3197 PrivateVarsPos[VD] = Counter; 3198 ++Counter; 3199 } 3200 const auto &TaskPrivatesMapFnInfo = 3201 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3202 llvm::FunctionType *TaskPrivatesMapTy = 3203 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3204 std::string Name = 3205 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3206 auto *TaskPrivatesMap = llvm::Function::Create( 3207 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3208 &CGM.getModule()); 3209 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3210 TaskPrivatesMapFnInfo); 3211 if (CGM.getLangOpts().Optimize) { 3212 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3213 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3214 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3215 } 3216 CodeGenFunction CGF(CGM); 3217 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3218 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3219 3220 // *privi = &.privates.privi; 3221 LValue Base = CGF.EmitLoadOfPointerLValue( 3222 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3223 TaskPrivatesArg.getType()->castAs<PointerType>()); 3224 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3225 Counter = 0; 3226 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3227 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3228 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3229 LValue RefLVal = 3230 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3231 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3232 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3233 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3234 ++Counter; 3235 } 3236 CGF.FinishFunction(); 3237 return TaskPrivatesMap; 3238 } 3239 3240 /// Emit initialization for private variables in task-based directives. 3241 static void emitPrivatesInit(CodeGenFunction &CGF, 3242 const OMPExecutableDirective &D, 3243 Address KmpTaskSharedsPtr, LValue TDBase, 3244 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3245 QualType SharedsTy, QualType SharedsPtrTy, 3246 const OMPTaskDataTy &Data, 3247 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3248 ASTContext &C = CGF.getContext(); 3249 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3250 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3251 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3252 ? OMPD_taskloop 3253 : OMPD_task; 3254 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3255 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3256 LValue SrcBase; 3257 bool IsTargetTask = 3258 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3259 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3260 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3261 // PointersArray, SizesArray, and MappersArray. The original variables for 3262 // these arrays are not captured and we get their addresses explicitly. 3263 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3264 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3265 SrcBase = CGF.MakeAddrLValue( 3266 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3267 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy), 3268 CGF.ConvertTypeForMem(SharedsTy)), 3269 SharedsTy); 3270 } 3271 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3272 for (const PrivateDataTy &Pair : Privates) { 3273 // Do not initialize private locals. 3274 if (Pair.second.isLocalPrivate()) { 3275 ++FI; 3276 continue; 3277 } 3278 const VarDecl *VD = Pair.second.PrivateCopy; 3279 const Expr *Init = VD->getAnyInitializer(); 3280 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3281 !CGF.isTrivialInitializer(Init)))) { 3282 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3283 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3284 const VarDecl *OriginalVD = Pair.second.Original; 3285 // Check if the variable is the target-based BasePointersArray, 3286 // PointersArray, SizesArray, or MappersArray. 3287 LValue SharedRefLValue; 3288 QualType Type = PrivateLValue.getType(); 3289 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3290 if (IsTargetTask && !SharedField) { 3291 assert(isa<ImplicitParamDecl>(OriginalVD) && 3292 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3293 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3294 ->getNumParams() == 0 && 3295 isa<TranslationUnitDecl>( 3296 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3297 ->getDeclContext()) && 3298 "Expected artificial target data variable."); 3299 SharedRefLValue = 3300 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3301 } else if (ForDup) { 3302 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3303 SharedRefLValue = CGF.MakeAddrLValue( 3304 SharedRefLValue.getAddress(CGF).withAlignment( 3305 C.getDeclAlign(OriginalVD)), 3306 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3307 SharedRefLValue.getTBAAInfo()); 3308 } else if (CGF.LambdaCaptureFields.count( 3309 Pair.second.Original->getCanonicalDecl()) > 0 || 3310 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) { 3311 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3312 } else { 3313 // Processing for implicitly captured variables. 3314 InlinedOpenMPRegionRAII Region( 3315 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3316 /*HasCancel=*/false, /*NoInheritance=*/true); 3317 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3318 } 3319 if (Type->isArrayType()) { 3320 // Initialize firstprivate array. 3321 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3322 // Perform simple memcpy. 3323 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3324 } else { 3325 // Initialize firstprivate array using element-by-element 3326 // initialization. 3327 CGF.EmitOMPAggregateAssign( 3328 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3329 Type, 3330 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3331 Address SrcElement) { 3332 // Clean up any temporaries needed by the initialization. 3333 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3334 InitScope.addPrivate(Elem, SrcElement); 3335 (void)InitScope.Privatize(); 3336 // Emit initialization for single element. 3337 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3338 CGF, &CapturesInfo); 3339 CGF.EmitAnyExprToMem(Init, DestElement, 3340 Init->getType().getQualifiers(), 3341 /*IsInitializer=*/false); 3342 }); 3343 } 3344 } else { 3345 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3346 InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF)); 3347 (void)InitScope.Privatize(); 3348 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3349 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3350 /*capturedByInit=*/false); 3351 } 3352 } else { 3353 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3354 } 3355 } 3356 ++FI; 3357 } 3358 } 3359 3360 /// Check if duplication function is required for taskloops. 3361 static bool checkInitIsRequired(CodeGenFunction &CGF, 3362 ArrayRef<PrivateDataTy> Privates) { 3363 bool InitRequired = false; 3364 for (const PrivateDataTy &Pair : Privates) { 3365 if (Pair.second.isLocalPrivate()) 3366 continue; 3367 const VarDecl *VD = Pair.second.PrivateCopy; 3368 const Expr *Init = VD->getAnyInitializer(); 3369 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) && 3370 !CGF.isTrivialInitializer(Init)); 3371 if (InitRequired) 3372 break; 3373 } 3374 return InitRequired; 3375 } 3376 3377 3378 /// Emit task_dup function (for initialization of 3379 /// private/firstprivate/lastprivate vars and last_iter flag) 3380 /// \code 3381 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3382 /// lastpriv) { 3383 /// // setup lastprivate flag 3384 /// task_dst->last = lastpriv; 3385 /// // could be constructor calls here... 3386 /// } 3387 /// \endcode 3388 static llvm::Value * 3389 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3390 const OMPExecutableDirective &D, 3391 QualType KmpTaskTWithPrivatesPtrQTy, 3392 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3393 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3394 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3395 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3396 ASTContext &C = CGM.getContext(); 3397 FunctionArgList Args; 3398 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3399 KmpTaskTWithPrivatesPtrQTy, 3400 ImplicitParamKind::Other); 3401 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3402 KmpTaskTWithPrivatesPtrQTy, 3403 ImplicitParamKind::Other); 3404 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 3405 ImplicitParamKind::Other); 3406 Args.push_back(&DstArg); 3407 Args.push_back(&SrcArg); 3408 Args.push_back(&LastprivArg); 3409 const auto &TaskDupFnInfo = 3410 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3411 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3412 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 3413 auto *TaskDup = llvm::Function::Create( 3414 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3415 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 3416 TaskDup->setDoesNotRecurse(); 3417 CodeGenFunction CGF(CGM); 3418 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 3419 Loc); 3420 3421 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3422 CGF.GetAddrOfLocalVar(&DstArg), 3423 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3424 // task_dst->liter = lastpriv; 3425 if (WithLastIter) { 3426 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3427 LValue Base = CGF.EmitLValueForField( 3428 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3429 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3430 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 3431 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 3432 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 3433 } 3434 3435 // Emit initial values for private copies (if any). 3436 assert(!Privates.empty()); 3437 Address KmpTaskSharedsPtr = Address::invalid(); 3438 if (!Data.FirstprivateVars.empty()) { 3439 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3440 CGF.GetAddrOfLocalVar(&SrcArg), 3441 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3442 LValue Base = CGF.EmitLValueForField( 3443 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3444 KmpTaskSharedsPtr = Address( 3445 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 3446 Base, *std::next(KmpTaskTQTyRD->field_begin(), 3447 KmpTaskTShareds)), 3448 Loc), 3449 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy)); 3450 } 3451 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 3452 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 3453 CGF.FinishFunction(); 3454 return TaskDup; 3455 } 3456 3457 /// Checks if destructor function is required to be generated. 3458 /// \return true if cleanups are required, false otherwise. 3459 static bool 3460 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3461 ArrayRef<PrivateDataTy> Privates) { 3462 for (const PrivateDataTy &P : Privates) { 3463 if (P.second.isLocalPrivate()) 3464 continue; 3465 QualType Ty = P.second.Original->getType().getNonReferenceType(); 3466 if (Ty.isDestructedType()) 3467 return true; 3468 } 3469 return false; 3470 } 3471 3472 namespace { 3473 /// Loop generator for OpenMP iterator expression. 3474 class OMPIteratorGeneratorScope final 3475 : public CodeGenFunction::OMPPrivateScope { 3476 CodeGenFunction &CGF; 3477 const OMPIteratorExpr *E = nullptr; 3478 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 3479 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 3480 OMPIteratorGeneratorScope() = delete; 3481 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 3482 3483 public: 3484 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 3485 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 3486 if (!E) 3487 return; 3488 SmallVector<llvm::Value *, 4> Uppers; 3489 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 3490 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 3491 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 3492 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName())); 3493 const OMPIteratorHelperData &HelperData = E->getHelper(I); 3494 addPrivate( 3495 HelperData.CounterVD, 3496 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr")); 3497 } 3498 Privatize(); 3499 3500 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 3501 const OMPIteratorHelperData &HelperData = E->getHelper(I); 3502 LValue CLVal = 3503 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 3504 HelperData.CounterVD->getType()); 3505 // Counter = 0; 3506 CGF.EmitStoreOfScalar( 3507 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 3508 CLVal); 3509 CodeGenFunction::JumpDest &ContDest = 3510 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 3511 CodeGenFunction::JumpDest &ExitDest = 3512 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 3513 // N = <number-of_iterations>; 3514 llvm::Value *N = Uppers[I]; 3515 // cont: 3516 // if (Counter < N) goto body; else goto exit; 3517 CGF.EmitBlock(ContDest.getBlock()); 3518 auto *CVal = 3519 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 3520 llvm::Value *Cmp = 3521 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 3522 ? CGF.Builder.CreateICmpSLT(CVal, N) 3523 : CGF.Builder.CreateICmpULT(CVal, N); 3524 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 3525 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 3526 // body: 3527 CGF.EmitBlock(BodyBB); 3528 // Iteri = Begini + Counter * Stepi; 3529 CGF.EmitIgnoredExpr(HelperData.Update); 3530 } 3531 } 3532 ~OMPIteratorGeneratorScope() { 3533 if (!E) 3534 return; 3535 for (unsigned I = E->numOfIterators(); I > 0; --I) { 3536 // Counter = Counter + 1; 3537 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 3538 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 3539 // goto cont; 3540 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 3541 // exit: 3542 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 3543 } 3544 } 3545 }; 3546 } // namespace 3547 3548 static std::pair<llvm::Value *, llvm::Value *> 3549 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 3550 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 3551 llvm::Value *Addr; 3552 if (OASE) { 3553 const Expr *Base = OASE->getBase(); 3554 Addr = CGF.EmitScalarExpr(Base); 3555 } else { 3556 Addr = CGF.EmitLValue(E).getPointer(CGF); 3557 } 3558 llvm::Value *SizeVal; 3559 QualType Ty = E->getType(); 3560 if (OASE) { 3561 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 3562 for (const Expr *SE : OASE->getDimensions()) { 3563 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 3564 Sz = CGF.EmitScalarConversion( 3565 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 3566 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 3567 } 3568 } else if (const auto *ASE = 3569 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 3570 LValue UpAddrLVal = 3571 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 3572 Address UpAddrAddress = UpAddrLVal.getAddress(CGF); 3573 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 3574 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1); 3575 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 3576 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 3577 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 3578 } else { 3579 SizeVal = CGF.getTypeSize(Ty); 3580 } 3581 return std::make_pair(Addr, SizeVal); 3582 } 3583 3584 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 3585 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 3586 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 3587 if (KmpTaskAffinityInfoTy.isNull()) { 3588 RecordDecl *KmpAffinityInfoRD = 3589 C.buildImplicitRecord("kmp_task_affinity_info_t"); 3590 KmpAffinityInfoRD->startDefinition(); 3591 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 3592 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 3593 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 3594 KmpAffinityInfoRD->completeDefinition(); 3595 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 3596 } 3597 } 3598 3599 CGOpenMPRuntime::TaskResultTy 3600 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 3601 const OMPExecutableDirective &D, 3602 llvm::Function *TaskFunction, QualType SharedsTy, 3603 Address Shareds, const OMPTaskDataTy &Data) { 3604 ASTContext &C = CGM.getContext(); 3605 llvm::SmallVector<PrivateDataTy, 4> Privates; 3606 // Aggregate privates and sort them by the alignment. 3607 const auto *I = Data.PrivateCopies.begin(); 3608 for (const Expr *E : Data.PrivateVars) { 3609 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3610 Privates.emplace_back( 3611 C.getDeclAlign(VD), 3612 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3613 /*PrivateElemInit=*/nullptr)); 3614 ++I; 3615 } 3616 I = Data.FirstprivateCopies.begin(); 3617 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 3618 for (const Expr *E : Data.FirstprivateVars) { 3619 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3620 Privates.emplace_back( 3621 C.getDeclAlign(VD), 3622 PrivateHelpersTy( 3623 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3624 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 3625 ++I; 3626 ++IElemInitRef; 3627 } 3628 I = Data.LastprivateCopies.begin(); 3629 for (const Expr *E : Data.LastprivateVars) { 3630 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3631 Privates.emplace_back( 3632 C.getDeclAlign(VD), 3633 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3634 /*PrivateElemInit=*/nullptr)); 3635 ++I; 3636 } 3637 for (const VarDecl *VD : Data.PrivateLocals) { 3638 if (isAllocatableDecl(VD)) 3639 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 3640 else 3641 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 3642 } 3643 llvm::stable_sort(Privates, 3644 [](const PrivateDataTy &L, const PrivateDataTy &R) { 3645 return L.first > R.first; 3646 }); 3647 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3648 // Build type kmp_routine_entry_t (if not built yet). 3649 emitKmpRoutineEntryT(KmpInt32Ty); 3650 // Build type kmp_task_t (if not built yet). 3651 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 3652 if (SavedKmpTaskloopTQTy.isNull()) { 3653 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 3654 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 3655 } 3656 KmpTaskTQTy = SavedKmpTaskloopTQTy; 3657 } else { 3658 assert((D.getDirectiveKind() == OMPD_task || 3659 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 3660 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 3661 "Expected taskloop, task or target directive"); 3662 if (SavedKmpTaskTQTy.isNull()) { 3663 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 3664 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 3665 } 3666 KmpTaskTQTy = SavedKmpTaskTQTy; 3667 } 3668 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3669 // Build particular struct kmp_task_t for the given task. 3670 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 3671 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 3672 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 3673 QualType KmpTaskTWithPrivatesPtrQTy = 3674 C.getPointerType(KmpTaskTWithPrivatesQTy); 3675 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 3676 llvm::Type *KmpTaskTWithPrivatesPtrTy = 3677 KmpTaskTWithPrivatesTy->getPointerTo(); 3678 llvm::Value *KmpTaskTWithPrivatesTySize = 3679 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 3680 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 3681 3682 // Emit initial values for private copies (if any). 3683 llvm::Value *TaskPrivatesMap = nullptr; 3684 llvm::Type *TaskPrivatesMapTy = 3685 std::next(TaskFunction->arg_begin(), 3)->getType(); 3686 if (!Privates.empty()) { 3687 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3688 TaskPrivatesMap = 3689 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 3690 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3691 TaskPrivatesMap, TaskPrivatesMapTy); 3692 } else { 3693 TaskPrivatesMap = llvm::ConstantPointerNull::get( 3694 cast<llvm::PointerType>(TaskPrivatesMapTy)); 3695 } 3696 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 3697 // kmp_task_t *tt); 3698 llvm::Function *TaskEntry = emitProxyTaskFunction( 3699 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 3700 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 3701 TaskPrivatesMap); 3702 3703 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 3704 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 3705 // kmp_routine_entry_t *task_entry); 3706 // Task flags. Format is taken from 3707 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 3708 // description of kmp_tasking_flags struct. 3709 enum { 3710 TiedFlag = 0x1, 3711 FinalFlag = 0x2, 3712 DestructorsFlag = 0x8, 3713 PriorityFlag = 0x20, 3714 DetachableFlag = 0x40, 3715 }; 3716 unsigned Flags = Data.Tied ? TiedFlag : 0; 3717 bool NeedsCleanup = false; 3718 if (!Privates.empty()) { 3719 NeedsCleanup = 3720 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 3721 if (NeedsCleanup) 3722 Flags = Flags | DestructorsFlag; 3723 } 3724 if (Data.Priority.getInt()) 3725 Flags = Flags | PriorityFlag; 3726 if (D.hasClausesOfKind<OMPDetachClause>()) 3727 Flags = Flags | DetachableFlag; 3728 llvm::Value *TaskFlags = 3729 Data.Final.getPointer() 3730 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 3731 CGF.Builder.getInt32(FinalFlag), 3732 CGF.Builder.getInt32(/*C=*/0)) 3733 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 3734 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 3735 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 3736 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 3737 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 3738 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3739 TaskEntry, KmpRoutineEntryPtrTy)}; 3740 llvm::Value *NewTask; 3741 if (D.hasClausesOfKind<OMPNowaitClause>()) { 3742 // Check if we have any device clause associated with the directive. 3743 const Expr *Device = nullptr; 3744 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 3745 Device = C->getDevice(); 3746 // Emit device ID if any otherwise use default value. 3747 llvm::Value *DeviceID; 3748 if (Device) 3749 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 3750 CGF.Int64Ty, /*isSigned=*/true); 3751 else 3752 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 3753 AllocArgs.push_back(DeviceID); 3754 NewTask = CGF.EmitRuntimeCall( 3755 OMPBuilder.getOrCreateRuntimeFunction( 3756 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 3757 AllocArgs); 3758 } else { 3759 NewTask = 3760 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 3761 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 3762 AllocArgs); 3763 } 3764 // Emit detach clause initialization. 3765 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 3766 // task_descriptor); 3767 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 3768 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 3769 LValue EvtLVal = CGF.EmitLValue(Evt); 3770 3771 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 3772 // int gtid, kmp_task_t *task); 3773 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 3774 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 3775 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 3776 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 3777 OMPBuilder.getOrCreateRuntimeFunction( 3778 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 3779 {Loc, Tid, NewTask}); 3780 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 3781 Evt->getExprLoc()); 3782 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 3783 } 3784 // Process affinity clauses. 3785 if (D.hasClausesOfKind<OMPAffinityClause>()) { 3786 // Process list of affinity data. 3787 ASTContext &C = CGM.getContext(); 3788 Address AffinitiesArray = Address::invalid(); 3789 // Calculate number of elements to form the array of affinity data. 3790 llvm::Value *NumOfElements = nullptr; 3791 unsigned NumAffinities = 0; 3792 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 3793 if (const Expr *Modifier = C->getModifier()) { 3794 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 3795 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 3796 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 3797 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 3798 NumOfElements = 3799 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 3800 } 3801 } else { 3802 NumAffinities += C->varlist_size(); 3803 } 3804 } 3805 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 3806 // Fields ids in kmp_task_affinity_info record. 3807 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 3808 3809 QualType KmpTaskAffinityInfoArrayTy; 3810 if (NumOfElements) { 3811 NumOfElements = CGF.Builder.CreateNUWAdd( 3812 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 3813 auto *OVE = new (C) OpaqueValueExpr( 3814 Loc, 3815 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 3816 VK_PRValue); 3817 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 3818 RValue::get(NumOfElements)); 3819 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType( 3820 KmpTaskAffinityInfoTy, OVE, ArraySizeModifier::Normal, 3821 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 3822 // Properly emit variable-sized array. 3823 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 3824 ImplicitParamKind::Other); 3825 CGF.EmitVarDecl(*PD); 3826 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 3827 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 3828 /*isSigned=*/false); 3829 } else { 3830 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 3831 KmpTaskAffinityInfoTy, 3832 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 3833 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0); 3834 AffinitiesArray = 3835 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 3836 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 3837 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 3838 /*isSigned=*/false); 3839 } 3840 3841 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 3842 // Fill array by elements without iterators. 3843 unsigned Pos = 0; 3844 bool HasIterator = false; 3845 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 3846 if (C->getModifier()) { 3847 HasIterator = true; 3848 continue; 3849 } 3850 for (const Expr *E : C->varlists()) { 3851 llvm::Value *Addr; 3852 llvm::Value *Size; 3853 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 3854 LValue Base = 3855 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 3856 KmpTaskAffinityInfoTy); 3857 // affs[i].base_addr = &<Affinities[i].second>; 3858 LValue BaseAddrLVal = CGF.EmitLValueForField( 3859 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 3860 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 3861 BaseAddrLVal); 3862 // affs[i].len = sizeof(<Affinities[i].second>); 3863 LValue LenLVal = CGF.EmitLValueForField( 3864 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 3865 CGF.EmitStoreOfScalar(Size, LenLVal); 3866 ++Pos; 3867 } 3868 } 3869 LValue PosLVal; 3870 if (HasIterator) { 3871 PosLVal = CGF.MakeAddrLValue( 3872 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 3873 C.getSizeType()); 3874 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 3875 } 3876 // Process elements with iterators. 3877 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 3878 const Expr *Modifier = C->getModifier(); 3879 if (!Modifier) 3880 continue; 3881 OMPIteratorGeneratorScope IteratorScope( 3882 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 3883 for (const Expr *E : C->varlists()) { 3884 llvm::Value *Addr; 3885 llvm::Value *Size; 3886 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 3887 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 3888 LValue Base = CGF.MakeAddrLValue( 3889 CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy); 3890 // affs[i].base_addr = &<Affinities[i].second>; 3891 LValue BaseAddrLVal = CGF.EmitLValueForField( 3892 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 3893 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 3894 BaseAddrLVal); 3895 // affs[i].len = sizeof(<Affinities[i].second>); 3896 LValue LenLVal = CGF.EmitLValueForField( 3897 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 3898 CGF.EmitStoreOfScalar(Size, LenLVal); 3899 Idx = CGF.Builder.CreateNUWAdd( 3900 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 3901 CGF.EmitStoreOfScalar(Idx, PosLVal); 3902 } 3903 } 3904 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 3905 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 3906 // naffins, kmp_task_affinity_info_t *affin_list); 3907 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 3908 llvm::Value *GTid = getThreadID(CGF, Loc); 3909 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3910 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 3911 // FIXME: Emit the function and ignore its result for now unless the 3912 // runtime function is properly implemented. 3913 (void)CGF.EmitRuntimeCall( 3914 OMPBuilder.getOrCreateRuntimeFunction( 3915 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 3916 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 3917 } 3918 llvm::Value *NewTaskNewTaskTTy = 3919 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3920 NewTask, KmpTaskTWithPrivatesPtrTy); 3921 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 3922 KmpTaskTWithPrivatesQTy); 3923 LValue TDBase = 3924 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3925 // Fill the data in the resulting kmp_task_t record. 3926 // Copy shareds if there are any. 3927 Address KmpTaskSharedsPtr = Address::invalid(); 3928 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 3929 KmpTaskSharedsPtr = Address( 3930 CGF.EmitLoadOfScalar( 3931 CGF.EmitLValueForField( 3932 TDBase, 3933 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)), 3934 Loc), 3935 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy)); 3936 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 3937 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 3938 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 3939 } 3940 // Emit initial values for private copies (if any). 3941 TaskResultTy Result; 3942 if (!Privates.empty()) { 3943 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 3944 SharedsTy, SharedsPtrTy, Data, Privates, 3945 /*ForDup=*/false); 3946 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 3947 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 3948 Result.TaskDupFn = emitTaskDupFunction( 3949 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 3950 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 3951 /*WithLastIter=*/!Data.LastprivateVars.empty()); 3952 } 3953 } 3954 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 3955 enum { Priority = 0, Destructors = 1 }; 3956 // Provide pointer to function with destructors for privates. 3957 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 3958 const RecordDecl *KmpCmplrdataUD = 3959 (*FI)->getType()->getAsUnionType()->getDecl(); 3960 if (NeedsCleanup) { 3961 llvm::Value *DestructorFn = emitDestructorsFunction( 3962 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 3963 KmpTaskTWithPrivatesQTy); 3964 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 3965 LValue DestructorsLV = CGF.EmitLValueForField( 3966 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 3967 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3968 DestructorFn, KmpRoutineEntryPtrTy), 3969 DestructorsLV); 3970 } 3971 // Set priority. 3972 if (Data.Priority.getInt()) { 3973 LValue Data2LV = CGF.EmitLValueForField( 3974 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 3975 LValue PriorityLV = CGF.EmitLValueForField( 3976 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 3977 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 3978 } 3979 Result.NewTask = NewTask; 3980 Result.TaskEntry = TaskEntry; 3981 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 3982 Result.TDBase = TDBase; 3983 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 3984 return Result; 3985 } 3986 3987 /// Translates internal dependency kind into the runtime kind. 3988 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 3989 RTLDependenceKindTy DepKind; 3990 switch (K) { 3991 case OMPC_DEPEND_in: 3992 DepKind = RTLDependenceKindTy::DepIn; 3993 break; 3994 // Out and InOut dependencies must use the same code. 3995 case OMPC_DEPEND_out: 3996 case OMPC_DEPEND_inout: 3997 DepKind = RTLDependenceKindTy::DepInOut; 3998 break; 3999 case OMPC_DEPEND_mutexinoutset: 4000 DepKind = RTLDependenceKindTy::DepMutexInOutSet; 4001 break; 4002 case OMPC_DEPEND_inoutset: 4003 DepKind = RTLDependenceKindTy::DepInOutSet; 4004 break; 4005 case OMPC_DEPEND_outallmemory: 4006 DepKind = RTLDependenceKindTy::DepOmpAllMem; 4007 break; 4008 case OMPC_DEPEND_source: 4009 case OMPC_DEPEND_sink: 4010 case OMPC_DEPEND_depobj: 4011 case OMPC_DEPEND_inoutallmemory: 4012 case OMPC_DEPEND_unknown: 4013 llvm_unreachable("Unknown task dependence type"); 4014 } 4015 return DepKind; 4016 } 4017 4018 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4019 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4020 QualType &FlagsTy) { 4021 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4022 if (KmpDependInfoTy.isNull()) { 4023 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4024 KmpDependInfoRD->startDefinition(); 4025 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4026 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4027 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4028 KmpDependInfoRD->completeDefinition(); 4029 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4030 } 4031 } 4032 4033 std::pair<llvm::Value *, LValue> 4034 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4035 SourceLocation Loc) { 4036 ASTContext &C = CGM.getContext(); 4037 QualType FlagsTy; 4038 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4039 RecordDecl *KmpDependInfoRD = 4040 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4041 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4042 LValue Base = CGF.EmitLoadOfPointerLValue( 4043 DepobjLVal.getAddress(CGF).withElementType( 4044 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)), 4045 KmpDependInfoPtrTy->castAs<PointerType>()); 4046 Address DepObjAddr = CGF.Builder.CreateGEP( 4047 Base.getAddress(CGF), 4048 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4049 LValue NumDepsBase = CGF.MakeAddrLValue( 4050 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4051 // NumDeps = deps[i].base_addr; 4052 LValue BaseAddrLVal = CGF.EmitLValueForField( 4053 NumDepsBase, 4054 *std::next(KmpDependInfoRD->field_begin(), 4055 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr))); 4056 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4057 return std::make_pair(NumDeps, Base); 4058 } 4059 4060 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4061 llvm::PointerUnion<unsigned *, LValue *> Pos, 4062 const OMPTaskDataTy::DependData &Data, 4063 Address DependenciesArray) { 4064 CodeGenModule &CGM = CGF.CGM; 4065 ASTContext &C = CGM.getContext(); 4066 QualType FlagsTy; 4067 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4068 RecordDecl *KmpDependInfoRD = 4069 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4070 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4071 4072 OMPIteratorGeneratorScope IteratorScope( 4073 CGF, cast_or_null<OMPIteratorExpr>( 4074 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4075 : nullptr)); 4076 for (const Expr *E : Data.DepExprs) { 4077 llvm::Value *Addr; 4078 llvm::Value *Size; 4079 4080 // The expression will be a nullptr in the 'omp_all_memory' case. 4081 if (E) { 4082 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4083 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy); 4084 } else { 4085 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4086 Size = llvm::ConstantInt::get(CGF.SizeTy, 0); 4087 } 4088 LValue Base; 4089 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4090 Base = CGF.MakeAddrLValue( 4091 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4092 } else { 4093 assert(E && "Expected a non-null expression"); 4094 LValue &PosLVal = *Pos.get<LValue *>(); 4095 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4096 Base = CGF.MakeAddrLValue( 4097 CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy); 4098 } 4099 // deps[i].base_addr = &<Dependencies[i].second>; 4100 LValue BaseAddrLVal = CGF.EmitLValueForField( 4101 Base, 4102 *std::next(KmpDependInfoRD->field_begin(), 4103 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr))); 4104 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal); 4105 // deps[i].len = sizeof(<Dependencies[i].second>); 4106 LValue LenLVal = CGF.EmitLValueForField( 4107 Base, *std::next(KmpDependInfoRD->field_begin(), 4108 static_cast<unsigned int>(RTLDependInfoFields::Len))); 4109 CGF.EmitStoreOfScalar(Size, LenLVal); 4110 // deps[i].flags = <Dependencies[i].first>; 4111 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4112 LValue FlagsLVal = CGF.EmitLValueForField( 4113 Base, 4114 *std::next(KmpDependInfoRD->field_begin(), 4115 static_cast<unsigned int>(RTLDependInfoFields::Flags))); 4116 CGF.EmitStoreOfScalar( 4117 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)), 4118 FlagsLVal); 4119 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4120 ++(*P); 4121 } else { 4122 LValue &PosLVal = *Pos.get<LValue *>(); 4123 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4124 Idx = CGF.Builder.CreateNUWAdd(Idx, 4125 llvm::ConstantInt::get(Idx->getType(), 1)); 4126 CGF.EmitStoreOfScalar(Idx, PosLVal); 4127 } 4128 } 4129 } 4130 4131 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes( 4132 CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4133 const OMPTaskDataTy::DependData &Data) { 4134 assert(Data.DepKind == OMPC_DEPEND_depobj && 4135 "Expected depobj dependency kind."); 4136 SmallVector<llvm::Value *, 4> Sizes; 4137 SmallVector<LValue, 4> SizeLVals; 4138 ASTContext &C = CGF.getContext(); 4139 { 4140 OMPIteratorGeneratorScope IteratorScope( 4141 CGF, cast_or_null<OMPIteratorExpr>( 4142 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4143 : nullptr)); 4144 for (const Expr *E : Data.DepExprs) { 4145 llvm::Value *NumDeps; 4146 LValue Base; 4147 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4148 std::tie(NumDeps, Base) = 4149 getDepobjElements(CGF, DepobjLVal, E->getExprLoc()); 4150 LValue NumLVal = CGF.MakeAddrLValue( 4151 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4152 C.getUIntPtrType()); 4153 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0), 4154 NumLVal.getAddress(CGF)); 4155 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4156 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4157 CGF.EmitStoreOfScalar(Add, NumLVal); 4158 SizeLVals.push_back(NumLVal); 4159 } 4160 } 4161 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4162 llvm::Value *Size = 4163 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4164 Sizes.push_back(Size); 4165 } 4166 return Sizes; 4167 } 4168 4169 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF, 4170 QualType &KmpDependInfoTy, 4171 LValue PosLVal, 4172 const OMPTaskDataTy::DependData &Data, 4173 Address DependenciesArray) { 4174 assert(Data.DepKind == OMPC_DEPEND_depobj && 4175 "Expected depobj dependency kind."); 4176 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4177 { 4178 OMPIteratorGeneratorScope IteratorScope( 4179 CGF, cast_or_null<OMPIteratorExpr>( 4180 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4181 : nullptr)); 4182 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4183 const Expr *E = Data.DepExprs[I]; 4184 llvm::Value *NumDeps; 4185 LValue Base; 4186 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4187 std::tie(NumDeps, Base) = 4188 getDepobjElements(CGF, DepobjLVal, E->getExprLoc()); 4189 4190 // memcopy dependency data. 4191 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4192 ElSize, 4193 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4194 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4195 Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos); 4196 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4197 4198 // Increase pos. 4199 // pos += size; 4200 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4201 CGF.EmitStoreOfScalar(Add, PosLVal); 4202 } 4203 } 4204 } 4205 4206 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4207 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4208 SourceLocation Loc) { 4209 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4210 return D.DepExprs.empty(); 4211 })) 4212 return std::make_pair(nullptr, Address::invalid()); 4213 // Process list of dependencies. 4214 ASTContext &C = CGM.getContext(); 4215 Address DependenciesArray = Address::invalid(); 4216 llvm::Value *NumOfElements = nullptr; 4217 unsigned NumDependencies = std::accumulate( 4218 Dependencies.begin(), Dependencies.end(), 0, 4219 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4220 return D.DepKind == OMPC_DEPEND_depobj 4221 ? V 4222 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4223 }); 4224 QualType FlagsTy; 4225 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4226 bool HasDepobjDeps = false; 4227 bool HasRegularWithIterators = false; 4228 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4229 llvm::Value *NumOfRegularWithIterators = 4230 llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4231 // Calculate number of depobj dependencies and regular deps with the 4232 // iterators. 4233 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4234 if (D.DepKind == OMPC_DEPEND_depobj) { 4235 SmallVector<llvm::Value *, 4> Sizes = 4236 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4237 for (llvm::Value *Size : Sizes) { 4238 NumOfDepobjElements = 4239 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4240 } 4241 HasDepobjDeps = true; 4242 continue; 4243 } 4244 // Include number of iterations, if any. 4245 4246 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4247 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4248 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4249 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4250 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul( 4251 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size())); 4252 NumOfRegularWithIterators = 4253 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps); 4254 } 4255 HasRegularWithIterators = true; 4256 continue; 4257 } 4258 } 4259 4260 QualType KmpDependInfoArrayTy; 4261 if (HasDepobjDeps || HasRegularWithIterators) { 4262 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4263 /*isSigned=*/false); 4264 if (HasDepobjDeps) { 4265 NumOfElements = 4266 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4267 } 4268 if (HasRegularWithIterators) { 4269 NumOfElements = 4270 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4271 } 4272 auto *OVE = new (C) OpaqueValueExpr( 4273 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4274 VK_PRValue); 4275 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4276 RValue::get(NumOfElements)); 4277 KmpDependInfoArrayTy = 4278 C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal, 4279 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4280 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4281 // Properly emit variable-sized array. 4282 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4283 ImplicitParamKind::Other); 4284 CGF.EmitVarDecl(*PD); 4285 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4286 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4287 /*isSigned=*/false); 4288 } else { 4289 KmpDependInfoArrayTy = C.getConstantArrayType( 4290 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4291 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0); 4292 DependenciesArray = 4293 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4294 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4295 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4296 /*isSigned=*/false); 4297 } 4298 unsigned Pos = 0; 4299 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4300 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4301 Dependencies[I].IteratorExpr) 4302 continue; 4303 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4304 DependenciesArray); 4305 } 4306 // Copy regular dependencies with iterators. 4307 LValue PosLVal = CGF.MakeAddrLValue( 4308 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4309 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4310 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4311 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4312 !Dependencies[I].IteratorExpr) 4313 continue; 4314 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4315 DependenciesArray); 4316 } 4317 // Copy final depobj arrays without iterators. 4318 if (HasDepobjDeps) { 4319 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4320 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4321 continue; 4322 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4323 DependenciesArray); 4324 } 4325 } 4326 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4327 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty); 4328 return std::make_pair(NumOfElements, DependenciesArray); 4329 } 4330 4331 Address CGOpenMPRuntime::emitDepobjDependClause( 4332 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4333 SourceLocation Loc) { 4334 if (Dependencies.DepExprs.empty()) 4335 return Address::invalid(); 4336 // Process list of dependencies. 4337 ASTContext &C = CGM.getContext(); 4338 Address DependenciesArray = Address::invalid(); 4339 unsigned NumDependencies = Dependencies.DepExprs.size(); 4340 QualType FlagsTy; 4341 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4342 RecordDecl *KmpDependInfoRD = 4343 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4344 4345 llvm::Value *Size; 4346 // Define type kmp_depend_info[<Dependencies.size()>]; 4347 // For depobj reserve one extra element to store the number of elements. 4348 // It is required to handle depobj(x) update(in) construct. 4349 // kmp_depend_info[<Dependencies.size()>] deps; 4350 llvm::Value *NumDepsVal; 4351 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4352 if (const auto *IE = 4353 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4354 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4355 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4356 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4357 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4358 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4359 } 4360 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4361 NumDepsVal); 4362 CharUnits SizeInBytes = 4363 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4364 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4365 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4366 NumDepsVal = 4367 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4368 } else { 4369 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4370 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4371 nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0); 4372 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4373 Size = CGM.getSize(Sz.alignTo(Align)); 4374 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 4375 } 4376 // Need to allocate on the dynamic memory. 4377 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4378 // Use default allocator. 4379 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4380 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 4381 4382 llvm::Value *Addr = 4383 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4384 CGM.getModule(), OMPRTL___kmpc_alloc), 4385 Args, ".dep.arr.addr"); 4386 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy); 4387 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4388 Addr, KmpDependInfoLlvmTy->getPointerTo()); 4389 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align); 4390 // Write number of elements in the first element of array for depobj. 4391 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 4392 // deps[i].base_addr = NumDependencies; 4393 LValue BaseAddrLVal = CGF.EmitLValueForField( 4394 Base, 4395 *std::next(KmpDependInfoRD->field_begin(), 4396 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr))); 4397 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 4398 llvm::PointerUnion<unsigned *, LValue *> Pos; 4399 unsigned Idx = 1; 4400 LValue PosLVal; 4401 if (Dependencies.IteratorExpr) { 4402 PosLVal = CGF.MakeAddrLValue( 4403 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 4404 C.getSizeType()); 4405 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 4406 /*IsInit=*/true); 4407 Pos = &PosLVal; 4408 } else { 4409 Pos = &Idx; 4410 } 4411 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 4412 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4413 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy, 4414 CGF.Int8Ty); 4415 return DependenciesArray; 4416 } 4417 4418 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 4419 SourceLocation Loc) { 4420 ASTContext &C = CGM.getContext(); 4421 QualType FlagsTy; 4422 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4423 LValue Base = CGF.EmitLoadOfPointerLValue( 4424 DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>()); 4425 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4426 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4427 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy), 4428 CGF.ConvertTypeForMem(KmpDependInfoTy)); 4429 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4430 Addr.getElementType(), Addr.getPointer(), 4431 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4432 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 4433 CGF.VoidPtrTy); 4434 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4435 // Use default allocator. 4436 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4437 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 4438 4439 // _kmpc_free(gtid, addr, nullptr); 4440 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4441 CGM.getModule(), OMPRTL___kmpc_free), 4442 Args); 4443 } 4444 4445 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 4446 OpenMPDependClauseKind NewDepKind, 4447 SourceLocation Loc) { 4448 ASTContext &C = CGM.getContext(); 4449 QualType FlagsTy; 4450 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4451 RecordDecl *KmpDependInfoRD = 4452 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4453 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4454 llvm::Value *NumDeps; 4455 LValue Base; 4456 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 4457 4458 Address Begin = Base.getAddress(CGF); 4459 // Cast from pointer to array type to pointer to single element. 4460 llvm::Value *End = CGF.Builder.CreateGEP( 4461 Begin.getElementType(), Begin.getPointer(), NumDeps); 4462 // The basic structure here is a while-do loop. 4463 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 4464 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 4465 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 4466 CGF.EmitBlock(BodyBB); 4467 llvm::PHINode *ElementPHI = 4468 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 4469 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 4470 Begin = Begin.withPointer(ElementPHI, KnownNonNull); 4471 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 4472 Base.getTBAAInfo()); 4473 // deps[i].flags = NewDepKind; 4474 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 4475 LValue FlagsLVal = CGF.EmitLValueForField( 4476 Base, *std::next(KmpDependInfoRD->field_begin(), 4477 static_cast<unsigned int>(RTLDependInfoFields::Flags))); 4478 CGF.EmitStoreOfScalar( 4479 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)), 4480 FlagsLVal); 4481 4482 // Shift the address forward by one element. 4483 Address ElementNext = 4484 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 4485 ElementPHI->addIncoming(ElementNext.getPointer(), 4486 CGF.Builder.GetInsertBlock()); 4487 llvm::Value *IsEmpty = 4488 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 4489 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 4490 // Done. 4491 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 4492 } 4493 4494 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 4495 const OMPExecutableDirective &D, 4496 llvm::Function *TaskFunction, 4497 QualType SharedsTy, Address Shareds, 4498 const Expr *IfCond, 4499 const OMPTaskDataTy &Data) { 4500 if (!CGF.HaveInsertPoint()) 4501 return; 4502 4503 TaskResultTy Result = 4504 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 4505 llvm::Value *NewTask = Result.NewTask; 4506 llvm::Function *TaskEntry = Result.TaskEntry; 4507 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 4508 LValue TDBase = Result.TDBase; 4509 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 4510 // Process list of dependences. 4511 Address DependenciesArray = Address::invalid(); 4512 llvm::Value *NumOfElements; 4513 std::tie(NumOfElements, DependenciesArray) = 4514 emitDependClause(CGF, Data.Dependences, Loc); 4515 4516 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 4517 // libcall. 4518 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 4519 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 4520 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 4521 // list is not empty 4522 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4523 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 4524 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 4525 llvm::Value *DepTaskArgs[7]; 4526 if (!Data.Dependences.empty()) { 4527 DepTaskArgs[0] = UpLoc; 4528 DepTaskArgs[1] = ThreadID; 4529 DepTaskArgs[2] = NewTask; 4530 DepTaskArgs[3] = NumOfElements; 4531 DepTaskArgs[4] = DependenciesArray.getPointer(); 4532 DepTaskArgs[5] = CGF.Builder.getInt32(0); 4533 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4534 } 4535 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 4536 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 4537 if (!Data.Tied) { 4538 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4539 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 4540 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 4541 } 4542 if (!Data.Dependences.empty()) { 4543 CGF.EmitRuntimeCall( 4544 OMPBuilder.getOrCreateRuntimeFunction( 4545 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 4546 DepTaskArgs); 4547 } else { 4548 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4549 CGM.getModule(), OMPRTL___kmpc_omp_task), 4550 TaskArgs); 4551 } 4552 // Check if parent region is untied and build return for untied task; 4553 if (auto *Region = 4554 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 4555 Region->emitUntiedSwitch(CGF); 4556 }; 4557 4558 llvm::Value *DepWaitTaskArgs[7]; 4559 if (!Data.Dependences.empty()) { 4560 DepWaitTaskArgs[0] = UpLoc; 4561 DepWaitTaskArgs[1] = ThreadID; 4562 DepWaitTaskArgs[2] = NumOfElements; 4563 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 4564 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 4565 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4566 DepWaitTaskArgs[6] = 4567 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause); 4568 } 4569 auto &M = CGM.getModule(); 4570 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 4571 TaskEntry, &Data, &DepWaitTaskArgs, 4572 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 4573 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 4574 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 4575 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 4576 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 4577 // is specified. 4578 if (!Data.Dependences.empty()) 4579 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4580 M, OMPRTL___kmpc_omp_taskwait_deps_51), 4581 DepWaitTaskArgs); 4582 // Call proxy_task_entry(gtid, new_task); 4583 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 4584 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 4585 Action.Enter(CGF); 4586 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 4587 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 4588 OutlinedFnArgs); 4589 }; 4590 4591 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 4592 // kmp_task_t *new_task); 4593 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 4594 // kmp_task_t *new_task); 4595 RegionCodeGenTy RCG(CodeGen); 4596 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 4597 M, OMPRTL___kmpc_omp_task_begin_if0), 4598 TaskArgs, 4599 OMPBuilder.getOrCreateRuntimeFunction( 4600 M, OMPRTL___kmpc_omp_task_complete_if0), 4601 TaskArgs); 4602 RCG.setAction(Action); 4603 RCG(CGF); 4604 }; 4605 4606 if (IfCond) { 4607 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 4608 } else { 4609 RegionCodeGenTy ThenRCG(ThenCodeGen); 4610 ThenRCG(CGF); 4611 } 4612 } 4613 4614 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 4615 const OMPLoopDirective &D, 4616 llvm::Function *TaskFunction, 4617 QualType SharedsTy, Address Shareds, 4618 const Expr *IfCond, 4619 const OMPTaskDataTy &Data) { 4620 if (!CGF.HaveInsertPoint()) 4621 return; 4622 TaskResultTy Result = 4623 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 4624 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 4625 // libcall. 4626 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 4627 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 4628 // sched, kmp_uint64 grainsize, void *task_dup); 4629 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4630 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 4631 llvm::Value *IfVal; 4632 if (IfCond) { 4633 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 4634 /*isSigned=*/true); 4635 } else { 4636 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 4637 } 4638 4639 LValue LBLVal = CGF.EmitLValueForField( 4640 Result.TDBase, 4641 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 4642 const auto *LBVar = 4643 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 4644 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 4645 LBLVal.getQuals(), 4646 /*IsInitializer=*/true); 4647 LValue UBLVal = CGF.EmitLValueForField( 4648 Result.TDBase, 4649 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 4650 const auto *UBVar = 4651 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 4652 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 4653 UBLVal.getQuals(), 4654 /*IsInitializer=*/true); 4655 LValue StLVal = CGF.EmitLValueForField( 4656 Result.TDBase, 4657 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 4658 const auto *StVar = 4659 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 4660 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 4661 StLVal.getQuals(), 4662 /*IsInitializer=*/true); 4663 // Store reductions address. 4664 LValue RedLVal = CGF.EmitLValueForField( 4665 Result.TDBase, 4666 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 4667 if (Data.Reductions) { 4668 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 4669 } else { 4670 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 4671 CGF.getContext().VoidPtrTy); 4672 } 4673 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 4674 llvm::Value *TaskArgs[] = { 4675 UpLoc, 4676 ThreadID, 4677 Result.NewTask, 4678 IfVal, 4679 LBLVal.getPointer(CGF), 4680 UBLVal.getPointer(CGF), 4681 CGF.EmitLoadOfScalar(StLVal, Loc), 4682 llvm::ConstantInt::getSigned( 4683 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 4684 llvm::ConstantInt::getSigned( 4685 CGF.IntTy, Data.Schedule.getPointer() 4686 ? Data.Schedule.getInt() ? NumTasks : Grainsize 4687 : NoSchedule), 4688 Data.Schedule.getPointer() 4689 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 4690 /*isSigned=*/false) 4691 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 4692 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4693 Result.TaskDupFn, CGF.VoidPtrTy) 4694 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 4695 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4696 CGM.getModule(), OMPRTL___kmpc_taskloop), 4697 TaskArgs); 4698 } 4699 4700 /// Emit reduction operation for each element of array (required for 4701 /// array sections) LHS op = RHS. 4702 /// \param Type Type of array. 4703 /// \param LHSVar Variable on the left side of the reduction operation 4704 /// (references element of array in original variable). 4705 /// \param RHSVar Variable on the right side of the reduction operation 4706 /// (references element of array in original variable). 4707 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 4708 /// RHSVar. 4709 static void EmitOMPAggregateReduction( 4710 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 4711 const VarDecl *RHSVar, 4712 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 4713 const Expr *, const Expr *)> &RedOpGen, 4714 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 4715 const Expr *UpExpr = nullptr) { 4716 // Perform element-by-element initialization. 4717 QualType ElementTy; 4718 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 4719 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 4720 4721 // Drill down to the base element type on both arrays. 4722 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 4723 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 4724 4725 llvm::Value *RHSBegin = RHSAddr.getPointer(); 4726 llvm::Value *LHSBegin = LHSAddr.getPointer(); 4727 // Cast from pointer to array type to pointer to single element. 4728 llvm::Value *LHSEnd = 4729 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements); 4730 // The basic structure here is a while-do loop. 4731 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 4732 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 4733 llvm::Value *IsEmpty = 4734 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 4735 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 4736 4737 // Enter the loop body, making that address the current address. 4738 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 4739 CGF.EmitBlock(BodyBB); 4740 4741 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 4742 4743 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 4744 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 4745 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 4746 Address RHSElementCurrent( 4747 RHSElementPHI, RHSAddr.getElementType(), 4748 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 4749 4750 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 4751 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 4752 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 4753 Address LHSElementCurrent( 4754 LHSElementPHI, LHSAddr.getElementType(), 4755 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 4756 4757 // Emit copy. 4758 CodeGenFunction::OMPPrivateScope Scope(CGF); 4759 Scope.addPrivate(LHSVar, LHSElementCurrent); 4760 Scope.addPrivate(RHSVar, RHSElementCurrent); 4761 Scope.Privatize(); 4762 RedOpGen(CGF, XExpr, EExpr, UpExpr); 4763 Scope.ForceCleanup(); 4764 4765 // Shift the address forward by one element. 4766 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 4767 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1, 4768 "omp.arraycpy.dest.element"); 4769 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 4770 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1, 4771 "omp.arraycpy.src.element"); 4772 // Check whether we've reached the end. 4773 llvm::Value *Done = 4774 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 4775 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 4776 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 4777 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 4778 4779 // Done. 4780 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 4781 } 4782 4783 /// Emit reduction combiner. If the combiner is a simple expression emit it as 4784 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 4785 /// UDR combiner function. 4786 static void emitReductionCombiner(CodeGenFunction &CGF, 4787 const Expr *ReductionOp) { 4788 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 4789 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 4790 if (const auto *DRE = 4791 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 4792 if (const auto *DRD = 4793 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 4794 std::pair<llvm::Function *, llvm::Function *> Reduction = 4795 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 4796 RValue Func = RValue::get(Reduction.first); 4797 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 4798 CGF.EmitIgnoredExpr(ReductionOp); 4799 return; 4800 } 4801 CGF.EmitIgnoredExpr(ReductionOp); 4802 } 4803 4804 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 4805 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType, 4806 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs, 4807 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) { 4808 ASTContext &C = CGM.getContext(); 4809 4810 // void reduction_func(void *LHSArg, void *RHSArg); 4811 FunctionArgList Args; 4812 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 4813 ImplicitParamKind::Other); 4814 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 4815 ImplicitParamKind::Other); 4816 Args.push_back(&LHSArg); 4817 Args.push_back(&RHSArg); 4818 const auto &CGFI = 4819 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4820 std::string Name = getReductionFuncName(ReducerName); 4821 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 4822 llvm::GlobalValue::InternalLinkage, Name, 4823 &CGM.getModule()); 4824 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 4825 Fn->setDoesNotRecurse(); 4826 CodeGenFunction CGF(CGM); 4827 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 4828 4829 // Dst = (void*[n])(LHSArg); 4830 // Src = (void*[n])(RHSArg); 4831 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4832 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 4833 ArgsElemType->getPointerTo()), 4834 ArgsElemType, CGF.getPointerAlign()); 4835 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4836 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 4837 ArgsElemType->getPointerTo()), 4838 ArgsElemType, CGF.getPointerAlign()); 4839 4840 // ... 4841 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 4842 // ... 4843 CodeGenFunction::OMPPrivateScope Scope(CGF); 4844 const auto *IPriv = Privates.begin(); 4845 unsigned Idx = 0; 4846 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 4847 const auto *RHSVar = 4848 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 4849 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar)); 4850 const auto *LHSVar = 4851 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 4852 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar)); 4853 QualType PrivTy = (*IPriv)->getType(); 4854 if (PrivTy->isVariablyModifiedType()) { 4855 // Get array size and emit VLA type. 4856 ++Idx; 4857 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 4858 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 4859 const VariableArrayType *VLA = 4860 CGF.getContext().getAsVariableArrayType(PrivTy); 4861 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 4862 CodeGenFunction::OpaqueValueMapping OpaqueMap( 4863 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 4864 CGF.EmitVariablyModifiedType(PrivTy); 4865 } 4866 } 4867 Scope.Privatize(); 4868 IPriv = Privates.begin(); 4869 const auto *ILHS = LHSExprs.begin(); 4870 const auto *IRHS = RHSExprs.begin(); 4871 for (const Expr *E : ReductionOps) { 4872 if ((*IPriv)->getType()->isArrayType()) { 4873 // Emit reduction for array section. 4874 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 4875 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 4876 EmitOMPAggregateReduction( 4877 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 4878 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 4879 emitReductionCombiner(CGF, E); 4880 }); 4881 } else { 4882 // Emit reduction for array subscript or single variable. 4883 emitReductionCombiner(CGF, E); 4884 } 4885 ++IPriv; 4886 ++ILHS; 4887 ++IRHS; 4888 } 4889 Scope.ForceCleanup(); 4890 CGF.FinishFunction(); 4891 return Fn; 4892 } 4893 4894 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 4895 const Expr *ReductionOp, 4896 const Expr *PrivateRef, 4897 const DeclRefExpr *LHS, 4898 const DeclRefExpr *RHS) { 4899 if (PrivateRef->getType()->isArrayType()) { 4900 // Emit reduction for array section. 4901 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 4902 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 4903 EmitOMPAggregateReduction( 4904 CGF, PrivateRef->getType(), LHSVar, RHSVar, 4905 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 4906 emitReductionCombiner(CGF, ReductionOp); 4907 }); 4908 } else { 4909 // Emit reduction for array subscript or single variable. 4910 emitReductionCombiner(CGF, ReductionOp); 4911 } 4912 } 4913 4914 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 4915 ArrayRef<const Expr *> Privates, 4916 ArrayRef<const Expr *> LHSExprs, 4917 ArrayRef<const Expr *> RHSExprs, 4918 ArrayRef<const Expr *> ReductionOps, 4919 ReductionOptionsTy Options) { 4920 if (!CGF.HaveInsertPoint()) 4921 return; 4922 4923 bool WithNowait = Options.WithNowait; 4924 bool SimpleReduction = Options.SimpleReduction; 4925 4926 // Next code should be emitted for reduction: 4927 // 4928 // static kmp_critical_name lock = { 0 }; 4929 // 4930 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 4931 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 4932 // ... 4933 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 4934 // *(Type<n>-1*)rhs[<n>-1]); 4935 // } 4936 // 4937 // ... 4938 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 4939 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 4940 // RedList, reduce_func, &<lock>)) { 4941 // case 1: 4942 // ... 4943 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 4944 // ... 4945 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 4946 // break; 4947 // case 2: 4948 // ... 4949 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 4950 // ... 4951 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 4952 // break; 4953 // default:; 4954 // } 4955 // 4956 // if SimpleReduction is true, only the next code is generated: 4957 // ... 4958 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 4959 // ... 4960 4961 ASTContext &C = CGM.getContext(); 4962 4963 if (SimpleReduction) { 4964 CodeGenFunction::RunCleanupsScope Scope(CGF); 4965 const auto *IPriv = Privates.begin(); 4966 const auto *ILHS = LHSExprs.begin(); 4967 const auto *IRHS = RHSExprs.begin(); 4968 for (const Expr *E : ReductionOps) { 4969 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 4970 cast<DeclRefExpr>(*IRHS)); 4971 ++IPriv; 4972 ++ILHS; 4973 ++IRHS; 4974 } 4975 return; 4976 } 4977 4978 // 1. Build a list of reduction variables. 4979 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 4980 auto Size = RHSExprs.size(); 4981 for (const Expr *E : Privates) { 4982 if (E->getType()->isVariablyModifiedType()) 4983 // Reserve place for array size. 4984 ++Size; 4985 } 4986 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 4987 QualType ReductionArrayTy = C.getConstantArrayType( 4988 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal, 4989 /*IndexTypeQuals=*/0); 4990 Address ReductionList = 4991 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 4992 const auto *IPriv = Privates.begin(); 4993 unsigned Idx = 0; 4994 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 4995 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 4996 CGF.Builder.CreateStore( 4997 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4998 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 4999 Elem); 5000 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5001 // Store array size. 5002 ++Idx; 5003 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5004 llvm::Value *Size = CGF.Builder.CreateIntCast( 5005 CGF.getVLASize( 5006 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5007 .NumElts, 5008 CGF.SizeTy, /*isSigned=*/false); 5009 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5010 Elem); 5011 } 5012 } 5013 5014 // 2. Emit reduce_func(). 5015 llvm::Function *ReductionFn = emitReductionFunction( 5016 CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy), 5017 Privates, LHSExprs, RHSExprs, ReductionOps); 5018 5019 // 3. Create static kmp_critical_name lock = { 0 }; 5020 std::string Name = getName({"reduction"}); 5021 llvm::Value *Lock = getCriticalRegionLock(Name); 5022 5023 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5024 // RedList, reduce_func, &<lock>); 5025 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5026 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5027 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5028 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5029 ReductionList.getPointer(), CGF.VoidPtrTy); 5030 llvm::Value *Args[] = { 5031 IdentTLoc, // ident_t *<loc> 5032 ThreadId, // i32 <gtid> 5033 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5034 ReductionArrayTySize, // size_type sizeof(RedList) 5035 RL, // void *RedList 5036 ReductionFn, // void (*) (void *, void *) <reduce_func> 5037 Lock // kmp_critical_name *&<lock> 5038 }; 5039 llvm::Value *Res = CGF.EmitRuntimeCall( 5040 OMPBuilder.getOrCreateRuntimeFunction( 5041 CGM.getModule(), 5042 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5043 Args); 5044 5045 // 5. Build switch(res) 5046 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5047 llvm::SwitchInst *SwInst = 5048 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5049 5050 // 6. Build case 1: 5051 // ... 5052 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5053 // ... 5054 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5055 // break; 5056 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5057 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5058 CGF.EmitBlock(Case1BB); 5059 5060 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5061 llvm::Value *EndArgs[] = { 5062 IdentTLoc, // ident_t *<loc> 5063 ThreadId, // i32 <gtid> 5064 Lock // kmp_critical_name *&<lock> 5065 }; 5066 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5067 CodeGenFunction &CGF, PrePostActionTy &Action) { 5068 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5069 const auto *IPriv = Privates.begin(); 5070 const auto *ILHS = LHSExprs.begin(); 5071 const auto *IRHS = RHSExprs.begin(); 5072 for (const Expr *E : ReductionOps) { 5073 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5074 cast<DeclRefExpr>(*IRHS)); 5075 ++IPriv; 5076 ++ILHS; 5077 ++IRHS; 5078 } 5079 }; 5080 RegionCodeGenTy RCG(CodeGen); 5081 CommonActionTy Action( 5082 nullptr, std::nullopt, 5083 OMPBuilder.getOrCreateRuntimeFunction( 5084 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5085 : OMPRTL___kmpc_end_reduce), 5086 EndArgs); 5087 RCG.setAction(Action); 5088 RCG(CGF); 5089 5090 CGF.EmitBranch(DefaultBB); 5091 5092 // 7. Build case 2: 5093 // ... 5094 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5095 // ... 5096 // break; 5097 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5098 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5099 CGF.EmitBlock(Case2BB); 5100 5101 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5102 CodeGenFunction &CGF, PrePostActionTy &Action) { 5103 const auto *ILHS = LHSExprs.begin(); 5104 const auto *IRHS = RHSExprs.begin(); 5105 const auto *IPriv = Privates.begin(); 5106 for (const Expr *E : ReductionOps) { 5107 const Expr *XExpr = nullptr; 5108 const Expr *EExpr = nullptr; 5109 const Expr *UpExpr = nullptr; 5110 BinaryOperatorKind BO = BO_Comma; 5111 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5112 if (BO->getOpcode() == BO_Assign) { 5113 XExpr = BO->getLHS(); 5114 UpExpr = BO->getRHS(); 5115 } 5116 } 5117 // Try to emit update expression as a simple atomic. 5118 const Expr *RHSExpr = UpExpr; 5119 if (RHSExpr) { 5120 // Analyze RHS part of the whole expression. 5121 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5122 RHSExpr->IgnoreParenImpCasts())) { 5123 // If this is a conditional operator, analyze its condition for 5124 // min/max reduction operator. 5125 RHSExpr = ACO->getCond(); 5126 } 5127 if (const auto *BORHS = 5128 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5129 EExpr = BORHS->getRHS(); 5130 BO = BORHS->getOpcode(); 5131 } 5132 } 5133 if (XExpr) { 5134 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5135 auto &&AtomicRedGen = [BO, VD, 5136 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5137 const Expr *EExpr, const Expr *UpExpr) { 5138 LValue X = CGF.EmitLValue(XExpr); 5139 RValue E; 5140 if (EExpr) 5141 E = CGF.EmitAnyExpr(EExpr); 5142 CGF.EmitOMPAtomicSimpleUpdateExpr( 5143 X, E, BO, /*IsXLHSInRHSPart=*/true, 5144 llvm::AtomicOrdering::Monotonic, Loc, 5145 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5146 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5147 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5148 CGF.emitOMPSimpleStore( 5149 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5150 VD->getType().getNonReferenceType(), Loc); 5151 PrivateScope.addPrivate(VD, LHSTemp); 5152 (void)PrivateScope.Privatize(); 5153 return CGF.EmitAnyExpr(UpExpr); 5154 }); 5155 }; 5156 if ((*IPriv)->getType()->isArrayType()) { 5157 // Emit atomic reduction for array section. 5158 const auto *RHSVar = 5159 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5160 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5161 AtomicRedGen, XExpr, EExpr, UpExpr); 5162 } else { 5163 // Emit atomic reduction for array subscript or single variable. 5164 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5165 } 5166 } else { 5167 // Emit as a critical region. 5168 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5169 const Expr *, const Expr *) { 5170 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5171 std::string Name = RT.getName({"atomic_reduction"}); 5172 RT.emitCriticalRegion( 5173 CGF, Name, 5174 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5175 Action.Enter(CGF); 5176 emitReductionCombiner(CGF, E); 5177 }, 5178 Loc); 5179 }; 5180 if ((*IPriv)->getType()->isArrayType()) { 5181 const auto *LHSVar = 5182 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5183 const auto *RHSVar = 5184 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5185 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5186 CritRedGen); 5187 } else { 5188 CritRedGen(CGF, nullptr, nullptr, nullptr); 5189 } 5190 } 5191 ++ILHS; 5192 ++IRHS; 5193 ++IPriv; 5194 } 5195 }; 5196 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5197 if (!WithNowait) { 5198 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5199 llvm::Value *EndArgs[] = { 5200 IdentTLoc, // ident_t *<loc> 5201 ThreadId, // i32 <gtid> 5202 Lock // kmp_critical_name *&<lock> 5203 }; 5204 CommonActionTy Action(nullptr, std::nullopt, 5205 OMPBuilder.getOrCreateRuntimeFunction( 5206 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5207 EndArgs); 5208 AtomicRCG.setAction(Action); 5209 AtomicRCG(CGF); 5210 } else { 5211 AtomicRCG(CGF); 5212 } 5213 5214 CGF.EmitBranch(DefaultBB); 5215 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5216 } 5217 5218 /// Generates unique name for artificial threadprivate variables. 5219 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5220 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5221 const Expr *Ref) { 5222 SmallString<256> Buffer; 5223 llvm::raw_svector_ostream Out(Buffer); 5224 const clang::DeclRefExpr *DE; 5225 const VarDecl *D = ::getBaseDecl(Ref, DE); 5226 if (!D) 5227 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5228 D = D->getCanonicalDecl(); 5229 std::string Name = CGM.getOpenMPRuntime().getName( 5230 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5231 Out << Prefix << Name << "_" 5232 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5233 return std::string(Out.str()); 5234 } 5235 5236 /// Emits reduction initializer function: 5237 /// \code 5238 /// void @.red_init(void* %arg, void* %orig) { 5239 /// %0 = bitcast void* %arg to <type>* 5240 /// store <type> <init>, <type>* %0 5241 /// ret void 5242 /// } 5243 /// \endcode 5244 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5245 SourceLocation Loc, 5246 ReductionCodeGen &RCG, unsigned N) { 5247 ASTContext &C = CGM.getContext(); 5248 QualType VoidPtrTy = C.VoidPtrTy; 5249 VoidPtrTy.addRestrict(); 5250 FunctionArgList Args; 5251 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5252 ImplicitParamKind::Other); 5253 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5254 ImplicitParamKind::Other); 5255 Args.emplace_back(&Param); 5256 Args.emplace_back(&ParamOrig); 5257 const auto &FnInfo = 5258 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5259 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5260 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5261 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5262 Name, &CGM.getModule()); 5263 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5264 Fn->setDoesNotRecurse(); 5265 CodeGenFunction CGF(CGM); 5266 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5267 QualType PrivateType = RCG.getPrivateType(N); 5268 Address PrivateAddr = CGF.EmitLoadOfPointer( 5269 CGF.GetAddrOfLocalVar(&Param).withElementType( 5270 CGF.ConvertTypeForMem(PrivateType)->getPointerTo()), 5271 C.getPointerType(PrivateType)->castAs<PointerType>()); 5272 llvm::Value *Size = nullptr; 5273 // If the size of the reduction item is non-constant, load it from global 5274 // threadprivate variable. 5275 if (RCG.getSizes(N).second) { 5276 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5277 CGF, CGM.getContext().getSizeType(), 5278 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5279 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5280 CGM.getContext().getSizeType(), Loc); 5281 } 5282 RCG.emitAggregateType(CGF, N, Size); 5283 Address OrigAddr = Address::invalid(); 5284 // If initializer uses initializer from declare reduction construct, emit a 5285 // pointer to the address of the original reduction item (reuired by reduction 5286 // initializer) 5287 if (RCG.usesReductionInitializer(N)) { 5288 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5289 OrigAddr = CGF.EmitLoadOfPointer( 5290 SharedAddr, 5291 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5292 } 5293 // Emit the initializer: 5294 // %0 = bitcast void* %arg to <type>* 5295 // store <type> <init>, <type>* %0 5296 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr, 5297 [](CodeGenFunction &) { return false; }); 5298 CGF.FinishFunction(); 5299 return Fn; 5300 } 5301 5302 /// Emits reduction combiner function: 5303 /// \code 5304 /// void @.red_comb(void* %arg0, void* %arg1) { 5305 /// %lhs = bitcast void* %arg0 to <type>* 5306 /// %rhs = bitcast void* %arg1 to <type>* 5307 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5308 /// store <type> %2, <type>* %lhs 5309 /// ret void 5310 /// } 5311 /// \endcode 5312 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5313 SourceLocation Loc, 5314 ReductionCodeGen &RCG, unsigned N, 5315 const Expr *ReductionOp, 5316 const Expr *LHS, const Expr *RHS, 5317 const Expr *PrivateRef) { 5318 ASTContext &C = CGM.getContext(); 5319 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5320 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5321 FunctionArgList Args; 5322 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5323 C.VoidPtrTy, ImplicitParamKind::Other); 5324 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5325 ImplicitParamKind::Other); 5326 Args.emplace_back(&ParamInOut); 5327 Args.emplace_back(&ParamIn); 5328 const auto &FnInfo = 5329 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5330 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5331 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5332 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5333 Name, &CGM.getModule()); 5334 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5335 Fn->setDoesNotRecurse(); 5336 CodeGenFunction CGF(CGM); 5337 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5338 llvm::Value *Size = nullptr; 5339 // If the size of the reduction item is non-constant, load it from global 5340 // threadprivate variable. 5341 if (RCG.getSizes(N).second) { 5342 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5343 CGF, CGM.getContext().getSizeType(), 5344 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5345 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5346 CGM.getContext().getSizeType(), Loc); 5347 } 5348 RCG.emitAggregateType(CGF, N, Size); 5349 // Remap lhs and rhs variables to the addresses of the function arguments. 5350 // %lhs = bitcast void* %arg0 to <type>* 5351 // %rhs = bitcast void* %arg1 to <type>* 5352 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5353 PrivateScope.addPrivate( 5354 LHSVD, 5355 // Pull out the pointer to the variable. 5356 CGF.EmitLoadOfPointer( 5357 CGF.GetAddrOfLocalVar(&ParamInOut) 5358 .withElementType( 5359 CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()), 5360 C.getPointerType(LHSVD->getType())->castAs<PointerType>())); 5361 PrivateScope.addPrivate( 5362 RHSVD, 5363 // Pull out the pointer to the variable. 5364 CGF.EmitLoadOfPointer( 5365 CGF.GetAddrOfLocalVar(&ParamIn).withElementType( 5366 CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()), 5367 C.getPointerType(RHSVD->getType())->castAs<PointerType>())); 5368 PrivateScope.Privatize(); 5369 // Emit the combiner body: 5370 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5371 // store <type> %2, <type>* %lhs 5372 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5373 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5374 cast<DeclRefExpr>(RHS)); 5375 CGF.FinishFunction(); 5376 return Fn; 5377 } 5378 5379 /// Emits reduction finalizer function: 5380 /// \code 5381 /// void @.red_fini(void* %arg) { 5382 /// %0 = bitcast void* %arg to <type>* 5383 /// <destroy>(<type>* %0) 5384 /// ret void 5385 /// } 5386 /// \endcode 5387 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5388 SourceLocation Loc, 5389 ReductionCodeGen &RCG, unsigned N) { 5390 if (!RCG.needCleanups(N)) 5391 return nullptr; 5392 ASTContext &C = CGM.getContext(); 5393 FunctionArgList Args; 5394 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5395 ImplicitParamKind::Other); 5396 Args.emplace_back(&Param); 5397 const auto &FnInfo = 5398 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5399 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5400 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 5401 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5402 Name, &CGM.getModule()); 5403 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5404 Fn->setDoesNotRecurse(); 5405 CodeGenFunction CGF(CGM); 5406 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5407 Address PrivateAddr = CGF.EmitLoadOfPointer( 5408 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>()); 5409 llvm::Value *Size = nullptr; 5410 // If the size of the reduction item is non-constant, load it from global 5411 // threadprivate variable. 5412 if (RCG.getSizes(N).second) { 5413 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5414 CGF, CGM.getContext().getSizeType(), 5415 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5416 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5417 CGM.getContext().getSizeType(), Loc); 5418 } 5419 RCG.emitAggregateType(CGF, N, Size); 5420 // Emit the finalizer body: 5421 // <destroy>(<type>* %0) 5422 RCG.emitCleanups(CGF, N, PrivateAddr); 5423 CGF.FinishFunction(Loc); 5424 return Fn; 5425 } 5426 5427 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 5428 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 5429 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 5430 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 5431 return nullptr; 5432 5433 // Build typedef struct: 5434 // kmp_taskred_input { 5435 // void *reduce_shar; // shared reduction item 5436 // void *reduce_orig; // original reduction item used for initialization 5437 // size_t reduce_size; // size of data item 5438 // void *reduce_init; // data initialization routine 5439 // void *reduce_fini; // data finalization routine 5440 // void *reduce_comb; // data combiner routine 5441 // kmp_task_red_flags_t flags; // flags for additional info from compiler 5442 // } kmp_taskred_input_t; 5443 ASTContext &C = CGM.getContext(); 5444 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 5445 RD->startDefinition(); 5446 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5447 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5448 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 5449 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5450 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5451 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5452 const FieldDecl *FlagsFD = addFieldToRecordDecl( 5453 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 5454 RD->completeDefinition(); 5455 QualType RDType = C.getRecordType(RD); 5456 unsigned Size = Data.ReductionVars.size(); 5457 llvm::APInt ArraySize(/*numBits=*/64, Size); 5458 QualType ArrayRDType = 5459 C.getConstantArrayType(RDType, ArraySize, nullptr, 5460 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0); 5461 // kmp_task_red_input_t .rd_input.[Size]; 5462 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 5463 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 5464 Data.ReductionCopies, Data.ReductionOps); 5465 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 5466 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 5467 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 5468 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 5469 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 5470 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs, 5471 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 5472 ".rd_input.gep."); 5473 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 5474 // ElemLVal.reduce_shar = &Shareds[Cnt]; 5475 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 5476 RCG.emitSharedOrigLValue(CGF, Cnt); 5477 llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF); 5478 CGF.EmitStoreOfScalar(Shared, SharedLVal); 5479 // ElemLVal.reduce_orig = &Origs[Cnt]; 5480 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 5481 llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF); 5482 CGF.EmitStoreOfScalar(Orig, OrigLVal); 5483 RCG.emitAggregateType(CGF, Cnt); 5484 llvm::Value *SizeValInChars; 5485 llvm::Value *SizeVal; 5486 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 5487 // We use delayed creation/initialization for VLAs and array sections. It is 5488 // required because runtime does not provide the way to pass the sizes of 5489 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 5490 // threadprivate global variables are used to store these values and use 5491 // them in the functions. 5492 bool DelayedCreation = !!SizeVal; 5493 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 5494 /*isSigned=*/false); 5495 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 5496 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 5497 // ElemLVal.reduce_init = init; 5498 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 5499 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt); 5500 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 5501 // ElemLVal.reduce_fini = fini; 5502 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 5503 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 5504 llvm::Value *FiniAddr = 5505 Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 5506 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 5507 // ElemLVal.reduce_comb = comb; 5508 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 5509 llvm::Value *CombAddr = emitReduceCombFunction( 5510 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 5511 RHSExprs[Cnt], Data.ReductionCopies[Cnt]); 5512 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 5513 // ElemLVal.flags = 0; 5514 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 5515 if (DelayedCreation) { 5516 CGF.EmitStoreOfScalar( 5517 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 5518 FlagsLVal); 5519 } else 5520 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 5521 FlagsLVal.getType()); 5522 } 5523 if (Data.IsReductionWithTaskMod) { 5524 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 5525 // is_ws, int num, void *data); 5526 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 5527 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 5528 CGM.IntTy, /*isSigned=*/true); 5529 llvm::Value *Args[] = { 5530 IdentTLoc, GTid, 5531 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 5532 /*isSigned=*/true), 5533 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 5534 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5535 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 5536 return CGF.EmitRuntimeCall( 5537 OMPBuilder.getOrCreateRuntimeFunction( 5538 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 5539 Args); 5540 } 5541 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 5542 llvm::Value *Args[] = { 5543 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 5544 /*isSigned=*/true), 5545 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 5546 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 5547 CGM.VoidPtrTy)}; 5548 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5549 CGM.getModule(), OMPRTL___kmpc_taskred_init), 5550 Args); 5551 } 5552 5553 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 5554 SourceLocation Loc, 5555 bool IsWorksharingReduction) { 5556 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 5557 // is_ws, int num, void *data); 5558 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 5559 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 5560 CGM.IntTy, /*isSigned=*/true); 5561 llvm::Value *Args[] = {IdentTLoc, GTid, 5562 llvm::ConstantInt::get(CGM.IntTy, 5563 IsWorksharingReduction ? 1 : 0, 5564 /*isSigned=*/true)}; 5565 (void)CGF.EmitRuntimeCall( 5566 OMPBuilder.getOrCreateRuntimeFunction( 5567 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 5568 Args); 5569 } 5570 5571 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 5572 SourceLocation Loc, 5573 ReductionCodeGen &RCG, 5574 unsigned N) { 5575 auto Sizes = RCG.getSizes(N); 5576 // Emit threadprivate global variable if the type is non-constant 5577 // (Sizes.second = nullptr). 5578 if (Sizes.second) { 5579 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 5580 /*isSigned=*/false); 5581 Address SizeAddr = getAddrOfArtificialThreadPrivate( 5582 CGF, CGM.getContext().getSizeType(), 5583 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5584 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 5585 } 5586 } 5587 5588 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 5589 SourceLocation Loc, 5590 llvm::Value *ReductionsPtr, 5591 LValue SharedLVal) { 5592 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 5593 // *d); 5594 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 5595 CGM.IntTy, 5596 /*isSigned=*/true), 5597 ReductionsPtr, 5598 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5599 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 5600 return Address( 5601 CGF.EmitRuntimeCall( 5602 OMPBuilder.getOrCreateRuntimeFunction( 5603 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 5604 Args), 5605 CGF.Int8Ty, SharedLVal.getAlignment()); 5606 } 5607 5608 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, 5609 const OMPTaskDataTy &Data) { 5610 if (!CGF.HaveInsertPoint()) 5611 return; 5612 5613 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) { 5614 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder. 5615 OMPBuilder.createTaskwait(CGF.Builder); 5616 } else { 5617 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5618 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5619 auto &M = CGM.getModule(); 5620 Address DependenciesArray = Address::invalid(); 5621 llvm::Value *NumOfElements; 5622 std::tie(NumOfElements, DependenciesArray) = 5623 emitDependClause(CGF, Data.Dependences, Loc); 5624 if (!Data.Dependences.empty()) { 5625 llvm::Value *DepWaitTaskArgs[7]; 5626 DepWaitTaskArgs[0] = UpLoc; 5627 DepWaitTaskArgs[1] = ThreadID; 5628 DepWaitTaskArgs[2] = NumOfElements; 5629 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5630 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5631 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5632 DepWaitTaskArgs[6] = 5633 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause); 5634 5635 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5636 5637 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid, 5638 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5639 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list, 5640 // kmp_int32 has_no_wait); if dependence info is specified. 5641 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5642 M, OMPRTL___kmpc_omp_taskwait_deps_51), 5643 DepWaitTaskArgs); 5644 5645 } else { 5646 5647 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 5648 // global_tid); 5649 llvm::Value *Args[] = {UpLoc, ThreadID}; 5650 // Ignore return result until untied tasks are supported. 5651 CGF.EmitRuntimeCall( 5652 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait), 5653 Args); 5654 } 5655 } 5656 5657 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5658 Region->emitUntiedSwitch(CGF); 5659 } 5660 5661 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 5662 OpenMPDirectiveKind InnerKind, 5663 const RegionCodeGenTy &CodeGen, 5664 bool HasCancel) { 5665 if (!CGF.HaveInsertPoint()) 5666 return; 5667 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 5668 InnerKind != OMPD_critical && 5669 InnerKind != OMPD_master && 5670 InnerKind != OMPD_masked); 5671 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 5672 } 5673 5674 namespace { 5675 enum RTCancelKind { 5676 CancelNoreq = 0, 5677 CancelParallel = 1, 5678 CancelLoop = 2, 5679 CancelSections = 3, 5680 CancelTaskgroup = 4 5681 }; 5682 } // anonymous namespace 5683 5684 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 5685 RTCancelKind CancelKind = CancelNoreq; 5686 if (CancelRegion == OMPD_parallel) 5687 CancelKind = CancelParallel; 5688 else if (CancelRegion == OMPD_for) 5689 CancelKind = CancelLoop; 5690 else if (CancelRegion == OMPD_sections) 5691 CancelKind = CancelSections; 5692 else { 5693 assert(CancelRegion == OMPD_taskgroup); 5694 CancelKind = CancelTaskgroup; 5695 } 5696 return CancelKind; 5697 } 5698 5699 void CGOpenMPRuntime::emitCancellationPointCall( 5700 CodeGenFunction &CGF, SourceLocation Loc, 5701 OpenMPDirectiveKind CancelRegion) { 5702 if (!CGF.HaveInsertPoint()) 5703 return; 5704 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 5705 // global_tid, kmp_int32 cncl_kind); 5706 if (auto *OMPRegionInfo = 5707 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 5708 // For 'cancellation point taskgroup', the task region info may not have a 5709 // cancel. This may instead happen in another adjacent task. 5710 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 5711 llvm::Value *Args[] = { 5712 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 5713 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 5714 // Ignore return result until untied tasks are supported. 5715 llvm::Value *Result = CGF.EmitRuntimeCall( 5716 OMPBuilder.getOrCreateRuntimeFunction( 5717 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 5718 Args); 5719 // if (__kmpc_cancellationpoint()) { 5720 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 5721 // exit from construct; 5722 // } 5723 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 5724 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 5725 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 5726 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 5727 CGF.EmitBlock(ExitBB); 5728 if (CancelRegion == OMPD_parallel) 5729 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 5730 // exit from construct; 5731 CodeGenFunction::JumpDest CancelDest = 5732 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 5733 CGF.EmitBranchThroughCleanup(CancelDest); 5734 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 5735 } 5736 } 5737 } 5738 5739 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 5740 const Expr *IfCond, 5741 OpenMPDirectiveKind CancelRegion) { 5742 if (!CGF.HaveInsertPoint()) 5743 return; 5744 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 5745 // kmp_int32 cncl_kind); 5746 auto &M = CGM.getModule(); 5747 if (auto *OMPRegionInfo = 5748 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 5749 auto &&ThenGen = [this, &M, Loc, CancelRegion, 5750 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 5751 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5752 llvm::Value *Args[] = { 5753 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 5754 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 5755 // Ignore return result until untied tasks are supported. 5756 llvm::Value *Result = CGF.EmitRuntimeCall( 5757 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 5758 // if (__kmpc_cancel()) { 5759 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 5760 // exit from construct; 5761 // } 5762 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 5763 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 5764 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 5765 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 5766 CGF.EmitBlock(ExitBB); 5767 if (CancelRegion == OMPD_parallel) 5768 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 5769 // exit from construct; 5770 CodeGenFunction::JumpDest CancelDest = 5771 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 5772 CGF.EmitBranchThroughCleanup(CancelDest); 5773 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 5774 }; 5775 if (IfCond) { 5776 emitIfClause(CGF, IfCond, ThenGen, 5777 [](CodeGenFunction &, PrePostActionTy &) {}); 5778 } else { 5779 RegionCodeGenTy ThenRCG(ThenGen); 5780 ThenRCG(CGF); 5781 } 5782 } 5783 } 5784 5785 namespace { 5786 /// Cleanup action for uses_allocators support. 5787 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 5788 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 5789 5790 public: 5791 OMPUsesAllocatorsActionTy( 5792 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 5793 : Allocators(Allocators) {} 5794 void Enter(CodeGenFunction &CGF) override { 5795 if (!CGF.HaveInsertPoint()) 5796 return; 5797 for (const auto &AllocatorData : Allocators) { 5798 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 5799 CGF, AllocatorData.first, AllocatorData.second); 5800 } 5801 } 5802 void Exit(CodeGenFunction &CGF) override { 5803 if (!CGF.HaveInsertPoint()) 5804 return; 5805 for (const auto &AllocatorData : Allocators) { 5806 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 5807 AllocatorData.first); 5808 } 5809 } 5810 }; 5811 } // namespace 5812 5813 void CGOpenMPRuntime::emitTargetOutlinedFunction( 5814 const OMPExecutableDirective &D, StringRef ParentName, 5815 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 5816 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 5817 assert(!ParentName.empty() && "Invalid target entry parent name!"); 5818 HasEmittedTargetRegion = true; 5819 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 5820 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 5821 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 5822 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 5823 if (!D.AllocatorTraits) 5824 continue; 5825 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 5826 } 5827 } 5828 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 5829 CodeGen.setAction(UsesAllocatorAction); 5830 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 5831 IsOffloadEntry, CodeGen); 5832 } 5833 5834 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 5835 const Expr *Allocator, 5836 const Expr *AllocatorTraits) { 5837 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 5838 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 5839 // Use default memspace handle. 5840 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5841 llvm::Value *NumTraits = llvm::ConstantInt::get( 5842 CGF.IntTy, cast<ConstantArrayType>( 5843 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 5844 ->getSize() 5845 .getLimitedValue()); 5846 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 5847 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5848 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy); 5849 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 5850 AllocatorTraitsLVal.getBaseInfo(), 5851 AllocatorTraitsLVal.getTBAAInfo()); 5852 llvm::Value *Traits = Addr.getPointer(); 5853 5854 llvm::Value *AllocatorVal = 5855 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5856 CGM.getModule(), OMPRTL___kmpc_init_allocator), 5857 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 5858 // Store to allocator. 5859 CGF.EmitAutoVarAlloca(*cast<VarDecl>( 5860 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 5861 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 5862 AllocatorVal = 5863 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 5864 Allocator->getType(), Allocator->getExprLoc()); 5865 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 5866 } 5867 5868 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 5869 const Expr *Allocator) { 5870 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 5871 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 5872 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 5873 llvm::Value *AllocatorVal = 5874 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 5875 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 5876 CGF.getContext().VoidPtrTy, 5877 Allocator->getExprLoc()); 5878 (void)CGF.EmitRuntimeCall( 5879 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 5880 OMPRTL___kmpc_destroy_allocator), 5881 {ThreadId, AllocatorVal}); 5882 } 5883 5884 void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams( 5885 const OMPExecutableDirective &D, CodeGenFunction &CGF, 5886 int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal, 5887 int32_t &MaxTeamsVal) { 5888 5889 getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal, MaxTeamsVal); 5890 getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal, 5891 /*UpperBoundOnly=*/true); 5892 5893 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) { 5894 for (auto *A : C->getAttrs()) { 5895 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1; 5896 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1; 5897 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A)) 5898 CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal, 5899 &AttrMinBlocksVal, &AttrMaxBlocksVal); 5900 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A)) 5901 CGM.handleAMDGPUFlatWorkGroupSizeAttr( 5902 nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal, 5903 &AttrMaxThreadsVal); 5904 else 5905 continue; 5906 5907 MinThreadsVal = std::max(MinThreadsVal, AttrMinThreadsVal); 5908 if (AttrMaxThreadsVal > 0) 5909 MaxThreadsVal = MaxThreadsVal > 0 5910 ? std::min(MaxThreadsVal, AttrMaxThreadsVal) 5911 : AttrMaxThreadsVal; 5912 MinTeamsVal = std::max(MinTeamsVal, AttrMinBlocksVal); 5913 if (AttrMaxBlocksVal > 0) 5914 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal) 5915 : AttrMaxBlocksVal; 5916 } 5917 } 5918 } 5919 5920 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 5921 const OMPExecutableDirective &D, StringRef ParentName, 5922 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 5923 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 5924 5925 llvm::TargetRegionEntryInfo EntryInfo = 5926 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName); 5927 5928 CodeGenFunction CGF(CGM, true); 5929 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction = 5930 [&CGF, &D, &CodeGen](StringRef EntryFnName) { 5931 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 5932 5933 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 5934 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 5935 return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 5936 }; 5937 5938 OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction, 5939 IsOffloadEntry, OutlinedFn, OutlinedFnID); 5940 5941 if (!OutlinedFn) 5942 return; 5943 5944 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); 5945 5946 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) { 5947 for (auto *A : C->getAttrs()) { 5948 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A)) 5949 CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr); 5950 } 5951 } 5952 } 5953 5954 /// Checks if the expression is constant or does not have non-trivial function 5955 /// calls. 5956 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 5957 // We can skip constant expressions. 5958 // We can skip expressions with trivial calls or simple expressions. 5959 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 5960 !E->hasNonTrivialCall(Ctx)) && 5961 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 5962 } 5963 5964 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 5965 const Stmt *Body) { 5966 const Stmt *Child = Body->IgnoreContainers(); 5967 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 5968 Child = nullptr; 5969 for (const Stmt *S : C->body()) { 5970 if (const auto *E = dyn_cast<Expr>(S)) { 5971 if (isTrivial(Ctx, E)) 5972 continue; 5973 } 5974 // Some of the statements can be ignored. 5975 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 5976 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 5977 continue; 5978 // Analyze declarations. 5979 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 5980 if (llvm::all_of(DS->decls(), [](const Decl *D) { 5981 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 5982 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 5983 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 5984 isa<UsingDirectiveDecl>(D) || 5985 isa<OMPDeclareReductionDecl>(D) || 5986 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 5987 return true; 5988 const auto *VD = dyn_cast<VarDecl>(D); 5989 if (!VD) 5990 return false; 5991 return VD->hasGlobalStorage() || !VD->isUsed(); 5992 })) 5993 continue; 5994 } 5995 // Found multiple children - cannot get the one child only. 5996 if (Child) 5997 return nullptr; 5998 Child = S; 5999 } 6000 if (Child) 6001 Child = Child->IgnoreContainers(); 6002 } 6003 return Child; 6004 } 6005 6006 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( 6007 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal, 6008 int32_t &MaxTeamsVal) { 6009 6010 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6011 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6012 "Expected target-based executable directive."); 6013 switch (DirectiveKind) { 6014 case OMPD_target: { 6015 const auto *CS = D.getInnermostCapturedStmt(); 6016 const auto *Body = 6017 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6018 const Stmt *ChildStmt = 6019 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6020 if (const auto *NestedDir = 6021 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6022 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6023 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6024 const Expr *NumTeams = 6025 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6026 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6027 if (auto Constant = 6028 NumTeams->getIntegerConstantExpr(CGF.getContext())) 6029 MinTeamsVal = MaxTeamsVal = Constant->getExtValue(); 6030 return NumTeams; 6031 } 6032 MinTeamsVal = MaxTeamsVal = 0; 6033 return nullptr; 6034 } 6035 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6036 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) { 6037 MinTeamsVal = MaxTeamsVal = 1; 6038 return nullptr; 6039 } 6040 MinTeamsVal = MaxTeamsVal = 1; 6041 return nullptr; 6042 } 6043 // A value of -1 is used to check if we need to emit no teams region 6044 MinTeamsVal = MaxTeamsVal = -1; 6045 return nullptr; 6046 } 6047 case OMPD_target_teams_loop: 6048 case OMPD_target_teams: 6049 case OMPD_target_teams_distribute: 6050 case OMPD_target_teams_distribute_simd: 6051 case OMPD_target_teams_distribute_parallel_for: 6052 case OMPD_target_teams_distribute_parallel_for_simd: { 6053 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6054 const Expr *NumTeams = 6055 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6056 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6057 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) 6058 MinTeamsVal = MaxTeamsVal = Constant->getExtValue(); 6059 return NumTeams; 6060 } 6061 MinTeamsVal = MaxTeamsVal = 0; 6062 return nullptr; 6063 } 6064 case OMPD_target_parallel: 6065 case OMPD_target_parallel_for: 6066 case OMPD_target_parallel_for_simd: 6067 case OMPD_target_parallel_loop: 6068 case OMPD_target_simd: 6069 MinTeamsVal = MaxTeamsVal = 1; 6070 return nullptr; 6071 case OMPD_parallel: 6072 case OMPD_for: 6073 case OMPD_parallel_for: 6074 case OMPD_parallel_loop: 6075 case OMPD_parallel_master: 6076 case OMPD_parallel_sections: 6077 case OMPD_for_simd: 6078 case OMPD_parallel_for_simd: 6079 case OMPD_cancel: 6080 case OMPD_cancellation_point: 6081 case OMPD_ordered: 6082 case OMPD_threadprivate: 6083 case OMPD_allocate: 6084 case OMPD_task: 6085 case OMPD_simd: 6086 case OMPD_tile: 6087 case OMPD_unroll: 6088 case OMPD_sections: 6089 case OMPD_section: 6090 case OMPD_single: 6091 case OMPD_master: 6092 case OMPD_critical: 6093 case OMPD_taskyield: 6094 case OMPD_barrier: 6095 case OMPD_taskwait: 6096 case OMPD_taskgroup: 6097 case OMPD_atomic: 6098 case OMPD_flush: 6099 case OMPD_depobj: 6100 case OMPD_scan: 6101 case OMPD_teams: 6102 case OMPD_target_data: 6103 case OMPD_target_exit_data: 6104 case OMPD_target_enter_data: 6105 case OMPD_distribute: 6106 case OMPD_distribute_simd: 6107 case OMPD_distribute_parallel_for: 6108 case OMPD_distribute_parallel_for_simd: 6109 case OMPD_teams_distribute: 6110 case OMPD_teams_distribute_simd: 6111 case OMPD_teams_distribute_parallel_for: 6112 case OMPD_teams_distribute_parallel_for_simd: 6113 case OMPD_target_update: 6114 case OMPD_declare_simd: 6115 case OMPD_declare_variant: 6116 case OMPD_begin_declare_variant: 6117 case OMPD_end_declare_variant: 6118 case OMPD_declare_target: 6119 case OMPD_end_declare_target: 6120 case OMPD_declare_reduction: 6121 case OMPD_declare_mapper: 6122 case OMPD_taskloop: 6123 case OMPD_taskloop_simd: 6124 case OMPD_master_taskloop: 6125 case OMPD_master_taskloop_simd: 6126 case OMPD_parallel_master_taskloop: 6127 case OMPD_parallel_master_taskloop_simd: 6128 case OMPD_requires: 6129 case OMPD_metadirective: 6130 case OMPD_unknown: 6131 break; 6132 default: 6133 break; 6134 } 6135 llvm_unreachable("Unexpected directive kind."); 6136 } 6137 6138 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( 6139 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6140 assert(!CGF.getLangOpts().OpenMPIsTargetDevice && 6141 "Clauses associated with the teams directive expected to be emitted " 6142 "only for the host!"); 6143 CGBuilderTy &Bld = CGF.Builder; 6144 int32_t MinNT = -1, MaxNT = -1; 6145 const Expr *NumTeams = 6146 getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT); 6147 if (NumTeams != nullptr) { 6148 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6149 6150 switch (DirectiveKind) { 6151 case OMPD_target: { 6152 const auto *CS = D.getInnermostCapturedStmt(); 6153 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6154 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6155 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6156 /*IgnoreResultAssign*/ true); 6157 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6158 /*isSigned=*/true); 6159 } 6160 case OMPD_target_teams: 6161 case OMPD_target_teams_distribute: 6162 case OMPD_target_teams_distribute_simd: 6163 case OMPD_target_teams_distribute_parallel_for: 6164 case OMPD_target_teams_distribute_parallel_for_simd: { 6165 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6166 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6167 /*IgnoreResultAssign*/ true); 6168 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6169 /*isSigned=*/true); 6170 } 6171 default: 6172 break; 6173 } 6174 } 6175 6176 assert(MinNT == MaxNT && "Num threads ranges require handling here."); 6177 return llvm::ConstantInt::get(CGF.Int32Ty, MinNT); 6178 } 6179 6180 /// Check for a num threads constant value (stored in \p DefaultVal), or 6181 /// expression (stored in \p E). If the value is conditional (via an if-clause), 6182 /// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are 6183 /// nullptr, no expression evaluation is perfomed. 6184 static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6185 const Expr **E, int32_t &UpperBound, 6186 bool UpperBoundOnly, llvm::Value **CondVal) { 6187 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6188 CGF.getContext(), CS->getCapturedStmt()); 6189 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6190 if (!Dir) 6191 return; 6192 6193 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6194 // Handle if clause. If if clause present, the number of threads is 6195 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6196 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) { 6197 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6198 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6199 const OMPIfClause *IfClause = nullptr; 6200 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6201 if (C->getNameModifier() == OMPD_unknown || 6202 C->getNameModifier() == OMPD_parallel) { 6203 IfClause = C; 6204 break; 6205 } 6206 } 6207 if (IfClause) { 6208 const Expr *CondExpr = IfClause->getCondition(); 6209 bool Result; 6210 if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6211 if (!Result) { 6212 UpperBound = 1; 6213 return; 6214 } 6215 } else { 6216 CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange()); 6217 if (const auto *PreInit = 6218 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6219 for (const auto *I : PreInit->decls()) { 6220 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6221 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6222 } else { 6223 CodeGenFunction::AutoVarEmission Emission = 6224 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6225 CGF.EmitAutoVarCleanups(Emission); 6226 } 6227 } 6228 *CondVal = CGF.EvaluateExprAsBool(CondExpr); 6229 } 6230 } 6231 } 6232 } 6233 // Check the value of num_threads clause iff if clause was not specified 6234 // or is not evaluated to false. 6235 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6236 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6237 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6238 const auto *NumThreadsClause = 6239 Dir->getSingleClause<OMPNumThreadsClause>(); 6240 const Expr *NTExpr = NumThreadsClause->getNumThreads(); 6241 if (NTExpr->isIntegerConstantExpr(CGF.getContext())) 6242 if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext())) 6243 UpperBound = 6244 UpperBound 6245 ? Constant->getZExtValue() 6246 : std::min(UpperBound, 6247 static_cast<int32_t>(Constant->getZExtValue())); 6248 // If we haven't found a upper bound, remember we saw a thread limiting 6249 // clause. 6250 if (UpperBound == -1) 6251 UpperBound = 0; 6252 if (!E) 6253 return; 6254 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange()); 6255 if (const auto *PreInit = 6256 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6257 for (const auto *I : PreInit->decls()) { 6258 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6259 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6260 } else { 6261 CodeGenFunction::AutoVarEmission Emission = 6262 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6263 CGF.EmitAutoVarCleanups(Emission); 6264 } 6265 } 6266 } 6267 *E = NTExpr; 6268 } 6269 return; 6270 } 6271 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6272 UpperBound = 1; 6273 } 6274 6275 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( 6276 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound, 6277 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) { 6278 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) && 6279 "Clauses associated with the teams directive expected to be emitted " 6280 "only for the host!"); 6281 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6282 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6283 "Expected target-based executable directive."); 6284 6285 const Expr *NT = nullptr; 6286 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT; 6287 6288 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) { 6289 if (E->isIntegerConstantExpr(CGF.getContext())) { 6290 if (auto Constant = E->getIntegerConstantExpr(CGF.getContext())) 6291 UpperBound = UpperBound ? Constant->getZExtValue() 6292 : std::min(UpperBound, 6293 int32_t(Constant->getZExtValue())); 6294 } 6295 // If we haven't found a upper bound, remember we saw a thread limiting 6296 // clause. 6297 if (UpperBound == -1) 6298 UpperBound = 0; 6299 if (EPtr) 6300 *EPtr = E; 6301 }; 6302 6303 auto ReturnSequential = [&]() { 6304 UpperBound = 1; 6305 return NT; 6306 }; 6307 6308 switch (DirectiveKind) { 6309 case OMPD_target: { 6310 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6311 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal); 6312 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6313 CGF.getContext(), CS->getCapturedStmt()); 6314 // TODO: The standard is not clear how to resolve two thread limit clauses, 6315 // let's pick the teams one if it's present, otherwise the target one. 6316 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6317 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6318 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) { 6319 ThreadLimitClause = TLC; 6320 if (ThreadLimitExpr) { 6321 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6322 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6323 CodeGenFunction::LexicalScope Scope( 6324 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6325 if (const auto *PreInit = 6326 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6327 for (const auto *I : PreInit->decls()) { 6328 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6329 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6330 } else { 6331 CodeGenFunction::AutoVarEmission Emission = 6332 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6333 CGF.EmitAutoVarCleanups(Emission); 6334 } 6335 } 6336 } 6337 } 6338 } 6339 } 6340 if (ThreadLimitClause) 6341 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr); 6342 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6343 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6344 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6345 CS = Dir->getInnermostCapturedStmt(); 6346 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6347 CGF.getContext(), CS->getCapturedStmt()); 6348 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6349 } 6350 if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6351 CS = Dir->getInnermostCapturedStmt(); 6352 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal); 6353 } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6354 return ReturnSequential(); 6355 } 6356 return NT; 6357 } 6358 case OMPD_target_teams: { 6359 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6360 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6361 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6362 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr); 6363 } 6364 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6365 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal); 6366 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6367 CGF.getContext(), CS->getCapturedStmt()); 6368 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6369 if (Dir->getDirectiveKind() == OMPD_distribute) { 6370 CS = Dir->getInnermostCapturedStmt(); 6371 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal); 6372 } 6373 } 6374 return NT; 6375 } 6376 case OMPD_target_teams_distribute: 6377 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6378 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6379 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6380 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr); 6381 } 6382 getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound, 6383 UpperBoundOnly, CondVal); 6384 return NT; 6385 case OMPD_target_teams_loop: 6386 case OMPD_target_parallel_loop: 6387 case OMPD_target_parallel: 6388 case OMPD_target_parallel_for: 6389 case OMPD_target_parallel_for_simd: 6390 case OMPD_target_teams_distribute_parallel_for: 6391 case OMPD_target_teams_distribute_parallel_for_simd: { 6392 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) { 6393 const OMPIfClause *IfClause = nullptr; 6394 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6395 if (C->getNameModifier() == OMPD_unknown || 6396 C->getNameModifier() == OMPD_parallel) { 6397 IfClause = C; 6398 break; 6399 } 6400 } 6401 if (IfClause) { 6402 const Expr *Cond = IfClause->getCondition(); 6403 bool Result; 6404 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6405 if (!Result) 6406 return ReturnSequential(); 6407 } else { 6408 CodeGenFunction::RunCleanupsScope Scope(CGF); 6409 *CondVal = CGF.EvaluateExprAsBool(Cond); 6410 } 6411 } 6412 } 6413 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6414 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6415 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6416 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr); 6417 } 6418 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6419 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6420 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6421 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr); 6422 return NumThreadsClause->getNumThreads(); 6423 } 6424 return NT; 6425 } 6426 case OMPD_target_teams_distribute_simd: 6427 case OMPD_target_simd: 6428 return ReturnSequential(); 6429 default: 6430 break; 6431 } 6432 llvm_unreachable("Unsupported directive kind."); 6433 } 6434 6435 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( 6436 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6437 llvm::Value *NumThreadsVal = nullptr; 6438 llvm::Value *CondVal = nullptr; 6439 llvm::Value *ThreadLimitVal = nullptr; 6440 const Expr *ThreadLimitExpr = nullptr; 6441 int32_t UpperBound = -1; 6442 6443 const Expr *NT = getNumThreadsExprForTargetDirective( 6444 CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal, 6445 &ThreadLimitExpr); 6446 6447 // Thread limit expressions are used below, emit them. 6448 if (ThreadLimitExpr) { 6449 ThreadLimitVal = 6450 CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true); 6451 ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty, 6452 /*isSigned=*/false); 6453 } 6454 6455 // Generate the num teams expression. 6456 if (UpperBound == 1) { 6457 NumThreadsVal = CGF.Builder.getInt32(UpperBound); 6458 } else if (NT) { 6459 NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true); 6460 NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty, 6461 /*isSigned=*/false); 6462 } else if (ThreadLimitVal) { 6463 // If we do not have a num threads value but a thread limit, replace the 6464 // former with the latter. We know handled the thread limit expression. 6465 NumThreadsVal = ThreadLimitVal; 6466 ThreadLimitVal = nullptr; 6467 } else { 6468 // Default to "0" which means runtime choice. 6469 assert(!ThreadLimitVal && "Default not applicable with thread limit value"); 6470 NumThreadsVal = CGF.Builder.getInt32(0); 6471 } 6472 6473 // Handle if clause. If if clause present, the number of threads is 6474 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6475 if (CondVal) { 6476 CodeGenFunction::RunCleanupsScope Scope(CGF); 6477 NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal, 6478 CGF.Builder.getInt32(1)); 6479 } 6480 6481 // If the thread limit and num teams expression were present, take the 6482 // minimum. 6483 if (ThreadLimitVal) { 6484 NumThreadsVal = CGF.Builder.CreateSelect( 6485 CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal), 6486 ThreadLimitVal, NumThreadsVal); 6487 } 6488 6489 return NumThreadsVal; 6490 } 6491 6492 namespace { 6493 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 6494 6495 // Utility to handle information from clauses associated with a given 6496 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 6497 // It provides a convenient interface to obtain the information and generate 6498 // code for that information. 6499 class MappableExprsHandler { 6500 public: 6501 /// Get the offset of the OMP_MAP_MEMBER_OF field. 6502 static unsigned getFlagMemberOffset() { 6503 unsigned Offset = 0; 6504 for (uint64_t Remain = 6505 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 6506 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF); 6507 !(Remain & 1); Remain = Remain >> 1) 6508 Offset++; 6509 return Offset; 6510 } 6511 6512 /// Class that holds debugging information for a data mapping to be passed to 6513 /// the runtime library. 6514 class MappingExprInfo { 6515 /// The variable declaration used for the data mapping. 6516 const ValueDecl *MapDecl = nullptr; 6517 /// The original expression used in the map clause, or null if there is 6518 /// none. 6519 const Expr *MapExpr = nullptr; 6520 6521 public: 6522 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 6523 : MapDecl(MapDecl), MapExpr(MapExpr) {} 6524 6525 const ValueDecl *getMapDecl() const { return MapDecl; } 6526 const Expr *getMapExpr() const { return MapExpr; } 6527 }; 6528 6529 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy; 6530 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy; 6531 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy; 6532 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy; 6533 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy; 6534 using MapNonContiguousArrayTy = 6535 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy; 6536 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 6537 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>; 6538 6539 /// This structure contains combined information generated for mappable 6540 /// clauses, including base pointers, pointers, sizes, map types, user-defined 6541 /// mappers, and non-contiguous information. 6542 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy { 6543 MapExprsArrayTy Exprs; 6544 MapValueDeclsArrayTy Mappers; 6545 MapValueDeclsArrayTy DevicePtrDecls; 6546 6547 /// Append arrays in \a CurInfo. 6548 void append(MapCombinedInfoTy &CurInfo) { 6549 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 6550 DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(), 6551 CurInfo.DevicePtrDecls.end()); 6552 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 6553 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo); 6554 } 6555 }; 6556 6557 /// Map between a struct and the its lowest & highest elements which have been 6558 /// mapped. 6559 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 6560 /// HE(FieldIndex, Pointer)} 6561 struct StructRangeInfoTy { 6562 MapCombinedInfoTy PreliminaryMapData; 6563 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 6564 0, Address::invalid()}; 6565 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 6566 0, Address::invalid()}; 6567 Address Base = Address::invalid(); 6568 Address LB = Address::invalid(); 6569 bool IsArraySection = false; 6570 bool HasCompleteRecord = false; 6571 }; 6572 6573 private: 6574 /// Kind that defines how a device pointer has to be returned. 6575 struct MapInfo { 6576 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 6577 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 6578 ArrayRef<OpenMPMapModifierKind> MapModifiers; 6579 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 6580 bool ReturnDevicePointer = false; 6581 bool IsImplicit = false; 6582 const ValueDecl *Mapper = nullptr; 6583 const Expr *VarRef = nullptr; 6584 bool ForDeviceAddr = false; 6585 6586 MapInfo() = default; 6587 MapInfo( 6588 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 6589 OpenMPMapClauseKind MapType, 6590 ArrayRef<OpenMPMapModifierKind> MapModifiers, 6591 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 6592 bool ReturnDevicePointer, bool IsImplicit, 6593 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 6594 bool ForDeviceAddr = false) 6595 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 6596 MotionModifiers(MotionModifiers), 6597 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 6598 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 6599 }; 6600 6601 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 6602 /// member and there is no map information about it, then emission of that 6603 /// entry is deferred until the whole struct has been processed. 6604 struct DeferredDevicePtrEntryTy { 6605 const Expr *IE = nullptr; 6606 const ValueDecl *VD = nullptr; 6607 bool ForDeviceAddr = false; 6608 6609 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 6610 bool ForDeviceAddr) 6611 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 6612 }; 6613 6614 /// The target directive from where the mappable clauses were extracted. It 6615 /// is either a executable directive or a user-defined mapper directive. 6616 llvm::PointerUnion<const OMPExecutableDirective *, 6617 const OMPDeclareMapperDecl *> 6618 CurDir; 6619 6620 /// Function the directive is being generated for. 6621 CodeGenFunction &CGF; 6622 6623 /// Set of all first private variables in the current directive. 6624 /// bool data is set to true if the variable is implicitly marked as 6625 /// firstprivate, false otherwise. 6626 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 6627 6628 /// Map between device pointer declarations and their expression components. 6629 /// The key value for declarations in 'this' is null. 6630 llvm::DenseMap< 6631 const ValueDecl *, 6632 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 6633 DevPointersMap; 6634 6635 /// Map between device addr declarations and their expression components. 6636 /// The key value for declarations in 'this' is null. 6637 llvm::DenseMap< 6638 const ValueDecl *, 6639 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 6640 HasDevAddrsMap; 6641 6642 /// Map between lambda declarations and their map type. 6643 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap; 6644 6645 llvm::Value *getExprTypeSize(const Expr *E) const { 6646 QualType ExprTy = E->getType().getCanonicalType(); 6647 6648 // Calculate the size for array shaping expression. 6649 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 6650 llvm::Value *Size = 6651 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 6652 for (const Expr *SE : OAE->getDimensions()) { 6653 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 6654 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 6655 CGF.getContext().getSizeType(), 6656 SE->getExprLoc()); 6657 Size = CGF.Builder.CreateNUWMul(Size, Sz); 6658 } 6659 return Size; 6660 } 6661 6662 // Reference types are ignored for mapping purposes. 6663 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 6664 ExprTy = RefTy->getPointeeType().getCanonicalType(); 6665 6666 // Given that an array section is considered a built-in type, we need to 6667 // do the calculation based on the length of the section instead of relying 6668 // on CGF.getTypeSize(E->getType()). 6669 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 6670 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 6671 OAE->getBase()->IgnoreParenImpCasts()) 6672 .getCanonicalType(); 6673 6674 // If there is no length associated with the expression and lower bound is 6675 // not specified too, that means we are using the whole length of the 6676 // base. 6677 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 6678 !OAE->getLowerBound()) 6679 return CGF.getTypeSize(BaseTy); 6680 6681 llvm::Value *ElemSize; 6682 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 6683 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 6684 } else { 6685 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 6686 assert(ATy && "Expecting array type if not a pointer type."); 6687 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 6688 } 6689 6690 // If we don't have a length at this point, that is because we have an 6691 // array section with a single element. 6692 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 6693 return ElemSize; 6694 6695 if (const Expr *LenExpr = OAE->getLength()) { 6696 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 6697 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 6698 CGF.getContext().getSizeType(), 6699 LenExpr->getExprLoc()); 6700 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 6701 } 6702 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 6703 OAE->getLowerBound() && "expected array_section[lb:]."); 6704 // Size = sizetype - lb * elemtype; 6705 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 6706 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 6707 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 6708 CGF.getContext().getSizeType(), 6709 OAE->getLowerBound()->getExprLoc()); 6710 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 6711 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 6712 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 6713 LengthVal = CGF.Builder.CreateSelect( 6714 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 6715 return LengthVal; 6716 } 6717 return CGF.getTypeSize(ExprTy); 6718 } 6719 6720 /// Return the corresponding bits for a given map clause modifier. Add 6721 /// a flag marking the map as a pointer if requested. Add a flag marking the 6722 /// map as the first one of a series of maps that relate to the same map 6723 /// expression. 6724 OpenMPOffloadMappingFlags getMapTypeBits( 6725 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 6726 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 6727 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 6728 OpenMPOffloadMappingFlags Bits = 6729 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT 6730 : OpenMPOffloadMappingFlags::OMP_MAP_NONE; 6731 switch (MapType) { 6732 case OMPC_MAP_alloc: 6733 case OMPC_MAP_release: 6734 // alloc and release is the default behavior in the runtime library, i.e. 6735 // if we don't pass any bits alloc/release that is what the runtime is 6736 // going to do. Therefore, we don't need to signal anything for these two 6737 // type modifiers. 6738 break; 6739 case OMPC_MAP_to: 6740 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO; 6741 break; 6742 case OMPC_MAP_from: 6743 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM; 6744 break; 6745 case OMPC_MAP_tofrom: 6746 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO | 6747 OpenMPOffloadMappingFlags::OMP_MAP_FROM; 6748 break; 6749 case OMPC_MAP_delete: 6750 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE; 6751 break; 6752 case OMPC_MAP_unknown: 6753 llvm_unreachable("Unexpected map type!"); 6754 } 6755 if (AddPtrFlag) 6756 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; 6757 if (AddIsTargetParamFlag) 6758 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; 6759 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always)) 6760 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS; 6761 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close)) 6762 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE; 6763 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) || 6764 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present)) 6765 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT; 6766 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold)) 6767 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; 6768 if (IsNonContiguous) 6769 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG; 6770 return Bits; 6771 } 6772 6773 /// Return true if the provided expression is a final array section. A 6774 /// final array section, is one whose length can't be proved to be one. 6775 bool isFinalArraySectionExpression(const Expr *E) const { 6776 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 6777 6778 // It is not an array section and therefore not a unity-size one. 6779 if (!OASE) 6780 return false; 6781 6782 // An array section with no colon always refer to a single element. 6783 if (OASE->getColonLocFirst().isInvalid()) 6784 return false; 6785 6786 const Expr *Length = OASE->getLength(); 6787 6788 // If we don't have a length we have to check if the array has size 1 6789 // for this dimension. Also, we should always expect a length if the 6790 // base type is pointer. 6791 if (!Length) { 6792 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 6793 OASE->getBase()->IgnoreParenImpCasts()) 6794 .getCanonicalType(); 6795 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 6796 return ATy->getSize().getSExtValue() != 1; 6797 // If we don't have a constant dimension length, we have to consider 6798 // the current section as having any size, so it is not necessarily 6799 // unitary. If it happen to be unity size, that's user fault. 6800 return true; 6801 } 6802 6803 // Check if the length evaluates to 1. 6804 Expr::EvalResult Result; 6805 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 6806 return true; // Can have more that size 1. 6807 6808 llvm::APSInt ConstLength = Result.Val.getInt(); 6809 return ConstLength.getSExtValue() != 1; 6810 } 6811 6812 /// Generate the base pointers, section pointers, sizes, map type bits, and 6813 /// user-defined mappers (all included in \a CombinedInfo) for the provided 6814 /// map type, map or motion modifiers, and expression components. 6815 /// \a IsFirstComponent should be set to true if the provided set of 6816 /// components is the first associated with a capture. 6817 void generateInfoForComponentList( 6818 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 6819 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 6820 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 6821 MapCombinedInfoTy &CombinedInfo, 6822 MapCombinedInfoTy &StructBaseCombinedInfo, 6823 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 6824 bool IsImplicit, bool GenerateAllInfoForClauses, 6825 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 6826 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 6827 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 6828 OverlappedElements = std::nullopt) const { 6829 // The following summarizes what has to be generated for each map and the 6830 // types below. The generated information is expressed in this order: 6831 // base pointer, section pointer, size, flags 6832 // (to add to the ones that come from the map type and modifier). 6833 // 6834 // double d; 6835 // int i[100]; 6836 // float *p; 6837 // int **a = &i; 6838 // 6839 // struct S1 { 6840 // int i; 6841 // float f[50]; 6842 // } 6843 // struct S2 { 6844 // int i; 6845 // float f[50]; 6846 // S1 s; 6847 // double *p; 6848 // struct S2 *ps; 6849 // int &ref; 6850 // } 6851 // S2 s; 6852 // S2 *ps; 6853 // 6854 // map(d) 6855 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 6856 // 6857 // map(i) 6858 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 6859 // 6860 // map(i[1:23]) 6861 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 6862 // 6863 // map(p) 6864 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 6865 // 6866 // map(p[1:24]) 6867 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 6868 // in unified shared memory mode or for local pointers 6869 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 6870 // 6871 // map((*a)[0:3]) 6872 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM 6873 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM 6874 // 6875 // map(**a) 6876 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM 6877 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM 6878 // 6879 // map(s) 6880 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 6881 // 6882 // map(s.i) 6883 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 6884 // 6885 // map(s.s.f) 6886 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 6887 // 6888 // map(s.p) 6889 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 6890 // 6891 // map(to: s.p[:22]) 6892 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 6893 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 6894 // &(s.p), &(s.p[0]), 22*sizeof(double), 6895 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 6896 // (*) alloc space for struct members, only this is a target parameter 6897 // (**) map the pointer (nothing to be mapped in this example) (the compiler 6898 // optimizes this entry out, same in the examples below) 6899 // (***) map the pointee (map: to) 6900 // 6901 // map(to: s.ref) 6902 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) 6903 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 6904 // (*) alloc space for struct members, only this is a target parameter 6905 // (**) map the pointer (nothing to be mapped in this example) (the compiler 6906 // optimizes this entry out, same in the examples below) 6907 // (***) map the pointee (map: to) 6908 // 6909 // map(s.ps) 6910 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 6911 // 6912 // map(from: s.ps->s.i) 6913 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 6914 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 6915 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 6916 // 6917 // map(to: s.ps->ps) 6918 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 6919 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 6920 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 6921 // 6922 // map(s.ps->ps->ps) 6923 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 6924 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 6925 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 6926 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 6927 // 6928 // map(to: s.ps->ps->s.f[:22]) 6929 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 6930 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 6931 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 6932 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 6933 // 6934 // map(ps) 6935 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 6936 // 6937 // map(ps->i) 6938 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 6939 // 6940 // map(ps->s.f) 6941 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 6942 // 6943 // map(from: ps->p) 6944 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 6945 // 6946 // map(to: ps->p[:22]) 6947 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 6948 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 6949 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 6950 // 6951 // map(ps->ps) 6952 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 6953 // 6954 // map(from: ps->ps->s.i) 6955 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 6956 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 6957 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 6958 // 6959 // map(from: ps->ps->ps) 6960 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 6961 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 6962 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 6963 // 6964 // map(ps->ps->ps->ps) 6965 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 6966 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 6967 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 6968 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 6969 // 6970 // map(to: ps->ps->ps->s.f[:22]) 6971 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 6972 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 6973 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 6974 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 6975 // 6976 // map(to: s.f[:22]) map(from: s.p[:33]) 6977 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 6978 // sizeof(double*) (**), TARGET_PARAM 6979 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 6980 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 6981 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 6982 // (*) allocate contiguous space needed to fit all mapped members even if 6983 // we allocate space for members not mapped (in this example, 6984 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 6985 // them as well because they fall between &s.f[0] and &s.p) 6986 // 6987 // map(from: s.f[:22]) map(to: ps->p[:33]) 6988 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 6989 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 6990 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 6991 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 6992 // (*) the struct this entry pertains to is the 2nd element in the list of 6993 // arguments, hence MEMBER_OF(2) 6994 // 6995 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 6996 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 6997 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 6998 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 6999 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7000 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7001 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7002 // (*) the struct this entry pertains to is the 4th element in the list 7003 // of arguments, hence MEMBER_OF(4) 7004 7005 // Track if the map information being generated is the first for a capture. 7006 bool IsCaptureFirstInfo = IsFirstComponentList; 7007 // When the variable is on a declare target link or in a to clause with 7008 // unified memory, a reference is needed to hold the host/device address 7009 // of the variable. 7010 bool RequiresReference = false; 7011 7012 // Scan the components from the base to the complete expression. 7013 auto CI = Components.rbegin(); 7014 auto CE = Components.rend(); 7015 auto I = CI; 7016 7017 // Track if the map information being generated is the first for a list of 7018 // components. 7019 bool IsExpressionFirstInfo = true; 7020 bool FirstPointerInComplexData = false; 7021 Address BP = Address::invalid(); 7022 const Expr *AssocExpr = I->getAssociatedExpression(); 7023 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7024 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7025 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7026 7027 if (isa<MemberExpr>(AssocExpr)) { 7028 // The base is the 'this' pointer. The content of the pointer is going 7029 // to be the base of the field being mapped. 7030 BP = CGF.LoadCXXThisAddress(); 7031 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7032 (OASE && 7033 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7034 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7035 } else if (OAShE && 7036 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7037 BP = Address( 7038 CGF.EmitScalarExpr(OAShE->getBase()), 7039 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()), 7040 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7041 } else { 7042 // The base is the reference to the variable. 7043 // BP = &Var. 7044 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7045 if (const auto *VD = 7046 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7047 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7048 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7049 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7050 ((*Res == OMPDeclareTargetDeclAttr::MT_To || 7051 *Res == OMPDeclareTargetDeclAttr::MT_Enter) && 7052 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7053 RequiresReference = true; 7054 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7055 } 7056 } 7057 } 7058 7059 // If the variable is a pointer and is being dereferenced (i.e. is not 7060 // the last component), the base has to be the pointer itself, not its 7061 // reference. References are ignored for mapping purposes. 7062 QualType Ty = 7063 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7064 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7065 // No need to generate individual map information for the pointer, it 7066 // can be associated with the combined storage if shared memory mode is 7067 // active or the base declaration is not global variable. 7068 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7069 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7070 !VD || VD->hasLocalStorage()) 7071 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7072 else 7073 FirstPointerInComplexData = true; 7074 ++I; 7075 } 7076 } 7077 7078 // Track whether a component of the list should be marked as MEMBER_OF some 7079 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7080 // in a component list should be marked as MEMBER_OF, all subsequent entries 7081 // do not belong to the base struct. E.g. 7082 // struct S2 s; 7083 // s.ps->ps->ps->f[:] 7084 // (1) (2) (3) (4) 7085 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7086 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7087 // is the pointee of ps(2) which is not member of struct s, so it should not 7088 // be marked as such (it is still PTR_AND_OBJ). 7089 // The variable is initialized to false so that PTR_AND_OBJ entries which 7090 // are not struct members are not considered (e.g. array of pointers to 7091 // data). 7092 bool ShouldBeMemberOf = false; 7093 7094 // Variable keeping track of whether or not we have encountered a component 7095 // in the component list which is a member expression. Useful when we have a 7096 // pointer or a final array section, in which case it is the previous 7097 // component in the list which tells us whether we have a member expression. 7098 // E.g. X.f[:] 7099 // While processing the final array section "[:]" it is "f" which tells us 7100 // whether we are dealing with a member of a declared struct. 7101 const MemberExpr *EncounteredME = nullptr; 7102 7103 // Track for the total number of dimension. Start from one for the dummy 7104 // dimension. 7105 uint64_t DimSize = 1; 7106 7107 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7108 bool IsPrevMemberReference = false; 7109 7110 // We need to check if we will be encountering any MEs. If we do not 7111 // encounter any ME expression it means we will be mapping the whole struct. 7112 // In that case we need to skip adding an entry for the struct to the 7113 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo 7114 // list only when generating all info for clauses. 7115 bool IsMappingWholeStruct = true; 7116 if (!GenerateAllInfoForClauses) { 7117 IsMappingWholeStruct = false; 7118 } else { 7119 for (auto TempI = I; TempI != CE; ++TempI) { 7120 const MemberExpr *PossibleME = 7121 dyn_cast<MemberExpr>(TempI->getAssociatedExpression()); 7122 if (PossibleME) { 7123 IsMappingWholeStruct = false; 7124 break; 7125 } 7126 } 7127 } 7128 7129 for (; I != CE; ++I) { 7130 // If the current component is member of a struct (parent struct) mark it. 7131 if (!EncounteredME) { 7132 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7133 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7134 // as MEMBER_OF the parent struct. 7135 if (EncounteredME) { 7136 ShouldBeMemberOf = true; 7137 // Do not emit as complex pointer if this is actually not array-like 7138 // expression. 7139 if (FirstPointerInComplexData) { 7140 QualType Ty = std::prev(I) 7141 ->getAssociatedDeclaration() 7142 ->getType() 7143 .getNonReferenceType(); 7144 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7145 FirstPointerInComplexData = false; 7146 } 7147 } 7148 } 7149 7150 auto Next = std::next(I); 7151 7152 // We need to generate the addresses and sizes if this is the last 7153 // component, if the component is a pointer or if it is an array section 7154 // whose length can't be proved to be one. If this is a pointer, it 7155 // becomes the base address for the following components. 7156 7157 // A final array section, is one whose length can't be proved to be one. 7158 // If the map item is non-contiguous then we don't treat any array section 7159 // as final array section. 7160 bool IsFinalArraySection = 7161 !IsNonContiguous && 7162 isFinalArraySectionExpression(I->getAssociatedExpression()); 7163 7164 // If we have a declaration for the mapping use that, otherwise use 7165 // the base declaration of the map clause. 7166 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 7167 ? I->getAssociatedDeclaration() 7168 : BaseDecl; 7169 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression() 7170 : MapExpr; 7171 7172 // Get information on whether the element is a pointer. Have to do a 7173 // special treatment for array sections given that they are built-in 7174 // types. 7175 const auto *OASE = 7176 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7177 const auto *OAShE = 7178 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7179 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7180 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7181 bool IsPointer = 7182 OAShE || 7183 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7184 .getCanonicalType() 7185 ->isAnyPointerType()) || 7186 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7187 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && 7188 MapDecl && 7189 MapDecl->getType()->isLValueReferenceType(); 7190 bool IsNonDerefPointer = IsPointer && 7191 !(UO && UO->getOpcode() != UO_Deref) && !BO && 7192 !IsNonContiguous; 7193 7194 if (OASE) 7195 ++DimSize; 7196 7197 if (Next == CE || IsMemberReference || IsNonDerefPointer || 7198 IsFinalArraySection) { 7199 // If this is not the last component, we expect the pointer to be 7200 // associated with an array expression or member expression. 7201 assert((Next == CE || 7202 isa<MemberExpr>(Next->getAssociatedExpression()) || 7203 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7204 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7205 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 7206 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7207 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7208 "Unexpected expression"); 7209 7210 Address LB = Address::invalid(); 7211 Address LowestElem = Address::invalid(); 7212 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, 7213 const MemberExpr *E) { 7214 const Expr *BaseExpr = E->getBase(); 7215 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a 7216 // scalar. 7217 LValue BaseLV; 7218 if (E->isArrow()) { 7219 LValueBaseInfo BaseInfo; 7220 TBAAAccessInfo TBAAInfo; 7221 Address Addr = 7222 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); 7223 QualType PtrTy = BaseExpr->getType()->getPointeeType(); 7224 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); 7225 } else { 7226 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); 7227 } 7228 return BaseLV; 7229 }; 7230 if (OAShE) { 7231 LowestElem = LB = 7232 Address(CGF.EmitScalarExpr(OAShE->getBase()), 7233 CGF.ConvertTypeForMem( 7234 OAShE->getBase()->getType()->getPointeeType()), 7235 CGF.getContext().getTypeAlignInChars( 7236 OAShE->getBase()->getType())); 7237 } else if (IsMemberReference) { 7238 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); 7239 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 7240 LowestElem = CGF.EmitLValueForFieldInitialization( 7241 BaseLVal, cast<FieldDecl>(MapDecl)) 7242 .getAddress(CGF); 7243 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) 7244 .getAddress(CGF); 7245 } else { 7246 LowestElem = LB = 7247 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7248 .getAddress(CGF); 7249 } 7250 7251 // If this component is a pointer inside the base struct then we don't 7252 // need to create any entry for it - it will be combined with the object 7253 // it is pointing to into a single PTR_AND_OBJ entry. 7254 bool IsMemberPointerOrAddr = 7255 EncounteredME && 7256 (((IsPointer || ForDeviceAddr) && 7257 I->getAssociatedExpression() == EncounteredME) || 7258 (IsPrevMemberReference && !IsPointer) || 7259 (IsMemberReference && Next != CE && 7260 !Next->getAssociatedExpression()->getType()->isPointerType())); 7261 if (!OverlappedElements.empty() && Next == CE) { 7262 // Handle base element with the info for overlapped elements. 7263 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7264 assert(!IsPointer && 7265 "Unexpected base element with the pointer type."); 7266 // Mark the whole struct as the struct that requires allocation on the 7267 // device. 7268 PartialStruct.LowestElem = {0, LowestElem}; 7269 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7270 I->getAssociatedExpression()->getType()); 7271 Address HB = CGF.Builder.CreateConstGEP( 7272 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 7273 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty), 7274 TypeSize.getQuantity() - 1); 7275 PartialStruct.HighestElem = { 7276 std::numeric_limits<decltype( 7277 PartialStruct.HighestElem.first)>::max(), 7278 HB}; 7279 PartialStruct.Base = BP; 7280 PartialStruct.LB = LB; 7281 assert( 7282 PartialStruct.PreliminaryMapData.BasePointers.empty() && 7283 "Overlapped elements must be used only once for the variable."); 7284 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 7285 // Emit data for non-overlapped data. 7286 OpenMPOffloadMappingFlags Flags = 7287 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | 7288 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 7289 /*AddPtrFlag=*/false, 7290 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 7291 llvm::Value *Size = nullptr; 7292 // Do bitcopy of all non-overlapped structure elements. 7293 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7294 Component : OverlappedElements) { 7295 Address ComponentLB = Address::invalid(); 7296 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7297 Component) { 7298 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { 7299 const auto *FD = dyn_cast<FieldDecl>(VD); 7300 if (FD && FD->getType()->isLValueReferenceType()) { 7301 const auto *ME = 7302 cast<MemberExpr>(MC.getAssociatedExpression()); 7303 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 7304 ComponentLB = 7305 CGF.EmitLValueForFieldInitialization(BaseLVal, FD) 7306 .getAddress(CGF); 7307 } else { 7308 ComponentLB = 7309 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7310 .getAddress(CGF); 7311 } 7312 Size = CGF.Builder.CreatePtrDiff( 7313 CGF.Int8Ty, ComponentLB.getPointer(), LB.getPointer()); 7314 break; 7315 } 7316 } 7317 assert(Size && "Failed to determine structure size"); 7318 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7319 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7320 CombinedInfo.DevicePtrDecls.push_back(nullptr); 7321 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 7322 CombinedInfo.Pointers.push_back(LB.getPointer()); 7323 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7324 Size, CGF.Int64Ty, /*isSigned=*/true)); 7325 CombinedInfo.Types.push_back(Flags); 7326 CombinedInfo.Mappers.push_back(nullptr); 7327 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7328 : 1); 7329 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7330 } 7331 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7332 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7333 CombinedInfo.DevicePtrDecls.push_back(nullptr); 7334 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 7335 CombinedInfo.Pointers.push_back(LB.getPointer()); 7336 Size = CGF.Builder.CreatePtrDiff( 7337 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(), 7338 LB.getPointer()); 7339 CombinedInfo.Sizes.push_back( 7340 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7341 CombinedInfo.Types.push_back(Flags); 7342 CombinedInfo.Mappers.push_back(nullptr); 7343 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7344 : 1); 7345 break; 7346 } 7347 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7348 // Skip adding an entry in the CurInfo of this combined entry if the 7349 // whole struct is currently being mapped. The struct needs to be added 7350 // in the first position before any data internal to the struct is being 7351 // mapped. 7352 if (!IsMemberPointerOrAddr || 7353 (Next == CE && MapType != OMPC_MAP_unknown)) { 7354 if (!IsMappingWholeStruct) { 7355 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7356 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7357 CombinedInfo.DevicePtrDecls.push_back(nullptr); 7358 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 7359 CombinedInfo.Pointers.push_back(LB.getPointer()); 7360 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7361 Size, CGF.Int64Ty, /*isSigned=*/true)); 7362 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7363 : 1); 7364 } else { 7365 StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7366 StructBaseCombinedInfo.BasePointers.push_back(BP.getPointer()); 7367 StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr); 7368 StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 7369 StructBaseCombinedInfo.Pointers.push_back(LB.getPointer()); 7370 StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7371 Size, CGF.Int64Ty, /*isSigned=*/true)); 7372 StructBaseCombinedInfo.NonContigInfo.Dims.push_back( 7373 IsNonContiguous ? DimSize : 1); 7374 } 7375 7376 // If Mapper is valid, the last component inherits the mapper. 7377 bool HasMapper = Mapper && Next == CE; 7378 if (!IsMappingWholeStruct) 7379 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 7380 else 7381 StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper 7382 : nullptr); 7383 7384 // We need to add a pointer flag for each map that comes from the 7385 // same expression except for the first one. We also need to signal 7386 // this map is the first one that relates with the current capture 7387 // (there is a set of entries for each capture). 7388 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7389 MapType, MapModifiers, MotionModifiers, IsImplicit, 7390 !IsExpressionFirstInfo || RequiresReference || 7391 FirstPointerInComplexData || IsMemberReference, 7392 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 7393 7394 if (!IsExpressionFirstInfo || IsMemberReference) { 7395 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7396 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7397 if (IsPointer || (IsMemberReference && Next != CE)) 7398 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO | 7399 OpenMPOffloadMappingFlags::OMP_MAP_FROM | 7400 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS | 7401 OpenMPOffloadMappingFlags::OMP_MAP_DELETE | 7402 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE); 7403 7404 if (ShouldBeMemberOf) { 7405 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7406 // should be later updated with the correct value of MEMBER_OF. 7407 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF; 7408 // From now on, all subsequent PTR_AND_OBJ entries should not be 7409 // marked as MEMBER_OF. 7410 ShouldBeMemberOf = false; 7411 } 7412 } 7413 7414 if (!IsMappingWholeStruct) 7415 CombinedInfo.Types.push_back(Flags); 7416 else 7417 StructBaseCombinedInfo.Types.push_back(Flags); 7418 } 7419 7420 // If we have encountered a member expression so far, keep track of the 7421 // mapped member. If the parent is "*this", then the value declaration 7422 // is nullptr. 7423 if (EncounteredME) { 7424 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 7425 unsigned FieldIndex = FD->getFieldIndex(); 7426 7427 // Update info about the lowest and highest elements for this struct 7428 if (!PartialStruct.Base.isValid()) { 7429 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 7430 if (IsFinalArraySection) { 7431 Address HB = 7432 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 7433 .getAddress(CGF); 7434 PartialStruct.HighestElem = {FieldIndex, HB}; 7435 } else { 7436 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 7437 } 7438 PartialStruct.Base = BP; 7439 PartialStruct.LB = BP; 7440 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7441 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 7442 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7443 if (IsFinalArraySection) { 7444 Address HB = 7445 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 7446 .getAddress(CGF); 7447 PartialStruct.HighestElem = {FieldIndex, HB}; 7448 } else { 7449 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 7450 } 7451 } 7452 } 7453 7454 // Need to emit combined struct for array sections. 7455 if (IsFinalArraySection || IsNonContiguous) 7456 PartialStruct.IsArraySection = true; 7457 7458 // If we have a final array section, we are done with this expression. 7459 if (IsFinalArraySection) 7460 break; 7461 7462 // The pointer becomes the base for the next element. 7463 if (Next != CE) 7464 BP = IsMemberReference ? LowestElem : LB; 7465 7466 IsExpressionFirstInfo = false; 7467 IsCaptureFirstInfo = false; 7468 FirstPointerInComplexData = false; 7469 IsPrevMemberReference = IsMemberReference; 7470 } else if (FirstPointerInComplexData) { 7471 QualType Ty = Components.rbegin() 7472 ->getAssociatedDeclaration() 7473 ->getType() 7474 .getNonReferenceType(); 7475 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7476 FirstPointerInComplexData = false; 7477 } 7478 } 7479 // If ran into the whole component - allocate the space for the whole 7480 // record. 7481 if (!EncounteredME) 7482 PartialStruct.HasCompleteRecord = true; 7483 7484 if (!IsNonContiguous) 7485 return; 7486 7487 const ASTContext &Context = CGF.getContext(); 7488 7489 // For supporting stride in array section, we need to initialize the first 7490 // dimension size as 1, first offset as 0, and first count as 1 7491 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 7492 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 7493 MapValuesArrayTy CurStrides; 7494 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 7495 uint64_t ElementTypeSize; 7496 7497 // Collect Size information for each dimension and get the element size as 7498 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 7499 // should be [10, 10] and the first stride is 4 btyes. 7500 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 7501 Components) { 7502 const Expr *AssocExpr = Component.getAssociatedExpression(); 7503 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7504 7505 if (!OASE) 7506 continue; 7507 7508 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 7509 auto *CAT = Context.getAsConstantArrayType(Ty); 7510 auto *VAT = Context.getAsVariableArrayType(Ty); 7511 7512 // We need all the dimension size except for the last dimension. 7513 assert((VAT || CAT || &Component == &*Components.begin()) && 7514 "Should be either ConstantArray or VariableArray if not the " 7515 "first Component"); 7516 7517 // Get element size if CurStrides is empty. 7518 if (CurStrides.empty()) { 7519 const Type *ElementType = nullptr; 7520 if (CAT) 7521 ElementType = CAT->getElementType().getTypePtr(); 7522 else if (VAT) 7523 ElementType = VAT->getElementType().getTypePtr(); 7524 else 7525 assert(&Component == &*Components.begin() && 7526 "Only expect pointer (non CAT or VAT) when this is the " 7527 "first Component"); 7528 // If ElementType is null, then it means the base is a pointer 7529 // (neither CAT nor VAT) and we'll attempt to get ElementType again 7530 // for next iteration. 7531 if (ElementType) { 7532 // For the case that having pointer as base, we need to remove one 7533 // level of indirection. 7534 if (&Component != &*Components.begin()) 7535 ElementType = ElementType->getPointeeOrArrayElementType(); 7536 ElementTypeSize = 7537 Context.getTypeSizeInChars(ElementType).getQuantity(); 7538 CurStrides.push_back( 7539 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 7540 } 7541 } 7542 // Get dimension value except for the last dimension since we don't need 7543 // it. 7544 if (DimSizes.size() < Components.size() - 1) { 7545 if (CAT) 7546 DimSizes.push_back(llvm::ConstantInt::get( 7547 CGF.Int64Ty, CAT->getSize().getZExtValue())); 7548 else if (VAT) 7549 DimSizes.push_back(CGF.Builder.CreateIntCast( 7550 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 7551 /*IsSigned=*/false)); 7552 } 7553 } 7554 7555 // Skip the dummy dimension since we have already have its information. 7556 auto *DI = DimSizes.begin() + 1; 7557 // Product of dimension. 7558 llvm::Value *DimProd = 7559 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 7560 7561 // Collect info for non-contiguous. Notice that offset, count, and stride 7562 // are only meaningful for array-section, so we insert a null for anything 7563 // other than array-section. 7564 // Also, the size of offset, count, and stride are not the same as 7565 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 7566 // count, and stride are the same as the number of non-contiguous 7567 // declaration in target update to/from clause. 7568 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 7569 Components) { 7570 const Expr *AssocExpr = Component.getAssociatedExpression(); 7571 7572 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 7573 llvm::Value *Offset = CGF.Builder.CreateIntCast( 7574 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 7575 /*isSigned=*/false); 7576 CurOffsets.push_back(Offset); 7577 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 7578 CurStrides.push_back(CurStrides.back()); 7579 continue; 7580 } 7581 7582 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7583 7584 if (!OASE) 7585 continue; 7586 7587 // Offset 7588 const Expr *OffsetExpr = OASE->getLowerBound(); 7589 llvm::Value *Offset = nullptr; 7590 if (!OffsetExpr) { 7591 // If offset is absent, then we just set it to zero. 7592 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 7593 } else { 7594 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 7595 CGF.Int64Ty, 7596 /*isSigned=*/false); 7597 } 7598 CurOffsets.push_back(Offset); 7599 7600 // Count 7601 const Expr *CountExpr = OASE->getLength(); 7602 llvm::Value *Count = nullptr; 7603 if (!CountExpr) { 7604 // In Clang, once a high dimension is an array section, we construct all 7605 // the lower dimension as array section, however, for case like 7606 // arr[0:2][2], Clang construct the inner dimension as an array section 7607 // but it actually is not in an array section form according to spec. 7608 if (!OASE->getColonLocFirst().isValid() && 7609 !OASE->getColonLocSecond().isValid()) { 7610 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 7611 } else { 7612 // OpenMP 5.0, 2.1.5 Array Sections, Description. 7613 // When the length is absent it defaults to ⌈(size − 7614 // lower-bound)/stride⌉, where size is the size of the array 7615 // dimension. 7616 const Expr *StrideExpr = OASE->getStride(); 7617 llvm::Value *Stride = 7618 StrideExpr 7619 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 7620 CGF.Int64Ty, /*isSigned=*/false) 7621 : nullptr; 7622 if (Stride) 7623 Count = CGF.Builder.CreateUDiv( 7624 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 7625 else 7626 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 7627 } 7628 } else { 7629 Count = CGF.EmitScalarExpr(CountExpr); 7630 } 7631 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 7632 CurCounts.push_back(Count); 7633 7634 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 7635 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 7636 // Offset Count Stride 7637 // D0 0 1 4 (int) <- dummy dimension 7638 // D1 0 2 8 (2 * (1) * 4) 7639 // D2 1 2 20 (1 * (1 * 5) * 4) 7640 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 7641 const Expr *StrideExpr = OASE->getStride(); 7642 llvm::Value *Stride = 7643 StrideExpr 7644 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 7645 CGF.Int64Ty, /*isSigned=*/false) 7646 : nullptr; 7647 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 7648 if (Stride) 7649 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 7650 else 7651 CurStrides.push_back(DimProd); 7652 if (DI != DimSizes.end()) 7653 ++DI; 7654 } 7655 7656 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 7657 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 7658 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 7659 } 7660 7661 /// Return the adjusted map modifiers if the declaration a capture refers to 7662 /// appears in a first-private clause. This is expected to be used only with 7663 /// directives that start with 'target'. 7664 OpenMPOffloadMappingFlags 7665 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7666 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7667 7668 // A first private variable captured by reference will use only the 7669 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7670 // declaration is known as first-private in this handler. 7671 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 7672 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 7673 return OpenMPOffloadMappingFlags::OMP_MAP_TO | 7674 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; 7675 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE | 7676 OpenMPOffloadMappingFlags::OMP_MAP_TO; 7677 } 7678 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl()); 7679 if (I != LambdasMap.end()) 7680 // for map(to: lambda): using user specified map type. 7681 return getMapTypeBits( 7682 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(), 7683 /*MotionModifiers=*/std::nullopt, I->getSecond()->isImplicit(), 7684 /*AddPtrFlag=*/false, 7685 /*AddIsTargetParamFlag=*/false, 7686 /*isNonContiguous=*/false); 7687 return OpenMPOffloadMappingFlags::OMP_MAP_TO | 7688 OpenMPOffloadMappingFlags::OMP_MAP_FROM; 7689 } 7690 7691 void getPlainLayout(const CXXRecordDecl *RD, 7692 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7693 bool AsBase) const { 7694 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7695 7696 llvm::StructType *St = 7697 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7698 7699 unsigned NumElements = St->getNumElements(); 7700 llvm::SmallVector< 7701 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7702 RecordLayout(NumElements); 7703 7704 // Fill bases. 7705 for (const auto &I : RD->bases()) { 7706 if (I.isVirtual()) 7707 continue; 7708 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7709 // Ignore empty bases. 7710 if (Base->isEmpty() || CGF.getContext() 7711 .getASTRecordLayout(Base) 7712 .getNonVirtualSize() 7713 .isZero()) 7714 continue; 7715 7716 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 7717 RecordLayout[FieldIndex] = Base; 7718 } 7719 // Fill in virtual bases. 7720 for (const auto &I : RD->vbases()) { 7721 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7722 // Ignore empty bases. 7723 if (Base->isEmpty()) 7724 continue; 7725 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 7726 if (RecordLayout[FieldIndex]) 7727 continue; 7728 RecordLayout[FieldIndex] = Base; 7729 } 7730 // Fill in all the fields. 7731 assert(!RD->isUnion() && "Unexpected union."); 7732 for (const auto *Field : RD->fields()) { 7733 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 7734 // will fill in later.) 7735 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 7736 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 7737 RecordLayout[FieldIndex] = Field; 7738 } 7739 } 7740 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 7741 &Data : RecordLayout) { 7742 if (Data.isNull()) 7743 continue; 7744 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 7745 getPlainLayout(Base, Layout, /*AsBase=*/true); 7746 else 7747 Layout.push_back(Data.get<const FieldDecl *>()); 7748 } 7749 } 7750 7751 /// Generate all the base pointers, section pointers, sizes, map types, and 7752 /// mappers for the extracted mappable expressions (all included in \a 7753 /// CombinedInfo). Also, for each item that relates with a device pointer, a 7754 /// pair of the relevant declaration and index where it occurs is appended to 7755 /// the device pointers info array. 7756 void generateAllInfoForClauses( 7757 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 7758 llvm::OpenMPIRBuilder &OMPBuilder, 7759 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 7760 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 7761 // We have to process the component lists that relate with the same 7762 // declaration in a single chunk so that we can generate the map flags 7763 // correctly. Therefore, we organize all lists in a map. 7764 enum MapKind { Present, Allocs, Other, Total }; 7765 llvm::MapVector<CanonicalDeclPtr<const Decl>, 7766 SmallVector<SmallVector<MapInfo, 8>, 4>> 7767 Info; 7768 7769 // Helper function to fill the information map for the different supported 7770 // clauses. 7771 auto &&InfoGen = 7772 [&Info, &SkipVarSet]( 7773 const ValueDecl *D, MapKind Kind, 7774 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 7775 OpenMPMapClauseKind MapType, 7776 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7777 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7778 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 7779 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 7780 if (SkipVarSet.contains(D)) 7781 return; 7782 auto It = Info.find(D); 7783 if (It == Info.end()) 7784 It = Info 7785 .insert(std::make_pair( 7786 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 7787 .first; 7788 It->second[Kind].emplace_back( 7789 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 7790 IsImplicit, Mapper, VarRef, ForDeviceAddr); 7791 }; 7792 7793 for (const auto *Cl : Clauses) { 7794 const auto *C = dyn_cast<OMPMapClause>(Cl); 7795 if (!C) 7796 continue; 7797 MapKind Kind = Other; 7798 if (llvm::is_contained(C->getMapTypeModifiers(), 7799 OMPC_MAP_MODIFIER_present)) 7800 Kind = Present; 7801 else if (C->getMapType() == OMPC_MAP_alloc) 7802 Kind = Allocs; 7803 const auto *EI = C->getVarRefs().begin(); 7804 for (const auto L : C->component_lists()) { 7805 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 7806 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 7807 C->getMapTypeModifiers(), std::nullopt, 7808 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 7809 E); 7810 ++EI; 7811 } 7812 } 7813 for (const auto *Cl : Clauses) { 7814 const auto *C = dyn_cast<OMPToClause>(Cl); 7815 if (!C) 7816 continue; 7817 MapKind Kind = Other; 7818 if (llvm::is_contained(C->getMotionModifiers(), 7819 OMPC_MOTION_MODIFIER_present)) 7820 Kind = Present; 7821 const auto *EI = C->getVarRefs().begin(); 7822 for (const auto L : C->component_lists()) { 7823 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, std::nullopt, 7824 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 7825 C->isImplicit(), std::get<2>(L), *EI); 7826 ++EI; 7827 } 7828 } 7829 for (const auto *Cl : Clauses) { 7830 const auto *C = dyn_cast<OMPFromClause>(Cl); 7831 if (!C) 7832 continue; 7833 MapKind Kind = Other; 7834 if (llvm::is_contained(C->getMotionModifiers(), 7835 OMPC_MOTION_MODIFIER_present)) 7836 Kind = Present; 7837 const auto *EI = C->getVarRefs().begin(); 7838 for (const auto L : C->component_lists()) { 7839 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, 7840 std::nullopt, C->getMotionModifiers(), 7841 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 7842 *EI); 7843 ++EI; 7844 } 7845 } 7846 7847 // Look at the use_device_ptr and use_device_addr clauses information and 7848 // mark the existing map entries as such. If there is no map information for 7849 // an entry in the use_device_ptr and use_device_addr list, we create one 7850 // with map type 'alloc' and zero size section. It is the user fault if that 7851 // was not mapped before. If there is no map information and the pointer is 7852 // a struct member, then we defer the emission of that entry until the whole 7853 // struct has been processed. 7854 llvm::MapVector<CanonicalDeclPtr<const Decl>, 7855 SmallVector<DeferredDevicePtrEntryTy, 4>> 7856 DeferredInfo; 7857 MapCombinedInfoTy UseDeviceDataCombinedInfo; 7858 7859 auto &&UseDeviceDataCombinedInfoGen = 7860 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr, 7861 CodeGenFunction &CGF, bool IsDevAddr) { 7862 UseDeviceDataCombinedInfo.Exprs.push_back(VD); 7863 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr); 7864 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD); 7865 UseDeviceDataCombinedInfo.DevicePointers.emplace_back( 7866 IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer); 7867 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr); 7868 UseDeviceDataCombinedInfo.Sizes.push_back( 7869 llvm::Constant::getNullValue(CGF.Int64Ty)); 7870 UseDeviceDataCombinedInfo.Types.push_back( 7871 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM); 7872 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr); 7873 }; 7874 7875 auto &&MapInfoGen = 7876 [&DeferredInfo, &UseDeviceDataCombinedInfoGen, 7877 &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD, 7878 OMPClauseMappableExprCommon::MappableExprComponentListRef 7879 Components, 7880 bool IsImplicit, bool IsDevAddr) { 7881 // We didn't find any match in our map information - generate a zero 7882 // size array section - if the pointer is a struct member we defer 7883 // this action until the whole struct has been processed. 7884 if (isa<MemberExpr>(IE)) { 7885 // Insert the pointer into Info to be processed by 7886 // generateInfoForComponentList. Because it is a member pointer 7887 // without a pointee, no entry will be generated for it, therefore 7888 // we need to generate one after the whole struct has been 7889 // processed. Nonetheless, generateInfoForComponentList must be 7890 // called to take the pointer into account for the calculation of 7891 // the range of the partial struct. 7892 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, std::nullopt, 7893 std::nullopt, /*ReturnDevicePointer=*/false, IsImplicit, 7894 nullptr, nullptr, IsDevAddr); 7895 DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr); 7896 } else { 7897 llvm::Value *Ptr; 7898 if (IsDevAddr) { 7899 if (IE->isGLValue()) 7900 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 7901 else 7902 Ptr = CGF.EmitScalarExpr(IE); 7903 } else { 7904 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 7905 } 7906 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr); 7907 } 7908 }; 7909 7910 auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD, 7911 const Expr *IE, bool IsDevAddr) -> bool { 7912 // We potentially have map information for this declaration already. 7913 // Look for the first set of components that refer to it. If found, 7914 // return true. 7915 // If the first component is a member expression, we have to look into 7916 // 'this', which maps to null in the map of map information. Otherwise 7917 // look directly for the information. 7918 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 7919 if (It != Info.end()) { 7920 bool Found = false; 7921 for (auto &Data : It->second) { 7922 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 7923 return MI.Components.back().getAssociatedDeclaration() == VD; 7924 }); 7925 // If we found a map entry, signal that the pointer has to be 7926 // returned and move on to the next declaration. Exclude cases where 7927 // the base pointer is mapped as array subscript, array section or 7928 // array shaping. The base address is passed as a pointer to base in 7929 // this case and cannot be used as a base for use_device_ptr list 7930 // item. 7931 if (CI != Data.end()) { 7932 if (IsDevAddr) { 7933 CI->ForDeviceAddr = IsDevAddr; 7934 CI->ReturnDevicePointer = true; 7935 Found = true; 7936 break; 7937 } else { 7938 auto PrevCI = std::next(CI->Components.rbegin()); 7939 const auto *VarD = dyn_cast<VarDecl>(VD); 7940 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7941 isa<MemberExpr>(IE) || 7942 !VD->getType().getNonReferenceType()->isPointerType() || 7943 PrevCI == CI->Components.rend() || 7944 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 7945 VarD->hasLocalStorage()) { 7946 CI->ForDeviceAddr = IsDevAddr; 7947 CI->ReturnDevicePointer = true; 7948 Found = true; 7949 break; 7950 } 7951 } 7952 } 7953 } 7954 return Found; 7955 } 7956 return false; 7957 }; 7958 7959 // Look at the use_device_ptr clause information and mark the existing map 7960 // entries as such. If there is no map information for an entry in the 7961 // use_device_ptr list, we create one with map type 'alloc' and zero size 7962 // section. It is the user fault if that was not mapped before. If there is 7963 // no map information and the pointer is a struct member, then we defer the 7964 // emission of that entry until the whole struct has been processed. 7965 for (const auto *Cl : Clauses) { 7966 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 7967 if (!C) 7968 continue; 7969 for (const auto L : C->component_lists()) { 7970 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 7971 std::get<1>(L); 7972 assert(!Components.empty() && 7973 "Not expecting empty list of components!"); 7974 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 7975 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 7976 const Expr *IE = Components.back().getAssociatedExpression(); 7977 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false)) 7978 continue; 7979 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(), 7980 /*IsDevAddr=*/false); 7981 } 7982 } 7983 7984 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 7985 for (const auto *Cl : Clauses) { 7986 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 7987 if (!C) 7988 continue; 7989 for (const auto L : C->component_lists()) { 7990 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 7991 std::get<1>(L); 7992 assert(!std::get<1>(L).empty() && 7993 "Not expecting empty list of components!"); 7994 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 7995 if (!Processed.insert(VD).second) 7996 continue; 7997 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 7998 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 7999 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true)) 8000 continue; 8001 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(), 8002 /*IsDevAddr=*/true); 8003 } 8004 } 8005 8006 for (const auto &Data : Info) { 8007 StructRangeInfoTy PartialStruct; 8008 // Current struct information: 8009 MapCombinedInfoTy CurInfo; 8010 // Current struct base information: 8011 MapCombinedInfoTy StructBaseCurInfo; 8012 const Decl *D = Data.first; 8013 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8014 for (const auto &M : Data.second) { 8015 for (const MapInfo &L : M) { 8016 assert(!L.Components.empty() && 8017 "Not expecting declaration with no component lists."); 8018 8019 // Remember the current base pointer index. 8020 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8021 unsigned StructBasePointersIdx = 8022 StructBaseCurInfo.BasePointers.size(); 8023 CurInfo.NonContigInfo.IsNonContiguous = 8024 L.Components.back().isNonContiguous(); 8025 generateInfoForComponentList( 8026 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8027 CurInfo, StructBaseCurInfo, PartialStruct, 8028 /*IsFirstComponentList=*/false, L.IsImplicit, 8029 /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD, 8030 L.VarRef); 8031 8032 // If this entry relates to a device pointer, set the relevant 8033 // declaration and add the 'return pointer' flag. 8034 if (L.ReturnDevicePointer) { 8035 // Check whether a value was added to either CurInfo or 8036 // StructBaseCurInfo and error if no value was added to either of 8037 // them: 8038 assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() || 8039 StructBasePointersIdx < 8040 StructBaseCurInfo.BasePointers.size()) && 8041 "Unexpected number of mapped base pointers."); 8042 8043 // Choose a base pointer index which is always valid: 8044 const ValueDecl *RelevantVD = 8045 L.Components.back().getAssociatedDeclaration(); 8046 assert(RelevantVD && 8047 "No relevant declaration related with device pointer??"); 8048 8049 // If StructBaseCurInfo has been updated this iteration then work on 8050 // the first new entry added to it i.e. make sure that when multiple 8051 // values are added to any of the lists, the first value added is 8052 // being modified by the assignments below (not the last value 8053 // added). 8054 if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) { 8055 StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] = 8056 RelevantVD; 8057 StructBaseCurInfo.DevicePointers[StructBasePointersIdx] = 8058 L.ForDeviceAddr ? DeviceInfoTy::Address 8059 : DeviceInfoTy::Pointer; 8060 StructBaseCurInfo.Types[StructBasePointersIdx] |= 8061 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM; 8062 } else { 8063 CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD; 8064 CurInfo.DevicePointers[CurrentBasePointersIdx] = 8065 L.ForDeviceAddr ? DeviceInfoTy::Address 8066 : DeviceInfoTy::Pointer; 8067 CurInfo.Types[CurrentBasePointersIdx] |= 8068 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM; 8069 } 8070 } 8071 } 8072 } 8073 8074 // Append any pending zero-length pointers which are struct members and 8075 // used with use_device_ptr or use_device_addr. 8076 auto CI = DeferredInfo.find(Data.first); 8077 if (CI != DeferredInfo.end()) { 8078 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8079 llvm::Value *BasePtr; 8080 llvm::Value *Ptr; 8081 if (L.ForDeviceAddr) { 8082 if (L.IE->isGLValue()) 8083 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8084 else 8085 Ptr = this->CGF.EmitScalarExpr(L.IE); 8086 BasePtr = Ptr; 8087 // Entry is RETURN_PARAM. Also, set the placeholder value 8088 // MEMBER_OF=FFFF so that the entry is later updated with the 8089 // correct value of MEMBER_OF. 8090 CurInfo.Types.push_back( 8091 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM | 8092 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF); 8093 } else { 8094 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8095 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8096 L.IE->getExprLoc()); 8097 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8098 // placeholder value MEMBER_OF=FFFF so that the entry is later 8099 // updated with the correct value of MEMBER_OF. 8100 CurInfo.Types.push_back( 8101 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | 8102 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM | 8103 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF); 8104 } 8105 CurInfo.Exprs.push_back(L.VD); 8106 CurInfo.BasePointers.emplace_back(BasePtr); 8107 CurInfo.DevicePtrDecls.emplace_back(L.VD); 8108 CurInfo.DevicePointers.emplace_back( 8109 L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer); 8110 CurInfo.Pointers.push_back(Ptr); 8111 CurInfo.Sizes.push_back( 8112 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8113 CurInfo.Mappers.push_back(nullptr); 8114 } 8115 } 8116 8117 // Unify entries in one list making sure the struct mapping precedes the 8118 // individual fields: 8119 MapCombinedInfoTy UnionCurInfo; 8120 UnionCurInfo.append(StructBaseCurInfo); 8121 UnionCurInfo.append(CurInfo); 8122 8123 // If there is an entry in PartialStruct it means we have a struct with 8124 // individual members mapped. Emit an extra combined entry. 8125 if (PartialStruct.Base.isValid()) { 8126 UnionCurInfo.NonContigInfo.Dims.push_back(0); 8127 // Emit a combined entry: 8128 emitCombinedEntry(CombinedInfo, UnionCurInfo.Types, PartialStruct, 8129 /*IsMapThis*/ !VD, OMPBuilder, VD); 8130 } 8131 8132 // We need to append the results of this capture to what we already have. 8133 CombinedInfo.append(UnionCurInfo); 8134 } 8135 // Append data for use_device_ptr clauses. 8136 CombinedInfo.append(UseDeviceDataCombinedInfo); 8137 } 8138 8139 public: 8140 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8141 : CurDir(&Dir), CGF(CGF) { 8142 // Extract firstprivate clause information. 8143 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8144 for (const auto *D : C->varlists()) 8145 FirstPrivateDecls.try_emplace( 8146 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8147 // Extract implicit firstprivates from uses_allocators clauses. 8148 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8149 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8150 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8151 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8152 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8153 /*Implicit=*/true); 8154 else if (const auto *VD = dyn_cast<VarDecl>( 8155 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8156 ->getDecl())) 8157 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8158 } 8159 } 8160 // Extract device pointer clause information. 8161 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8162 for (auto L : C->component_lists()) 8163 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8164 // Extract device addr clause information. 8165 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>()) 8166 for (auto L : C->component_lists()) 8167 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L)); 8168 // Extract map information. 8169 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) { 8170 if (C->getMapType() != OMPC_MAP_to) 8171 continue; 8172 for (auto L : C->component_lists()) { 8173 const ValueDecl *VD = std::get<0>(L); 8174 const auto *RD = VD ? VD->getType() 8175 .getCanonicalType() 8176 .getNonReferenceType() 8177 ->getAsCXXRecordDecl() 8178 : nullptr; 8179 if (RD && RD->isLambda()) 8180 LambdasMap.try_emplace(std::get<0>(L), C); 8181 } 8182 } 8183 } 8184 8185 /// Constructor for the declare mapper directive. 8186 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8187 : CurDir(&Dir), CGF(CGF) {} 8188 8189 /// Generate code for the combined entry if we have a partially mapped struct 8190 /// and take care of the mapping flags of the arguments corresponding to 8191 /// individual struct members. 8192 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8193 MapFlagsArrayTy &CurTypes, 8194 const StructRangeInfoTy &PartialStruct, bool IsMapThis, 8195 llvm::OpenMPIRBuilder &OMPBuilder, 8196 const ValueDecl *VD = nullptr, 8197 bool NotTargetParams = true) const { 8198 if (CurTypes.size() == 1 && 8199 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) != 8200 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) && 8201 !PartialStruct.IsArraySection) 8202 return; 8203 Address LBAddr = PartialStruct.LowestElem.second; 8204 Address HBAddr = PartialStruct.HighestElem.second; 8205 if (PartialStruct.HasCompleteRecord) { 8206 LBAddr = PartialStruct.LB; 8207 HBAddr = PartialStruct.LB; 8208 } 8209 CombinedInfo.Exprs.push_back(VD); 8210 // Base is the base of the struct 8211 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8212 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8213 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8214 // Pointer is the address of the lowest element 8215 llvm::Value *LB = LBAddr.getPointer(); 8216 const CXXMethodDecl *MD = 8217 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr; 8218 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr; 8219 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false; 8220 // There should not be a mapper for a combined entry. 8221 if (HasBaseClass) { 8222 // OpenMP 5.2 148:21: 8223 // If the target construct is within a class non-static member function, 8224 // and a variable is an accessible data member of the object for which the 8225 // non-static data member function is invoked, the variable is treated as 8226 // if the this[:1] expression had appeared in a map clause with a map-type 8227 // of tofrom. 8228 // Emit this[:1] 8229 CombinedInfo.Pointers.push_back(PartialStruct.Base.getPointer()); 8230 QualType Ty = MD->getFunctionObjectParameterType(); 8231 llvm::Value *Size = 8232 CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty, 8233 /*isSigned=*/true); 8234 CombinedInfo.Sizes.push_back(Size); 8235 } else { 8236 CombinedInfo.Pointers.push_back(LB); 8237 // Size is (addr of {highest+1} element) - (addr of lowest element) 8238 llvm::Value *HB = HBAddr.getPointer(); 8239 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32( 8240 HBAddr.getElementType(), HB, /*Idx0=*/1); 8241 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8242 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8243 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr); 8244 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8245 /*isSigned=*/false); 8246 CombinedInfo.Sizes.push_back(Size); 8247 } 8248 CombinedInfo.Mappers.push_back(nullptr); 8249 // Map type is always TARGET_PARAM, if generate info for captures. 8250 CombinedInfo.Types.push_back( 8251 NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE 8252 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM); 8253 // If any element has the present modifier, then make sure the runtime 8254 // doesn't attempt to allocate the struct. 8255 if (CurTypes.end() != 8256 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8257 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 8258 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT); 8259 })) 8260 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT; 8261 // Remove TARGET_PARAM flag from the first element 8262 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; 8263 // If any element has the ompx_hold modifier, then make sure the runtime 8264 // uses the hold reference count for the struct as a whole so that it won't 8265 // be unmapped by an extra dynamic reference count decrement. Add it to all 8266 // elements as well so the runtime knows which reference count to check 8267 // when determining whether it's time for device-to-host transfers of 8268 // individual elements. 8269 if (CurTypes.end() != 8270 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8271 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 8272 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD); 8273 })) { 8274 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; 8275 for (auto &M : CurTypes) 8276 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; 8277 } 8278 8279 // All other current entries will be MEMBER_OF the combined entry 8280 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8281 // 0xFFFF in the MEMBER_OF field). 8282 OpenMPOffloadMappingFlags MemberOfFlag = 8283 OMPBuilder.getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 8284 for (auto &M : CurTypes) 8285 OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag); 8286 } 8287 8288 /// Generate all the base pointers, section pointers, sizes, map types, and 8289 /// mappers for the extracted mappable expressions (all included in \a 8290 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8291 /// pair of the relevant declaration and index where it occurs is appended to 8292 /// the device pointers info array. 8293 void generateAllInfo( 8294 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder, 8295 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8296 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8297 assert(CurDir.is<const OMPExecutableDirective *>() && 8298 "Expect a executable directive"); 8299 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8300 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder, 8301 SkipVarSet); 8302 } 8303 8304 /// Generate all the base pointers, section pointers, sizes, map types, and 8305 /// mappers for the extracted map clauses of user-defined mapper (all included 8306 /// in \a CombinedInfo). 8307 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo, 8308 llvm::OpenMPIRBuilder &OMPBuilder) const { 8309 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8310 "Expect a declare mapper directive"); 8311 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8312 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo, 8313 OMPBuilder); 8314 } 8315 8316 /// Emit capture info for lambdas for variables captured by reference. 8317 void generateInfoForLambdaCaptures( 8318 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8319 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8320 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType(); 8321 const auto *RD = VDType->getAsCXXRecordDecl(); 8322 if (!RD || !RD->isLambda()) 8323 return; 8324 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType), 8325 CGF.getContext().getDeclAlign(VD)); 8326 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType); 8327 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures; 8328 FieldDecl *ThisCapture = nullptr; 8329 RD->getCaptureFields(Captures, ThisCapture); 8330 if (ThisCapture) { 8331 LValue ThisLVal = 8332 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8333 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8334 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8335 VDLVal.getPointer(CGF)); 8336 CombinedInfo.Exprs.push_back(VD); 8337 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 8338 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8339 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8340 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 8341 CombinedInfo.Sizes.push_back( 8342 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8343 CGF.Int64Ty, /*isSigned=*/true)); 8344 CombinedInfo.Types.push_back( 8345 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | 8346 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | 8347 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | 8348 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); 8349 CombinedInfo.Mappers.push_back(nullptr); 8350 } 8351 for (const LambdaCapture &LC : RD->captures()) { 8352 if (!LC.capturesVariable()) 8353 continue; 8354 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar()); 8355 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8356 continue; 8357 auto It = Captures.find(VD); 8358 assert(It != Captures.end() && "Found lambda capture without field."); 8359 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8360 if (LC.getCaptureKind() == LCK_ByRef) { 8361 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8362 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8363 VDLVal.getPointer(CGF)); 8364 CombinedInfo.Exprs.push_back(VD); 8365 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8366 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8367 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8368 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 8369 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8370 CGF.getTypeSize( 8371 VD->getType().getCanonicalType().getNonReferenceType()), 8372 CGF.Int64Ty, /*isSigned=*/true)); 8373 } else { 8374 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8375 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8376 VDLVal.getPointer(CGF)); 8377 CombinedInfo.Exprs.push_back(VD); 8378 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8379 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8380 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8381 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 8382 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8383 } 8384 CombinedInfo.Types.push_back( 8385 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | 8386 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | 8387 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | 8388 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); 8389 CombinedInfo.Mappers.push_back(nullptr); 8390 } 8391 } 8392 8393 /// Set correct indices for lambdas captures. 8394 void adjustMemberOfForLambdaCaptures( 8395 llvm::OpenMPIRBuilder &OMPBuilder, 8396 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8397 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8398 MapFlagsArrayTy &Types) const { 8399 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8400 // Set correct member_of idx for all implicit lambda captures. 8401 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | 8402 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | 8403 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | 8404 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)) 8405 continue; 8406 llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]); 8407 assert(BasePtr && "Unable to find base lambda address."); 8408 int TgtIdx = -1; 8409 for (unsigned J = I; J > 0; --J) { 8410 unsigned Idx = J - 1; 8411 if (Pointers[Idx] != BasePtr) 8412 continue; 8413 TgtIdx = Idx; 8414 break; 8415 } 8416 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8417 // All other current entries will be MEMBER_OF the combined entry 8418 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8419 // 0xFFFF in the MEMBER_OF field). 8420 OpenMPOffloadMappingFlags MemberOfFlag = 8421 OMPBuilder.getMemberOfFlag(TgtIdx); 8422 OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8423 } 8424 } 8425 8426 /// Generate the base pointers, section pointers, sizes, map types, and 8427 /// mappers associated to a given capture (all included in \a CombinedInfo). 8428 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8429 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8430 StructRangeInfoTy &PartialStruct) const { 8431 assert(!Cap->capturesVariableArrayType() && 8432 "Not expecting to generate map info for a variable array type!"); 8433 8434 // We need to know when we generating information for the first component 8435 const ValueDecl *VD = Cap->capturesThis() 8436 ? nullptr 8437 : Cap->getCapturedVar()->getCanonicalDecl(); 8438 8439 // for map(to: lambda): skip here, processing it in 8440 // generateDefaultMapInfo 8441 if (LambdasMap.count(VD)) 8442 return; 8443 8444 // If this declaration appears in a is_device_ptr clause we just have to 8445 // pass the pointer by value. If it is a reference to a declaration, we just 8446 // pass its value. 8447 if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) { 8448 CombinedInfo.Exprs.push_back(VD); 8449 CombinedInfo.BasePointers.emplace_back(Arg); 8450 CombinedInfo.DevicePtrDecls.emplace_back(VD); 8451 CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer); 8452 CombinedInfo.Pointers.push_back(Arg); 8453 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8454 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 8455 /*isSigned=*/true)); 8456 CombinedInfo.Types.push_back( 8457 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | 8458 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM); 8459 CombinedInfo.Mappers.push_back(nullptr); 8460 return; 8461 } 8462 8463 using MapData = 8464 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8465 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 8466 const ValueDecl *, const Expr *>; 8467 SmallVector<MapData, 4> DeclComponentLists; 8468 // For member fields list in is_device_ptr, store it in 8469 // DeclComponentLists for generating components info. 8470 static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown; 8471 auto It = DevPointersMap.find(VD); 8472 if (It != DevPointersMap.end()) 8473 for (const auto &MCL : It->second) 8474 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown, 8475 /*IsImpicit = */ true, nullptr, 8476 nullptr); 8477 auto I = HasDevAddrsMap.find(VD); 8478 if (I != HasDevAddrsMap.end()) 8479 for (const auto &MCL : I->second) 8480 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown, 8481 /*IsImpicit = */ true, nullptr, 8482 nullptr); 8483 assert(CurDir.is<const OMPExecutableDirective *>() && 8484 "Expect a executable directive"); 8485 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8486 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8487 const auto *EI = C->getVarRefs().begin(); 8488 for (const auto L : C->decl_component_lists(VD)) { 8489 const ValueDecl *VDecl, *Mapper; 8490 // The Expression is not correct if the mapping is implicit 8491 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8492 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8493 std::tie(VDecl, Components, Mapper) = L; 8494 assert(VDecl == VD && "We got information for the wrong declaration??"); 8495 assert(!Components.empty() && 8496 "Not expecting declaration with no component lists."); 8497 DeclComponentLists.emplace_back(Components, C->getMapType(), 8498 C->getMapTypeModifiers(), 8499 C->isImplicit(), Mapper, E); 8500 ++EI; 8501 } 8502 } 8503 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 8504 const MapData &RHS) { 8505 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 8506 OpenMPMapClauseKind MapType = std::get<1>(RHS); 8507 bool HasPresent = 8508 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 8509 bool HasAllocs = MapType == OMPC_MAP_alloc; 8510 MapModifiers = std::get<2>(RHS); 8511 MapType = std::get<1>(LHS); 8512 bool HasPresentR = 8513 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 8514 bool HasAllocsR = MapType == OMPC_MAP_alloc; 8515 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 8516 }); 8517 8518 // Find overlapping elements (including the offset from the base element). 8519 llvm::SmallDenseMap< 8520 const MapData *, 8521 llvm::SmallVector< 8522 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8523 4> 8524 OverlappedData; 8525 size_t Count = 0; 8526 for (const MapData &L : DeclComponentLists) { 8527 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8528 OpenMPMapClauseKind MapType; 8529 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8530 bool IsImplicit; 8531 const ValueDecl *Mapper; 8532 const Expr *VarRef; 8533 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8534 L; 8535 ++Count; 8536 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) { 8537 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8538 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 8539 VarRef) = L1; 8540 auto CI = Components.rbegin(); 8541 auto CE = Components.rend(); 8542 auto SI = Components1.rbegin(); 8543 auto SE = Components1.rend(); 8544 for (; CI != CE && SI != SE; ++CI, ++SI) { 8545 if (CI->getAssociatedExpression()->getStmtClass() != 8546 SI->getAssociatedExpression()->getStmtClass()) 8547 break; 8548 // Are we dealing with different variables/fields? 8549 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8550 break; 8551 } 8552 // Found overlapping if, at least for one component, reached the head 8553 // of the components list. 8554 if (CI == CE || SI == SE) { 8555 // Ignore it if it is the same component. 8556 if (CI == CE && SI == SE) 8557 continue; 8558 const auto It = (SI == SE) ? CI : SI; 8559 // If one component is a pointer and another one is a kind of 8560 // dereference of this pointer (array subscript, section, dereference, 8561 // etc.), it is not an overlapping. 8562 // Same, if one component is a base and another component is a 8563 // dereferenced pointer memberexpr with the same base. 8564 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 8565 (std::prev(It)->getAssociatedDeclaration() && 8566 std::prev(It) 8567 ->getAssociatedDeclaration() 8568 ->getType() 8569 ->isPointerType()) || 8570 (It->getAssociatedDeclaration() && 8571 It->getAssociatedDeclaration()->getType()->isPointerType() && 8572 std::next(It) != CE && std::next(It) != SE)) 8573 continue; 8574 const MapData &BaseData = CI == CE ? L : L1; 8575 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8576 SI == SE ? Components : Components1; 8577 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8578 OverlappedElements.getSecond().push_back(SubData); 8579 } 8580 } 8581 } 8582 // Sort the overlapped elements for each item. 8583 llvm::SmallVector<const FieldDecl *, 4> Layout; 8584 if (!OverlappedData.empty()) { 8585 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 8586 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 8587 while (BaseType != OrigType) { 8588 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 8589 OrigType = BaseType->getPointeeOrArrayElementType(); 8590 } 8591 8592 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 8593 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8594 else { 8595 const auto *RD = BaseType->getAsRecordDecl(); 8596 Layout.append(RD->field_begin(), RD->field_end()); 8597 } 8598 } 8599 for (auto &Pair : OverlappedData) { 8600 llvm::stable_sort( 8601 Pair.getSecond(), 8602 [&Layout]( 8603 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8604 OMPClauseMappableExprCommon::MappableExprComponentListRef 8605 Second) { 8606 auto CI = First.rbegin(); 8607 auto CE = First.rend(); 8608 auto SI = Second.rbegin(); 8609 auto SE = Second.rend(); 8610 for (; CI != CE && SI != SE; ++CI, ++SI) { 8611 if (CI->getAssociatedExpression()->getStmtClass() != 8612 SI->getAssociatedExpression()->getStmtClass()) 8613 break; 8614 // Are we dealing with different variables/fields? 8615 if (CI->getAssociatedDeclaration() != 8616 SI->getAssociatedDeclaration()) 8617 break; 8618 } 8619 8620 // Lists contain the same elements. 8621 if (CI == CE && SI == SE) 8622 return false; 8623 8624 // List with less elements is less than list with more elements. 8625 if (CI == CE || SI == SE) 8626 return CI == CE; 8627 8628 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8629 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8630 if (FD1->getParent() == FD2->getParent()) 8631 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8632 const auto *It = 8633 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8634 return FD == FD1 || FD == FD2; 8635 }); 8636 return *It == FD1; 8637 }); 8638 } 8639 8640 // Associated with a capture, because the mapping flags depend on it. 8641 // Go through all of the elements with the overlapped elements. 8642 bool IsFirstComponentList = true; 8643 MapCombinedInfoTy StructBaseCombinedInfo; 8644 for (const auto &Pair : OverlappedData) { 8645 const MapData &L = *Pair.getFirst(); 8646 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8647 OpenMPMapClauseKind MapType; 8648 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8649 bool IsImplicit; 8650 const ValueDecl *Mapper; 8651 const Expr *VarRef; 8652 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8653 L; 8654 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8655 OverlappedComponents = Pair.getSecond(); 8656 generateInfoForComponentList( 8657 MapType, MapModifiers, std::nullopt, Components, CombinedInfo, 8658 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList, 8659 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper, 8660 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 8661 IsFirstComponentList = false; 8662 } 8663 // Go through other elements without overlapped elements. 8664 for (const MapData &L : DeclComponentLists) { 8665 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8666 OpenMPMapClauseKind MapType; 8667 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8668 bool IsImplicit; 8669 const ValueDecl *Mapper; 8670 const Expr *VarRef; 8671 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8672 L; 8673 auto It = OverlappedData.find(&L); 8674 if (It == OverlappedData.end()) 8675 generateInfoForComponentList( 8676 MapType, MapModifiers, std::nullopt, Components, CombinedInfo, 8677 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList, 8678 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper, 8679 /*ForDeviceAddr=*/false, VD, VarRef); 8680 IsFirstComponentList = false; 8681 } 8682 } 8683 8684 /// Generate the default map information for a given capture \a CI, 8685 /// record field declaration \a RI and captured value \a CV. 8686 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8687 const FieldDecl &RI, llvm::Value *CV, 8688 MapCombinedInfoTy &CombinedInfo) const { 8689 bool IsImplicit = true; 8690 // Do the default mapping. 8691 if (CI.capturesThis()) { 8692 CombinedInfo.Exprs.push_back(nullptr); 8693 CombinedInfo.BasePointers.push_back(CV); 8694 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8695 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8696 CombinedInfo.Pointers.push_back(CV); 8697 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8698 CombinedInfo.Sizes.push_back( 8699 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8700 CGF.Int64Ty, /*isSigned=*/true)); 8701 // Default map type. 8702 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO | 8703 OpenMPOffloadMappingFlags::OMP_MAP_FROM); 8704 } else if (CI.capturesVariableByCopy()) { 8705 const VarDecl *VD = CI.getCapturedVar(); 8706 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 8707 CombinedInfo.BasePointers.push_back(CV); 8708 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8709 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8710 CombinedInfo.Pointers.push_back(CV); 8711 if (!RI.getType()->isAnyPointerType()) { 8712 // We have to signal to the runtime captures passed by value that are 8713 // not pointers. 8714 CombinedInfo.Types.push_back( 8715 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL); 8716 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8717 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8718 } else { 8719 // Pointers are implicitly mapped with a zero size and no flags 8720 // (other than first map that is added for all implicit maps). 8721 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE); 8722 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8723 } 8724 auto I = FirstPrivateDecls.find(VD); 8725 if (I != FirstPrivateDecls.end()) 8726 IsImplicit = I->getSecond(); 8727 } else { 8728 assert(CI.capturesVariable() && "Expected captured reference."); 8729 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8730 QualType ElementType = PtrTy->getPointeeType(); 8731 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8732 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8733 // The default map type for a scalar/complex type is 'to' because by 8734 // default the value doesn't have to be retrieved. For an aggregate 8735 // type, the default is 'tofrom'. 8736 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 8737 const VarDecl *VD = CI.getCapturedVar(); 8738 auto I = FirstPrivateDecls.find(VD); 8739 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 8740 CombinedInfo.BasePointers.push_back(CV); 8741 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8742 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8743 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 8744 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8745 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8746 AlignmentSource::Decl)); 8747 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 8748 } else { 8749 CombinedInfo.Pointers.push_back(CV); 8750 } 8751 if (I != FirstPrivateDecls.end()) 8752 IsImplicit = I->getSecond(); 8753 } 8754 // Every default map produces a single argument which is a target parameter. 8755 CombinedInfo.Types.back() |= 8756 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; 8757 8758 // Add flag stating this is an implicit map. 8759 if (IsImplicit) 8760 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; 8761 8762 // No user-defined mapper for default mapping. 8763 CombinedInfo.Mappers.push_back(nullptr); 8764 } 8765 }; 8766 } // anonymous namespace 8767 8768 // Try to extract the base declaration from a `this->x` expression if possible. 8769 static ValueDecl *getDeclFromThisExpr(const Expr *E) { 8770 if (!E) 8771 return nullptr; 8772 8773 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts())) 8774 if (const MemberExpr *ME = 8775 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts())) 8776 return ME->getMemberDecl(); 8777 return nullptr; 8778 } 8779 8780 /// Emit a string constant containing the names of the values mapped to the 8781 /// offloading runtime library. 8782 llvm::Constant * 8783 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 8784 MappableExprsHandler::MappingExprInfo &MapExprs) { 8785 8786 uint32_t SrcLocStrSize; 8787 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) 8788 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 8789 8790 SourceLocation Loc; 8791 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { 8792 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr())) 8793 Loc = VD->getLocation(); 8794 else 8795 Loc = MapExprs.getMapExpr()->getExprLoc(); 8796 } else { 8797 Loc = MapExprs.getMapDecl()->getLocation(); 8798 } 8799 8800 std::string ExprName; 8801 if (MapExprs.getMapExpr()) { 8802 PrintingPolicy P(CGF.getContext().getLangOpts()); 8803 llvm::raw_string_ostream OS(ExprName); 8804 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 8805 OS.flush(); 8806 } else { 8807 ExprName = MapExprs.getMapDecl()->getNameAsString(); 8808 } 8809 8810 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 8811 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName, 8812 PLoc.getLine(), PLoc.getColumn(), 8813 SrcLocStrSize); 8814 } 8815 8816 /// Emit the arrays used to pass the captures and map information to the 8817 /// offloading runtime library. If there is no map or capture information, 8818 /// return nullptr by reference. 8819 static void emitOffloadingArrays( 8820 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 8821 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 8822 bool IsNonContiguous = false) { 8823 CodeGenModule &CGM = CGF.CGM; 8824 8825 // Reset the array information. 8826 Info.clearArrayInfo(); 8827 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 8828 8829 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 8830 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(), 8831 CGF.AllocaInsertPt->getIterator()); 8832 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(), 8833 CGF.Builder.GetInsertPoint()); 8834 8835 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 8836 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 8837 }; 8838 if (CGM.getCodeGenOpts().getDebugInfo() != 8839 llvm::codegenoptions::NoDebugInfo) { 8840 CombinedInfo.Names.resize(CombinedInfo.Exprs.size()); 8841 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(), 8842 FillInfoMap); 8843 } 8844 8845 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) { 8846 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) { 8847 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl); 8848 } 8849 }; 8850 8851 auto CustomMapperCB = [&](unsigned int I) { 8852 llvm::Value *MFunc = nullptr; 8853 if (CombinedInfo.Mappers[I]) { 8854 Info.HasMapper = true; 8855 MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 8856 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 8857 } 8858 return MFunc; 8859 }; 8860 OMPBuilder.emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info, 8861 /*IsNonContiguous=*/true, DeviceAddrCB, 8862 CustomMapperCB); 8863 } 8864 8865 /// Check for inner distribute directive. 8866 static const OMPExecutableDirective * 8867 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 8868 const auto *CS = D.getInnermostCapturedStmt(); 8869 const auto *Body = 8870 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 8871 const Stmt *ChildStmt = 8872 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8873 8874 if (const auto *NestedDir = 8875 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8876 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 8877 switch (D.getDirectiveKind()) { 8878 case OMPD_target: 8879 // For now, just treat 'target teams loop' as if it's distributed. 8880 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop) 8881 return NestedDir; 8882 if (DKind == OMPD_teams) { 8883 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 8884 /*IgnoreCaptured=*/true); 8885 if (!Body) 8886 return nullptr; 8887 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8888 if (const auto *NND = 8889 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8890 DKind = NND->getDirectiveKind(); 8891 if (isOpenMPDistributeDirective(DKind)) 8892 return NND; 8893 } 8894 } 8895 return nullptr; 8896 case OMPD_target_teams: 8897 if (isOpenMPDistributeDirective(DKind)) 8898 return NestedDir; 8899 return nullptr; 8900 case OMPD_target_parallel: 8901 case OMPD_target_simd: 8902 case OMPD_target_parallel_for: 8903 case OMPD_target_parallel_for_simd: 8904 return nullptr; 8905 case OMPD_target_teams_distribute: 8906 case OMPD_target_teams_distribute_simd: 8907 case OMPD_target_teams_distribute_parallel_for: 8908 case OMPD_target_teams_distribute_parallel_for_simd: 8909 case OMPD_parallel: 8910 case OMPD_for: 8911 case OMPD_parallel_for: 8912 case OMPD_parallel_master: 8913 case OMPD_parallel_sections: 8914 case OMPD_for_simd: 8915 case OMPD_parallel_for_simd: 8916 case OMPD_cancel: 8917 case OMPD_cancellation_point: 8918 case OMPD_ordered: 8919 case OMPD_threadprivate: 8920 case OMPD_allocate: 8921 case OMPD_task: 8922 case OMPD_simd: 8923 case OMPD_tile: 8924 case OMPD_unroll: 8925 case OMPD_sections: 8926 case OMPD_section: 8927 case OMPD_single: 8928 case OMPD_master: 8929 case OMPD_critical: 8930 case OMPD_taskyield: 8931 case OMPD_barrier: 8932 case OMPD_taskwait: 8933 case OMPD_taskgroup: 8934 case OMPD_atomic: 8935 case OMPD_flush: 8936 case OMPD_depobj: 8937 case OMPD_scan: 8938 case OMPD_teams: 8939 case OMPD_target_data: 8940 case OMPD_target_exit_data: 8941 case OMPD_target_enter_data: 8942 case OMPD_distribute: 8943 case OMPD_distribute_simd: 8944 case OMPD_distribute_parallel_for: 8945 case OMPD_distribute_parallel_for_simd: 8946 case OMPD_teams_distribute: 8947 case OMPD_teams_distribute_simd: 8948 case OMPD_teams_distribute_parallel_for: 8949 case OMPD_teams_distribute_parallel_for_simd: 8950 case OMPD_target_update: 8951 case OMPD_declare_simd: 8952 case OMPD_declare_variant: 8953 case OMPD_begin_declare_variant: 8954 case OMPD_end_declare_variant: 8955 case OMPD_declare_target: 8956 case OMPD_end_declare_target: 8957 case OMPD_declare_reduction: 8958 case OMPD_declare_mapper: 8959 case OMPD_taskloop: 8960 case OMPD_taskloop_simd: 8961 case OMPD_master_taskloop: 8962 case OMPD_master_taskloop_simd: 8963 case OMPD_parallel_master_taskloop: 8964 case OMPD_parallel_master_taskloop_simd: 8965 case OMPD_requires: 8966 case OMPD_metadirective: 8967 case OMPD_unknown: 8968 default: 8969 llvm_unreachable("Unexpected directive."); 8970 } 8971 } 8972 8973 return nullptr; 8974 } 8975 8976 /// Emit the user-defined mapper function. The code generation follows the 8977 /// pattern in the example below. 8978 /// \code 8979 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 8980 /// void *base, void *begin, 8981 /// int64_t size, int64_t type, 8982 /// void *name = nullptr) { 8983 /// // Allocate space for an array section first or add a base/begin for 8984 /// // pointer dereference. 8985 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 8986 /// !maptype.IsDelete) 8987 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8988 /// size*sizeof(Ty), clearToFromMember(type)); 8989 /// // Map members. 8990 /// for (unsigned i = 0; i < size; i++) { 8991 /// // For each component specified by this mapper: 8992 /// for (auto c : begin[i]->all_components) { 8993 /// if (c.hasMapper()) 8994 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 8995 /// c.arg_type, c.arg_name); 8996 /// else 8997 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 8998 /// c.arg_begin, c.arg_size, c.arg_type, 8999 /// c.arg_name); 9000 /// } 9001 /// } 9002 /// // Delete the array section. 9003 /// if (size > 1 && maptype.IsDelete) 9004 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9005 /// size*sizeof(Ty), clearToFromMember(type)); 9006 /// } 9007 /// \endcode 9008 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9009 CodeGenFunction *CGF) { 9010 if (UDMMap.count(D) > 0) 9011 return; 9012 ASTContext &C = CGM.getContext(); 9013 QualType Ty = D->getType(); 9014 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9015 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9016 auto *MapperVarDecl = 9017 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9018 SourceLocation Loc = D->getLocation(); 9019 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9020 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty); 9021 9022 // Prepare mapper function arguments and attributes. 9023 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9024 C.VoidPtrTy, ImplicitParamKind::Other); 9025 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9026 ImplicitParamKind::Other); 9027 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9028 C.VoidPtrTy, ImplicitParamKind::Other); 9029 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9030 ImplicitParamKind::Other); 9031 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9032 ImplicitParamKind::Other); 9033 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9034 ImplicitParamKind::Other); 9035 FunctionArgList Args; 9036 Args.push_back(&HandleArg); 9037 Args.push_back(&BaseArg); 9038 Args.push_back(&BeginArg); 9039 Args.push_back(&SizeArg); 9040 Args.push_back(&TypeArg); 9041 Args.push_back(&NameArg); 9042 const CGFunctionInfo &FnInfo = 9043 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9044 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9045 SmallString<64> TyStr; 9046 llvm::raw_svector_ostream Out(TyStr); 9047 CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out); 9048 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9049 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9050 Name, &CGM.getModule()); 9051 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9052 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9053 // Start the mapper function code generation. 9054 CodeGenFunction MapperCGF(CGM); 9055 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9056 // Compute the starting and end addresses of array elements. 9057 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9058 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9059 C.getPointerType(Int64Ty), Loc); 9060 // Prepare common arguments for array initiation and deletion. 9061 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9062 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9063 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9064 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9065 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9066 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9067 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9068 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9069 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9070 // Convert the size in bytes into the number of array elements. 9071 Size = MapperCGF.Builder.CreateExactUDiv( 9072 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9073 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9074 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 9075 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size); 9076 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9077 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9078 C.getPointerType(Int64Ty), Loc); 9079 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar( 9080 MapperCGF.GetAddrOfLocalVar(&NameArg), 9081 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9082 9083 // Emit array initiation if this is an array section and \p MapType indicates 9084 // that memory allocation is required. 9085 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9086 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9087 MapName, ElementSize, HeadBB, /*IsInit=*/true); 9088 9089 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9090 9091 // Emit the loop header block. 9092 MapperCGF.EmitBlock(HeadBB); 9093 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9094 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9095 // Evaluate whether the initial condition is satisfied. 9096 llvm::Value *IsEmpty = 9097 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9098 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9099 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9100 9101 // Emit the loop body block. 9102 MapperCGF.EmitBlock(BodyBB); 9103 llvm::BasicBlock *LastBB = BodyBB; 9104 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9105 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9106 PtrPHI->addIncoming(PtrBegin, EntryBB); 9107 Address PtrCurrent(PtrPHI, ElemTy, 9108 MapperCGF.GetAddrOfLocalVar(&BeginArg) 9109 .getAlignment() 9110 .alignmentOfArrayElement(ElementSize)); 9111 // Privatize the declared variable of mapper to be the current array element. 9112 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9113 Scope.addPrivate(MapperVarDecl, PtrCurrent); 9114 (void)Scope.Privatize(); 9115 9116 // Get map clause information. Fill up the arrays with all mapped variables. 9117 MappableExprsHandler::MapCombinedInfoTy Info; 9118 MappableExprsHandler MEHandler(*D, MapperCGF); 9119 MEHandler.generateAllInfoForMapper(Info, OMPBuilder); 9120 9121 // Call the runtime API __tgt_mapper_num_components to get the number of 9122 // pre-existing components. 9123 llvm::Value *OffloadingArgs[] = {Handle}; 9124 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9125 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9126 OMPRTL___tgt_mapper_num_components), 9127 OffloadingArgs); 9128 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9129 PreviousSize, 9130 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9131 9132 // Fill up the runtime mapper handle for all components. 9133 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 9134 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9135 Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9136 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9137 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9138 llvm::Value *CurSizeArg = Info.Sizes[I]; 9139 llvm::Value *CurNameArg = 9140 (CGM.getCodeGenOpts().getDebugInfo() == 9141 llvm::codegenoptions::NoDebugInfo) 9142 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 9143 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 9144 9145 // Extract the MEMBER_OF field from the map type. 9146 llvm::Value *OriMapType = MapperCGF.Builder.getInt64( 9147 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9148 Info.Types[I])); 9149 llvm::Value *MemberMapType = 9150 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9151 9152 // Combine the map type inherited from user-defined mapper with that 9153 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9154 // bits of the \a MapType, which is the input argument of the mapper 9155 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9156 // bits of MemberMapType. 9157 // [OpenMP 5.0], 1.2.6. map-type decay. 9158 // | alloc | to | from | tofrom | release | delete 9159 // ---------------------------------------------------------- 9160 // alloc | alloc | alloc | alloc | alloc | release | delete 9161 // to | alloc | to | alloc | to | release | delete 9162 // from | alloc | alloc | from | from | release | delete 9163 // tofrom | alloc | to | from | tofrom | release | delete 9164 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9165 MapType, 9166 MapperCGF.Builder.getInt64( 9167 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9168 OpenMPOffloadMappingFlags::OMP_MAP_TO | 9169 OpenMPOffloadMappingFlags::OMP_MAP_FROM))); 9170 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9171 llvm::BasicBlock *AllocElseBB = 9172 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9173 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9174 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9175 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9176 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9177 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9178 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9179 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9180 MapperCGF.EmitBlock(AllocBB); 9181 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9182 MemberMapType, 9183 MapperCGF.Builder.getInt64( 9184 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9185 OpenMPOffloadMappingFlags::OMP_MAP_TO | 9186 OpenMPOffloadMappingFlags::OMP_MAP_FROM))); 9187 MapperCGF.Builder.CreateBr(EndBB); 9188 MapperCGF.EmitBlock(AllocElseBB); 9189 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9190 LeftToFrom, 9191 MapperCGF.Builder.getInt64( 9192 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9193 OpenMPOffloadMappingFlags::OMP_MAP_TO))); 9194 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9195 // In case of to, clear OMP_MAP_FROM. 9196 MapperCGF.EmitBlock(ToBB); 9197 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9198 MemberMapType, 9199 MapperCGF.Builder.getInt64( 9200 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9201 OpenMPOffloadMappingFlags::OMP_MAP_FROM))); 9202 MapperCGF.Builder.CreateBr(EndBB); 9203 MapperCGF.EmitBlock(ToElseBB); 9204 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9205 LeftToFrom, 9206 MapperCGF.Builder.getInt64( 9207 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9208 OpenMPOffloadMappingFlags::OMP_MAP_FROM))); 9209 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9210 // In case of from, clear OMP_MAP_TO. 9211 MapperCGF.EmitBlock(FromBB); 9212 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9213 MemberMapType, 9214 MapperCGF.Builder.getInt64( 9215 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9216 OpenMPOffloadMappingFlags::OMP_MAP_TO))); 9217 // In case of tofrom, do nothing. 9218 MapperCGF.EmitBlock(EndBB); 9219 LastBB = EndBB; 9220 llvm::PHINode *CurMapType = 9221 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9222 CurMapType->addIncoming(AllocMapType, AllocBB); 9223 CurMapType->addIncoming(ToMapType, ToBB); 9224 CurMapType->addIncoming(FromMapType, FromBB); 9225 CurMapType->addIncoming(MemberMapType, ToElseBB); 9226 9227 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9228 CurSizeArg, CurMapType, CurNameArg}; 9229 if (Info.Mappers[I]) { 9230 // Call the corresponding mapper function. 9231 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 9232 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 9233 assert(MapperFunc && "Expect a valid mapper function is available."); 9234 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 9235 } else { 9236 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9237 // data structure. 9238 MapperCGF.EmitRuntimeCall( 9239 OMPBuilder.getOrCreateRuntimeFunction( 9240 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 9241 OffloadingArgs); 9242 } 9243 } 9244 9245 // Update the pointer to point to the next element that needs to be mapped, 9246 // and check whether we have mapped all elements. 9247 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9248 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9249 PtrPHI->addIncoming(PtrNext, LastBB); 9250 llvm::Value *IsDone = 9251 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9252 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9253 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9254 9255 MapperCGF.EmitBlock(ExitBB); 9256 // Emit array deletion if this is an array section and \p MapType indicates 9257 // that deletion is required. 9258 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9259 MapName, ElementSize, DoneBB, /*IsInit=*/false); 9260 9261 // Emit the function exit block. 9262 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9263 MapperCGF.FinishFunction(); 9264 UDMMap.try_emplace(D, Fn); 9265 if (CGF) { 9266 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9267 Decls.second.push_back(D); 9268 } 9269 } 9270 9271 /// Emit the array initialization or deletion portion for user-defined mapper 9272 /// code generation. First, it evaluates whether an array section is mapped and 9273 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9274 /// true, and \a MapType indicates to not delete this array, array 9275 /// initialization code is generated. If \a IsInit is false, and \a MapType 9276 /// indicates to not this array, array deletion code is generated. 9277 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9278 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9279 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9280 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, 9281 bool IsInit) { 9282 StringRef Prefix = IsInit ? ".init" : ".del"; 9283 9284 // Evaluate if this is an array section. 9285 llvm::BasicBlock *BodyBB = 9286 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9287 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 9288 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9289 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9290 MapType, 9291 MapperCGF.Builder.getInt64( 9292 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9293 OpenMPOffloadMappingFlags::OMP_MAP_DELETE))); 9294 llvm::Value *DeleteCond; 9295 llvm::Value *Cond; 9296 if (IsInit) { 9297 // base != begin? 9298 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin); 9299 // IsPtrAndObj? 9300 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 9301 MapType, 9302 MapperCGF.Builder.getInt64( 9303 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9304 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ))); 9305 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 9306 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 9307 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 9308 DeleteCond = MapperCGF.Builder.CreateIsNull( 9309 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9310 } else { 9311 Cond = IsArray; 9312 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9313 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9314 } 9315 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 9316 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 9317 9318 MapperCGF.EmitBlock(BodyBB); 9319 // Get the array size by multiplying element size and element number (i.e., \p 9320 // Size). 9321 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9322 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9323 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9324 // memory allocation/deletion purpose only. 9325 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9326 MapType, 9327 MapperCGF.Builder.getInt64( 9328 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9329 OpenMPOffloadMappingFlags::OMP_MAP_TO | 9330 OpenMPOffloadMappingFlags::OMP_MAP_FROM))); 9331 MapTypeArg = MapperCGF.Builder.CreateOr( 9332 MapTypeArg, 9333 MapperCGF.Builder.getInt64( 9334 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9335 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))); 9336 9337 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9338 // data structure. 9339 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 9340 ArraySize, MapTypeArg, MapName}; 9341 MapperCGF.EmitRuntimeCall( 9342 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9343 OMPRTL___tgt_push_mapper_component), 9344 OffloadingArgs); 9345 } 9346 9347 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 9348 const OMPDeclareMapperDecl *D) { 9349 auto I = UDMMap.find(D); 9350 if (I != UDMMap.end()) 9351 return I->second; 9352 emitUserDefinedMapper(D); 9353 return UDMMap.lookup(D); 9354 } 9355 9356 llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall( 9357 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9358 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9359 const OMPLoopDirective &D)> 9360 SizeEmitter) { 9361 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9362 const OMPExecutableDirective *TD = &D; 9363 // Get nested teams distribute kind directive, if any. 9364 if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) && 9365 Kind != OMPD_target_teams_loop) 9366 TD = getNestedDistributeDirective(CGM.getContext(), D); 9367 if (!TD) 9368 return llvm::ConstantInt::get(CGF.Int64Ty, 0); 9369 9370 const auto *LD = cast<OMPLoopDirective>(TD); 9371 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) 9372 return NumIterations; 9373 return llvm::ConstantInt::get(CGF.Int64Ty, 0); 9374 } 9375 9376 static void 9377 emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, 9378 const OMPExecutableDirective &D, 9379 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, 9380 bool RequiresOuterTask, const CapturedStmt &CS, 9381 bool OffloadingMandatory, CodeGenFunction &CGF) { 9382 if (OffloadingMandatory) { 9383 CGF.Builder.CreateUnreachable(); 9384 } else { 9385 if (RequiresOuterTask) { 9386 CapturedVars.clear(); 9387 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9388 } 9389 OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, 9390 CapturedVars); 9391 } 9392 } 9393 9394 static llvm::Value *emitDeviceID( 9395 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9396 CodeGenFunction &CGF) { 9397 // Emit device ID if any. 9398 llvm::Value *DeviceID; 9399 if (Device.getPointer()) { 9400 assert((Device.getInt() == OMPC_DEVICE_unknown || 9401 Device.getInt() == OMPC_DEVICE_device_num) && 9402 "Expected device_num modifier."); 9403 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 9404 DeviceID = 9405 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 9406 } else { 9407 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9408 } 9409 return DeviceID; 9410 } 9411 9412 llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D, 9413 CodeGenFunction &CGF) { 9414 llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0); 9415 9416 if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) { 9417 CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF); 9418 llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr( 9419 DynMemClause->getSize(), /*IgnoreResultAssign=*/true); 9420 DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty, 9421 /*isSigned=*/false); 9422 } 9423 return DynCGroupMem; 9424 } 9425 9426 static void emitTargetCallKernelLaunch( 9427 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, 9428 const OMPExecutableDirective &D, 9429 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask, 9430 const CapturedStmt &CS, bool OffloadingMandatory, 9431 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9432 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo, 9433 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray, 9434 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9435 const OMPLoopDirective &D)> 9436 SizeEmitter, 9437 CodeGenFunction &CGF, CodeGenModule &CGM) { 9438 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder(); 9439 9440 // Fill up the arrays with all the captured variables. 9441 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 9442 9443 // Get mappable expression information. 9444 MappableExprsHandler MEHandler(D, CGF); 9445 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 9446 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 9447 9448 auto RI = CS.getCapturedRecordDecl()->field_begin(); 9449 auto *CV = CapturedVars.begin(); 9450 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 9451 CE = CS.capture_end(); 9452 CI != CE; ++CI, ++RI, ++CV) { 9453 MappableExprsHandler::MapCombinedInfoTy CurInfo; 9454 MappableExprsHandler::StructRangeInfoTy PartialStruct; 9455 9456 // VLA sizes are passed to the outlined region by copy and do not have map 9457 // information associated. 9458 if (CI->capturesVariableArrayType()) { 9459 CurInfo.Exprs.push_back(nullptr); 9460 CurInfo.BasePointers.push_back(*CV); 9461 CurInfo.DevicePtrDecls.push_back(nullptr); 9462 CurInfo.DevicePointers.push_back( 9463 MappableExprsHandler::DeviceInfoTy::None); 9464 CurInfo.Pointers.push_back(*CV); 9465 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9466 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 9467 // Copy to the device as an argument. No need to retrieve it. 9468 CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | 9469 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM | 9470 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); 9471 CurInfo.Mappers.push_back(nullptr); 9472 } else { 9473 // If we have any information in the map clause, we use it, otherwise we 9474 // just do a default mapping. 9475 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 9476 if (!CI->capturesThis()) 9477 MappedVarSet.insert(CI->getCapturedVar()); 9478 else 9479 MappedVarSet.insert(nullptr); 9480 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 9481 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 9482 // Generate correct mapping for variables captured by reference in 9483 // lambdas. 9484 if (CI->capturesVariable()) 9485 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 9486 CurInfo, LambdaPointers); 9487 } 9488 // We expect to have at least an element of information for this capture. 9489 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 9490 "Non-existing map pointer for capture!"); 9491 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 9492 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 9493 CurInfo.BasePointers.size() == CurInfo.Types.size() && 9494 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 9495 "Inconsistent map information sizes!"); 9496 9497 // If there is an entry in PartialStruct it means we have a struct with 9498 // individual members mapped. Emit an extra combined entry. 9499 if (PartialStruct.Base.isValid()) { 9500 CombinedInfo.append(PartialStruct.PreliminaryMapData); 9501 MEHandler.emitCombinedEntry( 9502 CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(), 9503 OMPBuilder, nullptr, 9504 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 9505 } 9506 9507 // We need to append the results of this capture to what we already have. 9508 CombinedInfo.append(CurInfo); 9509 } 9510 // Adjust MEMBER_OF flags for the lambdas captures. 9511 MEHandler.adjustMemberOfForLambdaCaptures( 9512 OMPBuilder, LambdaPointers, CombinedInfo.BasePointers, 9513 CombinedInfo.Pointers, CombinedInfo.Types); 9514 // Map any list items in a map clause that were not captures because they 9515 // weren't referenced within the construct. 9516 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet); 9517 9518 CGOpenMPRuntime::TargetDataInfo Info; 9519 // Fill up the arrays and create the arguments. 9520 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 9521 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() != 9522 llvm::codegenoptions::NoDebugInfo; 9523 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info, 9524 EmitDebug, 9525 /*ForEndCall=*/false); 9526 9527 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9528 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, 9529 CGF.VoidPtrTy, CGM.getPointerAlign()); 9530 InputInfo.PointersArray = 9531 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 9532 InputInfo.SizesArray = 9533 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); 9534 InputInfo.MappersArray = 9535 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 9536 MapTypesArray = Info.RTArgs.MapTypesArray; 9537 MapNamesArray = Info.RTArgs.MapNamesArray; 9538 9539 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars, 9540 RequiresOuterTask, &CS, OffloadingMandatory, Device, 9541 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray, 9542 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9543 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor; 9544 9545 if (IsReverseOffloading) { 9546 // Reverse offloading is not supported, so just execute on the host. 9547 // FIXME: This fallback solution is incorrect since it ignores the 9548 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to 9549 // assert here and ensure SEMA emits an error. 9550 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars, 9551 RequiresOuterTask, CS, OffloadingMandatory, CGF); 9552 return; 9553 } 9554 9555 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>(); 9556 unsigned NumTargetItems = InputInfo.NumberOfTargetItems; 9557 9558 llvm::Value *BasePointersArray = InputInfo.BasePointersArray.getPointer(); 9559 llvm::Value *PointersArray = InputInfo.PointersArray.getPointer(); 9560 llvm::Value *SizesArray = InputInfo.SizesArray.getPointer(); 9561 llvm::Value *MappersArray = InputInfo.MappersArray.getPointer(); 9562 9563 auto &&EmitTargetCallFallbackCB = 9564 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS, 9565 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) 9566 -> llvm::OpenMPIRBuilder::InsertPointTy { 9567 CGF.Builder.restoreIP(IP); 9568 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars, 9569 RequiresOuterTask, CS, OffloadingMandatory, CGF); 9570 return CGF.Builder.saveIP(); 9571 }; 9572 9573 llvm::Value *DeviceID = emitDeviceID(Device, CGF); 9574 llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D); 9575 llvm::Value *NumThreads = 9576 OMPRuntime->emitNumThreadsForTargetDirective(CGF, D); 9577 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc()); 9578 llvm::Value *NumIterations = 9579 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter); 9580 llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF); 9581 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( 9582 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator()); 9583 9584 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs( 9585 BasePointersArray, PointersArray, SizesArray, MapTypesArray, 9586 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray); 9587 9588 llvm::OpenMPIRBuilder::TargetKernelArgs Args( 9589 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads, 9590 DynCGGroupMem, HasNoWait); 9591 9592 CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch( 9593 CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args, 9594 DeviceID, RTLoc, AllocaIP)); 9595 }; 9596 9597 if (RequiresOuterTask) 9598 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9599 else 9600 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9601 } 9602 9603 static void 9604 emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, 9605 const OMPExecutableDirective &D, 9606 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, 9607 bool RequiresOuterTask, const CapturedStmt &CS, 9608 bool OffloadingMandatory, CodeGenFunction &CGF) { 9609 9610 // Notify that the host version must be executed. 9611 auto &&ElseGen = 9612 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS, 9613 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) { 9614 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars, 9615 RequiresOuterTask, CS, OffloadingMandatory, CGF); 9616 }; 9617 9618 if (RequiresOuterTask) { 9619 CodeGenFunction::OMPTargetDataInfo InputInfo; 9620 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 9621 } else { 9622 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 9623 } 9624 } 9625 9626 void CGOpenMPRuntime::emitTargetCall( 9627 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9628 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9629 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9630 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9631 const OMPLoopDirective &D)> 9632 SizeEmitter) { 9633 if (!CGF.HaveInsertPoint()) 9634 return; 9635 9636 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice && 9637 CGM.getLangOpts().OpenMPOffloadMandatory; 9638 9639 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!"); 9640 9641 const bool RequiresOuterTask = 9642 D.hasClausesOfKind<OMPDependClause>() || 9643 D.hasClausesOfKind<OMPNowaitClause>() || 9644 D.hasClausesOfKind<OMPInReductionClause>() || 9645 (CGM.getLangOpts().OpenMP >= 51 && 9646 needsTaskBasedThreadLimit(D.getDirectiveKind()) && 9647 D.hasClausesOfKind<OMPThreadLimitClause>()); 9648 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9649 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9650 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9651 PrePostActionTy &) { 9652 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9653 }; 9654 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9655 9656 CodeGenFunction::OMPTargetDataInfo InputInfo; 9657 llvm::Value *MapTypesArray = nullptr; 9658 llvm::Value *MapNamesArray = nullptr; 9659 9660 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars, 9661 RequiresOuterTask, &CS, OffloadingMandatory, Device, 9662 OutlinedFnID, &InputInfo, &MapTypesArray, 9663 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF, 9664 PrePostActionTy &) { 9665 emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars, 9666 RequiresOuterTask, CS, OffloadingMandatory, 9667 Device, OutlinedFnID, InputInfo, MapTypesArray, 9668 MapNamesArray, SizeEmitter, CGF, CGM); 9669 }; 9670 9671 auto &&TargetElseGen = 9672 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS, 9673 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) { 9674 emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask, 9675 CS, OffloadingMandatory, CGF); 9676 }; 9677 9678 // If we have a target function ID it means that we need to support 9679 // offloading, otherwise, just execute on the host. We need to execute on host 9680 // regardless of the conditional in the if clause if, e.g., the user do not 9681 // specify target triples. 9682 if (OutlinedFnID) { 9683 if (IfCond) { 9684 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 9685 } else { 9686 RegionCodeGenTy ThenRCG(TargetThenGen); 9687 ThenRCG(CGF); 9688 } 9689 } else { 9690 RegionCodeGenTy ElseRCG(TargetElseGen); 9691 ElseRCG(CGF); 9692 } 9693 } 9694 9695 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 9696 StringRef ParentName) { 9697 if (!S) 9698 return; 9699 9700 // Codegen OMP target directives that offload compute to the device. 9701 bool RequiresDeviceCodegen = 9702 isa<OMPExecutableDirective>(S) && 9703 isOpenMPTargetExecutionDirective( 9704 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 9705 9706 if (RequiresDeviceCodegen) { 9707 const auto &E = *cast<OMPExecutableDirective>(S); 9708 9709 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc( 9710 CGM, OMPBuilder, E.getBeginLoc(), ParentName); 9711 9712 // Is this a target region that should not be emitted as an entry point? If 9713 // so just signal we are done with this target region. 9714 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo)) 9715 return; 9716 9717 switch (E.getDirectiveKind()) { 9718 case OMPD_target: 9719 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 9720 cast<OMPTargetDirective>(E)); 9721 break; 9722 case OMPD_target_parallel: 9723 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 9724 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 9725 break; 9726 case OMPD_target_teams: 9727 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 9728 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 9729 break; 9730 case OMPD_target_teams_distribute: 9731 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 9732 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 9733 break; 9734 case OMPD_target_teams_distribute_simd: 9735 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 9736 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 9737 break; 9738 case OMPD_target_parallel_for: 9739 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 9740 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 9741 break; 9742 case OMPD_target_parallel_for_simd: 9743 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 9744 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 9745 break; 9746 case OMPD_target_simd: 9747 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 9748 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 9749 break; 9750 case OMPD_target_teams_distribute_parallel_for: 9751 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 9752 CGM, ParentName, 9753 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 9754 break; 9755 case OMPD_target_teams_distribute_parallel_for_simd: 9756 CodeGenFunction:: 9757 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 9758 CGM, ParentName, 9759 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 9760 break; 9761 case OMPD_target_teams_loop: 9762 CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction( 9763 CGM, ParentName, cast<OMPTargetTeamsGenericLoopDirective>(E)); 9764 break; 9765 case OMPD_target_parallel_loop: 9766 CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction( 9767 CGM, ParentName, cast<OMPTargetParallelGenericLoopDirective>(E)); 9768 break; 9769 case OMPD_parallel: 9770 case OMPD_for: 9771 case OMPD_parallel_for: 9772 case OMPD_parallel_master: 9773 case OMPD_parallel_sections: 9774 case OMPD_for_simd: 9775 case OMPD_parallel_for_simd: 9776 case OMPD_cancel: 9777 case OMPD_cancellation_point: 9778 case OMPD_ordered: 9779 case OMPD_threadprivate: 9780 case OMPD_allocate: 9781 case OMPD_task: 9782 case OMPD_simd: 9783 case OMPD_tile: 9784 case OMPD_unroll: 9785 case OMPD_sections: 9786 case OMPD_section: 9787 case OMPD_single: 9788 case OMPD_master: 9789 case OMPD_critical: 9790 case OMPD_taskyield: 9791 case OMPD_barrier: 9792 case OMPD_taskwait: 9793 case OMPD_taskgroup: 9794 case OMPD_atomic: 9795 case OMPD_flush: 9796 case OMPD_depobj: 9797 case OMPD_scan: 9798 case OMPD_teams: 9799 case OMPD_target_data: 9800 case OMPD_target_exit_data: 9801 case OMPD_target_enter_data: 9802 case OMPD_distribute: 9803 case OMPD_distribute_simd: 9804 case OMPD_distribute_parallel_for: 9805 case OMPD_distribute_parallel_for_simd: 9806 case OMPD_teams_distribute: 9807 case OMPD_teams_distribute_simd: 9808 case OMPD_teams_distribute_parallel_for: 9809 case OMPD_teams_distribute_parallel_for_simd: 9810 case OMPD_target_update: 9811 case OMPD_declare_simd: 9812 case OMPD_declare_variant: 9813 case OMPD_begin_declare_variant: 9814 case OMPD_end_declare_variant: 9815 case OMPD_declare_target: 9816 case OMPD_end_declare_target: 9817 case OMPD_declare_reduction: 9818 case OMPD_declare_mapper: 9819 case OMPD_taskloop: 9820 case OMPD_taskloop_simd: 9821 case OMPD_master_taskloop: 9822 case OMPD_master_taskloop_simd: 9823 case OMPD_parallel_master_taskloop: 9824 case OMPD_parallel_master_taskloop_simd: 9825 case OMPD_requires: 9826 case OMPD_metadirective: 9827 case OMPD_unknown: 9828 default: 9829 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 9830 } 9831 return; 9832 } 9833 9834 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 9835 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 9836 return; 9837 9838 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 9839 return; 9840 } 9841 9842 // If this is a lambda function, look into its body. 9843 if (const auto *L = dyn_cast<LambdaExpr>(S)) 9844 S = L->getBody(); 9845 9846 // Keep looking for target regions recursively. 9847 for (const Stmt *II : S->children()) 9848 scanForTargetRegionsFunctions(II, ParentName); 9849 } 9850 9851 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { 9852 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9853 OMPDeclareTargetDeclAttr::getDeviceType(VD); 9854 if (!DevTy) 9855 return false; 9856 // Do not emit device_type(nohost) functions for the host. 9857 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 9858 return true; 9859 // Do not emit device_type(host) functions for the device. 9860 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host) 9861 return true; 9862 return false; 9863 } 9864 9865 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 9866 // If emitting code for the host, we do not process FD here. Instead we do 9867 // the normal code generation. 9868 if (!CGM.getLangOpts().OpenMPIsTargetDevice) { 9869 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) 9870 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 9871 CGM.getLangOpts().OpenMPIsTargetDevice)) 9872 return true; 9873 return false; 9874 } 9875 9876 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 9877 // Try to detect target regions in the function. 9878 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 9879 StringRef Name = CGM.getMangledName(GD); 9880 scanForTargetRegionsFunctions(FD->getBody(), Name); 9881 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 9882 CGM.getLangOpts().OpenMPIsTargetDevice)) 9883 return true; 9884 } 9885 9886 // Do not to emit function if it is not marked as declare target. 9887 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 9888 AlreadyEmittedTargetDecls.count(VD) == 0; 9889 } 9890 9891 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9892 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()), 9893 CGM.getLangOpts().OpenMPIsTargetDevice)) 9894 return true; 9895 9896 if (!CGM.getLangOpts().OpenMPIsTargetDevice) 9897 return false; 9898 9899 // Check if there are Ctors/Dtors in this declaration and look for target 9900 // regions in it. We use the complete variant to produce the kernel name 9901 // mangling. 9902 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 9903 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 9904 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 9905 StringRef ParentName = 9906 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 9907 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 9908 } 9909 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 9910 StringRef ParentName = 9911 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 9912 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 9913 } 9914 } 9915 9916 // Do not to emit variable if it is not marked as declare target. 9917 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9918 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 9919 cast<VarDecl>(GD.getDecl())); 9920 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 9921 ((*Res == OMPDeclareTargetDeclAttr::MT_To || 9922 *Res == OMPDeclareTargetDeclAttr::MT_Enter) && 9923 HasRequiresUnifiedSharedMemory)) { 9924 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 9925 return true; 9926 } 9927 return false; 9928 } 9929 9930 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 9931 llvm::Constant *Addr) { 9932 if (CGM.getLangOpts().OMPTargetTriples.empty() && 9933 !CGM.getLangOpts().OpenMPIsTargetDevice) 9934 return; 9935 9936 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9937 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9938 9939 // If this is an 'extern' declaration we defer to the canonical definition and 9940 // do not emit an offloading entry. 9941 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link && 9942 VD->hasExternalStorage()) 9943 return; 9944 9945 if (!Res) { 9946 if (CGM.getLangOpts().OpenMPIsTargetDevice) { 9947 // Register non-target variables being emitted in device code (debug info 9948 // may cause this). 9949 StringRef VarName = CGM.getMangledName(VD); 9950 EmittedNonTargetVariables.try_emplace(VarName, Addr); 9951 } 9952 return; 9953 } 9954 9955 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); }; 9956 auto LinkageForVariable = [&VD, this]() { 9957 return CGM.getLLVMLinkageVarDefinition(VD); 9958 }; 9959 9960 std::vector<llvm::GlobalVariable *> GeneratedRefs; 9961 OMPBuilder.registerTargetGlobalVariable( 9962 convertCaptureClause(VD), convertDeviceClause(VD), 9963 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly, 9964 VD->isExternallyVisible(), 9965 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, 9966 VD->getCanonicalDecl()->getBeginLoc()), 9967 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd, 9968 CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable, 9969 CGM.getTypes().ConvertTypeForMem( 9970 CGM.getContext().getPointerType(VD->getType())), 9971 Addr); 9972 9973 for (auto *ref : GeneratedRefs) 9974 CGM.addCompilerUsedGlobal(ref); 9975 } 9976 9977 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 9978 if (isa<FunctionDecl>(GD.getDecl()) || 9979 isa<OMPDeclareReductionDecl>(GD.getDecl())) 9980 return emitTargetFunctions(GD); 9981 9982 return emitTargetGlobalVariable(GD); 9983 } 9984 9985 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 9986 for (const VarDecl *VD : DeferredGlobalVariables) { 9987 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9988 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9989 if (!Res) 9990 continue; 9991 if ((*Res == OMPDeclareTargetDeclAttr::MT_To || 9992 *Res == OMPDeclareTargetDeclAttr::MT_Enter) && 9993 !HasRequiresUnifiedSharedMemory) { 9994 CGM.EmitGlobal(VD); 9995 } else { 9996 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 9997 ((*Res == OMPDeclareTargetDeclAttr::MT_To || 9998 *Res == OMPDeclareTargetDeclAttr::MT_Enter) && 9999 HasRequiresUnifiedSharedMemory)) && 10000 "Expected link clause or to clause with unified memory."); 10001 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10002 } 10003 } 10004 } 10005 10006 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10007 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10008 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10009 " Expected target-based directive."); 10010 } 10011 10012 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10013 for (const OMPClause *Clause : D->clauselists()) { 10014 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10015 HasRequiresUnifiedSharedMemory = true; 10016 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true); 10017 } else if (const auto *AC = 10018 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10019 switch (AC->getAtomicDefaultMemOrderKind()) { 10020 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10021 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10022 break; 10023 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10024 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10025 break; 10026 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10027 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10028 break; 10029 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10030 break; 10031 } 10032 } 10033 } 10034 } 10035 10036 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10037 return RequiresAtomicOrdering; 10038 } 10039 10040 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10041 LangAS &AS) { 10042 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10043 return false; 10044 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10045 switch(A->getAllocatorType()) { 10046 case OMPAllocateDeclAttr::OMPNullMemAlloc: 10047 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10048 // Not supported, fallback to the default mem space. 10049 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10050 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10051 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10052 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10053 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10054 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10055 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10056 AS = LangAS::Default; 10057 return true; 10058 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10059 llvm_unreachable("Expected predefined allocator for the variables with the " 10060 "static storage."); 10061 } 10062 return false; 10063 } 10064 10065 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10066 return HasRequiresUnifiedSharedMemory; 10067 } 10068 10069 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10070 CodeGenModule &CGM) 10071 : CGM(CGM) { 10072 if (CGM.getLangOpts().OpenMPIsTargetDevice) { 10073 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10074 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10075 } 10076 } 10077 10078 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10079 if (CGM.getLangOpts().OpenMPIsTargetDevice) 10080 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10081 } 10082 10083 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10084 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal) 10085 return true; 10086 10087 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10088 // Do not to emit function if it is marked as declare target as it was already 10089 // emitted. 10090 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10091 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10092 if (auto *F = dyn_cast_or_null<llvm::Function>( 10093 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10094 return !F->isDeclaration(); 10095 return false; 10096 } 10097 return true; 10098 } 10099 10100 return !AlreadyEmittedTargetDecls.insert(D).second; 10101 } 10102 10103 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10104 // If we don't have entries or if we are emitting code for the device, we 10105 // don't need to do anything. 10106 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10107 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsTargetDevice || 10108 (OMPBuilder.OffloadInfoManager.empty() && 10109 !HasEmittedDeclareTargetRegion && !HasEmittedTargetRegion)) 10110 return nullptr; 10111 10112 // Create and register the function that handles the requires directives. 10113 ASTContext &C = CGM.getContext(); 10114 10115 llvm::Function *RequiresRegFn; 10116 { 10117 CodeGenFunction CGF(CGM); 10118 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10119 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10120 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10121 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 10122 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10123 // TODO: check for other requires clauses. 10124 // The requires directive takes effect only when a target region is 10125 // present in the compilation unit. Otherwise it is ignored and not 10126 // passed to the runtime. This avoids the runtime from throwing an error 10127 // for mismatching requires clauses across compilation units that don't 10128 // contain at least 1 target region. 10129 assert((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion || 10130 !OMPBuilder.OffloadInfoManager.empty()) && 10131 "Target or declare target region expected."); 10132 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10133 CGM.getModule(), OMPRTL___tgt_register_requires), 10134 llvm::ConstantInt::get( 10135 CGM.Int64Ty, OMPBuilder.Config.getRequiresFlags())); 10136 CGF.FinishFunction(); 10137 } 10138 return RequiresRegFn; 10139 } 10140 10141 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10142 const OMPExecutableDirective &D, 10143 SourceLocation Loc, 10144 llvm::Function *OutlinedFn, 10145 ArrayRef<llvm::Value *> CapturedVars) { 10146 if (!CGF.HaveInsertPoint()) 10147 return; 10148 10149 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10150 CodeGenFunction::RunCleanupsScope Scope(CGF); 10151 10152 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10153 llvm::Value *Args[] = { 10154 RTLoc, 10155 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10156 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10157 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10158 RealArgs.append(std::begin(Args), std::end(Args)); 10159 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10160 10161 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 10162 CGM.getModule(), OMPRTL___kmpc_fork_teams); 10163 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10164 } 10165 10166 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10167 const Expr *NumTeams, 10168 const Expr *ThreadLimit, 10169 SourceLocation Loc) { 10170 if (!CGF.HaveInsertPoint()) 10171 return; 10172 10173 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10174 10175 llvm::Value *NumTeamsVal = 10176 NumTeams 10177 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10178 CGF.CGM.Int32Ty, /* isSigned = */ true) 10179 : CGF.Builder.getInt32(0); 10180 10181 llvm::Value *ThreadLimitVal = 10182 ThreadLimit 10183 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10184 CGF.CGM.Int32Ty, /* isSigned = */ true) 10185 : CGF.Builder.getInt32(0); 10186 10187 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10188 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10189 ThreadLimitVal}; 10190 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10191 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 10192 PushNumTeamsArgs); 10193 } 10194 10195 void CGOpenMPRuntime::emitThreadLimitClause(CodeGenFunction &CGF, 10196 const Expr *ThreadLimit, 10197 SourceLocation Loc) { 10198 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10199 llvm::Value *ThreadLimitVal = 10200 ThreadLimit 10201 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10202 CGF.CGM.Int32Ty, /* isSigned = */ true) 10203 : CGF.Builder.getInt32(0); 10204 10205 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit) 10206 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc), 10207 ThreadLimitVal}; 10208 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10209 CGM.getModule(), OMPRTL___kmpc_set_thread_limit), 10210 ThreadLimitArgs); 10211 } 10212 10213 void CGOpenMPRuntime::emitTargetDataCalls( 10214 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10215 const Expr *Device, const RegionCodeGenTy &CodeGen, 10216 CGOpenMPRuntime::TargetDataInfo &Info) { 10217 if (!CGF.HaveInsertPoint()) 10218 return; 10219 10220 // Action used to replace the default codegen action and turn privatization 10221 // off. 10222 PrePostActionTy NoPrivAction; 10223 10224 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 10225 10226 llvm::Value *IfCondVal = nullptr; 10227 if (IfCond) 10228 IfCondVal = CGF.EvaluateExprAsBool(IfCond); 10229 10230 // Emit device ID if any. 10231 llvm::Value *DeviceID = nullptr; 10232 if (Device) { 10233 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10234 CGF.Int64Ty, /*isSigned=*/true); 10235 } else { 10236 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10237 } 10238 10239 // Fill up the arrays with all the mapped variables. 10240 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10241 auto GenMapInfoCB = 10242 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & { 10243 CGF.Builder.restoreIP(CodeGenIP); 10244 // Get map clause information. 10245 MappableExprsHandler MEHandler(D, CGF); 10246 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder); 10247 10248 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 10249 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 10250 }; 10251 if (CGM.getCodeGenOpts().getDebugInfo() != 10252 llvm::codegenoptions::NoDebugInfo) { 10253 CombinedInfo.Names.resize(CombinedInfo.Exprs.size()); 10254 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(), 10255 FillInfoMap); 10256 } 10257 10258 return CombinedInfo; 10259 }; 10260 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy; 10261 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) { 10262 CGF.Builder.restoreIP(CodeGenIP); 10263 switch (BodyGenType) { 10264 case BodyGenTy::Priv: 10265 if (!Info.CaptureDeviceAddrMap.empty()) 10266 CodeGen(CGF); 10267 break; 10268 case BodyGenTy::DupNoPriv: 10269 if (!Info.CaptureDeviceAddrMap.empty()) { 10270 CodeGen.setAction(NoPrivAction); 10271 CodeGen(CGF); 10272 } 10273 break; 10274 case BodyGenTy::NoPriv: 10275 if (Info.CaptureDeviceAddrMap.empty()) { 10276 CodeGen.setAction(NoPrivAction); 10277 CodeGen(CGF); 10278 } 10279 break; 10280 } 10281 return InsertPointTy(CGF.Builder.GetInsertBlock(), 10282 CGF.Builder.GetInsertPoint()); 10283 }; 10284 10285 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) { 10286 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) { 10287 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl); 10288 } 10289 }; 10290 10291 auto CustomMapperCB = [&](unsigned int I) { 10292 llvm::Value *MFunc = nullptr; 10293 if (CombinedInfo.Mappers[I]) { 10294 Info.HasMapper = true; 10295 MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 10296 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 10297 } 10298 return MFunc; 10299 }; 10300 10301 // Source location for the ident struct 10302 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10303 10304 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(), 10305 CGF.AllocaInsertPt->getIterator()); 10306 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(), 10307 CGF.Builder.GetInsertPoint()); 10308 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP); 10309 CGF.Builder.restoreIP(OMPBuilder.createTargetData( 10310 OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB, 10311 /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc)); 10312 } 10313 10314 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10315 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10316 const Expr *Device) { 10317 if (!CGF.HaveInsertPoint()) 10318 return; 10319 10320 assert((isa<OMPTargetEnterDataDirective>(D) || 10321 isa<OMPTargetExitDataDirective>(D) || 10322 isa<OMPTargetUpdateDirective>(D)) && 10323 "Expecting either target enter, exit data, or update directives."); 10324 10325 CodeGenFunction::OMPTargetDataInfo InputInfo; 10326 llvm::Value *MapTypesArray = nullptr; 10327 llvm::Value *MapNamesArray = nullptr; 10328 // Generate the code for the opening of the data environment. 10329 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 10330 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10331 // Emit device ID if any. 10332 llvm::Value *DeviceID = nullptr; 10333 if (Device) { 10334 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10335 CGF.Int64Ty, /*isSigned=*/true); 10336 } else { 10337 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10338 } 10339 10340 // Emit the number of elements in the offloading arrays. 10341 llvm::Constant *PointerNum = 10342 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10343 10344 // Source location for the ident struct 10345 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10346 10347 llvm::Value *OffloadingArgs[] = {RTLoc, 10348 DeviceID, 10349 PointerNum, 10350 InputInfo.BasePointersArray.getPointer(), 10351 InputInfo.PointersArray.getPointer(), 10352 InputInfo.SizesArray.getPointer(), 10353 MapTypesArray, 10354 MapNamesArray, 10355 InputInfo.MappersArray.getPointer()}; 10356 10357 // Select the right runtime function call for each standalone 10358 // directive. 10359 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10360 RuntimeFunction RTLFn; 10361 switch (D.getDirectiveKind()) { 10362 case OMPD_target_enter_data: 10363 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 10364 : OMPRTL___tgt_target_data_begin_mapper; 10365 break; 10366 case OMPD_target_exit_data: 10367 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 10368 : OMPRTL___tgt_target_data_end_mapper; 10369 break; 10370 case OMPD_target_update: 10371 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 10372 : OMPRTL___tgt_target_data_update_mapper; 10373 break; 10374 case OMPD_parallel: 10375 case OMPD_for: 10376 case OMPD_parallel_for: 10377 case OMPD_parallel_master: 10378 case OMPD_parallel_sections: 10379 case OMPD_for_simd: 10380 case OMPD_parallel_for_simd: 10381 case OMPD_cancel: 10382 case OMPD_cancellation_point: 10383 case OMPD_ordered: 10384 case OMPD_threadprivate: 10385 case OMPD_allocate: 10386 case OMPD_task: 10387 case OMPD_simd: 10388 case OMPD_tile: 10389 case OMPD_unroll: 10390 case OMPD_sections: 10391 case OMPD_section: 10392 case OMPD_single: 10393 case OMPD_master: 10394 case OMPD_critical: 10395 case OMPD_taskyield: 10396 case OMPD_barrier: 10397 case OMPD_taskwait: 10398 case OMPD_taskgroup: 10399 case OMPD_atomic: 10400 case OMPD_flush: 10401 case OMPD_depobj: 10402 case OMPD_scan: 10403 case OMPD_teams: 10404 case OMPD_target_data: 10405 case OMPD_distribute: 10406 case OMPD_distribute_simd: 10407 case OMPD_distribute_parallel_for: 10408 case OMPD_distribute_parallel_for_simd: 10409 case OMPD_teams_distribute: 10410 case OMPD_teams_distribute_simd: 10411 case OMPD_teams_distribute_parallel_for: 10412 case OMPD_teams_distribute_parallel_for_simd: 10413 case OMPD_declare_simd: 10414 case OMPD_declare_variant: 10415 case OMPD_begin_declare_variant: 10416 case OMPD_end_declare_variant: 10417 case OMPD_declare_target: 10418 case OMPD_end_declare_target: 10419 case OMPD_declare_reduction: 10420 case OMPD_declare_mapper: 10421 case OMPD_taskloop: 10422 case OMPD_taskloop_simd: 10423 case OMPD_master_taskloop: 10424 case OMPD_master_taskloop_simd: 10425 case OMPD_parallel_master_taskloop: 10426 case OMPD_parallel_master_taskloop_simd: 10427 case OMPD_target: 10428 case OMPD_target_simd: 10429 case OMPD_target_teams_distribute: 10430 case OMPD_target_teams_distribute_simd: 10431 case OMPD_target_teams_distribute_parallel_for: 10432 case OMPD_target_teams_distribute_parallel_for_simd: 10433 case OMPD_target_teams: 10434 case OMPD_target_parallel: 10435 case OMPD_target_parallel_for: 10436 case OMPD_target_parallel_for_simd: 10437 case OMPD_requires: 10438 case OMPD_metadirective: 10439 case OMPD_unknown: 10440 default: 10441 llvm_unreachable("Unexpected standalone target data directive."); 10442 break; 10443 } 10444 CGF.EmitRuntimeCall( 10445 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 10446 OffloadingArgs); 10447 }; 10448 10449 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10450 &MapNamesArray](CodeGenFunction &CGF, 10451 PrePostActionTy &) { 10452 // Fill up the arrays with all the mapped variables. 10453 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10454 10455 // Get map clause information. 10456 MappableExprsHandler MEHandler(D, CGF); 10457 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder); 10458 10459 CGOpenMPRuntime::TargetDataInfo Info; 10460 // Fill up the arrays and create the arguments. 10461 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 10462 /*IsNonContiguous=*/true); 10463 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10464 D.hasClausesOfKind<OMPNowaitClause>(); 10465 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() != 10466 llvm::codegenoptions::NoDebugInfo; 10467 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info, 10468 EmitDebug, 10469 /*ForEndCall=*/false); 10470 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10471 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, 10472 CGF.VoidPtrTy, CGM.getPointerAlign()); 10473 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, 10474 CGM.getPointerAlign()); 10475 InputInfo.SizesArray = 10476 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); 10477 InputInfo.MappersArray = 10478 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 10479 MapTypesArray = Info.RTArgs.MapTypesArray; 10480 MapNamesArray = Info.RTArgs.MapNamesArray; 10481 if (RequiresOuterTask) 10482 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10483 else 10484 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10485 }; 10486 10487 if (IfCond) { 10488 emitIfClause(CGF, IfCond, TargetThenGen, 10489 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 10490 } else { 10491 RegionCodeGenTy ThenRCG(TargetThenGen); 10492 ThenRCG(CGF); 10493 } 10494 } 10495 10496 namespace { 10497 /// Kind of parameter in a function with 'declare simd' directive. 10498 enum ParamKindTy { 10499 Linear, 10500 LinearRef, 10501 LinearUVal, 10502 LinearVal, 10503 Uniform, 10504 Vector, 10505 }; 10506 /// Attribute set of the parameter. 10507 struct ParamAttrTy { 10508 ParamKindTy Kind = Vector; 10509 llvm::APSInt StrideOrArg; 10510 llvm::APSInt Alignment; 10511 bool HasVarStride = false; 10512 }; 10513 } // namespace 10514 10515 static unsigned evaluateCDTSize(const FunctionDecl *FD, 10516 ArrayRef<ParamAttrTy> ParamAttrs) { 10517 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 10518 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 10519 // of that clause. The VLEN value must be power of 2. 10520 // In other case the notion of the function`s "characteristic data type" (CDT) 10521 // is used to compute the vector length. 10522 // CDT is defined in the following order: 10523 // a) For non-void function, the CDT is the return type. 10524 // b) If the function has any non-uniform, non-linear parameters, then the 10525 // CDT is the type of the first such parameter. 10526 // c) If the CDT determined by a) or b) above is struct, union, or class 10527 // type which is pass-by-value (except for the type that maps to the 10528 // built-in complex data type), the characteristic data type is int. 10529 // d) If none of the above three cases is applicable, the CDT is int. 10530 // The VLEN is then determined based on the CDT and the size of vector 10531 // register of that ISA for which current vector version is generated. The 10532 // VLEN is computed using the formula below: 10533 // VLEN = sizeof(vector_register) / sizeof(CDT), 10534 // where vector register size specified in section 3.2.1 Registers and the 10535 // Stack Frame of original AMD64 ABI document. 10536 QualType RetType = FD->getReturnType(); 10537 if (RetType.isNull()) 10538 return 0; 10539 ASTContext &C = FD->getASTContext(); 10540 QualType CDT; 10541 if (!RetType.isNull() && !RetType->isVoidType()) { 10542 CDT = RetType; 10543 } else { 10544 unsigned Offset = 0; 10545 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 10546 if (ParamAttrs[Offset].Kind == Vector) 10547 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 10548 ++Offset; 10549 } 10550 if (CDT.isNull()) { 10551 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10552 if (ParamAttrs[I + Offset].Kind == Vector) { 10553 CDT = FD->getParamDecl(I)->getType(); 10554 break; 10555 } 10556 } 10557 } 10558 } 10559 if (CDT.isNull()) 10560 CDT = C.IntTy; 10561 CDT = CDT->getCanonicalTypeUnqualified(); 10562 if (CDT->isRecordType() || CDT->isUnionType()) 10563 CDT = C.IntTy; 10564 return C.getTypeSize(CDT); 10565 } 10566 10567 /// Mangle the parameter part of the vector function name according to 10568 /// their OpenMP classification. The mangling function is defined in 10569 /// section 4.5 of the AAVFABI(2021Q1). 10570 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 10571 SmallString<256> Buffer; 10572 llvm::raw_svector_ostream Out(Buffer); 10573 for (const auto &ParamAttr : ParamAttrs) { 10574 switch (ParamAttr.Kind) { 10575 case Linear: 10576 Out << 'l'; 10577 break; 10578 case LinearRef: 10579 Out << 'R'; 10580 break; 10581 case LinearUVal: 10582 Out << 'U'; 10583 break; 10584 case LinearVal: 10585 Out << 'L'; 10586 break; 10587 case Uniform: 10588 Out << 'u'; 10589 break; 10590 case Vector: 10591 Out << 'v'; 10592 break; 10593 } 10594 if (ParamAttr.HasVarStride) 10595 Out << "s" << ParamAttr.StrideOrArg; 10596 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef || 10597 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) { 10598 // Don't print the step value if it is not present or if it is 10599 // equal to 1. 10600 if (ParamAttr.StrideOrArg < 0) 10601 Out << 'n' << -ParamAttr.StrideOrArg; 10602 else if (ParamAttr.StrideOrArg != 1) 10603 Out << ParamAttr.StrideOrArg; 10604 } 10605 10606 if (!!ParamAttr.Alignment) 10607 Out << 'a' << ParamAttr.Alignment; 10608 } 10609 10610 return std::string(Out.str()); 10611 } 10612 10613 static void 10614 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 10615 const llvm::APSInt &VLENVal, 10616 ArrayRef<ParamAttrTy> ParamAttrs, 10617 OMPDeclareSimdDeclAttr::BranchStateTy State) { 10618 struct ISADataTy { 10619 char ISA; 10620 unsigned VecRegSize; 10621 }; 10622 ISADataTy ISAData[] = { 10623 { 10624 'b', 128 10625 }, // SSE 10626 { 10627 'c', 256 10628 }, // AVX 10629 { 10630 'd', 256 10631 }, // AVX2 10632 { 10633 'e', 512 10634 }, // AVX512 10635 }; 10636 llvm::SmallVector<char, 2> Masked; 10637 switch (State) { 10638 case OMPDeclareSimdDeclAttr::BS_Undefined: 10639 Masked.push_back('N'); 10640 Masked.push_back('M'); 10641 break; 10642 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10643 Masked.push_back('N'); 10644 break; 10645 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10646 Masked.push_back('M'); 10647 break; 10648 } 10649 for (char Mask : Masked) { 10650 for (const ISADataTy &Data : ISAData) { 10651 SmallString<256> Buffer; 10652 llvm::raw_svector_ostream Out(Buffer); 10653 Out << "_ZGV" << Data.ISA << Mask; 10654 if (!VLENVal) { 10655 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 10656 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 10657 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 10658 } else { 10659 Out << VLENVal; 10660 } 10661 Out << mangleVectorParameters(ParamAttrs); 10662 Out << '_' << Fn->getName(); 10663 Fn->addFnAttr(Out.str()); 10664 } 10665 } 10666 } 10667 10668 // This are the Functions that are needed to mangle the name of the 10669 // vector functions generated by the compiler, according to the rules 10670 // defined in the "Vector Function ABI specifications for AArch64", 10671 // available at 10672 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 10673 10674 /// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1). 10675 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 10676 QT = QT.getCanonicalType(); 10677 10678 if (QT->isVoidType()) 10679 return false; 10680 10681 if (Kind == ParamKindTy::Uniform) 10682 return false; 10683 10684 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef) 10685 return false; 10686 10687 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) && 10688 !QT->isReferenceType()) 10689 return false; 10690 10691 return true; 10692 } 10693 10694 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 10695 static bool getAArch64PBV(QualType QT, ASTContext &C) { 10696 QT = QT.getCanonicalType(); 10697 unsigned Size = C.getTypeSize(QT); 10698 10699 // Only scalars and complex within 16 bytes wide set PVB to true. 10700 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 10701 return false; 10702 10703 if (QT->isFloatingType()) 10704 return true; 10705 10706 if (QT->isIntegerType()) 10707 return true; 10708 10709 if (QT->isPointerType()) 10710 return true; 10711 10712 // TODO: Add support for complex types (section 3.1.2, item 2). 10713 10714 return false; 10715 } 10716 10717 /// Computes the lane size (LS) of a return type or of an input parameter, 10718 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 10719 /// TODO: Add support for references, section 3.2.1, item 1. 10720 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 10721 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 10722 QualType PTy = QT.getCanonicalType()->getPointeeType(); 10723 if (getAArch64PBV(PTy, C)) 10724 return C.getTypeSize(PTy); 10725 } 10726 if (getAArch64PBV(QT, C)) 10727 return C.getTypeSize(QT); 10728 10729 return C.getTypeSize(C.getUIntPtrType()); 10730 } 10731 10732 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 10733 // signature of the scalar function, as defined in 3.2.2 of the 10734 // AAVFABI. 10735 static std::tuple<unsigned, unsigned, bool> 10736 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 10737 QualType RetType = FD->getReturnType().getCanonicalType(); 10738 10739 ASTContext &C = FD->getASTContext(); 10740 10741 bool OutputBecomesInput = false; 10742 10743 llvm::SmallVector<unsigned, 8> Sizes; 10744 if (!RetType->isVoidType()) { 10745 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 10746 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 10747 OutputBecomesInput = true; 10748 } 10749 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10750 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 10751 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 10752 } 10753 10754 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 10755 // The LS of a function parameter / return value can only be a power 10756 // of 2, starting from 8 bits, up to 128. 10757 assert(llvm::all_of(Sizes, 10758 [](unsigned Size) { 10759 return Size == 8 || Size == 16 || Size == 32 || 10760 Size == 64 || Size == 128; 10761 }) && 10762 "Invalid size"); 10763 10764 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 10765 *std::max_element(std::begin(Sizes), std::end(Sizes)), 10766 OutputBecomesInput); 10767 } 10768 10769 // Function used to add the attribute. The parameter `VLEN` is 10770 // templated to allow the use of "x" when targeting scalable functions 10771 // for SVE. 10772 template <typename T> 10773 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 10774 char ISA, StringRef ParSeq, 10775 StringRef MangledName, bool OutputBecomesInput, 10776 llvm::Function *Fn) { 10777 SmallString<256> Buffer; 10778 llvm::raw_svector_ostream Out(Buffer); 10779 Out << Prefix << ISA << LMask << VLEN; 10780 if (OutputBecomesInput) 10781 Out << "v"; 10782 Out << ParSeq << "_" << MangledName; 10783 Fn->addFnAttr(Out.str()); 10784 } 10785 10786 // Helper function to generate the Advanced SIMD names depending on 10787 // the value of the NDS when simdlen is not present. 10788 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 10789 StringRef Prefix, char ISA, 10790 StringRef ParSeq, StringRef MangledName, 10791 bool OutputBecomesInput, 10792 llvm::Function *Fn) { 10793 switch (NDS) { 10794 case 8: 10795 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10796 OutputBecomesInput, Fn); 10797 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 10798 OutputBecomesInput, Fn); 10799 break; 10800 case 16: 10801 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10802 OutputBecomesInput, Fn); 10803 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10804 OutputBecomesInput, Fn); 10805 break; 10806 case 32: 10807 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10808 OutputBecomesInput, Fn); 10809 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10810 OutputBecomesInput, Fn); 10811 break; 10812 case 64: 10813 case 128: 10814 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10815 OutputBecomesInput, Fn); 10816 break; 10817 default: 10818 llvm_unreachable("Scalar type is too wide."); 10819 } 10820 } 10821 10822 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 10823 static void emitAArch64DeclareSimdFunction( 10824 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 10825 ArrayRef<ParamAttrTy> ParamAttrs, 10826 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 10827 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 10828 10829 // Get basic data for building the vector signature. 10830 const auto Data = getNDSWDS(FD, ParamAttrs); 10831 const unsigned NDS = std::get<0>(Data); 10832 const unsigned WDS = std::get<1>(Data); 10833 const bool OutputBecomesInput = std::get<2>(Data); 10834 10835 // Check the values provided via `simdlen` by the user. 10836 // 1. A `simdlen(1)` doesn't produce vector signatures, 10837 if (UserVLEN == 1) { 10838 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10839 DiagnosticsEngine::Warning, 10840 "The clause simdlen(1) has no effect when targeting aarch64."); 10841 CGM.getDiags().Report(SLoc, DiagID); 10842 return; 10843 } 10844 10845 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 10846 // Advanced SIMD output. 10847 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 10848 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10849 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 10850 "power of 2 when targeting Advanced SIMD."); 10851 CGM.getDiags().Report(SLoc, DiagID); 10852 return; 10853 } 10854 10855 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 10856 // limits. 10857 if (ISA == 's' && UserVLEN != 0) { 10858 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 10859 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10860 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 10861 "lanes in the architectural constraints " 10862 "for SVE (min is 128-bit, max is " 10863 "2048-bit, by steps of 128-bit)"); 10864 CGM.getDiags().Report(SLoc, DiagID) << WDS; 10865 return; 10866 } 10867 } 10868 10869 // Sort out parameter sequence. 10870 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 10871 StringRef Prefix = "_ZGV"; 10872 // Generate simdlen from user input (if any). 10873 if (UserVLEN) { 10874 if (ISA == 's') { 10875 // SVE generates only a masked function. 10876 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10877 OutputBecomesInput, Fn); 10878 } else { 10879 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10880 // Advanced SIMD generates one or two functions, depending on 10881 // the `[not]inbranch` clause. 10882 switch (State) { 10883 case OMPDeclareSimdDeclAttr::BS_Undefined: 10884 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10885 OutputBecomesInput, Fn); 10886 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10887 OutputBecomesInput, Fn); 10888 break; 10889 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10890 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10891 OutputBecomesInput, Fn); 10892 break; 10893 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10894 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10895 OutputBecomesInput, Fn); 10896 break; 10897 } 10898 } 10899 } else { 10900 // If no user simdlen is provided, follow the AAVFABI rules for 10901 // generating the vector length. 10902 if (ISA == 's') { 10903 // SVE, section 3.4.1, item 1. 10904 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 10905 OutputBecomesInput, Fn); 10906 } else { 10907 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10908 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 10909 // two vector names depending on the use of the clause 10910 // `[not]inbranch`. 10911 switch (State) { 10912 case OMPDeclareSimdDeclAttr::BS_Undefined: 10913 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10914 OutputBecomesInput, Fn); 10915 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10916 OutputBecomesInput, Fn); 10917 break; 10918 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10919 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10920 OutputBecomesInput, Fn); 10921 break; 10922 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10923 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10924 OutputBecomesInput, Fn); 10925 break; 10926 } 10927 } 10928 } 10929 } 10930 10931 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 10932 llvm::Function *Fn) { 10933 ASTContext &C = CGM.getContext(); 10934 FD = FD->getMostRecentDecl(); 10935 while (FD) { 10936 // Map params to their positions in function decl. 10937 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 10938 if (isa<CXXMethodDecl>(FD)) 10939 ParamPositions.try_emplace(FD, 0); 10940 unsigned ParamPos = ParamPositions.size(); 10941 for (const ParmVarDecl *P : FD->parameters()) { 10942 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 10943 ++ParamPos; 10944 } 10945 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 10946 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 10947 // Mark uniform parameters. 10948 for (const Expr *E : Attr->uniforms()) { 10949 E = E->IgnoreParenImpCasts(); 10950 unsigned Pos; 10951 if (isa<CXXThisExpr>(E)) { 10952 Pos = ParamPositions[FD]; 10953 } else { 10954 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10955 ->getCanonicalDecl(); 10956 auto It = ParamPositions.find(PVD); 10957 assert(It != ParamPositions.end() && "Function parameter not found"); 10958 Pos = It->second; 10959 } 10960 ParamAttrs[Pos].Kind = Uniform; 10961 } 10962 // Get alignment info. 10963 auto *NI = Attr->alignments_begin(); 10964 for (const Expr *E : Attr->aligneds()) { 10965 E = E->IgnoreParenImpCasts(); 10966 unsigned Pos; 10967 QualType ParmTy; 10968 if (isa<CXXThisExpr>(E)) { 10969 Pos = ParamPositions[FD]; 10970 ParmTy = E->getType(); 10971 } else { 10972 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10973 ->getCanonicalDecl(); 10974 auto It = ParamPositions.find(PVD); 10975 assert(It != ParamPositions.end() && "Function parameter not found"); 10976 Pos = It->second; 10977 ParmTy = PVD->getType(); 10978 } 10979 ParamAttrs[Pos].Alignment = 10980 (*NI) 10981 ? (*NI)->EvaluateKnownConstInt(C) 10982 : llvm::APSInt::getUnsigned( 10983 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 10984 .getQuantity()); 10985 ++NI; 10986 } 10987 // Mark linear parameters. 10988 auto *SI = Attr->steps_begin(); 10989 auto *MI = Attr->modifiers_begin(); 10990 for (const Expr *E : Attr->linears()) { 10991 E = E->IgnoreParenImpCasts(); 10992 unsigned Pos; 10993 bool IsReferenceType = false; 10994 // Rescaling factor needed to compute the linear parameter 10995 // value in the mangled name. 10996 unsigned PtrRescalingFactor = 1; 10997 if (isa<CXXThisExpr>(E)) { 10998 Pos = ParamPositions[FD]; 10999 auto *P = cast<PointerType>(E->getType()); 11000 PtrRescalingFactor = CGM.getContext() 11001 .getTypeSizeInChars(P->getPointeeType()) 11002 .getQuantity(); 11003 } else { 11004 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11005 ->getCanonicalDecl(); 11006 auto It = ParamPositions.find(PVD); 11007 assert(It != ParamPositions.end() && "Function parameter not found"); 11008 Pos = It->second; 11009 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 11010 PtrRescalingFactor = CGM.getContext() 11011 .getTypeSizeInChars(P->getPointeeType()) 11012 .getQuantity(); 11013 else if (PVD->getType()->isReferenceType()) { 11014 IsReferenceType = true; 11015 PtrRescalingFactor = 11016 CGM.getContext() 11017 .getTypeSizeInChars(PVD->getType().getNonReferenceType()) 11018 .getQuantity(); 11019 } 11020 } 11021 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 11022 if (*MI == OMPC_LINEAR_ref) 11023 ParamAttr.Kind = LinearRef; 11024 else if (*MI == OMPC_LINEAR_uval) 11025 ParamAttr.Kind = LinearUVal; 11026 else if (IsReferenceType) 11027 ParamAttr.Kind = LinearVal; 11028 else 11029 ParamAttr.Kind = Linear; 11030 // Assuming a stride of 1, for `linear` without modifiers. 11031 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 11032 if (*SI) { 11033 Expr::EvalResult Result; 11034 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 11035 if (const auto *DRE = 11036 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 11037 if (const auto *StridePVD = 11038 dyn_cast<ParmVarDecl>(DRE->getDecl())) { 11039 ParamAttr.HasVarStride = true; 11040 auto It = ParamPositions.find(StridePVD->getCanonicalDecl()); 11041 assert(It != ParamPositions.end() && 11042 "Function parameter not found"); 11043 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second); 11044 } 11045 } 11046 } else { 11047 ParamAttr.StrideOrArg = Result.Val.getInt(); 11048 } 11049 } 11050 // If we are using a linear clause on a pointer, we need to 11051 // rescale the value of linear_step with the byte size of the 11052 // pointee type. 11053 if (!ParamAttr.HasVarStride && 11054 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef)) 11055 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 11056 ++SI; 11057 ++MI; 11058 } 11059 llvm::APSInt VLENVal; 11060 SourceLocation ExprLoc; 11061 const Expr *VLENExpr = Attr->getSimdlen(); 11062 if (VLENExpr) { 11063 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11064 ExprLoc = VLENExpr->getExprLoc(); 11065 } 11066 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11067 if (CGM.getTriple().isX86()) { 11068 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11069 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11070 unsigned VLEN = VLENVal.getExtValue(); 11071 StringRef MangledName = Fn->getName(); 11072 if (CGM.getTarget().hasFeature("sve")) 11073 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11074 MangledName, 's', 128, Fn, ExprLoc); 11075 else if (CGM.getTarget().hasFeature("neon")) 11076 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11077 MangledName, 'n', 128, Fn, ExprLoc); 11078 } 11079 } 11080 FD = FD->getPreviousDecl(); 11081 } 11082 } 11083 11084 namespace { 11085 /// Cleanup action for doacross support. 11086 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 11087 public: 11088 static const int DoacrossFinArgs = 2; 11089 11090 private: 11091 llvm::FunctionCallee RTLFn; 11092 llvm::Value *Args[DoacrossFinArgs]; 11093 11094 public: 11095 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 11096 ArrayRef<llvm::Value *> CallArgs) 11097 : RTLFn(RTLFn) { 11098 assert(CallArgs.size() == DoacrossFinArgs); 11099 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11100 } 11101 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11102 if (!CGF.HaveInsertPoint()) 11103 return; 11104 CGF.EmitRuntimeCall(RTLFn, Args); 11105 } 11106 }; 11107 } // namespace 11108 11109 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11110 const OMPLoopDirective &D, 11111 ArrayRef<Expr *> NumIterations) { 11112 if (!CGF.HaveInsertPoint()) 11113 return; 11114 11115 ASTContext &C = CGM.getContext(); 11116 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11117 RecordDecl *RD; 11118 if (KmpDimTy.isNull()) { 11119 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11120 // kmp_int64 lo; // lower 11121 // kmp_int64 up; // upper 11122 // kmp_int64 st; // stride 11123 // }; 11124 RD = C.buildImplicitRecord("kmp_dim"); 11125 RD->startDefinition(); 11126 addFieldToRecordDecl(C, RD, Int64Ty); 11127 addFieldToRecordDecl(C, RD, Int64Ty); 11128 addFieldToRecordDecl(C, RD, Int64Ty); 11129 RD->completeDefinition(); 11130 KmpDimTy = C.getRecordType(RD); 11131 } else { 11132 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11133 } 11134 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11135 QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr, 11136 ArraySizeModifier::Normal, 0); 11137 11138 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11139 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11140 enum { LowerFD = 0, UpperFD, StrideFD }; 11141 // Fill dims with data. 11142 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11143 LValue DimsLVal = CGF.MakeAddrLValue( 11144 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11145 // dims.upper = num_iterations; 11146 LValue UpperLVal = CGF.EmitLValueForField( 11147 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11148 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 11149 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 11150 Int64Ty, NumIterations[I]->getExprLoc()); 11151 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11152 // dims.stride = 1; 11153 LValue StrideLVal = CGF.EmitLValueForField( 11154 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11155 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11156 StrideLVal); 11157 } 11158 11159 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11160 // kmp_int32 num_dims, struct kmp_dim * dims); 11161 llvm::Value *Args[] = { 11162 emitUpdateLocation(CGF, D.getBeginLoc()), 11163 getThreadID(CGF, D.getBeginLoc()), 11164 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11165 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11166 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 11167 CGM.VoidPtrTy)}; 11168 11169 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11170 CGM.getModule(), OMPRTL___kmpc_doacross_init); 11171 CGF.EmitRuntimeCall(RTLFn, Args); 11172 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11173 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11174 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11175 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 11176 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11177 llvm::ArrayRef(FiniArgs)); 11178 } 11179 11180 template <typename T> 11181 static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM, 11182 const T *C, llvm::Value *ULoc, 11183 llvm::Value *ThreadID) { 11184 QualType Int64Ty = 11185 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11186 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11187 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11188 Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0); 11189 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11190 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11191 const Expr *CounterVal = C->getLoopData(I); 11192 assert(CounterVal); 11193 llvm::Value *CntVal = CGF.EmitScalarConversion( 11194 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11195 CounterVal->getExprLoc()); 11196 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11197 /*Volatile=*/false, Int64Ty); 11198 } 11199 llvm::Value *Args[] = { 11200 ULoc, ThreadID, CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11201 llvm::FunctionCallee RTLFn; 11202 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 11203 OMPDoacrossKind<T> ODK; 11204 if (ODK.isSource(C)) { 11205 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11206 OMPRTL___kmpc_doacross_post); 11207 } else { 11208 assert(ODK.isSink(C) && "Expect sink modifier."); 11209 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11210 OMPRTL___kmpc_doacross_wait); 11211 } 11212 CGF.EmitRuntimeCall(RTLFn, Args); 11213 } 11214 11215 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11216 const OMPDependClause *C) { 11217 return EmitDoacrossOrdered<OMPDependClause>( 11218 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()), 11219 getThreadID(CGF, C->getBeginLoc())); 11220 } 11221 11222 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11223 const OMPDoacrossClause *C) { 11224 return EmitDoacrossOrdered<OMPDoacrossClause>( 11225 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()), 11226 getThreadID(CGF, C->getBeginLoc())); 11227 } 11228 11229 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11230 llvm::FunctionCallee Callee, 11231 ArrayRef<llvm::Value *> Args) const { 11232 assert(Loc.isValid() && "Outlined function call location must be valid."); 11233 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11234 11235 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11236 if (Fn->doesNotThrow()) { 11237 CGF.EmitNounwindRuntimeCall(Fn, Args); 11238 return; 11239 } 11240 } 11241 CGF.EmitRuntimeCall(Callee, Args); 11242 } 11243 11244 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11245 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11246 ArrayRef<llvm::Value *> Args) const { 11247 emitCall(CGF, Loc, OutlinedFn, Args); 11248 } 11249 11250 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11251 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11252 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11253 HasEmittedDeclareTargetRegion = true; 11254 } 11255 11256 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11257 const VarDecl *NativeParam, 11258 const VarDecl *TargetParam) const { 11259 return CGF.GetAddrOfLocalVar(NativeParam); 11260 } 11261 11262 /// Return allocator value from expression, or return a null allocator (default 11263 /// when no allocator specified). 11264 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF, 11265 const Expr *Allocator) { 11266 llvm::Value *AllocVal; 11267 if (Allocator) { 11268 AllocVal = CGF.EmitScalarExpr(Allocator); 11269 // According to the standard, the original allocator type is a enum 11270 // (integer). Convert to pointer type, if required. 11271 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 11272 CGF.getContext().VoidPtrTy, 11273 Allocator->getExprLoc()); 11274 } else { 11275 // If no allocator specified, it defaults to the null allocator. 11276 AllocVal = llvm::Constant::getNullValue( 11277 CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy)); 11278 } 11279 return AllocVal; 11280 } 11281 11282 /// Return the alignment from an allocate directive if present. 11283 static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) { 11284 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD); 11285 11286 if (!AllocateAlignment) 11287 return nullptr; 11288 11289 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity()); 11290 } 11291 11292 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11293 const VarDecl *VD) { 11294 if (!VD) 11295 return Address::invalid(); 11296 Address UntiedAddr = Address::invalid(); 11297 Address UntiedRealAddr = Address::invalid(); 11298 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11299 if (It != FunctionToUntiedTaskStackMap.end()) { 11300 const UntiedLocalVarsAddressesMap &UntiedData = 11301 UntiedLocalVarsStack[It->second]; 11302 auto I = UntiedData.find(VD); 11303 if (I != UntiedData.end()) { 11304 UntiedAddr = I->second.first; 11305 UntiedRealAddr = I->second.second; 11306 } 11307 } 11308 const VarDecl *CVD = VD->getCanonicalDecl(); 11309 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 11310 // Use the default allocation. 11311 if (!isAllocatableDecl(VD)) 11312 return UntiedAddr; 11313 llvm::Value *Size; 11314 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11315 if (CVD->getType()->isVariablyModifiedType()) { 11316 Size = CGF.getTypeSize(CVD->getType()); 11317 // Align the size: ((size + align - 1) / align) * align 11318 Size = CGF.Builder.CreateNUWAdd( 11319 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11320 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11321 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11322 } else { 11323 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11324 Size = CGM.getSize(Sz.alignTo(Align)); 11325 } 11326 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11327 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11328 const Expr *Allocator = AA->getAllocator(); 11329 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator); 11330 llvm::Value *Alignment = getAlignmentValue(CGM, CVD); 11331 SmallVector<llvm::Value *, 4> Args; 11332 Args.push_back(ThreadID); 11333 if (Alignment) 11334 Args.push_back(Alignment); 11335 Args.push_back(Size); 11336 Args.push_back(AllocVal); 11337 llvm::omp::RuntimeFunction FnID = 11338 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc; 11339 llvm::Value *Addr = CGF.EmitRuntimeCall( 11340 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args, 11341 getName({CVD->getName(), ".void.addr"})); 11342 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11343 CGM.getModule(), OMPRTL___kmpc_free); 11344 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 11345 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11346 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 11347 if (UntiedAddr.isValid()) 11348 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 11349 11350 // Cleanup action for allocate support. 11351 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 11352 llvm::FunctionCallee RTLFn; 11353 SourceLocation::UIntTy LocEncoding; 11354 Address Addr; 11355 const Expr *AllocExpr; 11356 11357 public: 11358 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 11359 SourceLocation::UIntTy LocEncoding, Address Addr, 11360 const Expr *AllocExpr) 11361 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 11362 AllocExpr(AllocExpr) {} 11363 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11364 if (!CGF.HaveInsertPoint()) 11365 return; 11366 llvm::Value *Args[3]; 11367 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 11368 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 11369 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11370 Addr.getPointer(), CGF.VoidPtrTy); 11371 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr); 11372 Args[2] = AllocVal; 11373 CGF.EmitRuntimeCall(RTLFn, Args); 11374 } 11375 }; 11376 Address VDAddr = 11377 UntiedRealAddr.isValid() 11378 ? UntiedRealAddr 11379 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align); 11380 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 11381 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 11382 VDAddr, Allocator); 11383 if (UntiedRealAddr.isValid()) 11384 if (auto *Region = 11385 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 11386 Region->emitUntiedSwitch(CGF); 11387 return VDAddr; 11388 } 11389 return UntiedAddr; 11390 } 11391 11392 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 11393 const VarDecl *VD) const { 11394 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11395 if (It == FunctionToUntiedTaskStackMap.end()) 11396 return false; 11397 return UntiedLocalVarsStack[It->second].count(VD) > 0; 11398 } 11399 11400 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11401 CodeGenModule &CGM, const OMPLoopDirective &S) 11402 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11403 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11404 if (!NeedToPush) 11405 return; 11406 NontemporalDeclsSet &DS = 11407 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11408 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11409 for (const Stmt *Ref : C->private_refs()) { 11410 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11411 const ValueDecl *VD; 11412 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11413 VD = DRE->getDecl(); 11414 } else { 11415 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11416 assert((ME->isImplicitCXXThis() || 11417 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11418 "Expected member of current class."); 11419 VD = ME->getMemberDecl(); 11420 } 11421 DS.insert(VD); 11422 } 11423 } 11424 } 11425 11426 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 11427 if (!NeedToPush) 11428 return; 11429 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 11430 } 11431 11432 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 11433 CodeGenFunction &CGF, 11434 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 11435 std::pair<Address, Address>> &LocalVars) 11436 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 11437 if (!NeedToPush) 11438 return; 11439 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 11440 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 11441 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 11442 } 11443 11444 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 11445 if (!NeedToPush) 11446 return; 11447 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 11448 } 11449 11450 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 11451 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11452 11453 return llvm::any_of( 11454 CGM.getOpenMPRuntime().NontemporalDeclsStack, 11455 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); }); 11456 } 11457 11458 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 11459 const OMPExecutableDirective &S, 11460 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 11461 const { 11462 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 11463 // Vars in target/task regions must be excluded completely. 11464 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 11465 isOpenMPTaskingDirective(S.getDirectiveKind())) { 11466 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11467 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 11468 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 11469 for (const CapturedStmt::Capture &Cap : CS->captures()) { 11470 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 11471 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 11472 } 11473 } 11474 // Exclude vars in private clauses. 11475 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 11476 for (const Expr *Ref : C->varlists()) { 11477 if (!Ref->getType()->isScalarType()) 11478 continue; 11479 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11480 if (!DRE) 11481 continue; 11482 NeedToCheckForLPCs.insert(DRE->getDecl()); 11483 } 11484 } 11485 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 11486 for (const Expr *Ref : C->varlists()) { 11487 if (!Ref->getType()->isScalarType()) 11488 continue; 11489 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11490 if (!DRE) 11491 continue; 11492 NeedToCheckForLPCs.insert(DRE->getDecl()); 11493 } 11494 } 11495 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11496 for (const Expr *Ref : C->varlists()) { 11497 if (!Ref->getType()->isScalarType()) 11498 continue; 11499 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11500 if (!DRE) 11501 continue; 11502 NeedToCheckForLPCs.insert(DRE->getDecl()); 11503 } 11504 } 11505 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 11506 for (const Expr *Ref : C->varlists()) { 11507 if (!Ref->getType()->isScalarType()) 11508 continue; 11509 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11510 if (!DRE) 11511 continue; 11512 NeedToCheckForLPCs.insert(DRE->getDecl()); 11513 } 11514 } 11515 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 11516 for (const Expr *Ref : C->varlists()) { 11517 if (!Ref->getType()->isScalarType()) 11518 continue; 11519 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11520 if (!DRE) 11521 continue; 11522 NeedToCheckForLPCs.insert(DRE->getDecl()); 11523 } 11524 } 11525 for (const Decl *VD : NeedToCheckForLPCs) { 11526 for (const LastprivateConditionalData &Data : 11527 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 11528 if (Data.DeclToUniqueName.count(VD) > 0) { 11529 if (!Data.Disabled) 11530 NeedToAddForLPCsAsDisabled.insert(VD); 11531 break; 11532 } 11533 } 11534 } 11535 } 11536 11537 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11538 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 11539 : CGM(CGF.CGM), 11540 Action((CGM.getLangOpts().OpenMP >= 50 && 11541 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 11542 [](const OMPLastprivateClause *C) { 11543 return C->getKind() == 11544 OMPC_LASTPRIVATE_conditional; 11545 })) 11546 ? ActionToDo::PushAsLastprivateConditional 11547 : ActionToDo::DoNotPush) { 11548 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11549 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 11550 return; 11551 assert(Action == ActionToDo::PushAsLastprivateConditional && 11552 "Expected a push action."); 11553 LastprivateConditionalData &Data = 11554 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11555 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11556 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 11557 continue; 11558 11559 for (const Expr *Ref : C->varlists()) { 11560 Data.DeclToUniqueName.insert(std::make_pair( 11561 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 11562 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 11563 } 11564 } 11565 Data.IVLVal = IVLVal; 11566 Data.Fn = CGF.CurFn; 11567 } 11568 11569 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11570 CodeGenFunction &CGF, const OMPExecutableDirective &S) 11571 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 11572 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11573 if (CGM.getLangOpts().OpenMP < 50) 11574 return; 11575 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 11576 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 11577 if (!NeedToAddForLPCsAsDisabled.empty()) { 11578 Action = ActionToDo::DisableLastprivateConditional; 11579 LastprivateConditionalData &Data = 11580 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11581 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 11582 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 11583 Data.Fn = CGF.CurFn; 11584 Data.Disabled = true; 11585 } 11586 } 11587 11588 CGOpenMPRuntime::LastprivateConditionalRAII 11589 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 11590 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 11591 return LastprivateConditionalRAII(CGF, S); 11592 } 11593 11594 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 11595 if (CGM.getLangOpts().OpenMP < 50) 11596 return; 11597 if (Action == ActionToDo::DisableLastprivateConditional) { 11598 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11599 "Expected list of disabled private vars."); 11600 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11601 } 11602 if (Action == ActionToDo::PushAsLastprivateConditional) { 11603 assert( 11604 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11605 "Expected list of lastprivate conditional vars."); 11606 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11607 } 11608 } 11609 11610 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 11611 const VarDecl *VD) { 11612 ASTContext &C = CGM.getContext(); 11613 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 11614 if (I == LastprivateConditionalToTypes.end()) 11615 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 11616 QualType NewType; 11617 const FieldDecl *VDField; 11618 const FieldDecl *FiredField; 11619 LValue BaseLVal; 11620 auto VI = I->getSecond().find(VD); 11621 if (VI == I->getSecond().end()) { 11622 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 11623 RD->startDefinition(); 11624 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 11625 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 11626 RD->completeDefinition(); 11627 NewType = C.getRecordType(RD); 11628 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 11629 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 11630 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 11631 } else { 11632 NewType = std::get<0>(VI->getSecond()); 11633 VDField = std::get<1>(VI->getSecond()); 11634 FiredField = std::get<2>(VI->getSecond()); 11635 BaseLVal = std::get<3>(VI->getSecond()); 11636 } 11637 LValue FiredLVal = 11638 CGF.EmitLValueForField(BaseLVal, FiredField); 11639 CGF.EmitStoreOfScalar( 11640 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 11641 FiredLVal); 11642 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 11643 } 11644 11645 namespace { 11646 /// Checks if the lastprivate conditional variable is referenced in LHS. 11647 class LastprivateConditionalRefChecker final 11648 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 11649 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 11650 const Expr *FoundE = nullptr; 11651 const Decl *FoundD = nullptr; 11652 StringRef UniqueDeclName; 11653 LValue IVLVal; 11654 llvm::Function *FoundFn = nullptr; 11655 SourceLocation Loc; 11656 11657 public: 11658 bool VisitDeclRefExpr(const DeclRefExpr *E) { 11659 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11660 llvm::reverse(LPM)) { 11661 auto It = D.DeclToUniqueName.find(E->getDecl()); 11662 if (It == D.DeclToUniqueName.end()) 11663 continue; 11664 if (D.Disabled) 11665 return false; 11666 FoundE = E; 11667 FoundD = E->getDecl()->getCanonicalDecl(); 11668 UniqueDeclName = It->second; 11669 IVLVal = D.IVLVal; 11670 FoundFn = D.Fn; 11671 break; 11672 } 11673 return FoundE == E; 11674 } 11675 bool VisitMemberExpr(const MemberExpr *E) { 11676 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 11677 return false; 11678 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11679 llvm::reverse(LPM)) { 11680 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 11681 if (It == D.DeclToUniqueName.end()) 11682 continue; 11683 if (D.Disabled) 11684 return false; 11685 FoundE = E; 11686 FoundD = E->getMemberDecl()->getCanonicalDecl(); 11687 UniqueDeclName = It->second; 11688 IVLVal = D.IVLVal; 11689 FoundFn = D.Fn; 11690 break; 11691 } 11692 return FoundE == E; 11693 } 11694 bool VisitStmt(const Stmt *S) { 11695 for (const Stmt *Child : S->children()) { 11696 if (!Child) 11697 continue; 11698 if (const auto *E = dyn_cast<Expr>(Child)) 11699 if (!E->isGLValue()) 11700 continue; 11701 if (Visit(Child)) 11702 return true; 11703 } 11704 return false; 11705 } 11706 explicit LastprivateConditionalRefChecker( 11707 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 11708 : LPM(LPM) {} 11709 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 11710 getFoundData() const { 11711 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 11712 } 11713 }; 11714 } // namespace 11715 11716 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 11717 LValue IVLVal, 11718 StringRef UniqueDeclName, 11719 LValue LVal, 11720 SourceLocation Loc) { 11721 // Last updated loop counter for the lastprivate conditional var. 11722 // int<xx> last_iv = 0; 11723 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 11724 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable( 11725 LLIVTy, getName({UniqueDeclName, "iv"})); 11726 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 11727 IVLVal.getAlignment().getAsAlign()); 11728 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 11729 11730 // Last value of the lastprivate conditional. 11731 // decltype(priv_a) last_a; 11732 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable( 11733 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 11734 Last->setAlignment(LVal.getAlignment().getAsAlign()); 11735 LValue LastLVal = CGF.MakeAddrLValue( 11736 Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType()); 11737 11738 // Global loop counter. Required to handle inner parallel-for regions. 11739 // iv 11740 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 11741 11742 // #pragma omp critical(a) 11743 // if (last_iv <= iv) { 11744 // last_iv = iv; 11745 // last_a = priv_a; 11746 // } 11747 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 11748 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 11749 Action.Enter(CGF); 11750 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 11751 // (last_iv <= iv) ? Check if the variable is updated and store new 11752 // value in global var. 11753 llvm::Value *CmpRes; 11754 if (IVLVal.getType()->isSignedIntegerType()) { 11755 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 11756 } else { 11757 assert(IVLVal.getType()->isUnsignedIntegerType() && 11758 "Loop iteration variable must be integer."); 11759 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 11760 } 11761 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 11762 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 11763 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 11764 // { 11765 CGF.EmitBlock(ThenBB); 11766 11767 // last_iv = iv; 11768 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 11769 11770 // last_a = priv_a; 11771 switch (CGF.getEvaluationKind(LVal.getType())) { 11772 case TEK_Scalar: { 11773 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 11774 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 11775 break; 11776 } 11777 case TEK_Complex: { 11778 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 11779 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 11780 break; 11781 } 11782 case TEK_Aggregate: 11783 llvm_unreachable( 11784 "Aggregates are not supported in lastprivate conditional."); 11785 } 11786 // } 11787 CGF.EmitBranch(ExitBB); 11788 // There is no need to emit line number for unconditional branch. 11789 (void)ApplyDebugLocation::CreateEmpty(CGF); 11790 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 11791 }; 11792 11793 if (CGM.getLangOpts().OpenMPSimd) { 11794 // Do not emit as a critical region as no parallel region could be emitted. 11795 RegionCodeGenTy ThenRCG(CodeGen); 11796 ThenRCG(CGF); 11797 } else { 11798 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 11799 } 11800 } 11801 11802 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 11803 const Expr *LHS) { 11804 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11805 return; 11806 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 11807 if (!Checker.Visit(LHS)) 11808 return; 11809 const Expr *FoundE; 11810 const Decl *FoundD; 11811 StringRef UniqueDeclName; 11812 LValue IVLVal; 11813 llvm::Function *FoundFn; 11814 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 11815 Checker.getFoundData(); 11816 if (FoundFn != CGF.CurFn) { 11817 // Special codegen for inner parallel regions. 11818 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 11819 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 11820 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 11821 "Lastprivate conditional is not found in outer region."); 11822 QualType StructTy = std::get<0>(It->getSecond()); 11823 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 11824 LValue PrivLVal = CGF.EmitLValue(FoundE); 11825 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11826 PrivLVal.getAddress(CGF), 11827 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)), 11828 CGF.ConvertTypeForMem(StructTy)); 11829 LValue BaseLVal = 11830 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 11831 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 11832 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 11833 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 11834 FiredLVal, llvm::AtomicOrdering::Unordered, 11835 /*IsVolatile=*/true, /*isInit=*/false); 11836 return; 11837 } 11838 11839 // Private address of the lastprivate conditional in the current context. 11840 // priv_a 11841 LValue LVal = CGF.EmitLValue(FoundE); 11842 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 11843 FoundE->getExprLoc()); 11844 } 11845 11846 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 11847 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11848 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 11849 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11850 return; 11851 auto Range = llvm::reverse(LastprivateConditionalStack); 11852 auto It = llvm::find_if( 11853 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 11854 if (It == Range.end() || It->Fn != CGF.CurFn) 11855 return; 11856 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 11857 assert(LPCI != LastprivateConditionalToTypes.end() && 11858 "Lastprivates must be registered already."); 11859 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11860 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 11861 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 11862 for (const auto &Pair : It->DeclToUniqueName) { 11863 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 11864 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD)) 11865 continue; 11866 auto I = LPCI->getSecond().find(Pair.first); 11867 assert(I != LPCI->getSecond().end() && 11868 "Lastprivate must be rehistered already."); 11869 // bool Cmp = priv_a.Fired != 0; 11870 LValue BaseLVal = std::get<3>(I->getSecond()); 11871 LValue FiredLVal = 11872 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 11873 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 11874 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 11875 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 11876 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 11877 // if (Cmp) { 11878 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 11879 CGF.EmitBlock(ThenBB); 11880 Address Addr = CGF.GetAddrOfLocalVar(VD); 11881 LValue LVal; 11882 if (VD->getType()->isReferenceType()) 11883 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 11884 AlignmentSource::Decl); 11885 else 11886 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 11887 AlignmentSource::Decl); 11888 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 11889 D.getBeginLoc()); 11890 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 11891 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 11892 // } 11893 } 11894 } 11895 11896 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 11897 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 11898 SourceLocation Loc) { 11899 if (CGF.getLangOpts().OpenMP < 50) 11900 return; 11901 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 11902 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 11903 "Unknown lastprivate conditional variable."); 11904 StringRef UniqueName = It->second; 11905 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 11906 // The variable was not updated in the region - exit. 11907 if (!GV) 11908 return; 11909 LValue LPLVal = CGF.MakeAddrLValue( 11910 Address(GV, GV->getValueType(), PrivLVal.getAlignment()), 11911 PrivLVal.getType().getNonReferenceType()); 11912 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 11913 CGF.EmitStoreOfScalar(Res, PrivLVal); 11914 } 11915 11916 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 11917 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11918 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 11919 const RegionCodeGenTy &CodeGen) { 11920 llvm_unreachable("Not supported in SIMD-only mode"); 11921 } 11922 11923 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 11924 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11925 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 11926 const RegionCodeGenTy &CodeGen) { 11927 llvm_unreachable("Not supported in SIMD-only mode"); 11928 } 11929 11930 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 11931 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11932 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 11933 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 11934 bool Tied, unsigned &NumberOfParts) { 11935 llvm_unreachable("Not supported in SIMD-only mode"); 11936 } 11937 11938 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 11939 SourceLocation Loc, 11940 llvm::Function *OutlinedFn, 11941 ArrayRef<llvm::Value *> CapturedVars, 11942 const Expr *IfCond, 11943 llvm::Value *NumThreads) { 11944 llvm_unreachable("Not supported in SIMD-only mode"); 11945 } 11946 11947 void CGOpenMPSIMDRuntime::emitCriticalRegion( 11948 CodeGenFunction &CGF, StringRef CriticalName, 11949 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 11950 const Expr *Hint) { 11951 llvm_unreachable("Not supported in SIMD-only mode"); 11952 } 11953 11954 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 11955 const RegionCodeGenTy &MasterOpGen, 11956 SourceLocation Loc) { 11957 llvm_unreachable("Not supported in SIMD-only mode"); 11958 } 11959 11960 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, 11961 const RegionCodeGenTy &MasterOpGen, 11962 SourceLocation Loc, 11963 const Expr *Filter) { 11964 llvm_unreachable("Not supported in SIMD-only mode"); 11965 } 11966 11967 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 11968 SourceLocation Loc) { 11969 llvm_unreachable("Not supported in SIMD-only mode"); 11970 } 11971 11972 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 11973 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 11974 SourceLocation Loc) { 11975 llvm_unreachable("Not supported in SIMD-only mode"); 11976 } 11977 11978 void CGOpenMPSIMDRuntime::emitSingleRegion( 11979 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 11980 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 11981 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 11982 ArrayRef<const Expr *> AssignmentOps) { 11983 llvm_unreachable("Not supported in SIMD-only mode"); 11984 } 11985 11986 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 11987 const RegionCodeGenTy &OrderedOpGen, 11988 SourceLocation Loc, 11989 bool IsThreads) { 11990 llvm_unreachable("Not supported in SIMD-only mode"); 11991 } 11992 11993 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 11994 SourceLocation Loc, 11995 OpenMPDirectiveKind Kind, 11996 bool EmitChecks, 11997 bool ForceSimpleCall) { 11998 llvm_unreachable("Not supported in SIMD-only mode"); 11999 } 12000 12001 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12002 CodeGenFunction &CGF, SourceLocation Loc, 12003 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12004 bool Ordered, const DispatchRTInput &DispatchValues) { 12005 llvm_unreachable("Not supported in SIMD-only mode"); 12006 } 12007 12008 void CGOpenMPSIMDRuntime::emitForStaticInit( 12009 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12010 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12011 llvm_unreachable("Not supported in SIMD-only mode"); 12012 } 12013 12014 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12015 CodeGenFunction &CGF, SourceLocation Loc, 12016 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12017 llvm_unreachable("Not supported in SIMD-only mode"); 12018 } 12019 12020 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12021 SourceLocation Loc, 12022 unsigned IVSize, 12023 bool IVSigned) { 12024 llvm_unreachable("Not supported in SIMD-only mode"); 12025 } 12026 12027 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12028 SourceLocation Loc, 12029 OpenMPDirectiveKind DKind) { 12030 llvm_unreachable("Not supported in SIMD-only mode"); 12031 } 12032 12033 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12034 SourceLocation Loc, 12035 unsigned IVSize, bool IVSigned, 12036 Address IL, Address LB, 12037 Address UB, Address ST) { 12038 llvm_unreachable("Not supported in SIMD-only mode"); 12039 } 12040 12041 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12042 llvm::Value *NumThreads, 12043 SourceLocation Loc) { 12044 llvm_unreachable("Not supported in SIMD-only mode"); 12045 } 12046 12047 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12048 ProcBindKind ProcBind, 12049 SourceLocation Loc) { 12050 llvm_unreachable("Not supported in SIMD-only mode"); 12051 } 12052 12053 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12054 const VarDecl *VD, 12055 Address VDAddr, 12056 SourceLocation Loc) { 12057 llvm_unreachable("Not supported in SIMD-only mode"); 12058 } 12059 12060 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12061 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12062 CodeGenFunction *CGF) { 12063 llvm_unreachable("Not supported in SIMD-only mode"); 12064 } 12065 12066 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12067 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12068 llvm_unreachable("Not supported in SIMD-only mode"); 12069 } 12070 12071 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 12072 ArrayRef<const Expr *> Vars, 12073 SourceLocation Loc, 12074 llvm::AtomicOrdering AO) { 12075 llvm_unreachable("Not supported in SIMD-only mode"); 12076 } 12077 12078 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 12079 const OMPExecutableDirective &D, 12080 llvm::Function *TaskFunction, 12081 QualType SharedsTy, Address Shareds, 12082 const Expr *IfCond, 12083 const OMPTaskDataTy &Data) { 12084 llvm_unreachable("Not supported in SIMD-only mode"); 12085 } 12086 12087 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 12088 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 12089 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 12090 const Expr *IfCond, const OMPTaskDataTy &Data) { 12091 llvm_unreachable("Not supported in SIMD-only mode"); 12092 } 12093 12094 void CGOpenMPSIMDRuntime::emitReduction( 12095 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 12096 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 12097 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 12098 assert(Options.SimpleReduction && "Only simple reduction is expected."); 12099 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 12100 ReductionOps, Options); 12101 } 12102 12103 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 12104 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 12105 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 12106 llvm_unreachable("Not supported in SIMD-only mode"); 12107 } 12108 12109 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 12110 SourceLocation Loc, 12111 bool IsWorksharingReduction) { 12112 llvm_unreachable("Not supported in SIMD-only mode"); 12113 } 12114 12115 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 12116 SourceLocation Loc, 12117 ReductionCodeGen &RCG, 12118 unsigned N) { 12119 llvm_unreachable("Not supported in SIMD-only mode"); 12120 } 12121 12122 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 12123 SourceLocation Loc, 12124 llvm::Value *ReductionsPtr, 12125 LValue SharedLVal) { 12126 llvm_unreachable("Not supported in SIMD-only mode"); 12127 } 12128 12129 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 12130 SourceLocation Loc, 12131 const OMPTaskDataTy &Data) { 12132 llvm_unreachable("Not supported in SIMD-only mode"); 12133 } 12134 12135 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 12136 CodeGenFunction &CGF, SourceLocation Loc, 12137 OpenMPDirectiveKind CancelRegion) { 12138 llvm_unreachable("Not supported in SIMD-only mode"); 12139 } 12140 12141 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 12142 SourceLocation Loc, const Expr *IfCond, 12143 OpenMPDirectiveKind CancelRegion) { 12144 llvm_unreachable("Not supported in SIMD-only mode"); 12145 } 12146 12147 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 12148 const OMPExecutableDirective &D, StringRef ParentName, 12149 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 12150 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 12151 llvm_unreachable("Not supported in SIMD-only mode"); 12152 } 12153 12154 void CGOpenMPSIMDRuntime::emitTargetCall( 12155 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12156 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 12157 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 12158 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 12159 const OMPLoopDirective &D)> 12160 SizeEmitter) { 12161 llvm_unreachable("Not supported in SIMD-only mode"); 12162 } 12163 12164 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 12165 llvm_unreachable("Not supported in SIMD-only mode"); 12166 } 12167 12168 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 12169 llvm_unreachable("Not supported in SIMD-only mode"); 12170 } 12171 12172 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 12173 return false; 12174 } 12175 12176 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 12177 const OMPExecutableDirective &D, 12178 SourceLocation Loc, 12179 llvm::Function *OutlinedFn, 12180 ArrayRef<llvm::Value *> CapturedVars) { 12181 llvm_unreachable("Not supported in SIMD-only mode"); 12182 } 12183 12184 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 12185 const Expr *NumTeams, 12186 const Expr *ThreadLimit, 12187 SourceLocation Loc) { 12188 llvm_unreachable("Not supported in SIMD-only mode"); 12189 } 12190 12191 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 12192 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12193 const Expr *Device, const RegionCodeGenTy &CodeGen, 12194 CGOpenMPRuntime::TargetDataInfo &Info) { 12195 llvm_unreachable("Not supported in SIMD-only mode"); 12196 } 12197 12198 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 12199 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12200 const Expr *Device) { 12201 llvm_unreachable("Not supported in SIMD-only mode"); 12202 } 12203 12204 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12205 const OMPLoopDirective &D, 12206 ArrayRef<Expr *> NumIterations) { 12207 llvm_unreachable("Not supported in SIMD-only mode"); 12208 } 12209 12210 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12211 const OMPDependClause *C) { 12212 llvm_unreachable("Not supported in SIMD-only mode"); 12213 } 12214 12215 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12216 const OMPDoacrossClause *C) { 12217 llvm_unreachable("Not supported in SIMD-only mode"); 12218 } 12219 12220 const VarDecl * 12221 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12222 const VarDecl *NativeParam) const { 12223 llvm_unreachable("Not supported in SIMD-only mode"); 12224 } 12225 12226 Address 12227 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12228 const VarDecl *NativeParam, 12229 const VarDecl *TargetParam) const { 12230 llvm_unreachable("Not supported in SIMD-only mode"); 12231 } 12232