1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/APValue.h" 20 #include "clang/AST/Attr.h" 21 #include "clang/AST/Decl.h" 22 #include "clang/AST/OpenMPClause.h" 23 #include "clang/AST/StmtOpenMP.h" 24 #include "clang/AST/StmtVisitor.h" 25 #include "clang/Basic/BitmaskEnum.h" 26 #include "clang/Basic/FileManager.h" 27 #include "clang/Basic/OpenMPKinds.h" 28 #include "clang/Basic/SourceManager.h" 29 #include "clang/CodeGen/ConstantInitBuilder.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/SetOperations.h" 32 #include "llvm/ADT/SmallBitVector.h" 33 #include "llvm/ADT/StringExtras.h" 34 #include "llvm/Bitcode/BitcodeReader.h" 35 #include "llvm/IR/Constants.h" 36 #include "llvm/IR/DerivedTypes.h" 37 #include "llvm/IR/GlobalValue.h" 38 #include "llvm/IR/InstrTypes.h" 39 #include "llvm/IR/Value.h" 40 #include "llvm/Support/AtomicOrdering.h" 41 #include "llvm/Support/Format.h" 42 #include "llvm/Support/raw_ostream.h" 43 #include <cassert> 44 #include <cstdint> 45 #include <numeric> 46 #include <optional> 47 48 using namespace clang; 49 using namespace CodeGen; 50 using namespace llvm::omp; 51 52 namespace { 53 /// Base class for handling code generation inside OpenMP regions. 54 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 55 public: 56 /// Kinds of OpenMP regions used in codegen. 57 enum CGOpenMPRegionKind { 58 /// Region with outlined function for standalone 'parallel' 59 /// directive. 60 ParallelOutlinedRegion, 61 /// Region with outlined function for standalone 'task' directive. 62 TaskOutlinedRegion, 63 /// Region for constructs that do not require function outlining, 64 /// like 'for', 'sections', 'atomic' etc. directives. 65 InlinedRegion, 66 /// Region with outlined function for standalone 'target' directive. 67 TargetRegion, 68 }; 69 70 CGOpenMPRegionInfo(const CapturedStmt &CS, 71 const CGOpenMPRegionKind RegionKind, 72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 73 bool HasCancel) 74 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 75 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 76 77 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 78 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 79 bool HasCancel) 80 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 81 Kind(Kind), HasCancel(HasCancel) {} 82 83 /// Get a variable or parameter for storing global thread id 84 /// inside OpenMP construct. 85 virtual const VarDecl *getThreadIDVariable() const = 0; 86 87 /// Emit the captured statement body. 88 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 89 90 /// Get an LValue for the current ThreadID variable. 91 /// \return LValue for thread id variable. This LValue always has type int32*. 92 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 93 94 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 95 96 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 97 98 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 99 100 bool hasCancel() const { return HasCancel; } 101 102 static bool classof(const CGCapturedStmtInfo *Info) { 103 return Info->getKind() == CR_OpenMP; 104 } 105 106 ~CGOpenMPRegionInfo() override = default; 107 108 protected: 109 CGOpenMPRegionKind RegionKind; 110 RegionCodeGenTy CodeGen; 111 OpenMPDirectiveKind Kind; 112 bool HasCancel; 113 }; 114 115 /// API for captured statement code generation in OpenMP constructs. 116 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 117 public: 118 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 119 const RegionCodeGenTy &CodeGen, 120 OpenMPDirectiveKind Kind, bool HasCancel, 121 StringRef HelperName) 122 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 123 HasCancel), 124 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 125 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 126 } 127 128 /// Get a variable or parameter for storing global thread id 129 /// inside OpenMP construct. 130 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 131 132 /// Get the name of the capture helper. 133 StringRef getHelperName() const override { return HelperName; } 134 135 static bool classof(const CGCapturedStmtInfo *Info) { 136 return CGOpenMPRegionInfo::classof(Info) && 137 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 138 ParallelOutlinedRegion; 139 } 140 141 private: 142 /// A variable or parameter storing global thread id for OpenMP 143 /// constructs. 144 const VarDecl *ThreadIDVar; 145 StringRef HelperName; 146 }; 147 148 /// API for captured statement code generation in OpenMP constructs. 149 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 150 public: 151 class UntiedTaskActionTy final : public PrePostActionTy { 152 bool Untied; 153 const VarDecl *PartIDVar; 154 const RegionCodeGenTy UntiedCodeGen; 155 llvm::SwitchInst *UntiedSwitch = nullptr; 156 157 public: 158 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 159 const RegionCodeGenTy &UntiedCodeGen) 160 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 161 void Enter(CodeGenFunction &CGF) override { 162 if (Untied) { 163 // Emit task switching point. 164 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 165 CGF.GetAddrOfLocalVar(PartIDVar), 166 PartIDVar->getType()->castAs<PointerType>()); 167 llvm::Value *Res = 168 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 169 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 170 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 171 CGF.EmitBlock(DoneBB); 172 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 173 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 174 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 175 CGF.Builder.GetInsertBlock()); 176 emitUntiedSwitch(CGF); 177 } 178 } 179 void emitUntiedSwitch(CodeGenFunction &CGF) const { 180 if (Untied) { 181 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 182 CGF.GetAddrOfLocalVar(PartIDVar), 183 PartIDVar->getType()->castAs<PointerType>()); 184 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 185 PartIdLVal); 186 UntiedCodeGen(CGF); 187 CodeGenFunction::JumpDest CurPoint = 188 CGF.getJumpDestInCurrentScope(".untied.next."); 189 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 190 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 191 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 192 CGF.Builder.GetInsertBlock()); 193 CGF.EmitBranchThroughCleanup(CurPoint); 194 CGF.EmitBlock(CurPoint.getBlock()); 195 } 196 } 197 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 198 }; 199 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 200 const VarDecl *ThreadIDVar, 201 const RegionCodeGenTy &CodeGen, 202 OpenMPDirectiveKind Kind, bool HasCancel, 203 const UntiedTaskActionTy &Action) 204 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 205 ThreadIDVar(ThreadIDVar), Action(Action) { 206 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 207 } 208 209 /// Get a variable or parameter for storing global thread id 210 /// inside OpenMP construct. 211 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 212 213 /// Get an LValue for the current ThreadID variable. 214 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 215 216 /// Get the name of the capture helper. 217 StringRef getHelperName() const override { return ".omp_outlined."; } 218 219 void emitUntiedSwitch(CodeGenFunction &CGF) override { 220 Action.emitUntiedSwitch(CGF); 221 } 222 223 static bool classof(const CGCapturedStmtInfo *Info) { 224 return CGOpenMPRegionInfo::classof(Info) && 225 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 226 TaskOutlinedRegion; 227 } 228 229 private: 230 /// A variable or parameter storing global thread id for OpenMP 231 /// constructs. 232 const VarDecl *ThreadIDVar; 233 /// Action for emitting code for untied tasks. 234 const UntiedTaskActionTy &Action; 235 }; 236 237 /// API for inlined captured statement code generation in OpenMP 238 /// constructs. 239 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 240 public: 241 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 242 const RegionCodeGenTy &CodeGen, 243 OpenMPDirectiveKind Kind, bool HasCancel) 244 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 245 OldCSI(OldCSI), 246 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 247 248 // Retrieve the value of the context parameter. 249 llvm::Value *getContextValue() const override { 250 if (OuterRegionInfo) 251 return OuterRegionInfo->getContextValue(); 252 llvm_unreachable("No context value for inlined OpenMP region"); 253 } 254 255 void setContextValue(llvm::Value *V) override { 256 if (OuterRegionInfo) { 257 OuterRegionInfo->setContextValue(V); 258 return; 259 } 260 llvm_unreachable("No context value for inlined OpenMP region"); 261 } 262 263 /// Lookup the captured field decl for a variable. 264 const FieldDecl *lookup(const VarDecl *VD) const override { 265 if (OuterRegionInfo) 266 return OuterRegionInfo->lookup(VD); 267 // If there is no outer outlined region,no need to lookup in a list of 268 // captured variables, we can use the original one. 269 return nullptr; 270 } 271 272 FieldDecl *getThisFieldDecl() const override { 273 if (OuterRegionInfo) 274 return OuterRegionInfo->getThisFieldDecl(); 275 return nullptr; 276 } 277 278 /// Get a variable or parameter for storing global thread id 279 /// inside OpenMP construct. 280 const VarDecl *getThreadIDVariable() const override { 281 if (OuterRegionInfo) 282 return OuterRegionInfo->getThreadIDVariable(); 283 return nullptr; 284 } 285 286 /// Get an LValue for the current ThreadID variable. 287 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 288 if (OuterRegionInfo) 289 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 290 llvm_unreachable("No LValue for inlined OpenMP construct"); 291 } 292 293 /// Get the name of the capture helper. 294 StringRef getHelperName() const override { 295 if (auto *OuterRegionInfo = getOldCSI()) 296 return OuterRegionInfo->getHelperName(); 297 llvm_unreachable("No helper name for inlined OpenMP construct"); 298 } 299 300 void emitUntiedSwitch(CodeGenFunction &CGF) override { 301 if (OuterRegionInfo) 302 OuterRegionInfo->emitUntiedSwitch(CGF); 303 } 304 305 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 306 307 static bool classof(const CGCapturedStmtInfo *Info) { 308 return CGOpenMPRegionInfo::classof(Info) && 309 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 310 } 311 312 ~CGOpenMPInlinedRegionInfo() override = default; 313 314 private: 315 /// CodeGen info about outer OpenMP region. 316 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 317 CGOpenMPRegionInfo *OuterRegionInfo; 318 }; 319 320 /// API for captured statement code generation in OpenMP target 321 /// constructs. For this captures, implicit parameters are used instead of the 322 /// captured fields. The name of the target region has to be unique in a given 323 /// application so it is provided by the client, because only the client has 324 /// the information to generate that. 325 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 326 public: 327 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 328 const RegionCodeGenTy &CodeGen, StringRef HelperName) 329 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 330 /*HasCancel=*/false), 331 HelperName(HelperName) {} 332 333 /// This is unused for target regions because each starts executing 334 /// with a single thread. 335 const VarDecl *getThreadIDVariable() const override { return nullptr; } 336 337 /// Get the name of the capture helper. 338 StringRef getHelperName() const override { return HelperName; } 339 340 static bool classof(const CGCapturedStmtInfo *Info) { 341 return CGOpenMPRegionInfo::classof(Info) && 342 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 343 } 344 345 private: 346 StringRef HelperName; 347 }; 348 349 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 350 llvm_unreachable("No codegen for expressions"); 351 } 352 /// API for generation of expressions captured in a innermost OpenMP 353 /// region. 354 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 355 public: 356 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 357 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 358 OMPD_unknown, 359 /*HasCancel=*/false), 360 PrivScope(CGF) { 361 // Make sure the globals captured in the provided statement are local by 362 // using the privatization logic. We assume the same variable is not 363 // captured more than once. 364 for (const auto &C : CS.captures()) { 365 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 366 continue; 367 368 const VarDecl *VD = C.getCapturedVar(); 369 if (VD->isLocalVarDeclOrParm()) 370 continue; 371 372 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 373 /*RefersToEnclosingVariableOrCapture=*/false, 374 VD->getType().getNonReferenceType(), VK_LValue, 375 C.getLocation()); 376 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF)); 377 } 378 (void)PrivScope.Privatize(); 379 } 380 381 /// Lookup the captured field decl for a variable. 382 const FieldDecl *lookup(const VarDecl *VD) const override { 383 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 384 return FD; 385 return nullptr; 386 } 387 388 /// Emit the captured statement body. 389 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 390 llvm_unreachable("No body for expressions"); 391 } 392 393 /// Get a variable or parameter for storing global thread id 394 /// inside OpenMP construct. 395 const VarDecl *getThreadIDVariable() const override { 396 llvm_unreachable("No thread id for expressions"); 397 } 398 399 /// Get the name of the capture helper. 400 StringRef getHelperName() const override { 401 llvm_unreachable("No helper name for expressions"); 402 } 403 404 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 405 406 private: 407 /// Private scope to capture global variables. 408 CodeGenFunction::OMPPrivateScope PrivScope; 409 }; 410 411 /// RAII for emitting code of OpenMP constructs. 412 class InlinedOpenMPRegionRAII { 413 CodeGenFunction &CGF; 414 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields; 415 FieldDecl *LambdaThisCaptureField = nullptr; 416 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 417 bool NoInheritance = false; 418 419 public: 420 /// Constructs region for combined constructs. 421 /// \param CodeGen Code generation sequence for combined directives. Includes 422 /// a list of functions used for code generation of implicitly inlined 423 /// regions. 424 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 425 OpenMPDirectiveKind Kind, bool HasCancel, 426 bool NoInheritance = true) 427 : CGF(CGF), NoInheritance(NoInheritance) { 428 // Start emission for the construct. 429 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 430 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 431 if (NoInheritance) { 432 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 433 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 434 CGF.LambdaThisCaptureField = nullptr; 435 BlockInfo = CGF.BlockInfo; 436 CGF.BlockInfo = nullptr; 437 } 438 } 439 440 ~InlinedOpenMPRegionRAII() { 441 // Restore original CapturedStmtInfo only if we're done with code emission. 442 auto *OldCSI = 443 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 444 delete CGF.CapturedStmtInfo; 445 CGF.CapturedStmtInfo = OldCSI; 446 if (NoInheritance) { 447 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 448 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 449 CGF.BlockInfo = BlockInfo; 450 } 451 } 452 }; 453 454 /// Values for bit flags used in the ident_t to describe the fields. 455 /// All enumeric elements are named and described in accordance with the code 456 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 457 enum OpenMPLocationFlags : unsigned { 458 /// Use trampoline for internal microtask. 459 OMP_IDENT_IMD = 0x01, 460 /// Use c-style ident structure. 461 OMP_IDENT_KMPC = 0x02, 462 /// Atomic reduction option for kmpc_reduce. 463 OMP_ATOMIC_REDUCE = 0x10, 464 /// Explicit 'barrier' directive. 465 OMP_IDENT_BARRIER_EXPL = 0x20, 466 /// Implicit barrier in code. 467 OMP_IDENT_BARRIER_IMPL = 0x40, 468 /// Implicit barrier in 'for' directive. 469 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 470 /// Implicit barrier in 'sections' directive. 471 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 472 /// Implicit barrier in 'single' directive. 473 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 474 /// Call of __kmp_for_static_init for static loop. 475 OMP_IDENT_WORK_LOOP = 0x200, 476 /// Call of __kmp_for_static_init for sections. 477 OMP_IDENT_WORK_SECTIONS = 0x400, 478 /// Call of __kmp_for_static_init for distribute. 479 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 480 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 481 }; 482 483 /// Describes ident structure that describes a source location. 484 /// All descriptions are taken from 485 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 486 /// Original structure: 487 /// typedef struct ident { 488 /// kmp_int32 reserved_1; /**< might be used in Fortran; 489 /// see above */ 490 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 491 /// KMP_IDENT_KMPC identifies this union 492 /// member */ 493 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 494 /// see above */ 495 ///#if USE_ITT_BUILD 496 /// /* but currently used for storing 497 /// region-specific ITT */ 498 /// /* contextual information. */ 499 ///#endif /* USE_ITT_BUILD */ 500 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 501 /// C++ */ 502 /// char const *psource; /**< String describing the source location. 503 /// The string is composed of semi-colon separated 504 // fields which describe the source file, 505 /// the function and a pair of line numbers that 506 /// delimit the construct. 507 /// */ 508 /// } ident_t; 509 enum IdentFieldIndex { 510 /// might be used in Fortran 511 IdentField_Reserved_1, 512 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 513 IdentField_Flags, 514 /// Not really used in Fortran any more 515 IdentField_Reserved_2, 516 /// Source[4] in Fortran, do not use for C++ 517 IdentField_Reserved_3, 518 /// String describing the source location. The string is composed of 519 /// semi-colon separated fields which describe the source file, the function 520 /// and a pair of line numbers that delimit the construct. 521 IdentField_PSource 522 }; 523 524 /// Schedule types for 'omp for' loops (these enumerators are taken from 525 /// the enum sched_type in kmp.h). 526 enum OpenMPSchedType { 527 /// Lower bound for default (unordered) versions. 528 OMP_sch_lower = 32, 529 OMP_sch_static_chunked = 33, 530 OMP_sch_static = 34, 531 OMP_sch_dynamic_chunked = 35, 532 OMP_sch_guided_chunked = 36, 533 OMP_sch_runtime = 37, 534 OMP_sch_auto = 38, 535 /// static with chunk adjustment (e.g., simd) 536 OMP_sch_static_balanced_chunked = 45, 537 /// Lower bound for 'ordered' versions. 538 OMP_ord_lower = 64, 539 OMP_ord_static_chunked = 65, 540 OMP_ord_static = 66, 541 OMP_ord_dynamic_chunked = 67, 542 OMP_ord_guided_chunked = 68, 543 OMP_ord_runtime = 69, 544 OMP_ord_auto = 70, 545 OMP_sch_default = OMP_sch_static, 546 /// dist_schedule types 547 OMP_dist_sch_static_chunked = 91, 548 OMP_dist_sch_static = 92, 549 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 550 /// Set if the monotonic schedule modifier was present. 551 OMP_sch_modifier_monotonic = (1 << 29), 552 /// Set if the nonmonotonic schedule modifier was present. 553 OMP_sch_modifier_nonmonotonic = (1 << 30), 554 }; 555 556 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 557 /// region. 558 class CleanupTy final : public EHScopeStack::Cleanup { 559 PrePostActionTy *Action; 560 561 public: 562 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 563 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 564 if (!CGF.HaveInsertPoint()) 565 return; 566 Action->Exit(CGF); 567 } 568 }; 569 570 } // anonymous namespace 571 572 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 573 CodeGenFunction::RunCleanupsScope Scope(CGF); 574 if (PrePostAction) { 575 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 576 Callback(CodeGen, CGF, *PrePostAction); 577 } else { 578 PrePostActionTy Action; 579 Callback(CodeGen, CGF, Action); 580 } 581 } 582 583 /// Check if the combiner is a call to UDR combiner and if it is so return the 584 /// UDR decl used for reduction. 585 static const OMPDeclareReductionDecl * 586 getReductionInit(const Expr *ReductionOp) { 587 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 588 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 589 if (const auto *DRE = 590 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 591 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 592 return DRD; 593 return nullptr; 594 } 595 596 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 597 const OMPDeclareReductionDecl *DRD, 598 const Expr *InitOp, 599 Address Private, Address Original, 600 QualType Ty) { 601 if (DRD->getInitializer()) { 602 std::pair<llvm::Function *, llvm::Function *> Reduction = 603 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 604 const auto *CE = cast<CallExpr>(InitOp); 605 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 606 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 607 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 608 const auto *LHSDRE = 609 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 610 const auto *RHSDRE = 611 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 612 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 613 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private); 614 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original); 615 (void)PrivateScope.Privatize(); 616 RValue Func = RValue::get(Reduction.second); 617 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 618 CGF.EmitIgnoredExpr(InitOp); 619 } else { 620 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 621 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 622 auto *GV = new llvm::GlobalVariable( 623 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 624 llvm::GlobalValue::PrivateLinkage, Init, Name); 625 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 626 RValue InitRVal; 627 switch (CGF.getEvaluationKind(Ty)) { 628 case TEK_Scalar: 629 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 630 break; 631 case TEK_Complex: 632 InitRVal = 633 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 634 break; 635 case TEK_Aggregate: { 636 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); 637 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); 638 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 639 /*IsInitializer=*/false); 640 return; 641 } 642 } 643 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue); 644 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 645 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 646 /*IsInitializer=*/false); 647 } 648 } 649 650 /// Emit initialization of arrays of complex types. 651 /// \param DestAddr Address of the array. 652 /// \param Type Type of array. 653 /// \param Init Initial expression of array. 654 /// \param SrcAddr Address of the original array. 655 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 656 QualType Type, bool EmitDeclareReductionInit, 657 const Expr *Init, 658 const OMPDeclareReductionDecl *DRD, 659 Address SrcAddr = Address::invalid()) { 660 // Perform element-by-element initialization. 661 QualType ElementTy; 662 663 // Drill down to the base element type on both arrays. 664 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 665 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 666 if (DRD) 667 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType()); 668 669 llvm::Value *SrcBegin = nullptr; 670 if (DRD) 671 SrcBegin = SrcAddr.getPointer(); 672 llvm::Value *DestBegin = DestAddr.getPointer(); 673 // Cast from pointer to array type to pointer to single element. 674 llvm::Value *DestEnd = 675 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); 676 // The basic structure here is a while-do loop. 677 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 678 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 679 llvm::Value *IsEmpty = 680 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 681 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 682 683 // Enter the loop body, making that address the current address. 684 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 685 CGF.EmitBlock(BodyBB); 686 687 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 688 689 llvm::PHINode *SrcElementPHI = nullptr; 690 Address SrcElementCurrent = Address::invalid(); 691 if (DRD) { 692 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 693 "omp.arraycpy.srcElementPast"); 694 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 695 SrcElementCurrent = 696 Address(SrcElementPHI, SrcAddr.getElementType(), 697 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 698 } 699 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 700 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 701 DestElementPHI->addIncoming(DestBegin, EntryBB); 702 Address DestElementCurrent = 703 Address(DestElementPHI, DestAddr.getElementType(), 704 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 705 706 // Emit copy. 707 { 708 CodeGenFunction::RunCleanupsScope InitScope(CGF); 709 if (EmitDeclareReductionInit) { 710 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 711 SrcElementCurrent, ElementTy); 712 } else 713 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 714 /*IsInitializer=*/false); 715 } 716 717 if (DRD) { 718 // Shift the address forward by one element. 719 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 720 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, 721 "omp.arraycpy.dest.element"); 722 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 723 } 724 725 // Shift the address forward by one element. 726 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 727 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, 728 "omp.arraycpy.dest.element"); 729 // Check whether we've reached the end. 730 llvm::Value *Done = 731 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 732 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 733 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 734 735 // Done. 736 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 737 } 738 739 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 740 return CGF.EmitOMPSharedLValue(E); 741 } 742 743 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 744 const Expr *E) { 745 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 746 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 747 return LValue(); 748 } 749 750 void ReductionCodeGen::emitAggregateInitialization( 751 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 752 const OMPDeclareReductionDecl *DRD) { 753 // Emit VarDecl with copy init for arrays. 754 // Get the address of the original variable captured in current 755 // captured region. 756 const auto *PrivateVD = 757 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 758 bool EmitDeclareReductionInit = 759 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 760 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 761 EmitDeclareReductionInit, 762 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 763 : PrivateVD->getInit(), 764 DRD, SharedAddr); 765 } 766 767 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 768 ArrayRef<const Expr *> Origs, 769 ArrayRef<const Expr *> Privates, 770 ArrayRef<const Expr *> ReductionOps) { 771 ClausesData.reserve(Shareds.size()); 772 SharedAddresses.reserve(Shareds.size()); 773 Sizes.reserve(Shareds.size()); 774 BaseDecls.reserve(Shareds.size()); 775 const auto *IOrig = Origs.begin(); 776 const auto *IPriv = Privates.begin(); 777 const auto *IRed = ReductionOps.begin(); 778 for (const Expr *Ref : Shareds) { 779 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 780 std::advance(IOrig, 1); 781 std::advance(IPriv, 1); 782 std::advance(IRed, 1); 783 } 784 } 785 786 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 787 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 788 "Number of generated lvalues must be exactly N."); 789 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 790 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 791 SharedAddresses.emplace_back(First, Second); 792 if (ClausesData[N].Shared == ClausesData[N].Ref) { 793 OrigAddresses.emplace_back(First, Second); 794 } else { 795 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 796 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 797 OrigAddresses.emplace_back(First, Second); 798 } 799 } 800 801 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 802 QualType PrivateType = getPrivateType(N); 803 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 804 if (!PrivateType->isVariablyModifiedType()) { 805 Sizes.emplace_back( 806 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 807 nullptr); 808 return; 809 } 810 llvm::Value *Size; 811 llvm::Value *SizeInChars; 812 auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType(); 813 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 814 if (AsArraySection) { 815 Size = CGF.Builder.CreatePtrDiff(ElemType, 816 OrigAddresses[N].second.getPointer(CGF), 817 OrigAddresses[N].first.getPointer(CGF)); 818 Size = CGF.Builder.CreateNUWAdd( 819 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 820 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 821 } else { 822 SizeInChars = 823 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 824 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 825 } 826 Sizes.emplace_back(SizeInChars, Size); 827 CodeGenFunction::OpaqueValueMapping OpaqueMap( 828 CGF, 829 cast<OpaqueValueExpr>( 830 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 831 RValue::get(Size)); 832 CGF.EmitVariablyModifiedType(PrivateType); 833 } 834 835 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 836 llvm::Value *Size) { 837 QualType PrivateType = getPrivateType(N); 838 if (!PrivateType->isVariablyModifiedType()) { 839 assert(!Size && !Sizes[N].second && 840 "Size should be nullptr for non-variably modified reduction " 841 "items."); 842 return; 843 } 844 CodeGenFunction::OpaqueValueMapping OpaqueMap( 845 CGF, 846 cast<OpaqueValueExpr>( 847 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 848 RValue::get(Size)); 849 CGF.EmitVariablyModifiedType(PrivateType); 850 } 851 852 void ReductionCodeGen::emitInitialization( 853 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 854 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 855 assert(SharedAddresses.size() > N && "No variable was generated"); 856 const auto *PrivateVD = 857 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 858 const OMPDeclareReductionDecl *DRD = 859 getReductionInit(ClausesData[N].ReductionOp); 860 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 861 if (DRD && DRD->getInitializer()) 862 (void)DefaultInit(CGF); 863 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD); 864 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 865 (void)DefaultInit(CGF); 866 QualType SharedType = SharedAddresses[N].first.getType(); 867 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 868 PrivateAddr, SharedAddr, SharedType); 869 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 870 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 871 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 872 PrivateVD->getType().getQualifiers(), 873 /*IsInitializer=*/false); 874 } 875 } 876 877 bool ReductionCodeGen::needCleanups(unsigned N) { 878 QualType PrivateType = getPrivateType(N); 879 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 880 return DTorKind != QualType::DK_none; 881 } 882 883 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 884 Address PrivateAddr) { 885 QualType PrivateType = getPrivateType(N); 886 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 887 if (needCleanups(N)) { 888 PrivateAddr = 889 PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType)); 890 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 891 } 892 } 893 894 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 895 LValue BaseLV) { 896 BaseTy = BaseTy.getNonReferenceType(); 897 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 898 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 899 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 900 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 901 } else { 902 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 903 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 904 } 905 BaseTy = BaseTy->getPointeeType(); 906 } 907 return CGF.MakeAddrLValue( 908 BaseLV.getAddress(CGF).withElementType(CGF.ConvertTypeForMem(ElTy)), 909 BaseLV.getType(), BaseLV.getBaseInfo(), 910 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 911 } 912 913 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 914 Address OriginalBaseAddress, llvm::Value *Addr) { 915 Address Tmp = Address::invalid(); 916 Address TopTmp = Address::invalid(); 917 Address MostTopTmp = Address::invalid(); 918 BaseTy = BaseTy.getNonReferenceType(); 919 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 920 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 921 Tmp = CGF.CreateMemTemp(BaseTy); 922 if (TopTmp.isValid()) 923 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 924 else 925 MostTopTmp = Tmp; 926 TopTmp = Tmp; 927 BaseTy = BaseTy->getPointeeType(); 928 } 929 930 if (Tmp.isValid()) { 931 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 932 Addr, Tmp.getElementType()); 933 CGF.Builder.CreateStore(Addr, Tmp); 934 return MostTopTmp; 935 } 936 937 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 938 Addr, OriginalBaseAddress.getType()); 939 return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull); 940 } 941 942 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 943 const VarDecl *OrigVD = nullptr; 944 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 945 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 946 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 947 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 948 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 949 Base = TempASE->getBase()->IgnoreParenImpCasts(); 950 DE = cast<DeclRefExpr>(Base); 951 OrigVD = cast<VarDecl>(DE->getDecl()); 952 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 953 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 954 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 955 Base = TempASE->getBase()->IgnoreParenImpCasts(); 956 DE = cast<DeclRefExpr>(Base); 957 OrigVD = cast<VarDecl>(DE->getDecl()); 958 } 959 return OrigVD; 960 } 961 962 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 963 Address PrivateAddr) { 964 const DeclRefExpr *DE; 965 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 966 BaseDecls.emplace_back(OrigVD); 967 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 968 LValue BaseLValue = 969 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 970 OriginalBaseLValue); 971 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF); 972 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 973 SharedAddr.getElementType(), BaseLValue.getPointer(CGF), 974 SharedAddr.getPointer()); 975 llvm::Value *PrivatePointer = 976 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 977 PrivateAddr.getPointer(), SharedAddr.getType()); 978 llvm::Value *Ptr = CGF.Builder.CreateGEP( 979 SharedAddr.getElementType(), PrivatePointer, Adjustment); 980 return castToBase(CGF, OrigVD->getType(), 981 SharedAddresses[N].first.getType(), 982 OriginalBaseLValue.getAddress(CGF), Ptr); 983 } 984 BaseDecls.emplace_back( 985 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 986 return PrivateAddr; 987 } 988 989 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 990 const OMPDeclareReductionDecl *DRD = 991 getReductionInit(ClausesData[N].ReductionOp); 992 return DRD && DRD->getInitializer(); 993 } 994 995 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 996 return CGF.EmitLoadOfPointerLValue( 997 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 998 getThreadIDVariable()->getType()->castAs<PointerType>()); 999 } 1000 1001 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 1002 if (!CGF.HaveInsertPoint()) 1003 return; 1004 // 1.2.2 OpenMP Language Terminology 1005 // Structured block - An executable statement with a single entry at the 1006 // top and a single exit at the bottom. 1007 // The point of exit cannot be a branch out of the structured block. 1008 // longjmp() and throw() must not violate the entry/exit criteria. 1009 CGF.EHStack.pushTerminate(); 1010 if (S) 1011 CGF.incrementProfileCounter(S); 1012 CodeGen(CGF); 1013 CGF.EHStack.popTerminate(); 1014 } 1015 1016 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1017 CodeGenFunction &CGF) { 1018 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1019 getThreadIDVariable()->getType(), 1020 AlignmentSource::Decl); 1021 } 1022 1023 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1024 QualType FieldTy) { 1025 auto *Field = FieldDecl::Create( 1026 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1027 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1028 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1029 Field->setAccess(AS_public); 1030 DC->addDecl(Field); 1031 return Field; 1032 } 1033 1034 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) 1035 : CGM(CGM), OMPBuilder(CGM.getModule()) { 1036 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1037 llvm::OpenMPIRBuilderConfig Config( 1038 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(), 1039 CGM.getLangOpts().OpenMPOffloadMandatory, 1040 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false, 1041 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false); 1042 OMPBuilder.initialize(); 1043 OMPBuilder.loadOffloadInfoMetadata(CGM.getLangOpts().OpenMPIsTargetDevice 1044 ? CGM.getLangOpts().OMPHostIRFile 1045 : StringRef{}); 1046 OMPBuilder.setConfig(Config); 1047 } 1048 1049 void CGOpenMPRuntime::clear() { 1050 InternalVars.clear(); 1051 // Clean non-target variable declarations possibly used only in debug info. 1052 for (const auto &Data : EmittedNonTargetVariables) { 1053 if (!Data.getValue().pointsToAliveValue()) 1054 continue; 1055 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1056 if (!GV) 1057 continue; 1058 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1059 continue; 1060 GV->eraseFromParent(); 1061 } 1062 } 1063 1064 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1065 return OMPBuilder.createPlatformSpecificName(Parts); 1066 } 1067 1068 static llvm::Function * 1069 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1070 const Expr *CombinerInitializer, const VarDecl *In, 1071 const VarDecl *Out, bool IsCombiner) { 1072 // void .omp_combiner.(Ty *in, Ty *out); 1073 ASTContext &C = CGM.getContext(); 1074 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1075 FunctionArgList Args; 1076 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1077 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other); 1078 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1079 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other); 1080 Args.push_back(&OmpOutParm); 1081 Args.push_back(&OmpInParm); 1082 const CGFunctionInfo &FnInfo = 1083 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1084 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1085 std::string Name = CGM.getOpenMPRuntime().getName( 1086 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1087 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1088 Name, &CGM.getModule()); 1089 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1090 if (CGM.getLangOpts().Optimize) { 1091 Fn->removeFnAttr(llvm::Attribute::NoInline); 1092 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1093 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1094 } 1095 CodeGenFunction CGF(CGM); 1096 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1097 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1098 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1099 Out->getLocation()); 1100 CodeGenFunction::OMPPrivateScope Scope(CGF); 1101 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1102 Scope.addPrivate( 1103 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1104 .getAddress(CGF)); 1105 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1106 Scope.addPrivate( 1107 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1108 .getAddress(CGF)); 1109 (void)Scope.Privatize(); 1110 if (!IsCombiner && Out->hasInit() && 1111 !CGF.isTrivialInitializer(Out->getInit())) { 1112 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1113 Out->getType().getQualifiers(), 1114 /*IsInitializer=*/true); 1115 } 1116 if (CombinerInitializer) 1117 CGF.EmitIgnoredExpr(CombinerInitializer); 1118 Scope.ForceCleanup(); 1119 CGF.FinishFunction(); 1120 return Fn; 1121 } 1122 1123 void CGOpenMPRuntime::emitUserDefinedReduction( 1124 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1125 if (UDRMap.count(D) > 0) 1126 return; 1127 llvm::Function *Combiner = emitCombinerOrInitializer( 1128 CGM, D->getType(), D->getCombiner(), 1129 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1130 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1131 /*IsCombiner=*/true); 1132 llvm::Function *Initializer = nullptr; 1133 if (const Expr *Init = D->getInitializer()) { 1134 Initializer = emitCombinerOrInitializer( 1135 CGM, D->getType(), 1136 D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init 1137 : nullptr, 1138 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1139 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1140 /*IsCombiner=*/false); 1141 } 1142 UDRMap.try_emplace(D, Combiner, Initializer); 1143 if (CGF) { 1144 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1145 Decls.second.push_back(D); 1146 } 1147 } 1148 1149 std::pair<llvm::Function *, llvm::Function *> 1150 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1151 auto I = UDRMap.find(D); 1152 if (I != UDRMap.end()) 1153 return I->second; 1154 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1155 return UDRMap.lookup(D); 1156 } 1157 1158 namespace { 1159 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1160 // Builder if one is present. 1161 struct PushAndPopStackRAII { 1162 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1163 bool HasCancel, llvm::omp::Directive Kind) 1164 : OMPBuilder(OMPBuilder) { 1165 if (!OMPBuilder) 1166 return; 1167 1168 // The following callback is the crucial part of clangs cleanup process. 1169 // 1170 // NOTE: 1171 // Once the OpenMPIRBuilder is used to create parallel regions (and 1172 // similar), the cancellation destination (Dest below) is determined via 1173 // IP. That means if we have variables to finalize we split the block at IP, 1174 // use the new block (=BB) as destination to build a JumpDest (via 1175 // getJumpDestInCurrentScope(BB)) which then is fed to 1176 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1177 // to push & pop an FinalizationInfo object. 1178 // The FiniCB will still be needed but at the point where the 1179 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1180 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1181 assert(IP.getBlock()->end() == IP.getPoint() && 1182 "Clang CG should cause non-terminated block!"); 1183 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1184 CGF.Builder.restoreIP(IP); 1185 CodeGenFunction::JumpDest Dest = 1186 CGF.getOMPCancelDestination(OMPD_parallel); 1187 CGF.EmitBranchThroughCleanup(Dest); 1188 }; 1189 1190 // TODO: Remove this once we emit parallel regions through the 1191 // OpenMPIRBuilder as it can do this setup internally. 1192 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); 1193 OMPBuilder->pushFinalizationCB(std::move(FI)); 1194 } 1195 ~PushAndPopStackRAII() { 1196 if (OMPBuilder) 1197 OMPBuilder->popFinalizationCB(); 1198 } 1199 llvm::OpenMPIRBuilder *OMPBuilder; 1200 }; 1201 } // namespace 1202 1203 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1204 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1205 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1206 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1207 assert(ThreadIDVar->getType()->isPointerType() && 1208 "thread id variable must be of type kmp_int32 *"); 1209 CodeGenFunction CGF(CGM, true); 1210 bool HasCancel = false; 1211 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1212 HasCancel = OPD->hasCancel(); 1213 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1214 HasCancel = OPD->hasCancel(); 1215 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1216 HasCancel = OPSD->hasCancel(); 1217 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1218 HasCancel = OPFD->hasCancel(); 1219 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1220 HasCancel = OPFD->hasCancel(); 1221 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1222 HasCancel = OPFD->hasCancel(); 1223 else if (const auto *OPFD = 1224 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1225 HasCancel = OPFD->hasCancel(); 1226 else if (const auto *OPFD = 1227 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1228 HasCancel = OPFD->hasCancel(); 1229 1230 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1231 // parallel region to make cancellation barriers work properly. 1232 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1233 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); 1234 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1235 HasCancel, OutlinedHelperName); 1236 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1237 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1238 } 1239 1240 std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const { 1241 std::string Suffix = getName({"omp_outlined"}); 1242 return (Name + Suffix).str(); 1243 } 1244 1245 std::string CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction &CGF) const { 1246 return getOutlinedHelperName(CGF.CurFn->getName()); 1247 } 1248 1249 std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const { 1250 std::string Suffix = getName({"omp", "reduction", "reduction_func"}); 1251 return (Name + Suffix).str(); 1252 } 1253 1254 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1255 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1256 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1257 const RegionCodeGenTy &CodeGen) { 1258 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1259 return emitParallelOrTeamsOutlinedFunction( 1260 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF), 1261 CodeGen); 1262 } 1263 1264 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1265 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1266 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1267 const RegionCodeGenTy &CodeGen) { 1268 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1269 return emitParallelOrTeamsOutlinedFunction( 1270 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF), 1271 CodeGen); 1272 } 1273 1274 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1275 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1276 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1277 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1278 bool Tied, unsigned &NumberOfParts) { 1279 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1280 PrePostActionTy &) { 1281 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1282 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1283 llvm::Value *TaskArgs[] = { 1284 UpLoc, ThreadID, 1285 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1286 TaskTVar->getType()->castAs<PointerType>()) 1287 .getPointer(CGF)}; 1288 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1289 CGM.getModule(), OMPRTL___kmpc_omp_task), 1290 TaskArgs); 1291 }; 1292 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1293 UntiedCodeGen); 1294 CodeGen.setAction(Action); 1295 assert(!ThreadIDVar->getType()->isPointerType() && 1296 "thread id variable must be of type kmp_int32 for tasks"); 1297 const OpenMPDirectiveKind Region = 1298 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1299 : OMPD_task; 1300 const CapturedStmt *CS = D.getCapturedStmt(Region); 1301 bool HasCancel = false; 1302 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1303 HasCancel = TD->hasCancel(); 1304 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1305 HasCancel = TD->hasCancel(); 1306 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1307 HasCancel = TD->hasCancel(); 1308 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1309 HasCancel = TD->hasCancel(); 1310 1311 CodeGenFunction CGF(CGM, true); 1312 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1313 InnermostKind, HasCancel, Action); 1314 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1315 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1316 if (!Tied) 1317 NumberOfParts = Action.getNumberOfParts(); 1318 return Res; 1319 } 1320 1321 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1322 bool AtCurrentPoint) { 1323 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1324 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1325 1326 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1327 if (AtCurrentPoint) { 1328 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1329 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1330 } else { 1331 Elem.second.ServiceInsertPt = 1332 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1333 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1334 } 1335 } 1336 1337 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1338 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1339 if (Elem.second.ServiceInsertPt) { 1340 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1341 Elem.second.ServiceInsertPt = nullptr; 1342 Ptr->eraseFromParent(); 1343 } 1344 } 1345 1346 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1347 SourceLocation Loc, 1348 SmallString<128> &Buffer) { 1349 llvm::raw_svector_ostream OS(Buffer); 1350 // Build debug location 1351 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1352 OS << ";" << PLoc.getFilename() << ";"; 1353 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1354 OS << FD->getQualifiedNameAsString(); 1355 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1356 return OS.str(); 1357 } 1358 1359 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1360 SourceLocation Loc, 1361 unsigned Flags, bool EmitLoc) { 1362 uint32_t SrcLocStrSize; 1363 llvm::Constant *SrcLocStr; 1364 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() == 1365 llvm::codegenoptions::NoDebugInfo) || 1366 Loc.isInvalid()) { 1367 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 1368 } else { 1369 std::string FunctionName; 1370 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1371 FunctionName = FD->getQualifiedNameAsString(); 1372 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1373 const char *FileName = PLoc.getFilename(); 1374 unsigned Line = PLoc.getLine(); 1375 unsigned Column = PLoc.getColumn(); 1376 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, 1377 Column, SrcLocStrSize); 1378 } 1379 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1380 return OMPBuilder.getOrCreateIdent( 1381 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags); 1382 } 1383 1384 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1385 SourceLocation Loc) { 1386 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1387 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1388 // the clang invariants used below might be broken. 1389 if (CGM.getLangOpts().OpenMPIRBuilder) { 1390 SmallString<128> Buffer; 1391 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1392 uint32_t SrcLocStrSize; 1393 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1394 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize); 1395 return OMPBuilder.getOrCreateThreadID( 1396 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize)); 1397 } 1398 1399 llvm::Value *ThreadID = nullptr; 1400 // Check whether we've already cached a load of the thread id in this 1401 // function. 1402 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1403 if (I != OpenMPLocThreadIDMap.end()) { 1404 ThreadID = I->second.ThreadID; 1405 if (ThreadID != nullptr) 1406 return ThreadID; 1407 } 1408 // If exceptions are enabled, do not use parameter to avoid possible crash. 1409 if (auto *OMPRegionInfo = 1410 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1411 if (OMPRegionInfo->getThreadIDVariable()) { 1412 // Check if this an outlined function with thread id passed as argument. 1413 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1414 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1415 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1416 !CGF.getLangOpts().CXXExceptions || 1417 CGF.Builder.GetInsertBlock() == TopBlock || 1418 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1419 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1420 TopBlock || 1421 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1422 CGF.Builder.GetInsertBlock()) { 1423 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1424 // If value loaded in entry block, cache it and use it everywhere in 1425 // function. 1426 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1427 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1428 Elem.second.ThreadID = ThreadID; 1429 } 1430 return ThreadID; 1431 } 1432 } 1433 } 1434 1435 // This is not an outlined function region - need to call __kmpc_int32 1436 // kmpc_global_thread_num(ident_t *loc). 1437 // Generate thread id value and cache this value for use across the 1438 // function. 1439 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1440 if (!Elem.second.ServiceInsertPt) 1441 setLocThreadIdInsertPt(CGF); 1442 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1443 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1444 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 1445 llvm::CallInst *Call = CGF.Builder.CreateCall( 1446 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1447 OMPRTL___kmpc_global_thread_num), 1448 emitUpdateLocation(CGF, Loc)); 1449 Call->setCallingConv(CGF.getRuntimeCC()); 1450 Elem.second.ThreadID = Call; 1451 return Call; 1452 } 1453 1454 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1455 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1456 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1457 clearLocThreadIdInsertPt(CGF); 1458 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1459 } 1460 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1461 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1462 UDRMap.erase(D); 1463 FunctionUDRMap.erase(CGF.CurFn); 1464 } 1465 auto I = FunctionUDMMap.find(CGF.CurFn); 1466 if (I != FunctionUDMMap.end()) { 1467 for(const auto *D : I->second) 1468 UDMMap.erase(D); 1469 FunctionUDMMap.erase(I); 1470 } 1471 LastprivateConditionalToTypes.erase(CGF.CurFn); 1472 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1473 } 1474 1475 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1476 return OMPBuilder.IdentPtr; 1477 } 1478 1479 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1480 if (!Kmpc_MicroTy) { 1481 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1482 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1483 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1484 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1485 } 1486 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1487 } 1488 1489 llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind 1490 convertDeviceClause(const VarDecl *VD) { 1491 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 1492 OMPDeclareTargetDeclAttr::getDeviceType(VD); 1493 if (!DevTy) 1494 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone; 1495 1496 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default 1497 case OMPDeclareTargetDeclAttr::DT_Host: 1498 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost; 1499 break; 1500 case OMPDeclareTargetDeclAttr::DT_NoHost: 1501 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost; 1502 break; 1503 case OMPDeclareTargetDeclAttr::DT_Any: 1504 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny; 1505 break; 1506 default: 1507 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone; 1508 break; 1509 } 1510 } 1511 1512 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind 1513 convertCaptureClause(const VarDecl *VD) { 1514 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType = 1515 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1516 if (!MapType) 1517 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone; 1518 switch ((int)*MapType) { // Avoid -Wcovered-switch-default 1519 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To: 1520 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo; 1521 break; 1522 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter: 1523 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter; 1524 break; 1525 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link: 1526 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink; 1527 break; 1528 default: 1529 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone; 1530 break; 1531 } 1532 } 1533 1534 static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc( 1535 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder, 1536 SourceLocation BeginLoc, llvm::StringRef ParentName = "") { 1537 1538 auto FileInfoCallBack = [&]() { 1539 SourceManager &SM = CGM.getContext().getSourceManager(); 1540 PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc); 1541 1542 llvm::sys::fs::UniqueID ID; 1543 if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1544 PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false); 1545 } 1546 1547 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine()); 1548 }; 1549 1550 return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName); 1551 } 1552 1553 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1554 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); }; 1555 1556 auto LinkageForVariable = [&VD, this]() { 1557 return CGM.getLLVMLinkageVarDefinition(VD); 1558 }; 1559 1560 std::vector<llvm::GlobalVariable *> GeneratedRefs; 1561 1562 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem( 1563 CGM.getContext().getPointerType(VD->getType())); 1564 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar( 1565 convertCaptureClause(VD), convertDeviceClause(VD), 1566 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly, 1567 VD->isExternallyVisible(), 1568 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, 1569 VD->getCanonicalDecl()->getBeginLoc()), 1570 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd, 1571 CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal, 1572 LinkageForVariable); 1573 1574 if (!addr) 1575 return Address::invalid(); 1576 return Address(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD)); 1577 } 1578 1579 llvm::Constant * 1580 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1581 assert(!CGM.getLangOpts().OpenMPUseTLS || 1582 !CGM.getContext().getTargetInfo().isTLSSupported()); 1583 // Lookup the entry, lazily creating it if necessary. 1584 std::string Suffix = getName({"cache", ""}); 1585 return OMPBuilder.getOrCreateInternalVariable( 1586 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str()); 1587 } 1588 1589 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1590 const VarDecl *VD, 1591 Address VDAddr, 1592 SourceLocation Loc) { 1593 if (CGM.getLangOpts().OpenMPUseTLS && 1594 CGM.getContext().getTargetInfo().isTLSSupported()) 1595 return VDAddr; 1596 1597 llvm::Type *VarTy = VDAddr.getElementType(); 1598 llvm::Value *Args[] = { 1599 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1600 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy), 1601 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1602 getOrCreateThreadPrivateCache(VD)}; 1603 return Address( 1604 CGF.EmitRuntimeCall( 1605 OMPBuilder.getOrCreateRuntimeFunction( 1606 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1607 Args), 1608 CGF.Int8Ty, VDAddr.getAlignment()); 1609 } 1610 1611 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1612 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1613 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1614 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1615 // library. 1616 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1617 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1618 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1619 OMPLoc); 1620 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1621 // to register constructor/destructor for variable. 1622 llvm::Value *Args[] = { 1623 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1624 Ctor, CopyCtor, Dtor}; 1625 CGF.EmitRuntimeCall( 1626 OMPBuilder.getOrCreateRuntimeFunction( 1627 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1628 Args); 1629 } 1630 1631 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1632 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1633 bool PerformInit, CodeGenFunction *CGF) { 1634 if (CGM.getLangOpts().OpenMPUseTLS && 1635 CGM.getContext().getTargetInfo().isTLSSupported()) 1636 return nullptr; 1637 1638 VD = VD->getDefinition(CGM.getContext()); 1639 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1640 QualType ASTTy = VD->getType(); 1641 1642 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1643 const Expr *Init = VD->getAnyInitializer(); 1644 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1645 // Generate function that re-emits the declaration's initializer into the 1646 // threadprivate copy of the variable VD 1647 CodeGenFunction CtorCGF(CGM); 1648 FunctionArgList Args; 1649 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1650 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1651 ImplicitParamKind::Other); 1652 Args.push_back(&Dst); 1653 1654 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1655 CGM.getContext().VoidPtrTy, Args); 1656 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1657 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1658 llvm::Function *Fn = 1659 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1660 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1661 Args, Loc, Loc); 1662 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1663 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1664 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1665 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy), 1666 VDAddr.getAlignment()); 1667 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1668 /*IsInitializer=*/true); 1669 ArgVal = CtorCGF.EmitLoadOfScalar( 1670 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1671 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1672 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1673 CtorCGF.FinishFunction(); 1674 Ctor = Fn; 1675 } 1676 if (VD->getType().isDestructedType() != QualType::DK_none) { 1677 // Generate function that emits destructor call for the threadprivate copy 1678 // of the variable VD 1679 CodeGenFunction DtorCGF(CGM); 1680 FunctionArgList Args; 1681 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1682 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1683 ImplicitParamKind::Other); 1684 Args.push_back(&Dst); 1685 1686 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1687 CGM.getContext().VoidTy, Args); 1688 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1689 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1690 llvm::Function *Fn = 1691 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1692 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1693 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1694 Loc, Loc); 1695 // Create a scope with an artificial location for the body of this function. 1696 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1697 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1698 DtorCGF.GetAddrOfLocalVar(&Dst), 1699 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1700 DtorCGF.emitDestroy( 1701 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy, 1702 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1703 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1704 DtorCGF.FinishFunction(); 1705 Dtor = Fn; 1706 } 1707 // Do not emit init function if it is not required. 1708 if (!Ctor && !Dtor) 1709 return nullptr; 1710 1711 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1712 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1713 /*isVarArg=*/false) 1714 ->getPointerTo(); 1715 // Copying constructor for the threadprivate variable. 1716 // Must be NULL - reserved by runtime, but currently it requires that this 1717 // parameter is always NULL. Otherwise it fires assertion. 1718 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1719 if (Ctor == nullptr) { 1720 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1721 /*isVarArg=*/false) 1722 ->getPointerTo(); 1723 Ctor = llvm::Constant::getNullValue(CtorTy); 1724 } 1725 if (Dtor == nullptr) { 1726 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1727 /*isVarArg=*/false) 1728 ->getPointerTo(); 1729 Dtor = llvm::Constant::getNullValue(DtorTy); 1730 } 1731 if (!CGF) { 1732 auto *InitFunctionTy = 1733 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1734 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1735 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1736 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1737 CodeGenFunction InitCGF(CGM); 1738 FunctionArgList ArgList; 1739 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1740 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1741 Loc, Loc); 1742 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1743 InitCGF.FinishFunction(); 1744 return InitFunction; 1745 } 1746 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1747 } 1748 return nullptr; 1749 } 1750 1751 void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD, 1752 llvm::GlobalValue *GV) { 1753 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr = 1754 OMPDeclareTargetDeclAttr::getActiveAttr(FD); 1755 1756 // We only need to handle active 'indirect' declare target functions. 1757 if (!ActiveAttr || !(*ActiveAttr)->getIndirect()) 1758 return; 1759 1760 // Get a mangled name to store the new device global in. 1761 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc( 1762 CGM, OMPBuilder, FD->getCanonicalDecl()->getBeginLoc(), FD->getName()); 1763 SmallString<128> Name; 1764 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo); 1765 1766 // We need to generate a new global to hold the address of the indirectly 1767 // called device function. Doing this allows us to keep the visibility and 1768 // linkage of the associated function unchanged while allowing the runtime to 1769 // access its value. 1770 llvm::GlobalValue *Addr = GV; 1771 if (CGM.getLangOpts().OpenMPIsTargetDevice) { 1772 Addr = new llvm::GlobalVariable( 1773 CGM.getModule(), CGM.VoidPtrTy, 1774 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name, 1775 nullptr, llvm::GlobalValue::NotThreadLocal, 1776 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace()); 1777 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility); 1778 } 1779 1780 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo( 1781 Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(), 1782 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect, 1783 llvm::GlobalValue::WeakODRLinkage); 1784 } 1785 1786 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 1787 QualType VarType, 1788 StringRef Name) { 1789 std::string Suffix = getName({"artificial", ""}); 1790 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 1791 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable( 1792 VarLVType, Twine(Name).concat(Suffix).str()); 1793 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 1794 CGM.getTarget().isTLSSupported()) { 1795 GAddr->setThreadLocal(/*Val=*/true); 1796 return Address(GAddr, GAddr->getValueType(), 1797 CGM.getContext().getTypeAlignInChars(VarType)); 1798 } 1799 std::string CacheSuffix = getName({"cache", ""}); 1800 llvm::Value *Args[] = { 1801 emitUpdateLocation(CGF, SourceLocation()), 1802 getThreadID(CGF, SourceLocation()), 1803 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 1804 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 1805 /*isSigned=*/false), 1806 OMPBuilder.getOrCreateInternalVariable( 1807 CGM.VoidPtrPtrTy, 1808 Twine(Name).concat(Suffix).concat(CacheSuffix).str())}; 1809 return Address( 1810 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1811 CGF.EmitRuntimeCall( 1812 OMPBuilder.getOrCreateRuntimeFunction( 1813 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1814 Args), 1815 VarLVType->getPointerTo(/*AddrSpace=*/0)), 1816 VarLVType, CGM.getContext().getTypeAlignInChars(VarType)); 1817 } 1818 1819 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 1820 const RegionCodeGenTy &ThenGen, 1821 const RegionCodeGenTy &ElseGen) { 1822 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 1823 1824 // If the condition constant folds and can be elided, try to avoid emitting 1825 // the condition and the dead arm of the if/else. 1826 bool CondConstant; 1827 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 1828 if (CondConstant) 1829 ThenGen(CGF); 1830 else 1831 ElseGen(CGF); 1832 return; 1833 } 1834 1835 // Otherwise, the condition did not fold, or we couldn't elide it. Just 1836 // emit the conditional branch. 1837 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 1838 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 1839 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 1840 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 1841 1842 // Emit the 'then' code. 1843 CGF.EmitBlock(ThenBlock); 1844 ThenGen(CGF); 1845 CGF.EmitBranch(ContBlock); 1846 // Emit the 'else' code if present. 1847 // There is no need to emit line number for unconditional branch. 1848 (void)ApplyDebugLocation::CreateEmpty(CGF); 1849 CGF.EmitBlock(ElseBlock); 1850 ElseGen(CGF); 1851 // There is no need to emit line number for unconditional branch. 1852 (void)ApplyDebugLocation::CreateEmpty(CGF); 1853 CGF.EmitBranch(ContBlock); 1854 // Emit the continuation block for code after the if. 1855 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 1856 } 1857 1858 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 1859 llvm::Function *OutlinedFn, 1860 ArrayRef<llvm::Value *> CapturedVars, 1861 const Expr *IfCond, 1862 llvm::Value *NumThreads) { 1863 if (!CGF.HaveInsertPoint()) 1864 return; 1865 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 1866 auto &M = CGM.getModule(); 1867 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 1868 this](CodeGenFunction &CGF, PrePostActionTy &) { 1869 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 1870 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 1871 llvm::Value *Args[] = { 1872 RTLoc, 1873 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 1874 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 1875 llvm::SmallVector<llvm::Value *, 16> RealArgs; 1876 RealArgs.append(std::begin(Args), std::end(Args)); 1877 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 1878 1879 llvm::FunctionCallee RTLFn = 1880 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 1881 CGF.EmitRuntimeCall(RTLFn, RealArgs); 1882 }; 1883 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 1884 this](CodeGenFunction &CGF, PrePostActionTy &) { 1885 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 1886 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 1887 // Build calls: 1888 // __kmpc_serialized_parallel(&Loc, GTid); 1889 llvm::Value *Args[] = {RTLoc, ThreadID}; 1890 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1891 M, OMPRTL___kmpc_serialized_parallel), 1892 Args); 1893 1894 // OutlinedFn(>id, &zero_bound, CapturedStruct); 1895 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 1896 Address ZeroAddrBound = 1897 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 1898 /*Name=*/".bound.zero.addr"); 1899 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound); 1900 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 1901 // ThreadId for serialized parallels is 0. 1902 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 1903 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 1904 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 1905 1906 // Ensure we do not inline the function. This is trivially true for the ones 1907 // passed to __kmpc_fork_call but the ones called in serialized regions 1908 // could be inlined. This is not a perfect but it is closer to the invariant 1909 // we want, namely, every data environment starts with a new function. 1910 // TODO: We should pass the if condition to the runtime function and do the 1911 // handling there. Much cleaner code. 1912 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline); 1913 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 1914 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 1915 1916 // __kmpc_end_serialized_parallel(&Loc, GTid); 1917 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 1918 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1919 M, OMPRTL___kmpc_end_serialized_parallel), 1920 EndArgs); 1921 }; 1922 if (IfCond) { 1923 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 1924 } else { 1925 RegionCodeGenTy ThenRCG(ThenGen); 1926 ThenRCG(CGF); 1927 } 1928 } 1929 1930 // If we're inside an (outlined) parallel region, use the region info's 1931 // thread-ID variable (it is passed in a first argument of the outlined function 1932 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 1933 // regular serial code region, get thread ID by calling kmp_int32 1934 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 1935 // return the address of that temp. 1936 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 1937 SourceLocation Loc) { 1938 if (auto *OMPRegionInfo = 1939 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 1940 if (OMPRegionInfo->getThreadIDVariable()) 1941 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 1942 1943 llvm::Value *ThreadID = getThreadID(CGF, Loc); 1944 QualType Int32Ty = 1945 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 1946 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 1947 CGF.EmitStoreOfScalar(ThreadID, 1948 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 1949 1950 return ThreadIDTemp; 1951 } 1952 1953 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 1954 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 1955 std::string Name = getName({Prefix, "var"}); 1956 return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name); 1957 } 1958 1959 namespace { 1960 /// Common pre(post)-action for different OpenMP constructs. 1961 class CommonActionTy final : public PrePostActionTy { 1962 llvm::FunctionCallee EnterCallee; 1963 ArrayRef<llvm::Value *> EnterArgs; 1964 llvm::FunctionCallee ExitCallee; 1965 ArrayRef<llvm::Value *> ExitArgs; 1966 bool Conditional; 1967 llvm::BasicBlock *ContBlock = nullptr; 1968 1969 public: 1970 CommonActionTy(llvm::FunctionCallee EnterCallee, 1971 ArrayRef<llvm::Value *> EnterArgs, 1972 llvm::FunctionCallee ExitCallee, 1973 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 1974 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 1975 ExitArgs(ExitArgs), Conditional(Conditional) {} 1976 void Enter(CodeGenFunction &CGF) override { 1977 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 1978 if (Conditional) { 1979 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 1980 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 1981 ContBlock = CGF.createBasicBlock("omp_if.end"); 1982 // Generate the branch (If-stmt) 1983 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 1984 CGF.EmitBlock(ThenBlock); 1985 } 1986 } 1987 void Done(CodeGenFunction &CGF) { 1988 // Emit the rest of blocks/branches 1989 CGF.EmitBranch(ContBlock); 1990 CGF.EmitBlock(ContBlock, true); 1991 } 1992 void Exit(CodeGenFunction &CGF) override { 1993 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 1994 } 1995 }; 1996 } // anonymous namespace 1997 1998 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 1999 StringRef CriticalName, 2000 const RegionCodeGenTy &CriticalOpGen, 2001 SourceLocation Loc, const Expr *Hint) { 2002 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2003 // CriticalOpGen(); 2004 // __kmpc_end_critical(ident_t *, gtid, Lock); 2005 // Prepare arguments and build a call to __kmpc_critical 2006 if (!CGF.HaveInsertPoint()) 2007 return; 2008 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2009 getCriticalRegionLock(CriticalName)}; 2010 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2011 std::end(Args)); 2012 if (Hint) { 2013 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2014 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2015 } 2016 CommonActionTy Action( 2017 OMPBuilder.getOrCreateRuntimeFunction( 2018 CGM.getModule(), 2019 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2020 EnterArgs, 2021 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2022 OMPRTL___kmpc_end_critical), 2023 Args); 2024 CriticalOpGen.setAction(Action); 2025 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2026 } 2027 2028 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2029 const RegionCodeGenTy &MasterOpGen, 2030 SourceLocation Loc) { 2031 if (!CGF.HaveInsertPoint()) 2032 return; 2033 // if(__kmpc_master(ident_t *, gtid)) { 2034 // MasterOpGen(); 2035 // __kmpc_end_master(ident_t *, gtid); 2036 // } 2037 // Prepare arguments and build a call to __kmpc_master 2038 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2039 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2040 CGM.getModule(), OMPRTL___kmpc_master), 2041 Args, 2042 OMPBuilder.getOrCreateRuntimeFunction( 2043 CGM.getModule(), OMPRTL___kmpc_end_master), 2044 Args, 2045 /*Conditional=*/true); 2046 MasterOpGen.setAction(Action); 2047 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2048 Action.Done(CGF); 2049 } 2050 2051 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, 2052 const RegionCodeGenTy &MaskedOpGen, 2053 SourceLocation Loc, const Expr *Filter) { 2054 if (!CGF.HaveInsertPoint()) 2055 return; 2056 // if(__kmpc_masked(ident_t *, gtid, filter)) { 2057 // MaskedOpGen(); 2058 // __kmpc_end_masked(iden_t *, gtid); 2059 // } 2060 // Prepare arguments and build a call to __kmpc_masked 2061 llvm::Value *FilterVal = Filter 2062 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) 2063 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 2064 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2065 FilterVal}; 2066 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), 2067 getThreadID(CGF, Loc)}; 2068 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2069 CGM.getModule(), OMPRTL___kmpc_masked), 2070 Args, 2071 OMPBuilder.getOrCreateRuntimeFunction( 2072 CGM.getModule(), OMPRTL___kmpc_end_masked), 2073 ArgsEnd, 2074 /*Conditional=*/true); 2075 MaskedOpGen.setAction(Action); 2076 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); 2077 Action.Done(CGF); 2078 } 2079 2080 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2081 SourceLocation Loc) { 2082 if (!CGF.HaveInsertPoint()) 2083 return; 2084 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2085 OMPBuilder.createTaskyield(CGF.Builder); 2086 } else { 2087 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2088 llvm::Value *Args[] = { 2089 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2090 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2091 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2092 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2093 Args); 2094 } 2095 2096 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2097 Region->emitUntiedSwitch(CGF); 2098 } 2099 2100 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2101 const RegionCodeGenTy &TaskgroupOpGen, 2102 SourceLocation Loc) { 2103 if (!CGF.HaveInsertPoint()) 2104 return; 2105 // __kmpc_taskgroup(ident_t *, gtid); 2106 // TaskgroupOpGen(); 2107 // __kmpc_end_taskgroup(ident_t *, gtid); 2108 // Prepare arguments and build a call to __kmpc_taskgroup 2109 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2110 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2111 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2112 Args, 2113 OMPBuilder.getOrCreateRuntimeFunction( 2114 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2115 Args); 2116 TaskgroupOpGen.setAction(Action); 2117 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2118 } 2119 2120 /// Given an array of pointers to variables, project the address of a 2121 /// given variable. 2122 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2123 unsigned Index, const VarDecl *Var) { 2124 // Pull out the pointer to the variable. 2125 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2126 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2127 2128 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType()); 2129 return Address( 2130 CGF.Builder.CreateBitCast( 2131 Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())), 2132 ElemTy, CGF.getContext().getDeclAlign(Var)); 2133 } 2134 2135 static llvm::Value *emitCopyprivateCopyFunction( 2136 CodeGenModule &CGM, llvm::Type *ArgsElemType, 2137 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2138 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2139 SourceLocation Loc) { 2140 ASTContext &C = CGM.getContext(); 2141 // void copy_func(void *LHSArg, void *RHSArg); 2142 FunctionArgList Args; 2143 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2144 ImplicitParamKind::Other); 2145 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2146 ImplicitParamKind::Other); 2147 Args.push_back(&LHSArg); 2148 Args.push_back(&RHSArg); 2149 const auto &CGFI = 2150 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2151 std::string Name = 2152 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2153 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2154 llvm::GlobalValue::InternalLinkage, Name, 2155 &CGM.getModule()); 2156 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2157 Fn->setDoesNotRecurse(); 2158 CodeGenFunction CGF(CGM); 2159 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2160 // Dest = (void*[n])(LHSArg); 2161 // Src = (void*[n])(RHSArg); 2162 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2163 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2164 ArgsElemType->getPointerTo()), 2165 ArgsElemType, CGF.getPointerAlign()); 2166 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2167 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2168 ArgsElemType->getPointerTo()), 2169 ArgsElemType, CGF.getPointerAlign()); 2170 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2171 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2172 // ... 2173 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2174 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2175 const auto *DestVar = 2176 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2177 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2178 2179 const auto *SrcVar = 2180 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2181 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2182 2183 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2184 QualType Type = VD->getType(); 2185 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2186 } 2187 CGF.FinishFunction(); 2188 return Fn; 2189 } 2190 2191 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2192 const RegionCodeGenTy &SingleOpGen, 2193 SourceLocation Loc, 2194 ArrayRef<const Expr *> CopyprivateVars, 2195 ArrayRef<const Expr *> SrcExprs, 2196 ArrayRef<const Expr *> DstExprs, 2197 ArrayRef<const Expr *> AssignmentOps) { 2198 if (!CGF.HaveInsertPoint()) 2199 return; 2200 assert(CopyprivateVars.size() == SrcExprs.size() && 2201 CopyprivateVars.size() == DstExprs.size() && 2202 CopyprivateVars.size() == AssignmentOps.size()); 2203 ASTContext &C = CGM.getContext(); 2204 // int32 did_it = 0; 2205 // if(__kmpc_single(ident_t *, gtid)) { 2206 // SingleOpGen(); 2207 // __kmpc_end_single(ident_t *, gtid); 2208 // did_it = 1; 2209 // } 2210 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2211 // <copy_func>, did_it); 2212 2213 Address DidIt = Address::invalid(); 2214 if (!CopyprivateVars.empty()) { 2215 // int32 did_it = 0; 2216 QualType KmpInt32Ty = 2217 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2218 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2219 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2220 } 2221 // Prepare arguments and build a call to __kmpc_single 2222 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2223 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2224 CGM.getModule(), OMPRTL___kmpc_single), 2225 Args, 2226 OMPBuilder.getOrCreateRuntimeFunction( 2227 CGM.getModule(), OMPRTL___kmpc_end_single), 2228 Args, 2229 /*Conditional=*/true); 2230 SingleOpGen.setAction(Action); 2231 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2232 if (DidIt.isValid()) { 2233 // did_it = 1; 2234 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2235 } 2236 Action.Done(CGF); 2237 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2238 // <copy_func>, did_it); 2239 if (DidIt.isValid()) { 2240 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2241 QualType CopyprivateArrayTy = C.getConstantArrayType( 2242 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal, 2243 /*IndexTypeQuals=*/0); 2244 // Create a list of all private variables for copyprivate. 2245 Address CopyprivateList = 2246 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2247 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2248 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2249 CGF.Builder.CreateStore( 2250 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2251 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2252 CGF.VoidPtrTy), 2253 Elem); 2254 } 2255 // Build function that copies private values from single region to all other 2256 // threads in the corresponding parallel region. 2257 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2258 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars, 2259 SrcExprs, DstExprs, AssignmentOps, Loc); 2260 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2261 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2262 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty); 2263 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2264 llvm::Value *Args[] = { 2265 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2266 getThreadID(CGF, Loc), // i32 <gtid> 2267 BufSize, // size_t <buf_size> 2268 CL.getPointer(), // void *<copyprivate list> 2269 CpyFn, // void (*) (void *, void *) <copy_func> 2270 DidItVal // i32 did_it 2271 }; 2272 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2273 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2274 Args); 2275 } 2276 } 2277 2278 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2279 const RegionCodeGenTy &OrderedOpGen, 2280 SourceLocation Loc, bool IsThreads) { 2281 if (!CGF.HaveInsertPoint()) 2282 return; 2283 // __kmpc_ordered(ident_t *, gtid); 2284 // OrderedOpGen(); 2285 // __kmpc_end_ordered(ident_t *, gtid); 2286 // Prepare arguments and build a call to __kmpc_ordered 2287 if (IsThreads) { 2288 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2289 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2290 CGM.getModule(), OMPRTL___kmpc_ordered), 2291 Args, 2292 OMPBuilder.getOrCreateRuntimeFunction( 2293 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2294 Args); 2295 OrderedOpGen.setAction(Action); 2296 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2297 return; 2298 } 2299 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2300 } 2301 2302 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2303 unsigned Flags; 2304 if (Kind == OMPD_for) 2305 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2306 else if (Kind == OMPD_sections) 2307 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2308 else if (Kind == OMPD_single) 2309 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2310 else if (Kind == OMPD_barrier) 2311 Flags = OMP_IDENT_BARRIER_EXPL; 2312 else 2313 Flags = OMP_IDENT_BARRIER_IMPL; 2314 return Flags; 2315 } 2316 2317 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2318 CodeGenFunction &CGF, const OMPLoopDirective &S, 2319 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2320 // Check if the loop directive is actually a doacross loop directive. In this 2321 // case choose static, 1 schedule. 2322 if (llvm::any_of( 2323 S.getClausesOfKind<OMPOrderedClause>(), 2324 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2325 ScheduleKind = OMPC_SCHEDULE_static; 2326 // Chunk size is 1 in this case. 2327 llvm::APInt ChunkSize(32, 1); 2328 ChunkExpr = IntegerLiteral::Create( 2329 CGF.getContext(), ChunkSize, 2330 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2331 SourceLocation()); 2332 } 2333 } 2334 2335 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2336 OpenMPDirectiveKind Kind, bool EmitChecks, 2337 bool ForceSimpleCall) { 2338 // Check if we should use the OMPBuilder 2339 auto *OMPRegionInfo = 2340 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2341 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2342 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2343 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2344 return; 2345 } 2346 2347 if (!CGF.HaveInsertPoint()) 2348 return; 2349 // Build call __kmpc_cancel_barrier(loc, thread_id); 2350 // Build call __kmpc_barrier(loc, thread_id); 2351 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2352 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2353 // thread_id); 2354 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2355 getThreadID(CGF, Loc)}; 2356 if (OMPRegionInfo) { 2357 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2358 llvm::Value *Result = CGF.EmitRuntimeCall( 2359 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2360 OMPRTL___kmpc_cancel_barrier), 2361 Args); 2362 if (EmitChecks) { 2363 // if (__kmpc_cancel_barrier()) { 2364 // exit from construct; 2365 // } 2366 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2367 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2368 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2369 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2370 CGF.EmitBlock(ExitBB); 2371 // exit from construct; 2372 CodeGenFunction::JumpDest CancelDestination = 2373 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2374 CGF.EmitBranchThroughCleanup(CancelDestination); 2375 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2376 } 2377 return; 2378 } 2379 } 2380 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2381 CGM.getModule(), OMPRTL___kmpc_barrier), 2382 Args); 2383 } 2384 2385 void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc, 2386 Expr *ME, bool IsFatal) { 2387 llvm::Value *MVL = 2388 ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF) 2389 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 2390 // Build call void __kmpc_error(ident_t *loc, int severity, const char 2391 // *message) 2392 llvm::Value *Args[] = { 2393 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true), 2394 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1), 2395 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)}; 2396 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2397 CGM.getModule(), OMPRTL___kmpc_error), 2398 Args); 2399 } 2400 2401 /// Map the OpenMP loop schedule to the runtime enumeration. 2402 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2403 bool Chunked, bool Ordered) { 2404 switch (ScheduleKind) { 2405 case OMPC_SCHEDULE_static: 2406 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2407 : (Ordered ? OMP_ord_static : OMP_sch_static); 2408 case OMPC_SCHEDULE_dynamic: 2409 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2410 case OMPC_SCHEDULE_guided: 2411 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2412 case OMPC_SCHEDULE_runtime: 2413 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2414 case OMPC_SCHEDULE_auto: 2415 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2416 case OMPC_SCHEDULE_unknown: 2417 assert(!Chunked && "chunk was specified but schedule kind not known"); 2418 return Ordered ? OMP_ord_static : OMP_sch_static; 2419 } 2420 llvm_unreachable("Unexpected runtime schedule"); 2421 } 2422 2423 /// Map the OpenMP distribute schedule to the runtime enumeration. 2424 static OpenMPSchedType 2425 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2426 // only static is allowed for dist_schedule 2427 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2428 } 2429 2430 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2431 bool Chunked) const { 2432 OpenMPSchedType Schedule = 2433 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2434 return Schedule == OMP_sch_static; 2435 } 2436 2437 bool CGOpenMPRuntime::isStaticNonchunked( 2438 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2439 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2440 return Schedule == OMP_dist_sch_static; 2441 } 2442 2443 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2444 bool Chunked) const { 2445 OpenMPSchedType Schedule = 2446 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2447 return Schedule == OMP_sch_static_chunked; 2448 } 2449 2450 bool CGOpenMPRuntime::isStaticChunked( 2451 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2452 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2453 return Schedule == OMP_dist_sch_static_chunked; 2454 } 2455 2456 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2457 OpenMPSchedType Schedule = 2458 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2459 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2460 return Schedule != OMP_sch_static; 2461 } 2462 2463 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2464 OpenMPScheduleClauseModifier M1, 2465 OpenMPScheduleClauseModifier M2) { 2466 int Modifier = 0; 2467 switch (M1) { 2468 case OMPC_SCHEDULE_MODIFIER_monotonic: 2469 Modifier = OMP_sch_modifier_monotonic; 2470 break; 2471 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2472 Modifier = OMP_sch_modifier_nonmonotonic; 2473 break; 2474 case OMPC_SCHEDULE_MODIFIER_simd: 2475 if (Schedule == OMP_sch_static_chunked) 2476 Schedule = OMP_sch_static_balanced_chunked; 2477 break; 2478 case OMPC_SCHEDULE_MODIFIER_last: 2479 case OMPC_SCHEDULE_MODIFIER_unknown: 2480 break; 2481 } 2482 switch (M2) { 2483 case OMPC_SCHEDULE_MODIFIER_monotonic: 2484 Modifier = OMP_sch_modifier_monotonic; 2485 break; 2486 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2487 Modifier = OMP_sch_modifier_nonmonotonic; 2488 break; 2489 case OMPC_SCHEDULE_MODIFIER_simd: 2490 if (Schedule == OMP_sch_static_chunked) 2491 Schedule = OMP_sch_static_balanced_chunked; 2492 break; 2493 case OMPC_SCHEDULE_MODIFIER_last: 2494 case OMPC_SCHEDULE_MODIFIER_unknown: 2495 break; 2496 } 2497 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2498 // If the static schedule kind is specified or if the ordered clause is 2499 // specified, and if the nonmonotonic modifier is not specified, the effect is 2500 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2501 // modifier is specified, the effect is as if the nonmonotonic modifier is 2502 // specified. 2503 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2504 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2505 Schedule == OMP_sch_static_balanced_chunked || 2506 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2507 Schedule == OMP_dist_sch_static_chunked || 2508 Schedule == OMP_dist_sch_static)) 2509 Modifier = OMP_sch_modifier_nonmonotonic; 2510 } 2511 return Schedule | Modifier; 2512 } 2513 2514 void CGOpenMPRuntime::emitForDispatchInit( 2515 CodeGenFunction &CGF, SourceLocation Loc, 2516 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2517 bool Ordered, const DispatchRTInput &DispatchValues) { 2518 if (!CGF.HaveInsertPoint()) 2519 return; 2520 OpenMPSchedType Schedule = getRuntimeSchedule( 2521 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2522 assert(Ordered || 2523 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2524 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2525 Schedule != OMP_sch_static_balanced_chunked)); 2526 // Call __kmpc_dispatch_init( 2527 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2528 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2529 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2530 2531 // If the Chunk was not specified in the clause - use default value 1. 2532 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2533 : CGF.Builder.getIntN(IVSize, 1); 2534 llvm::Value *Args[] = { 2535 emitUpdateLocation(CGF, Loc), 2536 getThreadID(CGF, Loc), 2537 CGF.Builder.getInt32(addMonoNonMonoModifier( 2538 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2539 DispatchValues.LB, // Lower 2540 DispatchValues.UB, // Upper 2541 CGF.Builder.getIntN(IVSize, 1), // Stride 2542 Chunk // Chunk 2543 }; 2544 CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned), 2545 Args); 2546 } 2547 2548 static void emitForStaticInitCall( 2549 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2550 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2551 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2552 const CGOpenMPRuntime::StaticRTInput &Values) { 2553 if (!CGF.HaveInsertPoint()) 2554 return; 2555 2556 assert(!Values.Ordered); 2557 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2558 Schedule == OMP_sch_static_balanced_chunked || 2559 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2560 Schedule == OMP_dist_sch_static || 2561 Schedule == OMP_dist_sch_static_chunked); 2562 2563 // Call __kmpc_for_static_init( 2564 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2565 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2566 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2567 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2568 llvm::Value *Chunk = Values.Chunk; 2569 if (Chunk == nullptr) { 2570 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2571 Schedule == OMP_dist_sch_static) && 2572 "expected static non-chunked schedule"); 2573 // If the Chunk was not specified in the clause - use default value 1. 2574 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2575 } else { 2576 assert((Schedule == OMP_sch_static_chunked || 2577 Schedule == OMP_sch_static_balanced_chunked || 2578 Schedule == OMP_ord_static_chunked || 2579 Schedule == OMP_dist_sch_static_chunked) && 2580 "expected static chunked schedule"); 2581 } 2582 llvm::Value *Args[] = { 2583 UpdateLocation, 2584 ThreadId, 2585 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2586 M2)), // Schedule type 2587 Values.IL.getPointer(), // &isLastIter 2588 Values.LB.getPointer(), // &LB 2589 Values.UB.getPointer(), // &UB 2590 Values.ST.getPointer(), // &Stride 2591 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2592 Chunk // Chunk 2593 }; 2594 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2595 } 2596 2597 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2598 SourceLocation Loc, 2599 OpenMPDirectiveKind DKind, 2600 const OpenMPScheduleTy &ScheduleKind, 2601 const StaticRTInput &Values) { 2602 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2603 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2604 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) && 2605 "Expected loop-based or sections-based directive."); 2606 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2607 isOpenMPLoopDirective(DKind) 2608 ? OMP_IDENT_WORK_LOOP 2609 : OMP_IDENT_WORK_SECTIONS); 2610 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2611 llvm::FunctionCallee StaticInitFunction = 2612 OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned, 2613 false); 2614 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2615 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2616 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2617 } 2618 2619 void CGOpenMPRuntime::emitDistributeStaticInit( 2620 CodeGenFunction &CGF, SourceLocation Loc, 2621 OpenMPDistScheduleClauseKind SchedKind, 2622 const CGOpenMPRuntime::StaticRTInput &Values) { 2623 OpenMPSchedType ScheduleNum = 2624 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2625 llvm::Value *UpdatedLocation = 2626 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2627 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2628 llvm::FunctionCallee StaticInitFunction; 2629 bool isGPUDistribute = 2630 CGM.getLangOpts().OpenMPIsTargetDevice && 2631 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()); 2632 StaticInitFunction = OMPBuilder.createForStaticInitFunction( 2633 Values.IVSize, Values.IVSigned, isGPUDistribute); 2634 2635 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2636 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2637 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2638 } 2639 2640 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2641 SourceLocation Loc, 2642 OpenMPDirectiveKind DKind) { 2643 if (!CGF.HaveInsertPoint()) 2644 return; 2645 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2646 llvm::Value *Args[] = { 2647 emitUpdateLocation(CGF, Loc, 2648 isOpenMPDistributeDirective(DKind) 2649 ? OMP_IDENT_WORK_DISTRIBUTE 2650 : isOpenMPLoopDirective(DKind) 2651 ? OMP_IDENT_WORK_LOOP 2652 : OMP_IDENT_WORK_SECTIONS), 2653 getThreadID(CGF, Loc)}; 2654 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2655 if (isOpenMPDistributeDirective(DKind) && 2656 CGM.getLangOpts().OpenMPIsTargetDevice && 2657 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX())) 2658 CGF.EmitRuntimeCall( 2659 OMPBuilder.getOrCreateRuntimeFunction( 2660 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini), 2661 Args); 2662 else 2663 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2664 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2665 Args); 2666 } 2667 2668 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2669 SourceLocation Loc, 2670 unsigned IVSize, 2671 bool IVSigned) { 2672 if (!CGF.HaveInsertPoint()) 2673 return; 2674 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2675 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2676 CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned), 2677 Args); 2678 } 2679 2680 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2681 SourceLocation Loc, unsigned IVSize, 2682 bool IVSigned, Address IL, 2683 Address LB, Address UB, 2684 Address ST) { 2685 // Call __kmpc_dispatch_next( 2686 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2687 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2688 // kmp_int[32|64] *p_stride); 2689 llvm::Value *Args[] = { 2690 emitUpdateLocation(CGF, Loc), 2691 getThreadID(CGF, Loc), 2692 IL.getPointer(), // &isLastIter 2693 LB.getPointer(), // &Lower 2694 UB.getPointer(), // &Upper 2695 ST.getPointer() // &Stride 2696 }; 2697 llvm::Value *Call = CGF.EmitRuntimeCall( 2698 OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args); 2699 return CGF.EmitScalarConversion( 2700 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2701 CGF.getContext().BoolTy, Loc); 2702 } 2703 2704 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2705 llvm::Value *NumThreads, 2706 SourceLocation Loc) { 2707 if (!CGF.HaveInsertPoint()) 2708 return; 2709 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2710 llvm::Value *Args[] = { 2711 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2712 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2713 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2714 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2715 Args); 2716 } 2717 2718 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2719 ProcBindKind ProcBind, 2720 SourceLocation Loc) { 2721 if (!CGF.HaveInsertPoint()) 2722 return; 2723 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2724 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2725 llvm::Value *Args[] = { 2726 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2727 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2728 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2729 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2730 Args); 2731 } 2732 2733 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2734 SourceLocation Loc, llvm::AtomicOrdering AO) { 2735 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2736 OMPBuilder.createFlush(CGF.Builder); 2737 } else { 2738 if (!CGF.HaveInsertPoint()) 2739 return; 2740 // Build call void __kmpc_flush(ident_t *loc) 2741 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2742 CGM.getModule(), OMPRTL___kmpc_flush), 2743 emitUpdateLocation(CGF, Loc)); 2744 } 2745 } 2746 2747 namespace { 2748 /// Indexes of fields for type kmp_task_t. 2749 enum KmpTaskTFields { 2750 /// List of shared variables. 2751 KmpTaskTShareds, 2752 /// Task routine. 2753 KmpTaskTRoutine, 2754 /// Partition id for the untied tasks. 2755 KmpTaskTPartId, 2756 /// Function with call of destructors for private variables. 2757 Data1, 2758 /// Task priority. 2759 Data2, 2760 /// (Taskloops only) Lower bound. 2761 KmpTaskTLowerBound, 2762 /// (Taskloops only) Upper bound. 2763 KmpTaskTUpperBound, 2764 /// (Taskloops only) Stride. 2765 KmpTaskTStride, 2766 /// (Taskloops only) Is last iteration flag. 2767 KmpTaskTLastIter, 2768 /// (Taskloops only) Reduction data. 2769 KmpTaskTReductions, 2770 }; 2771 } // anonymous namespace 2772 2773 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 2774 // If we are in simd mode or there are no entries, we don't need to do 2775 // anything. 2776 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty()) 2777 return; 2778 2779 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn = 2780 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind, 2781 const llvm::TargetRegionEntryInfo &EntryInfo) -> void { 2782 SourceLocation Loc; 2783 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) { 2784 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 2785 E = CGM.getContext().getSourceManager().fileinfo_end(); 2786 I != E; ++I) { 2787 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID && 2788 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) { 2789 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 2790 I->getFirst(), EntryInfo.Line, 1); 2791 break; 2792 } 2793 } 2794 } 2795 switch (Kind) { 2796 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: { 2797 unsigned DiagID = CGM.getDiags().getCustomDiagID( 2798 DiagnosticsEngine::Error, "Offloading entry for target region in " 2799 "%0 is incorrect: either the " 2800 "address or the ID is invalid."); 2801 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName; 2802 } break; 2803 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: { 2804 unsigned DiagID = CGM.getDiags().getCustomDiagID( 2805 DiagnosticsEngine::Error, "Offloading entry for declare target " 2806 "variable %0 is incorrect: the " 2807 "address is invalid."); 2808 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName; 2809 } break; 2810 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: { 2811 unsigned DiagID = CGM.getDiags().getCustomDiagID( 2812 DiagnosticsEngine::Error, 2813 "Offloading entry for declare target variable is incorrect: the " 2814 "address is invalid."); 2815 CGM.getDiags().Report(DiagID); 2816 } break; 2817 } 2818 }; 2819 2820 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn); 2821 } 2822 2823 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 2824 if (!KmpRoutineEntryPtrTy) { 2825 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 2826 ASTContext &C = CGM.getContext(); 2827 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 2828 FunctionProtoType::ExtProtoInfo EPI; 2829 KmpRoutineEntryPtrQTy = C.getPointerType( 2830 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 2831 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 2832 } 2833 } 2834 2835 namespace { 2836 struct PrivateHelpersTy { 2837 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 2838 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 2839 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 2840 PrivateElemInit(PrivateElemInit) {} 2841 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 2842 const Expr *OriginalRef = nullptr; 2843 const VarDecl *Original = nullptr; 2844 const VarDecl *PrivateCopy = nullptr; 2845 const VarDecl *PrivateElemInit = nullptr; 2846 bool isLocalPrivate() const { 2847 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 2848 } 2849 }; 2850 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 2851 } // anonymous namespace 2852 2853 static bool isAllocatableDecl(const VarDecl *VD) { 2854 const VarDecl *CVD = VD->getCanonicalDecl(); 2855 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 2856 return false; 2857 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 2858 // Use the default allocation. 2859 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 2860 !AA->getAllocator()); 2861 } 2862 2863 static RecordDecl * 2864 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 2865 if (!Privates.empty()) { 2866 ASTContext &C = CGM.getContext(); 2867 // Build struct .kmp_privates_t. { 2868 // /* private vars */ 2869 // }; 2870 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 2871 RD->startDefinition(); 2872 for (const auto &Pair : Privates) { 2873 const VarDecl *VD = Pair.second.Original; 2874 QualType Type = VD->getType().getNonReferenceType(); 2875 // If the private variable is a local variable with lvalue ref type, 2876 // allocate the pointer instead of the pointee type. 2877 if (Pair.second.isLocalPrivate()) { 2878 if (VD->getType()->isLValueReferenceType()) 2879 Type = C.getPointerType(Type); 2880 if (isAllocatableDecl(VD)) 2881 Type = C.getPointerType(Type); 2882 } 2883 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 2884 if (VD->hasAttrs()) { 2885 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 2886 E(VD->getAttrs().end()); 2887 I != E; ++I) 2888 FD->addAttr(*I); 2889 } 2890 } 2891 RD->completeDefinition(); 2892 return RD; 2893 } 2894 return nullptr; 2895 } 2896 2897 static RecordDecl * 2898 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 2899 QualType KmpInt32Ty, 2900 QualType KmpRoutineEntryPointerQTy) { 2901 ASTContext &C = CGM.getContext(); 2902 // Build struct kmp_task_t { 2903 // void * shareds; 2904 // kmp_routine_entry_t routine; 2905 // kmp_int32 part_id; 2906 // kmp_cmplrdata_t data1; 2907 // kmp_cmplrdata_t data2; 2908 // For taskloops additional fields: 2909 // kmp_uint64 lb; 2910 // kmp_uint64 ub; 2911 // kmp_int64 st; 2912 // kmp_int32 liter; 2913 // void * reductions; 2914 // }; 2915 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union); 2916 UD->startDefinition(); 2917 addFieldToRecordDecl(C, UD, KmpInt32Ty); 2918 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 2919 UD->completeDefinition(); 2920 QualType KmpCmplrdataTy = C.getRecordType(UD); 2921 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 2922 RD->startDefinition(); 2923 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 2924 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 2925 addFieldToRecordDecl(C, RD, KmpInt32Ty); 2926 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 2927 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 2928 if (isOpenMPTaskLoopDirective(Kind)) { 2929 QualType KmpUInt64Ty = 2930 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 2931 QualType KmpInt64Ty = 2932 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 2933 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 2934 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 2935 addFieldToRecordDecl(C, RD, KmpInt64Ty); 2936 addFieldToRecordDecl(C, RD, KmpInt32Ty); 2937 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 2938 } 2939 RD->completeDefinition(); 2940 return RD; 2941 } 2942 2943 static RecordDecl * 2944 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 2945 ArrayRef<PrivateDataTy> Privates) { 2946 ASTContext &C = CGM.getContext(); 2947 // Build struct kmp_task_t_with_privates { 2948 // kmp_task_t task_data; 2949 // .kmp_privates_t. privates; 2950 // }; 2951 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 2952 RD->startDefinition(); 2953 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 2954 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 2955 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 2956 RD->completeDefinition(); 2957 return RD; 2958 } 2959 2960 /// Emit a proxy function which accepts kmp_task_t as the second 2961 /// argument. 2962 /// \code 2963 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 2964 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 2965 /// For taskloops: 2966 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 2967 /// tt->reductions, tt->shareds); 2968 /// return 0; 2969 /// } 2970 /// \endcode 2971 static llvm::Function * 2972 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 2973 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 2974 QualType KmpTaskTWithPrivatesPtrQTy, 2975 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 2976 QualType SharedsPtrTy, llvm::Function *TaskFunction, 2977 llvm::Value *TaskPrivatesMap) { 2978 ASTContext &C = CGM.getContext(); 2979 FunctionArgList Args; 2980 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 2981 ImplicitParamKind::Other); 2982 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 2983 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 2984 ImplicitParamKind::Other); 2985 Args.push_back(&GtidArg); 2986 Args.push_back(&TaskTypeArg); 2987 const auto &TaskEntryFnInfo = 2988 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 2989 llvm::FunctionType *TaskEntryTy = 2990 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 2991 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 2992 auto *TaskEntry = llvm::Function::Create( 2993 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 2994 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 2995 TaskEntry->setDoesNotRecurse(); 2996 CodeGenFunction CGF(CGM); 2997 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 2998 Loc, Loc); 2999 3000 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3001 // tt, 3002 // For taskloops: 3003 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3004 // tt->task_data.shareds); 3005 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3006 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3007 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3008 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3009 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3010 const auto *KmpTaskTWithPrivatesQTyRD = 3011 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3012 LValue Base = 3013 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3014 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3015 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3016 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3017 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3018 3019 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3020 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3021 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3022 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3023 CGF.ConvertTypeForMem(SharedsPtrTy)); 3024 3025 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3026 llvm::Value *PrivatesParam; 3027 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3028 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3029 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3030 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3031 } else { 3032 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3033 } 3034 3035 llvm::Value *CommonArgs[] = { 3036 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap, 3037 CGF.Builder 3038 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF), 3039 CGF.VoidPtrTy, CGF.Int8Ty) 3040 .getPointer()}; 3041 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3042 std::end(CommonArgs)); 3043 if (isOpenMPTaskLoopDirective(Kind)) { 3044 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3045 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3046 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3047 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3048 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3049 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3050 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3051 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3052 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3053 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3054 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3055 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3056 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3057 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3058 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3059 CallArgs.push_back(LBParam); 3060 CallArgs.push_back(UBParam); 3061 CallArgs.push_back(StParam); 3062 CallArgs.push_back(LIParam); 3063 CallArgs.push_back(RParam); 3064 } 3065 CallArgs.push_back(SharedsParam); 3066 3067 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3068 CallArgs); 3069 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3070 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3071 CGF.FinishFunction(); 3072 return TaskEntry; 3073 } 3074 3075 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3076 SourceLocation Loc, 3077 QualType KmpInt32Ty, 3078 QualType KmpTaskTWithPrivatesPtrQTy, 3079 QualType KmpTaskTWithPrivatesQTy) { 3080 ASTContext &C = CGM.getContext(); 3081 FunctionArgList Args; 3082 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3083 ImplicitParamKind::Other); 3084 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3085 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3086 ImplicitParamKind::Other); 3087 Args.push_back(&GtidArg); 3088 Args.push_back(&TaskTypeArg); 3089 const auto &DestructorFnInfo = 3090 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3091 llvm::FunctionType *DestructorFnTy = 3092 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3093 std::string Name = 3094 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3095 auto *DestructorFn = 3096 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3097 Name, &CGM.getModule()); 3098 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3099 DestructorFnInfo); 3100 DestructorFn->setDoesNotRecurse(); 3101 CodeGenFunction CGF(CGM); 3102 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3103 Args, Loc, Loc); 3104 3105 LValue Base = CGF.EmitLoadOfPointerLValue( 3106 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3107 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3108 const auto *KmpTaskTWithPrivatesQTyRD = 3109 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3110 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3111 Base = CGF.EmitLValueForField(Base, *FI); 3112 for (const auto *Field : 3113 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3114 if (QualType::DestructionKind DtorKind = 3115 Field->getType().isDestructedType()) { 3116 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3117 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3118 } 3119 } 3120 CGF.FinishFunction(); 3121 return DestructorFn; 3122 } 3123 3124 /// Emit a privates mapping function for correct handling of private and 3125 /// firstprivate variables. 3126 /// \code 3127 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3128 /// **noalias priv1,..., <tyn> **noalias privn) { 3129 /// *priv1 = &.privates.priv1; 3130 /// ...; 3131 /// *privn = &.privates.privn; 3132 /// } 3133 /// \endcode 3134 static llvm::Value * 3135 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3136 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3137 ArrayRef<PrivateDataTy> Privates) { 3138 ASTContext &C = CGM.getContext(); 3139 FunctionArgList Args; 3140 ImplicitParamDecl TaskPrivatesArg( 3141 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3142 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3143 ImplicitParamKind::Other); 3144 Args.push_back(&TaskPrivatesArg); 3145 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3146 unsigned Counter = 1; 3147 for (const Expr *E : Data.PrivateVars) { 3148 Args.push_back(ImplicitParamDecl::Create( 3149 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3150 C.getPointerType(C.getPointerType(E->getType())) 3151 .withConst() 3152 .withRestrict(), 3153 ImplicitParamKind::Other)); 3154 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3155 PrivateVarsPos[VD] = Counter; 3156 ++Counter; 3157 } 3158 for (const Expr *E : Data.FirstprivateVars) { 3159 Args.push_back(ImplicitParamDecl::Create( 3160 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3161 C.getPointerType(C.getPointerType(E->getType())) 3162 .withConst() 3163 .withRestrict(), 3164 ImplicitParamKind::Other)); 3165 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3166 PrivateVarsPos[VD] = Counter; 3167 ++Counter; 3168 } 3169 for (const Expr *E : Data.LastprivateVars) { 3170 Args.push_back(ImplicitParamDecl::Create( 3171 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3172 C.getPointerType(C.getPointerType(E->getType())) 3173 .withConst() 3174 .withRestrict(), 3175 ImplicitParamKind::Other)); 3176 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3177 PrivateVarsPos[VD] = Counter; 3178 ++Counter; 3179 } 3180 for (const VarDecl *VD : Data.PrivateLocals) { 3181 QualType Ty = VD->getType().getNonReferenceType(); 3182 if (VD->getType()->isLValueReferenceType()) 3183 Ty = C.getPointerType(Ty); 3184 if (isAllocatableDecl(VD)) 3185 Ty = C.getPointerType(Ty); 3186 Args.push_back(ImplicitParamDecl::Create( 3187 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3188 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3189 ImplicitParamKind::Other)); 3190 PrivateVarsPos[VD] = Counter; 3191 ++Counter; 3192 } 3193 const auto &TaskPrivatesMapFnInfo = 3194 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3195 llvm::FunctionType *TaskPrivatesMapTy = 3196 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3197 std::string Name = 3198 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3199 auto *TaskPrivatesMap = llvm::Function::Create( 3200 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3201 &CGM.getModule()); 3202 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3203 TaskPrivatesMapFnInfo); 3204 if (CGM.getLangOpts().Optimize) { 3205 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3206 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3207 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3208 } 3209 CodeGenFunction CGF(CGM); 3210 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3211 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3212 3213 // *privi = &.privates.privi; 3214 LValue Base = CGF.EmitLoadOfPointerLValue( 3215 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3216 TaskPrivatesArg.getType()->castAs<PointerType>()); 3217 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3218 Counter = 0; 3219 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3220 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3221 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3222 LValue RefLVal = 3223 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3224 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3225 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3226 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3227 ++Counter; 3228 } 3229 CGF.FinishFunction(); 3230 return TaskPrivatesMap; 3231 } 3232 3233 /// Emit initialization for private variables in task-based directives. 3234 static void emitPrivatesInit(CodeGenFunction &CGF, 3235 const OMPExecutableDirective &D, 3236 Address KmpTaskSharedsPtr, LValue TDBase, 3237 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3238 QualType SharedsTy, QualType SharedsPtrTy, 3239 const OMPTaskDataTy &Data, 3240 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3241 ASTContext &C = CGF.getContext(); 3242 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3243 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3244 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3245 ? OMPD_taskloop 3246 : OMPD_task; 3247 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3248 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3249 LValue SrcBase; 3250 bool IsTargetTask = 3251 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3252 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3253 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3254 // PointersArray, SizesArray, and MappersArray. The original variables for 3255 // these arrays are not captured and we get their addresses explicitly. 3256 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3257 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3258 SrcBase = CGF.MakeAddrLValue( 3259 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3260 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy), 3261 CGF.ConvertTypeForMem(SharedsTy)), 3262 SharedsTy); 3263 } 3264 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3265 for (const PrivateDataTy &Pair : Privates) { 3266 // Do not initialize private locals. 3267 if (Pair.second.isLocalPrivate()) { 3268 ++FI; 3269 continue; 3270 } 3271 const VarDecl *VD = Pair.second.PrivateCopy; 3272 const Expr *Init = VD->getAnyInitializer(); 3273 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3274 !CGF.isTrivialInitializer(Init)))) { 3275 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3276 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3277 const VarDecl *OriginalVD = Pair.second.Original; 3278 // Check if the variable is the target-based BasePointersArray, 3279 // PointersArray, SizesArray, or MappersArray. 3280 LValue SharedRefLValue; 3281 QualType Type = PrivateLValue.getType(); 3282 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3283 if (IsTargetTask && !SharedField) { 3284 assert(isa<ImplicitParamDecl>(OriginalVD) && 3285 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3286 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3287 ->getNumParams() == 0 && 3288 isa<TranslationUnitDecl>( 3289 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3290 ->getDeclContext()) && 3291 "Expected artificial target data variable."); 3292 SharedRefLValue = 3293 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3294 } else if (ForDup) { 3295 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3296 SharedRefLValue = CGF.MakeAddrLValue( 3297 SharedRefLValue.getAddress(CGF).withAlignment( 3298 C.getDeclAlign(OriginalVD)), 3299 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3300 SharedRefLValue.getTBAAInfo()); 3301 } else if (CGF.LambdaCaptureFields.count( 3302 Pair.second.Original->getCanonicalDecl()) > 0 || 3303 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) { 3304 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3305 } else { 3306 // Processing for implicitly captured variables. 3307 InlinedOpenMPRegionRAII Region( 3308 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3309 /*HasCancel=*/false, /*NoInheritance=*/true); 3310 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3311 } 3312 if (Type->isArrayType()) { 3313 // Initialize firstprivate array. 3314 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3315 // Perform simple memcpy. 3316 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3317 } else { 3318 // Initialize firstprivate array using element-by-element 3319 // initialization. 3320 CGF.EmitOMPAggregateAssign( 3321 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3322 Type, 3323 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3324 Address SrcElement) { 3325 // Clean up any temporaries needed by the initialization. 3326 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3327 InitScope.addPrivate(Elem, SrcElement); 3328 (void)InitScope.Privatize(); 3329 // Emit initialization for single element. 3330 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3331 CGF, &CapturesInfo); 3332 CGF.EmitAnyExprToMem(Init, DestElement, 3333 Init->getType().getQualifiers(), 3334 /*IsInitializer=*/false); 3335 }); 3336 } 3337 } else { 3338 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3339 InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF)); 3340 (void)InitScope.Privatize(); 3341 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3342 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3343 /*capturedByInit=*/false); 3344 } 3345 } else { 3346 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3347 } 3348 } 3349 ++FI; 3350 } 3351 } 3352 3353 /// Check if duplication function is required for taskloops. 3354 static bool checkInitIsRequired(CodeGenFunction &CGF, 3355 ArrayRef<PrivateDataTy> Privates) { 3356 bool InitRequired = false; 3357 for (const PrivateDataTy &Pair : Privates) { 3358 if (Pair.second.isLocalPrivate()) 3359 continue; 3360 const VarDecl *VD = Pair.second.PrivateCopy; 3361 const Expr *Init = VD->getAnyInitializer(); 3362 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) && 3363 !CGF.isTrivialInitializer(Init)); 3364 if (InitRequired) 3365 break; 3366 } 3367 return InitRequired; 3368 } 3369 3370 3371 /// Emit task_dup function (for initialization of 3372 /// private/firstprivate/lastprivate vars and last_iter flag) 3373 /// \code 3374 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3375 /// lastpriv) { 3376 /// // setup lastprivate flag 3377 /// task_dst->last = lastpriv; 3378 /// // could be constructor calls here... 3379 /// } 3380 /// \endcode 3381 static llvm::Value * 3382 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3383 const OMPExecutableDirective &D, 3384 QualType KmpTaskTWithPrivatesPtrQTy, 3385 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3386 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3387 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3388 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3389 ASTContext &C = CGM.getContext(); 3390 FunctionArgList Args; 3391 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3392 KmpTaskTWithPrivatesPtrQTy, 3393 ImplicitParamKind::Other); 3394 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3395 KmpTaskTWithPrivatesPtrQTy, 3396 ImplicitParamKind::Other); 3397 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 3398 ImplicitParamKind::Other); 3399 Args.push_back(&DstArg); 3400 Args.push_back(&SrcArg); 3401 Args.push_back(&LastprivArg); 3402 const auto &TaskDupFnInfo = 3403 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3404 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3405 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 3406 auto *TaskDup = llvm::Function::Create( 3407 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3408 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 3409 TaskDup->setDoesNotRecurse(); 3410 CodeGenFunction CGF(CGM); 3411 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 3412 Loc); 3413 3414 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3415 CGF.GetAddrOfLocalVar(&DstArg), 3416 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3417 // task_dst->liter = lastpriv; 3418 if (WithLastIter) { 3419 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3420 LValue Base = CGF.EmitLValueForField( 3421 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3422 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3423 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 3424 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 3425 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 3426 } 3427 3428 // Emit initial values for private copies (if any). 3429 assert(!Privates.empty()); 3430 Address KmpTaskSharedsPtr = Address::invalid(); 3431 if (!Data.FirstprivateVars.empty()) { 3432 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3433 CGF.GetAddrOfLocalVar(&SrcArg), 3434 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3435 LValue Base = CGF.EmitLValueForField( 3436 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3437 KmpTaskSharedsPtr = Address( 3438 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 3439 Base, *std::next(KmpTaskTQTyRD->field_begin(), 3440 KmpTaskTShareds)), 3441 Loc), 3442 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy)); 3443 } 3444 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 3445 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 3446 CGF.FinishFunction(); 3447 return TaskDup; 3448 } 3449 3450 /// Checks if destructor function is required to be generated. 3451 /// \return true if cleanups are required, false otherwise. 3452 static bool 3453 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3454 ArrayRef<PrivateDataTy> Privates) { 3455 for (const PrivateDataTy &P : Privates) { 3456 if (P.second.isLocalPrivate()) 3457 continue; 3458 QualType Ty = P.second.Original->getType().getNonReferenceType(); 3459 if (Ty.isDestructedType()) 3460 return true; 3461 } 3462 return false; 3463 } 3464 3465 namespace { 3466 /// Loop generator for OpenMP iterator expression. 3467 class OMPIteratorGeneratorScope final 3468 : public CodeGenFunction::OMPPrivateScope { 3469 CodeGenFunction &CGF; 3470 const OMPIteratorExpr *E = nullptr; 3471 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 3472 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 3473 OMPIteratorGeneratorScope() = delete; 3474 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 3475 3476 public: 3477 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 3478 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 3479 if (!E) 3480 return; 3481 SmallVector<llvm::Value *, 4> Uppers; 3482 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 3483 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 3484 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 3485 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName())); 3486 const OMPIteratorHelperData &HelperData = E->getHelper(I); 3487 addPrivate( 3488 HelperData.CounterVD, 3489 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr")); 3490 } 3491 Privatize(); 3492 3493 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 3494 const OMPIteratorHelperData &HelperData = E->getHelper(I); 3495 LValue CLVal = 3496 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 3497 HelperData.CounterVD->getType()); 3498 // Counter = 0; 3499 CGF.EmitStoreOfScalar( 3500 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 3501 CLVal); 3502 CodeGenFunction::JumpDest &ContDest = 3503 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 3504 CodeGenFunction::JumpDest &ExitDest = 3505 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 3506 // N = <number-of_iterations>; 3507 llvm::Value *N = Uppers[I]; 3508 // cont: 3509 // if (Counter < N) goto body; else goto exit; 3510 CGF.EmitBlock(ContDest.getBlock()); 3511 auto *CVal = 3512 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 3513 llvm::Value *Cmp = 3514 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 3515 ? CGF.Builder.CreateICmpSLT(CVal, N) 3516 : CGF.Builder.CreateICmpULT(CVal, N); 3517 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 3518 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 3519 // body: 3520 CGF.EmitBlock(BodyBB); 3521 // Iteri = Begini + Counter * Stepi; 3522 CGF.EmitIgnoredExpr(HelperData.Update); 3523 } 3524 } 3525 ~OMPIteratorGeneratorScope() { 3526 if (!E) 3527 return; 3528 for (unsigned I = E->numOfIterators(); I > 0; --I) { 3529 // Counter = Counter + 1; 3530 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 3531 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 3532 // goto cont; 3533 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 3534 // exit: 3535 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 3536 } 3537 } 3538 }; 3539 } // namespace 3540 3541 static std::pair<llvm::Value *, llvm::Value *> 3542 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 3543 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 3544 llvm::Value *Addr; 3545 if (OASE) { 3546 const Expr *Base = OASE->getBase(); 3547 Addr = CGF.EmitScalarExpr(Base); 3548 } else { 3549 Addr = CGF.EmitLValue(E).getPointer(CGF); 3550 } 3551 llvm::Value *SizeVal; 3552 QualType Ty = E->getType(); 3553 if (OASE) { 3554 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 3555 for (const Expr *SE : OASE->getDimensions()) { 3556 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 3557 Sz = CGF.EmitScalarConversion( 3558 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 3559 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 3560 } 3561 } else if (const auto *ASE = 3562 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 3563 LValue UpAddrLVal = 3564 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 3565 Address UpAddrAddress = UpAddrLVal.getAddress(CGF); 3566 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 3567 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1); 3568 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 3569 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 3570 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 3571 } else { 3572 SizeVal = CGF.getTypeSize(Ty); 3573 } 3574 return std::make_pair(Addr, SizeVal); 3575 } 3576 3577 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 3578 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 3579 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 3580 if (KmpTaskAffinityInfoTy.isNull()) { 3581 RecordDecl *KmpAffinityInfoRD = 3582 C.buildImplicitRecord("kmp_task_affinity_info_t"); 3583 KmpAffinityInfoRD->startDefinition(); 3584 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 3585 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 3586 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 3587 KmpAffinityInfoRD->completeDefinition(); 3588 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 3589 } 3590 } 3591 3592 CGOpenMPRuntime::TaskResultTy 3593 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 3594 const OMPExecutableDirective &D, 3595 llvm::Function *TaskFunction, QualType SharedsTy, 3596 Address Shareds, const OMPTaskDataTy &Data) { 3597 ASTContext &C = CGM.getContext(); 3598 llvm::SmallVector<PrivateDataTy, 4> Privates; 3599 // Aggregate privates and sort them by the alignment. 3600 const auto *I = Data.PrivateCopies.begin(); 3601 for (const Expr *E : Data.PrivateVars) { 3602 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3603 Privates.emplace_back( 3604 C.getDeclAlign(VD), 3605 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3606 /*PrivateElemInit=*/nullptr)); 3607 ++I; 3608 } 3609 I = Data.FirstprivateCopies.begin(); 3610 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 3611 for (const Expr *E : Data.FirstprivateVars) { 3612 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3613 Privates.emplace_back( 3614 C.getDeclAlign(VD), 3615 PrivateHelpersTy( 3616 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3617 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 3618 ++I; 3619 ++IElemInitRef; 3620 } 3621 I = Data.LastprivateCopies.begin(); 3622 for (const Expr *E : Data.LastprivateVars) { 3623 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3624 Privates.emplace_back( 3625 C.getDeclAlign(VD), 3626 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3627 /*PrivateElemInit=*/nullptr)); 3628 ++I; 3629 } 3630 for (const VarDecl *VD : Data.PrivateLocals) { 3631 if (isAllocatableDecl(VD)) 3632 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 3633 else 3634 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 3635 } 3636 llvm::stable_sort(Privates, 3637 [](const PrivateDataTy &L, const PrivateDataTy &R) { 3638 return L.first > R.first; 3639 }); 3640 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3641 // Build type kmp_routine_entry_t (if not built yet). 3642 emitKmpRoutineEntryT(KmpInt32Ty); 3643 // Build type kmp_task_t (if not built yet). 3644 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 3645 if (SavedKmpTaskloopTQTy.isNull()) { 3646 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 3647 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 3648 } 3649 KmpTaskTQTy = SavedKmpTaskloopTQTy; 3650 } else { 3651 assert((D.getDirectiveKind() == OMPD_task || 3652 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 3653 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 3654 "Expected taskloop, task or target directive"); 3655 if (SavedKmpTaskTQTy.isNull()) { 3656 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 3657 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 3658 } 3659 KmpTaskTQTy = SavedKmpTaskTQTy; 3660 } 3661 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3662 // Build particular struct kmp_task_t for the given task. 3663 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 3664 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 3665 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 3666 QualType KmpTaskTWithPrivatesPtrQTy = 3667 C.getPointerType(KmpTaskTWithPrivatesQTy); 3668 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 3669 llvm::Type *KmpTaskTWithPrivatesPtrTy = 3670 KmpTaskTWithPrivatesTy->getPointerTo(); 3671 llvm::Value *KmpTaskTWithPrivatesTySize = 3672 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 3673 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 3674 3675 // Emit initial values for private copies (if any). 3676 llvm::Value *TaskPrivatesMap = nullptr; 3677 llvm::Type *TaskPrivatesMapTy = 3678 std::next(TaskFunction->arg_begin(), 3)->getType(); 3679 if (!Privates.empty()) { 3680 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3681 TaskPrivatesMap = 3682 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 3683 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3684 TaskPrivatesMap, TaskPrivatesMapTy); 3685 } else { 3686 TaskPrivatesMap = llvm::ConstantPointerNull::get( 3687 cast<llvm::PointerType>(TaskPrivatesMapTy)); 3688 } 3689 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 3690 // kmp_task_t *tt); 3691 llvm::Function *TaskEntry = emitProxyTaskFunction( 3692 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 3693 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 3694 TaskPrivatesMap); 3695 3696 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 3697 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 3698 // kmp_routine_entry_t *task_entry); 3699 // Task flags. Format is taken from 3700 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 3701 // description of kmp_tasking_flags struct. 3702 enum { 3703 TiedFlag = 0x1, 3704 FinalFlag = 0x2, 3705 DestructorsFlag = 0x8, 3706 PriorityFlag = 0x20, 3707 DetachableFlag = 0x40, 3708 }; 3709 unsigned Flags = Data.Tied ? TiedFlag : 0; 3710 bool NeedsCleanup = false; 3711 if (!Privates.empty()) { 3712 NeedsCleanup = 3713 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 3714 if (NeedsCleanup) 3715 Flags = Flags | DestructorsFlag; 3716 } 3717 if (Data.Priority.getInt()) 3718 Flags = Flags | PriorityFlag; 3719 if (D.hasClausesOfKind<OMPDetachClause>()) 3720 Flags = Flags | DetachableFlag; 3721 llvm::Value *TaskFlags = 3722 Data.Final.getPointer() 3723 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 3724 CGF.Builder.getInt32(FinalFlag), 3725 CGF.Builder.getInt32(/*C=*/0)) 3726 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 3727 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 3728 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 3729 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 3730 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 3731 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3732 TaskEntry, KmpRoutineEntryPtrTy)}; 3733 llvm::Value *NewTask; 3734 if (D.hasClausesOfKind<OMPNowaitClause>()) { 3735 // Check if we have any device clause associated with the directive. 3736 const Expr *Device = nullptr; 3737 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 3738 Device = C->getDevice(); 3739 // Emit device ID if any otherwise use default value. 3740 llvm::Value *DeviceID; 3741 if (Device) 3742 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 3743 CGF.Int64Ty, /*isSigned=*/true); 3744 else 3745 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 3746 AllocArgs.push_back(DeviceID); 3747 NewTask = CGF.EmitRuntimeCall( 3748 OMPBuilder.getOrCreateRuntimeFunction( 3749 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 3750 AllocArgs); 3751 } else { 3752 NewTask = 3753 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 3754 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 3755 AllocArgs); 3756 } 3757 // Emit detach clause initialization. 3758 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 3759 // task_descriptor); 3760 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 3761 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 3762 LValue EvtLVal = CGF.EmitLValue(Evt); 3763 3764 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 3765 // int gtid, kmp_task_t *task); 3766 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 3767 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 3768 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 3769 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 3770 OMPBuilder.getOrCreateRuntimeFunction( 3771 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 3772 {Loc, Tid, NewTask}); 3773 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 3774 Evt->getExprLoc()); 3775 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 3776 } 3777 // Process affinity clauses. 3778 if (D.hasClausesOfKind<OMPAffinityClause>()) { 3779 // Process list of affinity data. 3780 ASTContext &C = CGM.getContext(); 3781 Address AffinitiesArray = Address::invalid(); 3782 // Calculate number of elements to form the array of affinity data. 3783 llvm::Value *NumOfElements = nullptr; 3784 unsigned NumAffinities = 0; 3785 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 3786 if (const Expr *Modifier = C->getModifier()) { 3787 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 3788 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 3789 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 3790 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 3791 NumOfElements = 3792 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 3793 } 3794 } else { 3795 NumAffinities += C->varlist_size(); 3796 } 3797 } 3798 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 3799 // Fields ids in kmp_task_affinity_info record. 3800 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 3801 3802 QualType KmpTaskAffinityInfoArrayTy; 3803 if (NumOfElements) { 3804 NumOfElements = CGF.Builder.CreateNUWAdd( 3805 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 3806 auto *OVE = new (C) OpaqueValueExpr( 3807 Loc, 3808 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 3809 VK_PRValue); 3810 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 3811 RValue::get(NumOfElements)); 3812 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType( 3813 KmpTaskAffinityInfoTy, OVE, ArraySizeModifier::Normal, 3814 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 3815 // Properly emit variable-sized array. 3816 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 3817 ImplicitParamKind::Other); 3818 CGF.EmitVarDecl(*PD); 3819 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 3820 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 3821 /*isSigned=*/false); 3822 } else { 3823 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 3824 KmpTaskAffinityInfoTy, 3825 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 3826 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0); 3827 AffinitiesArray = 3828 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 3829 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 3830 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 3831 /*isSigned=*/false); 3832 } 3833 3834 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 3835 // Fill array by elements without iterators. 3836 unsigned Pos = 0; 3837 bool HasIterator = false; 3838 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 3839 if (C->getModifier()) { 3840 HasIterator = true; 3841 continue; 3842 } 3843 for (const Expr *E : C->varlists()) { 3844 llvm::Value *Addr; 3845 llvm::Value *Size; 3846 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 3847 LValue Base = 3848 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 3849 KmpTaskAffinityInfoTy); 3850 // affs[i].base_addr = &<Affinities[i].second>; 3851 LValue BaseAddrLVal = CGF.EmitLValueForField( 3852 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 3853 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 3854 BaseAddrLVal); 3855 // affs[i].len = sizeof(<Affinities[i].second>); 3856 LValue LenLVal = CGF.EmitLValueForField( 3857 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 3858 CGF.EmitStoreOfScalar(Size, LenLVal); 3859 ++Pos; 3860 } 3861 } 3862 LValue PosLVal; 3863 if (HasIterator) { 3864 PosLVal = CGF.MakeAddrLValue( 3865 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 3866 C.getSizeType()); 3867 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 3868 } 3869 // Process elements with iterators. 3870 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 3871 const Expr *Modifier = C->getModifier(); 3872 if (!Modifier) 3873 continue; 3874 OMPIteratorGeneratorScope IteratorScope( 3875 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 3876 for (const Expr *E : C->varlists()) { 3877 llvm::Value *Addr; 3878 llvm::Value *Size; 3879 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 3880 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 3881 LValue Base = CGF.MakeAddrLValue( 3882 CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy); 3883 // affs[i].base_addr = &<Affinities[i].second>; 3884 LValue BaseAddrLVal = CGF.EmitLValueForField( 3885 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 3886 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 3887 BaseAddrLVal); 3888 // affs[i].len = sizeof(<Affinities[i].second>); 3889 LValue LenLVal = CGF.EmitLValueForField( 3890 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 3891 CGF.EmitStoreOfScalar(Size, LenLVal); 3892 Idx = CGF.Builder.CreateNUWAdd( 3893 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 3894 CGF.EmitStoreOfScalar(Idx, PosLVal); 3895 } 3896 } 3897 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 3898 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 3899 // naffins, kmp_task_affinity_info_t *affin_list); 3900 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 3901 llvm::Value *GTid = getThreadID(CGF, Loc); 3902 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3903 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 3904 // FIXME: Emit the function and ignore its result for now unless the 3905 // runtime function is properly implemented. 3906 (void)CGF.EmitRuntimeCall( 3907 OMPBuilder.getOrCreateRuntimeFunction( 3908 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 3909 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 3910 } 3911 llvm::Value *NewTaskNewTaskTTy = 3912 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3913 NewTask, KmpTaskTWithPrivatesPtrTy); 3914 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 3915 KmpTaskTWithPrivatesQTy); 3916 LValue TDBase = 3917 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3918 // Fill the data in the resulting kmp_task_t record. 3919 // Copy shareds if there are any. 3920 Address KmpTaskSharedsPtr = Address::invalid(); 3921 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 3922 KmpTaskSharedsPtr = Address( 3923 CGF.EmitLoadOfScalar( 3924 CGF.EmitLValueForField( 3925 TDBase, 3926 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)), 3927 Loc), 3928 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy)); 3929 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 3930 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 3931 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 3932 } 3933 // Emit initial values for private copies (if any). 3934 TaskResultTy Result; 3935 if (!Privates.empty()) { 3936 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 3937 SharedsTy, SharedsPtrTy, Data, Privates, 3938 /*ForDup=*/false); 3939 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 3940 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 3941 Result.TaskDupFn = emitTaskDupFunction( 3942 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 3943 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 3944 /*WithLastIter=*/!Data.LastprivateVars.empty()); 3945 } 3946 } 3947 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 3948 enum { Priority = 0, Destructors = 1 }; 3949 // Provide pointer to function with destructors for privates. 3950 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 3951 const RecordDecl *KmpCmplrdataUD = 3952 (*FI)->getType()->getAsUnionType()->getDecl(); 3953 if (NeedsCleanup) { 3954 llvm::Value *DestructorFn = emitDestructorsFunction( 3955 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 3956 KmpTaskTWithPrivatesQTy); 3957 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 3958 LValue DestructorsLV = CGF.EmitLValueForField( 3959 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 3960 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3961 DestructorFn, KmpRoutineEntryPtrTy), 3962 DestructorsLV); 3963 } 3964 // Set priority. 3965 if (Data.Priority.getInt()) { 3966 LValue Data2LV = CGF.EmitLValueForField( 3967 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 3968 LValue PriorityLV = CGF.EmitLValueForField( 3969 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 3970 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 3971 } 3972 Result.NewTask = NewTask; 3973 Result.TaskEntry = TaskEntry; 3974 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 3975 Result.TDBase = TDBase; 3976 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 3977 return Result; 3978 } 3979 3980 /// Translates internal dependency kind into the runtime kind. 3981 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 3982 RTLDependenceKindTy DepKind; 3983 switch (K) { 3984 case OMPC_DEPEND_in: 3985 DepKind = RTLDependenceKindTy::DepIn; 3986 break; 3987 // Out and InOut dependencies must use the same code. 3988 case OMPC_DEPEND_out: 3989 case OMPC_DEPEND_inout: 3990 DepKind = RTLDependenceKindTy::DepInOut; 3991 break; 3992 case OMPC_DEPEND_mutexinoutset: 3993 DepKind = RTLDependenceKindTy::DepMutexInOutSet; 3994 break; 3995 case OMPC_DEPEND_inoutset: 3996 DepKind = RTLDependenceKindTy::DepInOutSet; 3997 break; 3998 case OMPC_DEPEND_outallmemory: 3999 DepKind = RTLDependenceKindTy::DepOmpAllMem; 4000 break; 4001 case OMPC_DEPEND_source: 4002 case OMPC_DEPEND_sink: 4003 case OMPC_DEPEND_depobj: 4004 case OMPC_DEPEND_inoutallmemory: 4005 case OMPC_DEPEND_unknown: 4006 llvm_unreachable("Unknown task dependence type"); 4007 } 4008 return DepKind; 4009 } 4010 4011 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4012 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4013 QualType &FlagsTy) { 4014 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4015 if (KmpDependInfoTy.isNull()) { 4016 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4017 KmpDependInfoRD->startDefinition(); 4018 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4019 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4020 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4021 KmpDependInfoRD->completeDefinition(); 4022 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4023 } 4024 } 4025 4026 std::pair<llvm::Value *, LValue> 4027 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4028 SourceLocation Loc) { 4029 ASTContext &C = CGM.getContext(); 4030 QualType FlagsTy; 4031 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4032 RecordDecl *KmpDependInfoRD = 4033 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4034 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4035 LValue Base = CGF.EmitLoadOfPointerLValue( 4036 DepobjLVal.getAddress(CGF).withElementType( 4037 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)), 4038 KmpDependInfoPtrTy->castAs<PointerType>()); 4039 Address DepObjAddr = CGF.Builder.CreateGEP( 4040 Base.getAddress(CGF), 4041 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4042 LValue NumDepsBase = CGF.MakeAddrLValue( 4043 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4044 // NumDeps = deps[i].base_addr; 4045 LValue BaseAddrLVal = CGF.EmitLValueForField( 4046 NumDepsBase, 4047 *std::next(KmpDependInfoRD->field_begin(), 4048 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr))); 4049 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4050 return std::make_pair(NumDeps, Base); 4051 } 4052 4053 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4054 llvm::PointerUnion<unsigned *, LValue *> Pos, 4055 const OMPTaskDataTy::DependData &Data, 4056 Address DependenciesArray) { 4057 CodeGenModule &CGM = CGF.CGM; 4058 ASTContext &C = CGM.getContext(); 4059 QualType FlagsTy; 4060 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4061 RecordDecl *KmpDependInfoRD = 4062 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4063 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4064 4065 OMPIteratorGeneratorScope IteratorScope( 4066 CGF, cast_or_null<OMPIteratorExpr>( 4067 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4068 : nullptr)); 4069 for (const Expr *E : Data.DepExprs) { 4070 llvm::Value *Addr; 4071 llvm::Value *Size; 4072 4073 // The expression will be a nullptr in the 'omp_all_memory' case. 4074 if (E) { 4075 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4076 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy); 4077 } else { 4078 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4079 Size = llvm::ConstantInt::get(CGF.SizeTy, 0); 4080 } 4081 LValue Base; 4082 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4083 Base = CGF.MakeAddrLValue( 4084 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4085 } else { 4086 assert(E && "Expected a non-null expression"); 4087 LValue &PosLVal = *Pos.get<LValue *>(); 4088 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4089 Base = CGF.MakeAddrLValue( 4090 CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy); 4091 } 4092 // deps[i].base_addr = &<Dependencies[i].second>; 4093 LValue BaseAddrLVal = CGF.EmitLValueForField( 4094 Base, 4095 *std::next(KmpDependInfoRD->field_begin(), 4096 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr))); 4097 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal); 4098 // deps[i].len = sizeof(<Dependencies[i].second>); 4099 LValue LenLVal = CGF.EmitLValueForField( 4100 Base, *std::next(KmpDependInfoRD->field_begin(), 4101 static_cast<unsigned int>(RTLDependInfoFields::Len))); 4102 CGF.EmitStoreOfScalar(Size, LenLVal); 4103 // deps[i].flags = <Dependencies[i].first>; 4104 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4105 LValue FlagsLVal = CGF.EmitLValueForField( 4106 Base, 4107 *std::next(KmpDependInfoRD->field_begin(), 4108 static_cast<unsigned int>(RTLDependInfoFields::Flags))); 4109 CGF.EmitStoreOfScalar( 4110 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)), 4111 FlagsLVal); 4112 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4113 ++(*P); 4114 } else { 4115 LValue &PosLVal = *Pos.get<LValue *>(); 4116 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4117 Idx = CGF.Builder.CreateNUWAdd(Idx, 4118 llvm::ConstantInt::get(Idx->getType(), 1)); 4119 CGF.EmitStoreOfScalar(Idx, PosLVal); 4120 } 4121 } 4122 } 4123 4124 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes( 4125 CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4126 const OMPTaskDataTy::DependData &Data) { 4127 assert(Data.DepKind == OMPC_DEPEND_depobj && 4128 "Expected depobj dependency kind."); 4129 SmallVector<llvm::Value *, 4> Sizes; 4130 SmallVector<LValue, 4> SizeLVals; 4131 ASTContext &C = CGF.getContext(); 4132 { 4133 OMPIteratorGeneratorScope IteratorScope( 4134 CGF, cast_or_null<OMPIteratorExpr>( 4135 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4136 : nullptr)); 4137 for (const Expr *E : Data.DepExprs) { 4138 llvm::Value *NumDeps; 4139 LValue Base; 4140 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4141 std::tie(NumDeps, Base) = 4142 getDepobjElements(CGF, DepobjLVal, E->getExprLoc()); 4143 LValue NumLVal = CGF.MakeAddrLValue( 4144 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4145 C.getUIntPtrType()); 4146 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0), 4147 NumLVal.getAddress(CGF)); 4148 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4149 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4150 CGF.EmitStoreOfScalar(Add, NumLVal); 4151 SizeLVals.push_back(NumLVal); 4152 } 4153 } 4154 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4155 llvm::Value *Size = 4156 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4157 Sizes.push_back(Size); 4158 } 4159 return Sizes; 4160 } 4161 4162 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF, 4163 QualType &KmpDependInfoTy, 4164 LValue PosLVal, 4165 const OMPTaskDataTy::DependData &Data, 4166 Address DependenciesArray) { 4167 assert(Data.DepKind == OMPC_DEPEND_depobj && 4168 "Expected depobj dependency kind."); 4169 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4170 { 4171 OMPIteratorGeneratorScope IteratorScope( 4172 CGF, cast_or_null<OMPIteratorExpr>( 4173 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4174 : nullptr)); 4175 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4176 const Expr *E = Data.DepExprs[I]; 4177 llvm::Value *NumDeps; 4178 LValue Base; 4179 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4180 std::tie(NumDeps, Base) = 4181 getDepobjElements(CGF, DepobjLVal, E->getExprLoc()); 4182 4183 // memcopy dependency data. 4184 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4185 ElSize, 4186 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4187 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4188 Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos); 4189 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4190 4191 // Increase pos. 4192 // pos += size; 4193 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4194 CGF.EmitStoreOfScalar(Add, PosLVal); 4195 } 4196 } 4197 } 4198 4199 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4200 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4201 SourceLocation Loc) { 4202 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4203 return D.DepExprs.empty(); 4204 })) 4205 return std::make_pair(nullptr, Address::invalid()); 4206 // Process list of dependencies. 4207 ASTContext &C = CGM.getContext(); 4208 Address DependenciesArray = Address::invalid(); 4209 llvm::Value *NumOfElements = nullptr; 4210 unsigned NumDependencies = std::accumulate( 4211 Dependencies.begin(), Dependencies.end(), 0, 4212 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4213 return D.DepKind == OMPC_DEPEND_depobj 4214 ? V 4215 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4216 }); 4217 QualType FlagsTy; 4218 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4219 bool HasDepobjDeps = false; 4220 bool HasRegularWithIterators = false; 4221 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4222 llvm::Value *NumOfRegularWithIterators = 4223 llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4224 // Calculate number of depobj dependencies and regular deps with the 4225 // iterators. 4226 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4227 if (D.DepKind == OMPC_DEPEND_depobj) { 4228 SmallVector<llvm::Value *, 4> Sizes = 4229 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4230 for (llvm::Value *Size : Sizes) { 4231 NumOfDepobjElements = 4232 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4233 } 4234 HasDepobjDeps = true; 4235 continue; 4236 } 4237 // Include number of iterations, if any. 4238 4239 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4240 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4241 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4242 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4243 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul( 4244 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size())); 4245 NumOfRegularWithIterators = 4246 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps); 4247 } 4248 HasRegularWithIterators = true; 4249 continue; 4250 } 4251 } 4252 4253 QualType KmpDependInfoArrayTy; 4254 if (HasDepobjDeps || HasRegularWithIterators) { 4255 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4256 /*isSigned=*/false); 4257 if (HasDepobjDeps) { 4258 NumOfElements = 4259 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4260 } 4261 if (HasRegularWithIterators) { 4262 NumOfElements = 4263 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4264 } 4265 auto *OVE = new (C) OpaqueValueExpr( 4266 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4267 VK_PRValue); 4268 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4269 RValue::get(NumOfElements)); 4270 KmpDependInfoArrayTy = 4271 C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal, 4272 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4273 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4274 // Properly emit variable-sized array. 4275 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4276 ImplicitParamKind::Other); 4277 CGF.EmitVarDecl(*PD); 4278 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4279 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4280 /*isSigned=*/false); 4281 } else { 4282 KmpDependInfoArrayTy = C.getConstantArrayType( 4283 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4284 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0); 4285 DependenciesArray = 4286 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4287 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4288 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4289 /*isSigned=*/false); 4290 } 4291 unsigned Pos = 0; 4292 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4293 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4294 Dependencies[I].IteratorExpr) 4295 continue; 4296 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4297 DependenciesArray); 4298 } 4299 // Copy regular dependencies with iterators. 4300 LValue PosLVal = CGF.MakeAddrLValue( 4301 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4302 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4303 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4304 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4305 !Dependencies[I].IteratorExpr) 4306 continue; 4307 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4308 DependenciesArray); 4309 } 4310 // Copy final depobj arrays without iterators. 4311 if (HasDepobjDeps) { 4312 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4313 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4314 continue; 4315 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4316 DependenciesArray); 4317 } 4318 } 4319 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4320 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty); 4321 return std::make_pair(NumOfElements, DependenciesArray); 4322 } 4323 4324 Address CGOpenMPRuntime::emitDepobjDependClause( 4325 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4326 SourceLocation Loc) { 4327 if (Dependencies.DepExprs.empty()) 4328 return Address::invalid(); 4329 // Process list of dependencies. 4330 ASTContext &C = CGM.getContext(); 4331 Address DependenciesArray = Address::invalid(); 4332 unsigned NumDependencies = Dependencies.DepExprs.size(); 4333 QualType FlagsTy; 4334 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4335 RecordDecl *KmpDependInfoRD = 4336 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4337 4338 llvm::Value *Size; 4339 // Define type kmp_depend_info[<Dependencies.size()>]; 4340 // For depobj reserve one extra element to store the number of elements. 4341 // It is required to handle depobj(x) update(in) construct. 4342 // kmp_depend_info[<Dependencies.size()>] deps; 4343 llvm::Value *NumDepsVal; 4344 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4345 if (const auto *IE = 4346 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4347 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4348 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4349 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4350 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4351 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4352 } 4353 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4354 NumDepsVal); 4355 CharUnits SizeInBytes = 4356 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4357 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4358 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4359 NumDepsVal = 4360 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4361 } else { 4362 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4363 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4364 nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0); 4365 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4366 Size = CGM.getSize(Sz.alignTo(Align)); 4367 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 4368 } 4369 // Need to allocate on the dynamic memory. 4370 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4371 // Use default allocator. 4372 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4373 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 4374 4375 llvm::Value *Addr = 4376 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4377 CGM.getModule(), OMPRTL___kmpc_alloc), 4378 Args, ".dep.arr.addr"); 4379 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy); 4380 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4381 Addr, KmpDependInfoLlvmTy->getPointerTo()); 4382 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align); 4383 // Write number of elements in the first element of array for depobj. 4384 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 4385 // deps[i].base_addr = NumDependencies; 4386 LValue BaseAddrLVal = CGF.EmitLValueForField( 4387 Base, 4388 *std::next(KmpDependInfoRD->field_begin(), 4389 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr))); 4390 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 4391 llvm::PointerUnion<unsigned *, LValue *> Pos; 4392 unsigned Idx = 1; 4393 LValue PosLVal; 4394 if (Dependencies.IteratorExpr) { 4395 PosLVal = CGF.MakeAddrLValue( 4396 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 4397 C.getSizeType()); 4398 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 4399 /*IsInit=*/true); 4400 Pos = &PosLVal; 4401 } else { 4402 Pos = &Idx; 4403 } 4404 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 4405 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4406 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy, 4407 CGF.Int8Ty); 4408 return DependenciesArray; 4409 } 4410 4411 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 4412 SourceLocation Loc) { 4413 ASTContext &C = CGM.getContext(); 4414 QualType FlagsTy; 4415 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4416 LValue Base = CGF.EmitLoadOfPointerLValue( 4417 DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>()); 4418 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4419 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4420 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy), 4421 CGF.ConvertTypeForMem(KmpDependInfoTy)); 4422 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4423 Addr.getElementType(), Addr.getPointer(), 4424 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4425 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 4426 CGF.VoidPtrTy); 4427 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4428 // Use default allocator. 4429 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4430 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 4431 4432 // _kmpc_free(gtid, addr, nullptr); 4433 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4434 CGM.getModule(), OMPRTL___kmpc_free), 4435 Args); 4436 } 4437 4438 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 4439 OpenMPDependClauseKind NewDepKind, 4440 SourceLocation Loc) { 4441 ASTContext &C = CGM.getContext(); 4442 QualType FlagsTy; 4443 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4444 RecordDecl *KmpDependInfoRD = 4445 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4446 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4447 llvm::Value *NumDeps; 4448 LValue Base; 4449 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 4450 4451 Address Begin = Base.getAddress(CGF); 4452 // Cast from pointer to array type to pointer to single element. 4453 llvm::Value *End = CGF.Builder.CreateGEP( 4454 Begin.getElementType(), Begin.getPointer(), NumDeps); 4455 // The basic structure here is a while-do loop. 4456 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 4457 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 4458 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 4459 CGF.EmitBlock(BodyBB); 4460 llvm::PHINode *ElementPHI = 4461 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 4462 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 4463 Begin = Begin.withPointer(ElementPHI, KnownNonNull); 4464 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 4465 Base.getTBAAInfo()); 4466 // deps[i].flags = NewDepKind; 4467 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 4468 LValue FlagsLVal = CGF.EmitLValueForField( 4469 Base, *std::next(KmpDependInfoRD->field_begin(), 4470 static_cast<unsigned int>(RTLDependInfoFields::Flags))); 4471 CGF.EmitStoreOfScalar( 4472 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)), 4473 FlagsLVal); 4474 4475 // Shift the address forward by one element. 4476 Address ElementNext = 4477 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 4478 ElementPHI->addIncoming(ElementNext.getPointer(), 4479 CGF.Builder.GetInsertBlock()); 4480 llvm::Value *IsEmpty = 4481 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 4482 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 4483 // Done. 4484 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 4485 } 4486 4487 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 4488 const OMPExecutableDirective &D, 4489 llvm::Function *TaskFunction, 4490 QualType SharedsTy, Address Shareds, 4491 const Expr *IfCond, 4492 const OMPTaskDataTy &Data) { 4493 if (!CGF.HaveInsertPoint()) 4494 return; 4495 4496 TaskResultTy Result = 4497 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 4498 llvm::Value *NewTask = Result.NewTask; 4499 llvm::Function *TaskEntry = Result.TaskEntry; 4500 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 4501 LValue TDBase = Result.TDBase; 4502 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 4503 // Process list of dependences. 4504 Address DependenciesArray = Address::invalid(); 4505 llvm::Value *NumOfElements; 4506 std::tie(NumOfElements, DependenciesArray) = 4507 emitDependClause(CGF, Data.Dependences, Loc); 4508 4509 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 4510 // libcall. 4511 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 4512 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 4513 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 4514 // list is not empty 4515 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4516 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 4517 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 4518 llvm::Value *DepTaskArgs[7]; 4519 if (!Data.Dependences.empty()) { 4520 DepTaskArgs[0] = UpLoc; 4521 DepTaskArgs[1] = ThreadID; 4522 DepTaskArgs[2] = NewTask; 4523 DepTaskArgs[3] = NumOfElements; 4524 DepTaskArgs[4] = DependenciesArray.getPointer(); 4525 DepTaskArgs[5] = CGF.Builder.getInt32(0); 4526 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4527 } 4528 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 4529 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 4530 if (!Data.Tied) { 4531 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4532 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 4533 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 4534 } 4535 if (!Data.Dependences.empty()) { 4536 CGF.EmitRuntimeCall( 4537 OMPBuilder.getOrCreateRuntimeFunction( 4538 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 4539 DepTaskArgs); 4540 } else { 4541 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4542 CGM.getModule(), OMPRTL___kmpc_omp_task), 4543 TaskArgs); 4544 } 4545 // Check if parent region is untied and build return for untied task; 4546 if (auto *Region = 4547 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 4548 Region->emitUntiedSwitch(CGF); 4549 }; 4550 4551 llvm::Value *DepWaitTaskArgs[7]; 4552 if (!Data.Dependences.empty()) { 4553 DepWaitTaskArgs[0] = UpLoc; 4554 DepWaitTaskArgs[1] = ThreadID; 4555 DepWaitTaskArgs[2] = NumOfElements; 4556 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 4557 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 4558 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4559 DepWaitTaskArgs[6] = 4560 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause); 4561 } 4562 auto &M = CGM.getModule(); 4563 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 4564 TaskEntry, &Data, &DepWaitTaskArgs, 4565 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 4566 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 4567 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 4568 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 4569 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 4570 // is specified. 4571 if (!Data.Dependences.empty()) 4572 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4573 M, OMPRTL___kmpc_omp_taskwait_deps_51), 4574 DepWaitTaskArgs); 4575 // Call proxy_task_entry(gtid, new_task); 4576 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 4577 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 4578 Action.Enter(CGF); 4579 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 4580 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 4581 OutlinedFnArgs); 4582 }; 4583 4584 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 4585 // kmp_task_t *new_task); 4586 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 4587 // kmp_task_t *new_task); 4588 RegionCodeGenTy RCG(CodeGen); 4589 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 4590 M, OMPRTL___kmpc_omp_task_begin_if0), 4591 TaskArgs, 4592 OMPBuilder.getOrCreateRuntimeFunction( 4593 M, OMPRTL___kmpc_omp_task_complete_if0), 4594 TaskArgs); 4595 RCG.setAction(Action); 4596 RCG(CGF); 4597 }; 4598 4599 if (IfCond) { 4600 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 4601 } else { 4602 RegionCodeGenTy ThenRCG(ThenCodeGen); 4603 ThenRCG(CGF); 4604 } 4605 } 4606 4607 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 4608 const OMPLoopDirective &D, 4609 llvm::Function *TaskFunction, 4610 QualType SharedsTy, Address Shareds, 4611 const Expr *IfCond, 4612 const OMPTaskDataTy &Data) { 4613 if (!CGF.HaveInsertPoint()) 4614 return; 4615 TaskResultTy Result = 4616 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 4617 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 4618 // libcall. 4619 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 4620 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 4621 // sched, kmp_uint64 grainsize, void *task_dup); 4622 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4623 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 4624 llvm::Value *IfVal; 4625 if (IfCond) { 4626 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 4627 /*isSigned=*/true); 4628 } else { 4629 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 4630 } 4631 4632 LValue LBLVal = CGF.EmitLValueForField( 4633 Result.TDBase, 4634 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 4635 const auto *LBVar = 4636 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 4637 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 4638 LBLVal.getQuals(), 4639 /*IsInitializer=*/true); 4640 LValue UBLVal = CGF.EmitLValueForField( 4641 Result.TDBase, 4642 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 4643 const auto *UBVar = 4644 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 4645 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 4646 UBLVal.getQuals(), 4647 /*IsInitializer=*/true); 4648 LValue StLVal = CGF.EmitLValueForField( 4649 Result.TDBase, 4650 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 4651 const auto *StVar = 4652 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 4653 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 4654 StLVal.getQuals(), 4655 /*IsInitializer=*/true); 4656 // Store reductions address. 4657 LValue RedLVal = CGF.EmitLValueForField( 4658 Result.TDBase, 4659 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 4660 if (Data.Reductions) { 4661 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 4662 } else { 4663 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 4664 CGF.getContext().VoidPtrTy); 4665 } 4666 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 4667 llvm::Value *TaskArgs[] = { 4668 UpLoc, 4669 ThreadID, 4670 Result.NewTask, 4671 IfVal, 4672 LBLVal.getPointer(CGF), 4673 UBLVal.getPointer(CGF), 4674 CGF.EmitLoadOfScalar(StLVal, Loc), 4675 llvm::ConstantInt::getSigned( 4676 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 4677 llvm::ConstantInt::getSigned( 4678 CGF.IntTy, Data.Schedule.getPointer() 4679 ? Data.Schedule.getInt() ? NumTasks : Grainsize 4680 : NoSchedule), 4681 Data.Schedule.getPointer() 4682 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 4683 /*isSigned=*/false) 4684 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 4685 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4686 Result.TaskDupFn, CGF.VoidPtrTy) 4687 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 4688 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4689 CGM.getModule(), OMPRTL___kmpc_taskloop), 4690 TaskArgs); 4691 } 4692 4693 /// Emit reduction operation for each element of array (required for 4694 /// array sections) LHS op = RHS. 4695 /// \param Type Type of array. 4696 /// \param LHSVar Variable on the left side of the reduction operation 4697 /// (references element of array in original variable). 4698 /// \param RHSVar Variable on the right side of the reduction operation 4699 /// (references element of array in original variable). 4700 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 4701 /// RHSVar. 4702 static void EmitOMPAggregateReduction( 4703 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 4704 const VarDecl *RHSVar, 4705 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 4706 const Expr *, const Expr *)> &RedOpGen, 4707 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 4708 const Expr *UpExpr = nullptr) { 4709 // Perform element-by-element initialization. 4710 QualType ElementTy; 4711 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 4712 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 4713 4714 // Drill down to the base element type on both arrays. 4715 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 4716 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 4717 4718 llvm::Value *RHSBegin = RHSAddr.getPointer(); 4719 llvm::Value *LHSBegin = LHSAddr.getPointer(); 4720 // Cast from pointer to array type to pointer to single element. 4721 llvm::Value *LHSEnd = 4722 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements); 4723 // The basic structure here is a while-do loop. 4724 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 4725 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 4726 llvm::Value *IsEmpty = 4727 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 4728 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 4729 4730 // Enter the loop body, making that address the current address. 4731 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 4732 CGF.EmitBlock(BodyBB); 4733 4734 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 4735 4736 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 4737 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 4738 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 4739 Address RHSElementCurrent( 4740 RHSElementPHI, RHSAddr.getElementType(), 4741 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 4742 4743 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 4744 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 4745 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 4746 Address LHSElementCurrent( 4747 LHSElementPHI, LHSAddr.getElementType(), 4748 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 4749 4750 // Emit copy. 4751 CodeGenFunction::OMPPrivateScope Scope(CGF); 4752 Scope.addPrivate(LHSVar, LHSElementCurrent); 4753 Scope.addPrivate(RHSVar, RHSElementCurrent); 4754 Scope.Privatize(); 4755 RedOpGen(CGF, XExpr, EExpr, UpExpr); 4756 Scope.ForceCleanup(); 4757 4758 // Shift the address forward by one element. 4759 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 4760 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1, 4761 "omp.arraycpy.dest.element"); 4762 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 4763 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1, 4764 "omp.arraycpy.src.element"); 4765 // Check whether we've reached the end. 4766 llvm::Value *Done = 4767 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 4768 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 4769 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 4770 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 4771 4772 // Done. 4773 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 4774 } 4775 4776 /// Emit reduction combiner. If the combiner is a simple expression emit it as 4777 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 4778 /// UDR combiner function. 4779 static void emitReductionCombiner(CodeGenFunction &CGF, 4780 const Expr *ReductionOp) { 4781 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 4782 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 4783 if (const auto *DRE = 4784 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 4785 if (const auto *DRD = 4786 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 4787 std::pair<llvm::Function *, llvm::Function *> Reduction = 4788 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 4789 RValue Func = RValue::get(Reduction.first); 4790 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 4791 CGF.EmitIgnoredExpr(ReductionOp); 4792 return; 4793 } 4794 CGF.EmitIgnoredExpr(ReductionOp); 4795 } 4796 4797 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 4798 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType, 4799 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs, 4800 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) { 4801 ASTContext &C = CGM.getContext(); 4802 4803 // void reduction_func(void *LHSArg, void *RHSArg); 4804 FunctionArgList Args; 4805 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 4806 ImplicitParamKind::Other); 4807 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 4808 ImplicitParamKind::Other); 4809 Args.push_back(&LHSArg); 4810 Args.push_back(&RHSArg); 4811 const auto &CGFI = 4812 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4813 std::string Name = getReductionFuncName(ReducerName); 4814 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 4815 llvm::GlobalValue::InternalLinkage, Name, 4816 &CGM.getModule()); 4817 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 4818 Fn->setDoesNotRecurse(); 4819 CodeGenFunction CGF(CGM); 4820 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 4821 4822 // Dst = (void*[n])(LHSArg); 4823 // Src = (void*[n])(RHSArg); 4824 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4825 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 4826 ArgsElemType->getPointerTo()), 4827 ArgsElemType, CGF.getPointerAlign()); 4828 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4829 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 4830 ArgsElemType->getPointerTo()), 4831 ArgsElemType, CGF.getPointerAlign()); 4832 4833 // ... 4834 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 4835 // ... 4836 CodeGenFunction::OMPPrivateScope Scope(CGF); 4837 const auto *IPriv = Privates.begin(); 4838 unsigned Idx = 0; 4839 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 4840 const auto *RHSVar = 4841 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 4842 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar)); 4843 const auto *LHSVar = 4844 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 4845 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar)); 4846 QualType PrivTy = (*IPriv)->getType(); 4847 if (PrivTy->isVariablyModifiedType()) { 4848 // Get array size and emit VLA type. 4849 ++Idx; 4850 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 4851 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 4852 const VariableArrayType *VLA = 4853 CGF.getContext().getAsVariableArrayType(PrivTy); 4854 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 4855 CodeGenFunction::OpaqueValueMapping OpaqueMap( 4856 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 4857 CGF.EmitVariablyModifiedType(PrivTy); 4858 } 4859 } 4860 Scope.Privatize(); 4861 IPriv = Privates.begin(); 4862 const auto *ILHS = LHSExprs.begin(); 4863 const auto *IRHS = RHSExprs.begin(); 4864 for (const Expr *E : ReductionOps) { 4865 if ((*IPriv)->getType()->isArrayType()) { 4866 // Emit reduction for array section. 4867 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 4868 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 4869 EmitOMPAggregateReduction( 4870 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 4871 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 4872 emitReductionCombiner(CGF, E); 4873 }); 4874 } else { 4875 // Emit reduction for array subscript or single variable. 4876 emitReductionCombiner(CGF, E); 4877 } 4878 ++IPriv; 4879 ++ILHS; 4880 ++IRHS; 4881 } 4882 Scope.ForceCleanup(); 4883 CGF.FinishFunction(); 4884 return Fn; 4885 } 4886 4887 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 4888 const Expr *ReductionOp, 4889 const Expr *PrivateRef, 4890 const DeclRefExpr *LHS, 4891 const DeclRefExpr *RHS) { 4892 if (PrivateRef->getType()->isArrayType()) { 4893 // Emit reduction for array section. 4894 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 4895 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 4896 EmitOMPAggregateReduction( 4897 CGF, PrivateRef->getType(), LHSVar, RHSVar, 4898 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 4899 emitReductionCombiner(CGF, ReductionOp); 4900 }); 4901 } else { 4902 // Emit reduction for array subscript or single variable. 4903 emitReductionCombiner(CGF, ReductionOp); 4904 } 4905 } 4906 4907 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 4908 ArrayRef<const Expr *> Privates, 4909 ArrayRef<const Expr *> LHSExprs, 4910 ArrayRef<const Expr *> RHSExprs, 4911 ArrayRef<const Expr *> ReductionOps, 4912 ReductionOptionsTy Options) { 4913 if (!CGF.HaveInsertPoint()) 4914 return; 4915 4916 bool WithNowait = Options.WithNowait; 4917 bool SimpleReduction = Options.SimpleReduction; 4918 4919 // Next code should be emitted for reduction: 4920 // 4921 // static kmp_critical_name lock = { 0 }; 4922 // 4923 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 4924 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 4925 // ... 4926 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 4927 // *(Type<n>-1*)rhs[<n>-1]); 4928 // } 4929 // 4930 // ... 4931 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 4932 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 4933 // RedList, reduce_func, &<lock>)) { 4934 // case 1: 4935 // ... 4936 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 4937 // ... 4938 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 4939 // break; 4940 // case 2: 4941 // ... 4942 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 4943 // ... 4944 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 4945 // break; 4946 // default:; 4947 // } 4948 // 4949 // if SimpleReduction is true, only the next code is generated: 4950 // ... 4951 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 4952 // ... 4953 4954 ASTContext &C = CGM.getContext(); 4955 4956 if (SimpleReduction) { 4957 CodeGenFunction::RunCleanupsScope Scope(CGF); 4958 const auto *IPriv = Privates.begin(); 4959 const auto *ILHS = LHSExprs.begin(); 4960 const auto *IRHS = RHSExprs.begin(); 4961 for (const Expr *E : ReductionOps) { 4962 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 4963 cast<DeclRefExpr>(*IRHS)); 4964 ++IPriv; 4965 ++ILHS; 4966 ++IRHS; 4967 } 4968 return; 4969 } 4970 4971 // 1. Build a list of reduction variables. 4972 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 4973 auto Size = RHSExprs.size(); 4974 for (const Expr *E : Privates) { 4975 if (E->getType()->isVariablyModifiedType()) 4976 // Reserve place for array size. 4977 ++Size; 4978 } 4979 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 4980 QualType ReductionArrayTy = C.getConstantArrayType( 4981 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal, 4982 /*IndexTypeQuals=*/0); 4983 Address ReductionList = 4984 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 4985 const auto *IPriv = Privates.begin(); 4986 unsigned Idx = 0; 4987 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 4988 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 4989 CGF.Builder.CreateStore( 4990 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4991 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 4992 Elem); 4993 if ((*IPriv)->getType()->isVariablyModifiedType()) { 4994 // Store array size. 4995 ++Idx; 4996 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 4997 llvm::Value *Size = CGF.Builder.CreateIntCast( 4998 CGF.getVLASize( 4999 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5000 .NumElts, 5001 CGF.SizeTy, /*isSigned=*/false); 5002 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5003 Elem); 5004 } 5005 } 5006 5007 // 2. Emit reduce_func(). 5008 llvm::Function *ReductionFn = emitReductionFunction( 5009 CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy), 5010 Privates, LHSExprs, RHSExprs, ReductionOps); 5011 5012 // 3. Create static kmp_critical_name lock = { 0 }; 5013 std::string Name = getName({"reduction"}); 5014 llvm::Value *Lock = getCriticalRegionLock(Name); 5015 5016 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5017 // RedList, reduce_func, &<lock>); 5018 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5019 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5020 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5021 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5022 ReductionList.getPointer(), CGF.VoidPtrTy); 5023 llvm::Value *Args[] = { 5024 IdentTLoc, // ident_t *<loc> 5025 ThreadId, // i32 <gtid> 5026 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5027 ReductionArrayTySize, // size_type sizeof(RedList) 5028 RL, // void *RedList 5029 ReductionFn, // void (*) (void *, void *) <reduce_func> 5030 Lock // kmp_critical_name *&<lock> 5031 }; 5032 llvm::Value *Res = CGF.EmitRuntimeCall( 5033 OMPBuilder.getOrCreateRuntimeFunction( 5034 CGM.getModule(), 5035 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5036 Args); 5037 5038 // 5. Build switch(res) 5039 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5040 llvm::SwitchInst *SwInst = 5041 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5042 5043 // 6. Build case 1: 5044 // ... 5045 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5046 // ... 5047 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5048 // break; 5049 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5050 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5051 CGF.EmitBlock(Case1BB); 5052 5053 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5054 llvm::Value *EndArgs[] = { 5055 IdentTLoc, // ident_t *<loc> 5056 ThreadId, // i32 <gtid> 5057 Lock // kmp_critical_name *&<lock> 5058 }; 5059 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5060 CodeGenFunction &CGF, PrePostActionTy &Action) { 5061 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5062 const auto *IPriv = Privates.begin(); 5063 const auto *ILHS = LHSExprs.begin(); 5064 const auto *IRHS = RHSExprs.begin(); 5065 for (const Expr *E : ReductionOps) { 5066 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5067 cast<DeclRefExpr>(*IRHS)); 5068 ++IPriv; 5069 ++ILHS; 5070 ++IRHS; 5071 } 5072 }; 5073 RegionCodeGenTy RCG(CodeGen); 5074 CommonActionTy Action( 5075 nullptr, std::nullopt, 5076 OMPBuilder.getOrCreateRuntimeFunction( 5077 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5078 : OMPRTL___kmpc_end_reduce), 5079 EndArgs); 5080 RCG.setAction(Action); 5081 RCG(CGF); 5082 5083 CGF.EmitBranch(DefaultBB); 5084 5085 // 7. Build case 2: 5086 // ... 5087 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5088 // ... 5089 // break; 5090 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5091 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5092 CGF.EmitBlock(Case2BB); 5093 5094 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5095 CodeGenFunction &CGF, PrePostActionTy &Action) { 5096 const auto *ILHS = LHSExprs.begin(); 5097 const auto *IRHS = RHSExprs.begin(); 5098 const auto *IPriv = Privates.begin(); 5099 for (const Expr *E : ReductionOps) { 5100 const Expr *XExpr = nullptr; 5101 const Expr *EExpr = nullptr; 5102 const Expr *UpExpr = nullptr; 5103 BinaryOperatorKind BO = BO_Comma; 5104 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5105 if (BO->getOpcode() == BO_Assign) { 5106 XExpr = BO->getLHS(); 5107 UpExpr = BO->getRHS(); 5108 } 5109 } 5110 // Try to emit update expression as a simple atomic. 5111 const Expr *RHSExpr = UpExpr; 5112 if (RHSExpr) { 5113 // Analyze RHS part of the whole expression. 5114 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5115 RHSExpr->IgnoreParenImpCasts())) { 5116 // If this is a conditional operator, analyze its condition for 5117 // min/max reduction operator. 5118 RHSExpr = ACO->getCond(); 5119 } 5120 if (const auto *BORHS = 5121 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5122 EExpr = BORHS->getRHS(); 5123 BO = BORHS->getOpcode(); 5124 } 5125 } 5126 if (XExpr) { 5127 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5128 auto &&AtomicRedGen = [BO, VD, 5129 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5130 const Expr *EExpr, const Expr *UpExpr) { 5131 LValue X = CGF.EmitLValue(XExpr); 5132 RValue E; 5133 if (EExpr) 5134 E = CGF.EmitAnyExpr(EExpr); 5135 CGF.EmitOMPAtomicSimpleUpdateExpr( 5136 X, E, BO, /*IsXLHSInRHSPart=*/true, 5137 llvm::AtomicOrdering::Monotonic, Loc, 5138 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5139 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5140 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5141 CGF.emitOMPSimpleStore( 5142 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5143 VD->getType().getNonReferenceType(), Loc); 5144 PrivateScope.addPrivate(VD, LHSTemp); 5145 (void)PrivateScope.Privatize(); 5146 return CGF.EmitAnyExpr(UpExpr); 5147 }); 5148 }; 5149 if ((*IPriv)->getType()->isArrayType()) { 5150 // Emit atomic reduction for array section. 5151 const auto *RHSVar = 5152 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5153 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5154 AtomicRedGen, XExpr, EExpr, UpExpr); 5155 } else { 5156 // Emit atomic reduction for array subscript or single variable. 5157 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5158 } 5159 } else { 5160 // Emit as a critical region. 5161 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5162 const Expr *, const Expr *) { 5163 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5164 std::string Name = RT.getName({"atomic_reduction"}); 5165 RT.emitCriticalRegion( 5166 CGF, Name, 5167 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5168 Action.Enter(CGF); 5169 emitReductionCombiner(CGF, E); 5170 }, 5171 Loc); 5172 }; 5173 if ((*IPriv)->getType()->isArrayType()) { 5174 const auto *LHSVar = 5175 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5176 const auto *RHSVar = 5177 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5178 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5179 CritRedGen); 5180 } else { 5181 CritRedGen(CGF, nullptr, nullptr, nullptr); 5182 } 5183 } 5184 ++ILHS; 5185 ++IRHS; 5186 ++IPriv; 5187 } 5188 }; 5189 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5190 if (!WithNowait) { 5191 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5192 llvm::Value *EndArgs[] = { 5193 IdentTLoc, // ident_t *<loc> 5194 ThreadId, // i32 <gtid> 5195 Lock // kmp_critical_name *&<lock> 5196 }; 5197 CommonActionTy Action(nullptr, std::nullopt, 5198 OMPBuilder.getOrCreateRuntimeFunction( 5199 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5200 EndArgs); 5201 AtomicRCG.setAction(Action); 5202 AtomicRCG(CGF); 5203 } else { 5204 AtomicRCG(CGF); 5205 } 5206 5207 CGF.EmitBranch(DefaultBB); 5208 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5209 } 5210 5211 /// Generates unique name for artificial threadprivate variables. 5212 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5213 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5214 const Expr *Ref) { 5215 SmallString<256> Buffer; 5216 llvm::raw_svector_ostream Out(Buffer); 5217 const clang::DeclRefExpr *DE; 5218 const VarDecl *D = ::getBaseDecl(Ref, DE); 5219 if (!D) 5220 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5221 D = D->getCanonicalDecl(); 5222 std::string Name = CGM.getOpenMPRuntime().getName( 5223 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5224 Out << Prefix << Name << "_" 5225 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5226 return std::string(Out.str()); 5227 } 5228 5229 /// Emits reduction initializer function: 5230 /// \code 5231 /// void @.red_init(void* %arg, void* %orig) { 5232 /// %0 = bitcast void* %arg to <type>* 5233 /// store <type> <init>, <type>* %0 5234 /// ret void 5235 /// } 5236 /// \endcode 5237 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5238 SourceLocation Loc, 5239 ReductionCodeGen &RCG, unsigned N) { 5240 ASTContext &C = CGM.getContext(); 5241 QualType VoidPtrTy = C.VoidPtrTy; 5242 VoidPtrTy.addRestrict(); 5243 FunctionArgList Args; 5244 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5245 ImplicitParamKind::Other); 5246 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5247 ImplicitParamKind::Other); 5248 Args.emplace_back(&Param); 5249 Args.emplace_back(&ParamOrig); 5250 const auto &FnInfo = 5251 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5252 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5253 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5254 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5255 Name, &CGM.getModule()); 5256 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5257 Fn->setDoesNotRecurse(); 5258 CodeGenFunction CGF(CGM); 5259 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5260 QualType PrivateType = RCG.getPrivateType(N); 5261 Address PrivateAddr = CGF.EmitLoadOfPointer( 5262 CGF.GetAddrOfLocalVar(&Param).withElementType( 5263 CGF.ConvertTypeForMem(PrivateType)->getPointerTo()), 5264 C.getPointerType(PrivateType)->castAs<PointerType>()); 5265 llvm::Value *Size = nullptr; 5266 // If the size of the reduction item is non-constant, load it from global 5267 // threadprivate variable. 5268 if (RCG.getSizes(N).second) { 5269 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5270 CGF, CGM.getContext().getSizeType(), 5271 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5272 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5273 CGM.getContext().getSizeType(), Loc); 5274 } 5275 RCG.emitAggregateType(CGF, N, Size); 5276 Address OrigAddr = Address::invalid(); 5277 // If initializer uses initializer from declare reduction construct, emit a 5278 // pointer to the address of the original reduction item (reuired by reduction 5279 // initializer) 5280 if (RCG.usesReductionInitializer(N)) { 5281 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5282 OrigAddr = CGF.EmitLoadOfPointer( 5283 SharedAddr, 5284 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5285 } 5286 // Emit the initializer: 5287 // %0 = bitcast void* %arg to <type>* 5288 // store <type> <init>, <type>* %0 5289 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr, 5290 [](CodeGenFunction &) { return false; }); 5291 CGF.FinishFunction(); 5292 return Fn; 5293 } 5294 5295 /// Emits reduction combiner function: 5296 /// \code 5297 /// void @.red_comb(void* %arg0, void* %arg1) { 5298 /// %lhs = bitcast void* %arg0 to <type>* 5299 /// %rhs = bitcast void* %arg1 to <type>* 5300 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5301 /// store <type> %2, <type>* %lhs 5302 /// ret void 5303 /// } 5304 /// \endcode 5305 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5306 SourceLocation Loc, 5307 ReductionCodeGen &RCG, unsigned N, 5308 const Expr *ReductionOp, 5309 const Expr *LHS, const Expr *RHS, 5310 const Expr *PrivateRef) { 5311 ASTContext &C = CGM.getContext(); 5312 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5313 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5314 FunctionArgList Args; 5315 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5316 C.VoidPtrTy, ImplicitParamKind::Other); 5317 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5318 ImplicitParamKind::Other); 5319 Args.emplace_back(&ParamInOut); 5320 Args.emplace_back(&ParamIn); 5321 const auto &FnInfo = 5322 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5323 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5324 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5325 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5326 Name, &CGM.getModule()); 5327 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5328 Fn->setDoesNotRecurse(); 5329 CodeGenFunction CGF(CGM); 5330 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5331 llvm::Value *Size = nullptr; 5332 // If the size of the reduction item is non-constant, load it from global 5333 // threadprivate variable. 5334 if (RCG.getSizes(N).second) { 5335 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5336 CGF, CGM.getContext().getSizeType(), 5337 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5338 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5339 CGM.getContext().getSizeType(), Loc); 5340 } 5341 RCG.emitAggregateType(CGF, N, Size); 5342 // Remap lhs and rhs variables to the addresses of the function arguments. 5343 // %lhs = bitcast void* %arg0 to <type>* 5344 // %rhs = bitcast void* %arg1 to <type>* 5345 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5346 PrivateScope.addPrivate( 5347 LHSVD, 5348 // Pull out the pointer to the variable. 5349 CGF.EmitLoadOfPointer( 5350 CGF.GetAddrOfLocalVar(&ParamInOut) 5351 .withElementType( 5352 CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()), 5353 C.getPointerType(LHSVD->getType())->castAs<PointerType>())); 5354 PrivateScope.addPrivate( 5355 RHSVD, 5356 // Pull out the pointer to the variable. 5357 CGF.EmitLoadOfPointer( 5358 CGF.GetAddrOfLocalVar(&ParamIn).withElementType( 5359 CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()), 5360 C.getPointerType(RHSVD->getType())->castAs<PointerType>())); 5361 PrivateScope.Privatize(); 5362 // Emit the combiner body: 5363 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5364 // store <type> %2, <type>* %lhs 5365 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5366 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5367 cast<DeclRefExpr>(RHS)); 5368 CGF.FinishFunction(); 5369 return Fn; 5370 } 5371 5372 /// Emits reduction finalizer function: 5373 /// \code 5374 /// void @.red_fini(void* %arg) { 5375 /// %0 = bitcast void* %arg to <type>* 5376 /// <destroy>(<type>* %0) 5377 /// ret void 5378 /// } 5379 /// \endcode 5380 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5381 SourceLocation Loc, 5382 ReductionCodeGen &RCG, unsigned N) { 5383 if (!RCG.needCleanups(N)) 5384 return nullptr; 5385 ASTContext &C = CGM.getContext(); 5386 FunctionArgList Args; 5387 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5388 ImplicitParamKind::Other); 5389 Args.emplace_back(&Param); 5390 const auto &FnInfo = 5391 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5392 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5393 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 5394 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5395 Name, &CGM.getModule()); 5396 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5397 Fn->setDoesNotRecurse(); 5398 CodeGenFunction CGF(CGM); 5399 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5400 Address PrivateAddr = CGF.EmitLoadOfPointer( 5401 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>()); 5402 llvm::Value *Size = nullptr; 5403 // If the size of the reduction item is non-constant, load it from global 5404 // threadprivate variable. 5405 if (RCG.getSizes(N).second) { 5406 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5407 CGF, CGM.getContext().getSizeType(), 5408 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5409 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5410 CGM.getContext().getSizeType(), Loc); 5411 } 5412 RCG.emitAggregateType(CGF, N, Size); 5413 // Emit the finalizer body: 5414 // <destroy>(<type>* %0) 5415 RCG.emitCleanups(CGF, N, PrivateAddr); 5416 CGF.FinishFunction(Loc); 5417 return Fn; 5418 } 5419 5420 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 5421 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 5422 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 5423 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 5424 return nullptr; 5425 5426 // Build typedef struct: 5427 // kmp_taskred_input { 5428 // void *reduce_shar; // shared reduction item 5429 // void *reduce_orig; // original reduction item used for initialization 5430 // size_t reduce_size; // size of data item 5431 // void *reduce_init; // data initialization routine 5432 // void *reduce_fini; // data finalization routine 5433 // void *reduce_comb; // data combiner routine 5434 // kmp_task_red_flags_t flags; // flags for additional info from compiler 5435 // } kmp_taskred_input_t; 5436 ASTContext &C = CGM.getContext(); 5437 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 5438 RD->startDefinition(); 5439 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5440 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5441 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 5442 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5443 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5444 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5445 const FieldDecl *FlagsFD = addFieldToRecordDecl( 5446 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 5447 RD->completeDefinition(); 5448 QualType RDType = C.getRecordType(RD); 5449 unsigned Size = Data.ReductionVars.size(); 5450 llvm::APInt ArraySize(/*numBits=*/64, Size); 5451 QualType ArrayRDType = 5452 C.getConstantArrayType(RDType, ArraySize, nullptr, 5453 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0); 5454 // kmp_task_red_input_t .rd_input.[Size]; 5455 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 5456 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 5457 Data.ReductionCopies, Data.ReductionOps); 5458 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 5459 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 5460 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 5461 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 5462 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 5463 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs, 5464 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 5465 ".rd_input.gep."); 5466 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 5467 // ElemLVal.reduce_shar = &Shareds[Cnt]; 5468 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 5469 RCG.emitSharedOrigLValue(CGF, Cnt); 5470 llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF); 5471 CGF.EmitStoreOfScalar(Shared, SharedLVal); 5472 // ElemLVal.reduce_orig = &Origs[Cnt]; 5473 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 5474 llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF); 5475 CGF.EmitStoreOfScalar(Orig, OrigLVal); 5476 RCG.emitAggregateType(CGF, Cnt); 5477 llvm::Value *SizeValInChars; 5478 llvm::Value *SizeVal; 5479 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 5480 // We use delayed creation/initialization for VLAs and array sections. It is 5481 // required because runtime does not provide the way to pass the sizes of 5482 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 5483 // threadprivate global variables are used to store these values and use 5484 // them in the functions. 5485 bool DelayedCreation = !!SizeVal; 5486 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 5487 /*isSigned=*/false); 5488 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 5489 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 5490 // ElemLVal.reduce_init = init; 5491 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 5492 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt); 5493 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 5494 // ElemLVal.reduce_fini = fini; 5495 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 5496 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 5497 llvm::Value *FiniAddr = 5498 Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 5499 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 5500 // ElemLVal.reduce_comb = comb; 5501 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 5502 llvm::Value *CombAddr = emitReduceCombFunction( 5503 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 5504 RHSExprs[Cnt], Data.ReductionCopies[Cnt]); 5505 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 5506 // ElemLVal.flags = 0; 5507 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 5508 if (DelayedCreation) { 5509 CGF.EmitStoreOfScalar( 5510 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 5511 FlagsLVal); 5512 } else 5513 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 5514 FlagsLVal.getType()); 5515 } 5516 if (Data.IsReductionWithTaskMod) { 5517 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 5518 // is_ws, int num, void *data); 5519 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 5520 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 5521 CGM.IntTy, /*isSigned=*/true); 5522 llvm::Value *Args[] = { 5523 IdentTLoc, GTid, 5524 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 5525 /*isSigned=*/true), 5526 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 5527 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5528 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 5529 return CGF.EmitRuntimeCall( 5530 OMPBuilder.getOrCreateRuntimeFunction( 5531 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 5532 Args); 5533 } 5534 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 5535 llvm::Value *Args[] = { 5536 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 5537 /*isSigned=*/true), 5538 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 5539 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 5540 CGM.VoidPtrTy)}; 5541 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5542 CGM.getModule(), OMPRTL___kmpc_taskred_init), 5543 Args); 5544 } 5545 5546 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 5547 SourceLocation Loc, 5548 bool IsWorksharingReduction) { 5549 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 5550 // is_ws, int num, void *data); 5551 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 5552 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 5553 CGM.IntTy, /*isSigned=*/true); 5554 llvm::Value *Args[] = {IdentTLoc, GTid, 5555 llvm::ConstantInt::get(CGM.IntTy, 5556 IsWorksharingReduction ? 1 : 0, 5557 /*isSigned=*/true)}; 5558 (void)CGF.EmitRuntimeCall( 5559 OMPBuilder.getOrCreateRuntimeFunction( 5560 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 5561 Args); 5562 } 5563 5564 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 5565 SourceLocation Loc, 5566 ReductionCodeGen &RCG, 5567 unsigned N) { 5568 auto Sizes = RCG.getSizes(N); 5569 // Emit threadprivate global variable if the type is non-constant 5570 // (Sizes.second = nullptr). 5571 if (Sizes.second) { 5572 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 5573 /*isSigned=*/false); 5574 Address SizeAddr = getAddrOfArtificialThreadPrivate( 5575 CGF, CGM.getContext().getSizeType(), 5576 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5577 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 5578 } 5579 } 5580 5581 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 5582 SourceLocation Loc, 5583 llvm::Value *ReductionsPtr, 5584 LValue SharedLVal) { 5585 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 5586 // *d); 5587 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 5588 CGM.IntTy, 5589 /*isSigned=*/true), 5590 ReductionsPtr, 5591 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5592 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 5593 return Address( 5594 CGF.EmitRuntimeCall( 5595 OMPBuilder.getOrCreateRuntimeFunction( 5596 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 5597 Args), 5598 CGF.Int8Ty, SharedLVal.getAlignment()); 5599 } 5600 5601 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, 5602 const OMPTaskDataTy &Data) { 5603 if (!CGF.HaveInsertPoint()) 5604 return; 5605 5606 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) { 5607 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder. 5608 OMPBuilder.createTaskwait(CGF.Builder); 5609 } else { 5610 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5611 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5612 auto &M = CGM.getModule(); 5613 Address DependenciesArray = Address::invalid(); 5614 llvm::Value *NumOfElements; 5615 std::tie(NumOfElements, DependenciesArray) = 5616 emitDependClause(CGF, Data.Dependences, Loc); 5617 if (!Data.Dependences.empty()) { 5618 llvm::Value *DepWaitTaskArgs[7]; 5619 DepWaitTaskArgs[0] = UpLoc; 5620 DepWaitTaskArgs[1] = ThreadID; 5621 DepWaitTaskArgs[2] = NumOfElements; 5622 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5623 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5624 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5625 DepWaitTaskArgs[6] = 5626 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause); 5627 5628 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5629 5630 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid, 5631 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5632 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list, 5633 // kmp_int32 has_no_wait); if dependence info is specified. 5634 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5635 M, OMPRTL___kmpc_omp_taskwait_deps_51), 5636 DepWaitTaskArgs); 5637 5638 } else { 5639 5640 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 5641 // global_tid); 5642 llvm::Value *Args[] = {UpLoc, ThreadID}; 5643 // Ignore return result until untied tasks are supported. 5644 CGF.EmitRuntimeCall( 5645 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait), 5646 Args); 5647 } 5648 } 5649 5650 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5651 Region->emitUntiedSwitch(CGF); 5652 } 5653 5654 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 5655 OpenMPDirectiveKind InnerKind, 5656 const RegionCodeGenTy &CodeGen, 5657 bool HasCancel) { 5658 if (!CGF.HaveInsertPoint()) 5659 return; 5660 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 5661 InnerKind != OMPD_critical && 5662 InnerKind != OMPD_master && 5663 InnerKind != OMPD_masked); 5664 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 5665 } 5666 5667 namespace { 5668 enum RTCancelKind { 5669 CancelNoreq = 0, 5670 CancelParallel = 1, 5671 CancelLoop = 2, 5672 CancelSections = 3, 5673 CancelTaskgroup = 4 5674 }; 5675 } // anonymous namespace 5676 5677 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 5678 RTCancelKind CancelKind = CancelNoreq; 5679 if (CancelRegion == OMPD_parallel) 5680 CancelKind = CancelParallel; 5681 else if (CancelRegion == OMPD_for) 5682 CancelKind = CancelLoop; 5683 else if (CancelRegion == OMPD_sections) 5684 CancelKind = CancelSections; 5685 else { 5686 assert(CancelRegion == OMPD_taskgroup); 5687 CancelKind = CancelTaskgroup; 5688 } 5689 return CancelKind; 5690 } 5691 5692 void CGOpenMPRuntime::emitCancellationPointCall( 5693 CodeGenFunction &CGF, SourceLocation Loc, 5694 OpenMPDirectiveKind CancelRegion) { 5695 if (!CGF.HaveInsertPoint()) 5696 return; 5697 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 5698 // global_tid, kmp_int32 cncl_kind); 5699 if (auto *OMPRegionInfo = 5700 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 5701 // For 'cancellation point taskgroup', the task region info may not have a 5702 // cancel. This may instead happen in another adjacent task. 5703 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 5704 llvm::Value *Args[] = { 5705 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 5706 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 5707 // Ignore return result until untied tasks are supported. 5708 llvm::Value *Result = CGF.EmitRuntimeCall( 5709 OMPBuilder.getOrCreateRuntimeFunction( 5710 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 5711 Args); 5712 // if (__kmpc_cancellationpoint()) { 5713 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 5714 // exit from construct; 5715 // } 5716 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 5717 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 5718 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 5719 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 5720 CGF.EmitBlock(ExitBB); 5721 if (CancelRegion == OMPD_parallel) 5722 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 5723 // exit from construct; 5724 CodeGenFunction::JumpDest CancelDest = 5725 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 5726 CGF.EmitBranchThroughCleanup(CancelDest); 5727 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 5728 } 5729 } 5730 } 5731 5732 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 5733 const Expr *IfCond, 5734 OpenMPDirectiveKind CancelRegion) { 5735 if (!CGF.HaveInsertPoint()) 5736 return; 5737 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 5738 // kmp_int32 cncl_kind); 5739 auto &M = CGM.getModule(); 5740 if (auto *OMPRegionInfo = 5741 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 5742 auto &&ThenGen = [this, &M, Loc, CancelRegion, 5743 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 5744 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5745 llvm::Value *Args[] = { 5746 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 5747 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 5748 // Ignore return result until untied tasks are supported. 5749 llvm::Value *Result = CGF.EmitRuntimeCall( 5750 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 5751 // if (__kmpc_cancel()) { 5752 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 5753 // exit from construct; 5754 // } 5755 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 5756 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 5757 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 5758 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 5759 CGF.EmitBlock(ExitBB); 5760 if (CancelRegion == OMPD_parallel) 5761 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 5762 // exit from construct; 5763 CodeGenFunction::JumpDest CancelDest = 5764 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 5765 CGF.EmitBranchThroughCleanup(CancelDest); 5766 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 5767 }; 5768 if (IfCond) { 5769 emitIfClause(CGF, IfCond, ThenGen, 5770 [](CodeGenFunction &, PrePostActionTy &) {}); 5771 } else { 5772 RegionCodeGenTy ThenRCG(ThenGen); 5773 ThenRCG(CGF); 5774 } 5775 } 5776 } 5777 5778 namespace { 5779 /// Cleanup action for uses_allocators support. 5780 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 5781 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 5782 5783 public: 5784 OMPUsesAllocatorsActionTy( 5785 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 5786 : Allocators(Allocators) {} 5787 void Enter(CodeGenFunction &CGF) override { 5788 if (!CGF.HaveInsertPoint()) 5789 return; 5790 for (const auto &AllocatorData : Allocators) { 5791 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 5792 CGF, AllocatorData.first, AllocatorData.second); 5793 } 5794 } 5795 void Exit(CodeGenFunction &CGF) override { 5796 if (!CGF.HaveInsertPoint()) 5797 return; 5798 for (const auto &AllocatorData : Allocators) { 5799 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 5800 AllocatorData.first); 5801 } 5802 } 5803 }; 5804 } // namespace 5805 5806 void CGOpenMPRuntime::emitTargetOutlinedFunction( 5807 const OMPExecutableDirective &D, StringRef ParentName, 5808 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 5809 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 5810 assert(!ParentName.empty() && "Invalid target entry parent name!"); 5811 HasEmittedTargetRegion = true; 5812 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 5813 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 5814 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 5815 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 5816 if (!D.AllocatorTraits) 5817 continue; 5818 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 5819 } 5820 } 5821 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 5822 CodeGen.setAction(UsesAllocatorAction); 5823 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 5824 IsOffloadEntry, CodeGen); 5825 } 5826 5827 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 5828 const Expr *Allocator, 5829 const Expr *AllocatorTraits) { 5830 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 5831 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 5832 // Use default memspace handle. 5833 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5834 llvm::Value *NumTraits = llvm::ConstantInt::get( 5835 CGF.IntTy, cast<ConstantArrayType>( 5836 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 5837 ->getSize() 5838 .getLimitedValue()); 5839 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 5840 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5841 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy); 5842 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 5843 AllocatorTraitsLVal.getBaseInfo(), 5844 AllocatorTraitsLVal.getTBAAInfo()); 5845 llvm::Value *Traits = Addr.getPointer(); 5846 5847 llvm::Value *AllocatorVal = 5848 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5849 CGM.getModule(), OMPRTL___kmpc_init_allocator), 5850 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 5851 // Store to allocator. 5852 CGF.EmitAutoVarAlloca(*cast<VarDecl>( 5853 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 5854 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 5855 AllocatorVal = 5856 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 5857 Allocator->getType(), Allocator->getExprLoc()); 5858 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 5859 } 5860 5861 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 5862 const Expr *Allocator) { 5863 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 5864 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 5865 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 5866 llvm::Value *AllocatorVal = 5867 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 5868 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 5869 CGF.getContext().VoidPtrTy, 5870 Allocator->getExprLoc()); 5871 (void)CGF.EmitRuntimeCall( 5872 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 5873 OMPRTL___kmpc_destroy_allocator), 5874 {ThreadId, AllocatorVal}); 5875 } 5876 5877 void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams( 5878 const OMPExecutableDirective &D, CodeGenFunction &CGF, 5879 int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal, 5880 int32_t &MaxTeamsVal) { 5881 5882 getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal, MaxTeamsVal); 5883 getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal, 5884 /*UpperBoundOnly=*/true); 5885 5886 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) { 5887 for (auto *A : C->getAttrs()) { 5888 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1; 5889 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1; 5890 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A)) 5891 CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal, 5892 &AttrMinBlocksVal, &AttrMaxBlocksVal); 5893 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A)) 5894 CGM.handleAMDGPUFlatWorkGroupSizeAttr( 5895 nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal, 5896 &AttrMaxThreadsVal); 5897 else 5898 continue; 5899 5900 MinThreadsVal = std::max(MinThreadsVal, AttrMinThreadsVal); 5901 if (AttrMaxThreadsVal > 0) 5902 MaxThreadsVal = MaxThreadsVal > 0 5903 ? std::min(MaxThreadsVal, AttrMaxThreadsVal) 5904 : AttrMaxThreadsVal; 5905 MinTeamsVal = std::max(MinTeamsVal, AttrMinBlocksVal); 5906 if (AttrMaxBlocksVal > 0) 5907 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal) 5908 : AttrMaxBlocksVal; 5909 } 5910 } 5911 } 5912 5913 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 5914 const OMPExecutableDirective &D, StringRef ParentName, 5915 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 5916 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 5917 5918 llvm::TargetRegionEntryInfo EntryInfo = 5919 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName); 5920 5921 CodeGenFunction CGF(CGM, true); 5922 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction = 5923 [&CGF, &D, &CodeGen](StringRef EntryFnName) { 5924 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 5925 5926 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 5927 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 5928 return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 5929 }; 5930 5931 OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction, 5932 IsOffloadEntry, OutlinedFn, OutlinedFnID); 5933 5934 if (!OutlinedFn) 5935 return; 5936 5937 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); 5938 5939 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) { 5940 for (auto *A : C->getAttrs()) { 5941 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A)) 5942 CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr); 5943 } 5944 } 5945 } 5946 5947 /// Checks if the expression is constant or does not have non-trivial function 5948 /// calls. 5949 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 5950 // We can skip constant expressions. 5951 // We can skip expressions with trivial calls or simple expressions. 5952 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 5953 !E->hasNonTrivialCall(Ctx)) && 5954 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 5955 } 5956 5957 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 5958 const Stmt *Body) { 5959 const Stmt *Child = Body->IgnoreContainers(); 5960 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 5961 Child = nullptr; 5962 for (const Stmt *S : C->body()) { 5963 if (const auto *E = dyn_cast<Expr>(S)) { 5964 if (isTrivial(Ctx, E)) 5965 continue; 5966 } 5967 // Some of the statements can be ignored. 5968 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 5969 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 5970 continue; 5971 // Analyze declarations. 5972 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 5973 if (llvm::all_of(DS->decls(), [](const Decl *D) { 5974 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 5975 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 5976 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 5977 isa<UsingDirectiveDecl>(D) || 5978 isa<OMPDeclareReductionDecl>(D) || 5979 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 5980 return true; 5981 const auto *VD = dyn_cast<VarDecl>(D); 5982 if (!VD) 5983 return false; 5984 return VD->hasGlobalStorage() || !VD->isUsed(); 5985 })) 5986 continue; 5987 } 5988 // Found multiple children - cannot get the one child only. 5989 if (Child) 5990 return nullptr; 5991 Child = S; 5992 } 5993 if (Child) 5994 Child = Child->IgnoreContainers(); 5995 } 5996 return Child; 5997 } 5998 5999 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( 6000 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal, 6001 int32_t &MaxTeamsVal) { 6002 6003 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6004 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6005 "Expected target-based executable directive."); 6006 switch (DirectiveKind) { 6007 case OMPD_target: { 6008 const auto *CS = D.getInnermostCapturedStmt(); 6009 const auto *Body = 6010 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6011 const Stmt *ChildStmt = 6012 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6013 if (const auto *NestedDir = 6014 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6015 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6016 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6017 const Expr *NumTeams = 6018 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6019 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6020 if (auto Constant = 6021 NumTeams->getIntegerConstantExpr(CGF.getContext())) 6022 MinTeamsVal = MaxTeamsVal = Constant->getExtValue(); 6023 return NumTeams; 6024 } 6025 MinTeamsVal = MaxTeamsVal = 0; 6026 return nullptr; 6027 } 6028 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6029 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) { 6030 MinTeamsVal = MaxTeamsVal = 1; 6031 return nullptr; 6032 } 6033 MinTeamsVal = MaxTeamsVal = 1; 6034 return nullptr; 6035 } 6036 // A value of -1 is used to check if we need to emit no teams region 6037 MinTeamsVal = MaxTeamsVal = -1; 6038 return nullptr; 6039 } 6040 case OMPD_target_teams_loop: 6041 case OMPD_target_teams: 6042 case OMPD_target_teams_distribute: 6043 case OMPD_target_teams_distribute_simd: 6044 case OMPD_target_teams_distribute_parallel_for: 6045 case OMPD_target_teams_distribute_parallel_for_simd: { 6046 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6047 const Expr *NumTeams = 6048 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6049 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6050 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) 6051 MinTeamsVal = MaxTeamsVal = Constant->getExtValue(); 6052 return NumTeams; 6053 } 6054 MinTeamsVal = MaxTeamsVal = 0; 6055 return nullptr; 6056 } 6057 case OMPD_target_parallel: 6058 case OMPD_target_parallel_for: 6059 case OMPD_target_parallel_for_simd: 6060 case OMPD_target_parallel_loop: 6061 case OMPD_target_simd: 6062 MinTeamsVal = MaxTeamsVal = 1; 6063 return nullptr; 6064 case OMPD_parallel: 6065 case OMPD_for: 6066 case OMPD_parallel_for: 6067 case OMPD_parallel_loop: 6068 case OMPD_parallel_master: 6069 case OMPD_parallel_sections: 6070 case OMPD_for_simd: 6071 case OMPD_parallel_for_simd: 6072 case OMPD_cancel: 6073 case OMPD_cancellation_point: 6074 case OMPD_ordered: 6075 case OMPD_threadprivate: 6076 case OMPD_allocate: 6077 case OMPD_task: 6078 case OMPD_simd: 6079 case OMPD_tile: 6080 case OMPD_unroll: 6081 case OMPD_sections: 6082 case OMPD_section: 6083 case OMPD_single: 6084 case OMPD_master: 6085 case OMPD_critical: 6086 case OMPD_taskyield: 6087 case OMPD_barrier: 6088 case OMPD_taskwait: 6089 case OMPD_taskgroup: 6090 case OMPD_atomic: 6091 case OMPD_flush: 6092 case OMPD_depobj: 6093 case OMPD_scan: 6094 case OMPD_teams: 6095 case OMPD_target_data: 6096 case OMPD_target_exit_data: 6097 case OMPD_target_enter_data: 6098 case OMPD_distribute: 6099 case OMPD_distribute_simd: 6100 case OMPD_distribute_parallel_for: 6101 case OMPD_distribute_parallel_for_simd: 6102 case OMPD_teams_distribute: 6103 case OMPD_teams_distribute_simd: 6104 case OMPD_teams_distribute_parallel_for: 6105 case OMPD_teams_distribute_parallel_for_simd: 6106 case OMPD_target_update: 6107 case OMPD_declare_simd: 6108 case OMPD_declare_variant: 6109 case OMPD_begin_declare_variant: 6110 case OMPD_end_declare_variant: 6111 case OMPD_declare_target: 6112 case OMPD_end_declare_target: 6113 case OMPD_declare_reduction: 6114 case OMPD_declare_mapper: 6115 case OMPD_taskloop: 6116 case OMPD_taskloop_simd: 6117 case OMPD_master_taskloop: 6118 case OMPD_master_taskloop_simd: 6119 case OMPD_parallel_master_taskloop: 6120 case OMPD_parallel_master_taskloop_simd: 6121 case OMPD_requires: 6122 case OMPD_metadirective: 6123 case OMPD_unknown: 6124 break; 6125 default: 6126 break; 6127 } 6128 llvm_unreachable("Unexpected directive kind."); 6129 } 6130 6131 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( 6132 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6133 assert(!CGF.getLangOpts().OpenMPIsTargetDevice && 6134 "Clauses associated with the teams directive expected to be emitted " 6135 "only for the host!"); 6136 CGBuilderTy &Bld = CGF.Builder; 6137 int32_t MinNT = -1, MaxNT = -1; 6138 const Expr *NumTeams = 6139 getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT); 6140 if (NumTeams != nullptr) { 6141 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6142 6143 switch (DirectiveKind) { 6144 case OMPD_target: { 6145 const auto *CS = D.getInnermostCapturedStmt(); 6146 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6147 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6148 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6149 /*IgnoreResultAssign*/ true); 6150 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6151 /*isSigned=*/true); 6152 } 6153 case OMPD_target_teams: 6154 case OMPD_target_teams_distribute: 6155 case OMPD_target_teams_distribute_simd: 6156 case OMPD_target_teams_distribute_parallel_for: 6157 case OMPD_target_teams_distribute_parallel_for_simd: { 6158 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6159 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6160 /*IgnoreResultAssign*/ true); 6161 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6162 /*isSigned=*/true); 6163 } 6164 default: 6165 break; 6166 } 6167 } 6168 6169 assert(MinNT == MaxNT && "Num threads ranges require handling here."); 6170 return llvm::ConstantInt::get(CGF.Int32Ty, MinNT); 6171 } 6172 6173 /// Check for a num threads constant value (stored in \p DefaultVal), or 6174 /// expression (stored in \p E). If the value is conditional (via an if-clause), 6175 /// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are 6176 /// nullptr, no expression evaluation is perfomed. 6177 static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6178 const Expr **E, int32_t &UpperBound, 6179 bool UpperBoundOnly, llvm::Value **CondVal) { 6180 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6181 CGF.getContext(), CS->getCapturedStmt()); 6182 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6183 if (!Dir) 6184 return; 6185 6186 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6187 // Handle if clause. If if clause present, the number of threads is 6188 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6189 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) { 6190 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6191 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6192 const OMPIfClause *IfClause = nullptr; 6193 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6194 if (C->getNameModifier() == OMPD_unknown || 6195 C->getNameModifier() == OMPD_parallel) { 6196 IfClause = C; 6197 break; 6198 } 6199 } 6200 if (IfClause) { 6201 const Expr *CondExpr = IfClause->getCondition(); 6202 bool Result; 6203 if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6204 if (!Result) { 6205 UpperBound = 1; 6206 return; 6207 } 6208 } else { 6209 CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange()); 6210 if (const auto *PreInit = 6211 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6212 for (const auto *I : PreInit->decls()) { 6213 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6214 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6215 } else { 6216 CodeGenFunction::AutoVarEmission Emission = 6217 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6218 CGF.EmitAutoVarCleanups(Emission); 6219 } 6220 } 6221 *CondVal = CGF.EvaluateExprAsBool(CondExpr); 6222 } 6223 } 6224 } 6225 } 6226 // Check the value of num_threads clause iff if clause was not specified 6227 // or is not evaluated to false. 6228 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6229 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6230 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6231 const auto *NumThreadsClause = 6232 Dir->getSingleClause<OMPNumThreadsClause>(); 6233 const Expr *NTExpr = NumThreadsClause->getNumThreads(); 6234 if (NTExpr->isIntegerConstantExpr(CGF.getContext())) 6235 if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext())) 6236 UpperBound = 6237 UpperBound 6238 ? Constant->getZExtValue() 6239 : std::min(UpperBound, 6240 static_cast<int32_t>(Constant->getZExtValue())); 6241 // If we haven't found a upper bound, remember we saw a thread limiting 6242 // clause. 6243 if (UpperBound == -1) 6244 UpperBound = 0; 6245 if (!E) 6246 return; 6247 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange()); 6248 if (const auto *PreInit = 6249 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6250 for (const auto *I : PreInit->decls()) { 6251 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6252 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6253 } else { 6254 CodeGenFunction::AutoVarEmission Emission = 6255 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6256 CGF.EmitAutoVarCleanups(Emission); 6257 } 6258 } 6259 } 6260 *E = NTExpr; 6261 } 6262 return; 6263 } 6264 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6265 UpperBound = 1; 6266 } 6267 6268 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( 6269 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound, 6270 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) { 6271 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) && 6272 "Clauses associated with the teams directive expected to be emitted " 6273 "only for the host!"); 6274 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6275 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6276 "Expected target-based executable directive."); 6277 6278 const Expr *NT = nullptr; 6279 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT; 6280 6281 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) { 6282 if (E->isIntegerConstantExpr(CGF.getContext())) { 6283 if (auto Constant = E->getIntegerConstantExpr(CGF.getContext())) 6284 UpperBound = UpperBound ? Constant->getZExtValue() 6285 : std::min(UpperBound, 6286 int32_t(Constant->getZExtValue())); 6287 } 6288 // If we haven't found a upper bound, remember we saw a thread limiting 6289 // clause. 6290 if (UpperBound == -1) 6291 UpperBound = 0; 6292 if (EPtr) 6293 *EPtr = E; 6294 }; 6295 6296 auto ReturnSequential = [&]() { 6297 UpperBound = 1; 6298 return NT; 6299 }; 6300 6301 switch (DirectiveKind) { 6302 case OMPD_target: { 6303 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6304 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal); 6305 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6306 CGF.getContext(), CS->getCapturedStmt()); 6307 // TODO: The standard is not clear how to resolve two thread limit clauses, 6308 // let's pick the teams one if it's present, otherwise the target one. 6309 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6310 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6311 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) { 6312 ThreadLimitClause = TLC; 6313 if (ThreadLimitExpr) { 6314 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6315 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6316 CodeGenFunction::LexicalScope Scope( 6317 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6318 if (const auto *PreInit = 6319 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6320 for (const auto *I : PreInit->decls()) { 6321 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6322 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6323 } else { 6324 CodeGenFunction::AutoVarEmission Emission = 6325 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6326 CGF.EmitAutoVarCleanups(Emission); 6327 } 6328 } 6329 } 6330 } 6331 } 6332 } 6333 if (ThreadLimitClause) 6334 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr); 6335 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6336 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6337 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6338 CS = Dir->getInnermostCapturedStmt(); 6339 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6340 CGF.getContext(), CS->getCapturedStmt()); 6341 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6342 } 6343 if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6344 CS = Dir->getInnermostCapturedStmt(); 6345 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal); 6346 } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6347 return ReturnSequential(); 6348 } 6349 return NT; 6350 } 6351 case OMPD_target_teams: { 6352 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6353 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6354 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6355 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr); 6356 } 6357 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6358 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal); 6359 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6360 CGF.getContext(), CS->getCapturedStmt()); 6361 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6362 if (Dir->getDirectiveKind() == OMPD_distribute) { 6363 CS = Dir->getInnermostCapturedStmt(); 6364 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal); 6365 } 6366 } 6367 return NT; 6368 } 6369 case OMPD_target_teams_distribute: 6370 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6371 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6372 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6373 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr); 6374 } 6375 getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound, 6376 UpperBoundOnly, CondVal); 6377 return NT; 6378 case OMPD_target_teams_loop: 6379 case OMPD_target_parallel_loop: 6380 case OMPD_target_parallel: 6381 case OMPD_target_parallel_for: 6382 case OMPD_target_parallel_for_simd: 6383 case OMPD_target_teams_distribute_parallel_for: 6384 case OMPD_target_teams_distribute_parallel_for_simd: { 6385 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) { 6386 const OMPIfClause *IfClause = nullptr; 6387 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6388 if (C->getNameModifier() == OMPD_unknown || 6389 C->getNameModifier() == OMPD_parallel) { 6390 IfClause = C; 6391 break; 6392 } 6393 } 6394 if (IfClause) { 6395 const Expr *Cond = IfClause->getCondition(); 6396 bool Result; 6397 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6398 if (!Result) 6399 return ReturnSequential(); 6400 } else { 6401 CodeGenFunction::RunCleanupsScope Scope(CGF); 6402 *CondVal = CGF.EvaluateExprAsBool(Cond); 6403 } 6404 } 6405 } 6406 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6407 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6408 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6409 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr); 6410 } 6411 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6412 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6413 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6414 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr); 6415 return NumThreadsClause->getNumThreads(); 6416 } 6417 return NT; 6418 } 6419 case OMPD_target_teams_distribute_simd: 6420 case OMPD_target_simd: 6421 return ReturnSequential(); 6422 default: 6423 break; 6424 } 6425 llvm_unreachable("Unsupported directive kind."); 6426 } 6427 6428 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( 6429 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6430 llvm::Value *NumThreadsVal = nullptr; 6431 llvm::Value *CondVal = nullptr; 6432 llvm::Value *ThreadLimitVal = nullptr; 6433 const Expr *ThreadLimitExpr = nullptr; 6434 int32_t UpperBound = -1; 6435 6436 const Expr *NT = getNumThreadsExprForTargetDirective( 6437 CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal, 6438 &ThreadLimitExpr); 6439 6440 // Thread limit expressions are used below, emit them. 6441 if (ThreadLimitExpr) { 6442 ThreadLimitVal = 6443 CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true); 6444 ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty, 6445 /*isSigned=*/false); 6446 } 6447 6448 // Generate the num teams expression. 6449 if (UpperBound == 1) { 6450 NumThreadsVal = CGF.Builder.getInt32(UpperBound); 6451 } else if (NT) { 6452 NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true); 6453 NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty, 6454 /*isSigned=*/false); 6455 } else if (ThreadLimitVal) { 6456 // If we do not have a num threads value but a thread limit, replace the 6457 // former with the latter. We know handled the thread limit expression. 6458 NumThreadsVal = ThreadLimitVal; 6459 ThreadLimitVal = nullptr; 6460 } else { 6461 // Default to "0" which means runtime choice. 6462 assert(!ThreadLimitVal && "Default not applicable with thread limit value"); 6463 NumThreadsVal = CGF.Builder.getInt32(0); 6464 } 6465 6466 // Handle if clause. If if clause present, the number of threads is 6467 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6468 if (CondVal) { 6469 CodeGenFunction::RunCleanupsScope Scope(CGF); 6470 NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal, 6471 CGF.Builder.getInt32(1)); 6472 } 6473 6474 // If the thread limit and num teams expression were present, take the 6475 // minimum. 6476 if (ThreadLimitVal) { 6477 NumThreadsVal = CGF.Builder.CreateSelect( 6478 CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal), 6479 ThreadLimitVal, NumThreadsVal); 6480 } 6481 6482 return NumThreadsVal; 6483 } 6484 6485 namespace { 6486 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 6487 6488 // Utility to handle information from clauses associated with a given 6489 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 6490 // It provides a convenient interface to obtain the information and generate 6491 // code for that information. 6492 class MappableExprsHandler { 6493 public: 6494 /// Get the offset of the OMP_MAP_MEMBER_OF field. 6495 static unsigned getFlagMemberOffset() { 6496 unsigned Offset = 0; 6497 for (uint64_t Remain = 6498 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 6499 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF); 6500 !(Remain & 1); Remain = Remain >> 1) 6501 Offset++; 6502 return Offset; 6503 } 6504 6505 /// Class that holds debugging information for a data mapping to be passed to 6506 /// the runtime library. 6507 class MappingExprInfo { 6508 /// The variable declaration used for the data mapping. 6509 const ValueDecl *MapDecl = nullptr; 6510 /// The original expression used in the map clause, or null if there is 6511 /// none. 6512 const Expr *MapExpr = nullptr; 6513 6514 public: 6515 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 6516 : MapDecl(MapDecl), MapExpr(MapExpr) {} 6517 6518 const ValueDecl *getMapDecl() const { return MapDecl; } 6519 const Expr *getMapExpr() const { return MapExpr; } 6520 }; 6521 6522 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy; 6523 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy; 6524 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy; 6525 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy; 6526 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy; 6527 using MapNonContiguousArrayTy = 6528 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy; 6529 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 6530 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>; 6531 6532 /// This structure contains combined information generated for mappable 6533 /// clauses, including base pointers, pointers, sizes, map types, user-defined 6534 /// mappers, and non-contiguous information. 6535 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy { 6536 MapExprsArrayTy Exprs; 6537 MapValueDeclsArrayTy Mappers; 6538 MapValueDeclsArrayTy DevicePtrDecls; 6539 6540 /// Append arrays in \a CurInfo. 6541 void append(MapCombinedInfoTy &CurInfo) { 6542 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 6543 DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(), 6544 CurInfo.DevicePtrDecls.end()); 6545 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 6546 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo); 6547 } 6548 }; 6549 6550 /// Map between a struct and the its lowest & highest elements which have been 6551 /// mapped. 6552 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 6553 /// HE(FieldIndex, Pointer)} 6554 struct StructRangeInfoTy { 6555 MapCombinedInfoTy PreliminaryMapData; 6556 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 6557 0, Address::invalid()}; 6558 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 6559 0, Address::invalid()}; 6560 Address Base = Address::invalid(); 6561 Address LB = Address::invalid(); 6562 bool IsArraySection = false; 6563 bool HasCompleteRecord = false; 6564 }; 6565 6566 private: 6567 /// Kind that defines how a device pointer has to be returned. 6568 struct MapInfo { 6569 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 6570 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 6571 ArrayRef<OpenMPMapModifierKind> MapModifiers; 6572 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 6573 bool ReturnDevicePointer = false; 6574 bool IsImplicit = false; 6575 const ValueDecl *Mapper = nullptr; 6576 const Expr *VarRef = nullptr; 6577 bool ForDeviceAddr = false; 6578 6579 MapInfo() = default; 6580 MapInfo( 6581 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 6582 OpenMPMapClauseKind MapType, 6583 ArrayRef<OpenMPMapModifierKind> MapModifiers, 6584 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 6585 bool ReturnDevicePointer, bool IsImplicit, 6586 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 6587 bool ForDeviceAddr = false) 6588 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 6589 MotionModifiers(MotionModifiers), 6590 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 6591 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 6592 }; 6593 6594 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 6595 /// member and there is no map information about it, then emission of that 6596 /// entry is deferred until the whole struct has been processed. 6597 struct DeferredDevicePtrEntryTy { 6598 const Expr *IE = nullptr; 6599 const ValueDecl *VD = nullptr; 6600 bool ForDeviceAddr = false; 6601 6602 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 6603 bool ForDeviceAddr) 6604 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 6605 }; 6606 6607 /// The target directive from where the mappable clauses were extracted. It 6608 /// is either a executable directive or a user-defined mapper directive. 6609 llvm::PointerUnion<const OMPExecutableDirective *, 6610 const OMPDeclareMapperDecl *> 6611 CurDir; 6612 6613 /// Function the directive is being generated for. 6614 CodeGenFunction &CGF; 6615 6616 /// Set of all first private variables in the current directive. 6617 /// bool data is set to true if the variable is implicitly marked as 6618 /// firstprivate, false otherwise. 6619 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 6620 6621 /// Map between device pointer declarations and their expression components. 6622 /// The key value for declarations in 'this' is null. 6623 llvm::DenseMap< 6624 const ValueDecl *, 6625 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 6626 DevPointersMap; 6627 6628 /// Map between device addr declarations and their expression components. 6629 /// The key value for declarations in 'this' is null. 6630 llvm::DenseMap< 6631 const ValueDecl *, 6632 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 6633 HasDevAddrsMap; 6634 6635 /// Map between lambda declarations and their map type. 6636 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap; 6637 6638 llvm::Value *getExprTypeSize(const Expr *E) const { 6639 QualType ExprTy = E->getType().getCanonicalType(); 6640 6641 // Calculate the size for array shaping expression. 6642 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 6643 llvm::Value *Size = 6644 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 6645 for (const Expr *SE : OAE->getDimensions()) { 6646 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 6647 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 6648 CGF.getContext().getSizeType(), 6649 SE->getExprLoc()); 6650 Size = CGF.Builder.CreateNUWMul(Size, Sz); 6651 } 6652 return Size; 6653 } 6654 6655 // Reference types are ignored for mapping purposes. 6656 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 6657 ExprTy = RefTy->getPointeeType().getCanonicalType(); 6658 6659 // Given that an array section is considered a built-in type, we need to 6660 // do the calculation based on the length of the section instead of relying 6661 // on CGF.getTypeSize(E->getType()). 6662 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 6663 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 6664 OAE->getBase()->IgnoreParenImpCasts()) 6665 .getCanonicalType(); 6666 6667 // If there is no length associated with the expression and lower bound is 6668 // not specified too, that means we are using the whole length of the 6669 // base. 6670 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 6671 !OAE->getLowerBound()) 6672 return CGF.getTypeSize(BaseTy); 6673 6674 llvm::Value *ElemSize; 6675 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 6676 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 6677 } else { 6678 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 6679 assert(ATy && "Expecting array type if not a pointer type."); 6680 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 6681 } 6682 6683 // If we don't have a length at this point, that is because we have an 6684 // array section with a single element. 6685 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 6686 return ElemSize; 6687 6688 if (const Expr *LenExpr = OAE->getLength()) { 6689 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 6690 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 6691 CGF.getContext().getSizeType(), 6692 LenExpr->getExprLoc()); 6693 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 6694 } 6695 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 6696 OAE->getLowerBound() && "expected array_section[lb:]."); 6697 // Size = sizetype - lb * elemtype; 6698 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 6699 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 6700 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 6701 CGF.getContext().getSizeType(), 6702 OAE->getLowerBound()->getExprLoc()); 6703 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 6704 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 6705 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 6706 LengthVal = CGF.Builder.CreateSelect( 6707 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 6708 return LengthVal; 6709 } 6710 return CGF.getTypeSize(ExprTy); 6711 } 6712 6713 /// Return the corresponding bits for a given map clause modifier. Add 6714 /// a flag marking the map as a pointer if requested. Add a flag marking the 6715 /// map as the first one of a series of maps that relate to the same map 6716 /// expression. 6717 OpenMPOffloadMappingFlags getMapTypeBits( 6718 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 6719 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 6720 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 6721 OpenMPOffloadMappingFlags Bits = 6722 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT 6723 : OpenMPOffloadMappingFlags::OMP_MAP_NONE; 6724 switch (MapType) { 6725 case OMPC_MAP_alloc: 6726 case OMPC_MAP_release: 6727 // alloc and release is the default behavior in the runtime library, i.e. 6728 // if we don't pass any bits alloc/release that is what the runtime is 6729 // going to do. Therefore, we don't need to signal anything for these two 6730 // type modifiers. 6731 break; 6732 case OMPC_MAP_to: 6733 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO; 6734 break; 6735 case OMPC_MAP_from: 6736 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM; 6737 break; 6738 case OMPC_MAP_tofrom: 6739 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO | 6740 OpenMPOffloadMappingFlags::OMP_MAP_FROM; 6741 break; 6742 case OMPC_MAP_delete: 6743 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE; 6744 break; 6745 case OMPC_MAP_unknown: 6746 llvm_unreachable("Unexpected map type!"); 6747 } 6748 if (AddPtrFlag) 6749 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; 6750 if (AddIsTargetParamFlag) 6751 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; 6752 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always)) 6753 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS; 6754 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close)) 6755 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE; 6756 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) || 6757 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present)) 6758 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT; 6759 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold)) 6760 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; 6761 if (IsNonContiguous) 6762 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG; 6763 return Bits; 6764 } 6765 6766 /// Return true if the provided expression is a final array section. A 6767 /// final array section, is one whose length can't be proved to be one. 6768 bool isFinalArraySectionExpression(const Expr *E) const { 6769 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 6770 6771 // It is not an array section and therefore not a unity-size one. 6772 if (!OASE) 6773 return false; 6774 6775 // An array section with no colon always refer to a single element. 6776 if (OASE->getColonLocFirst().isInvalid()) 6777 return false; 6778 6779 const Expr *Length = OASE->getLength(); 6780 6781 // If we don't have a length we have to check if the array has size 1 6782 // for this dimension. Also, we should always expect a length if the 6783 // base type is pointer. 6784 if (!Length) { 6785 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 6786 OASE->getBase()->IgnoreParenImpCasts()) 6787 .getCanonicalType(); 6788 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 6789 return ATy->getSize().getSExtValue() != 1; 6790 // If we don't have a constant dimension length, we have to consider 6791 // the current section as having any size, so it is not necessarily 6792 // unitary. If it happen to be unity size, that's user fault. 6793 return true; 6794 } 6795 6796 // Check if the length evaluates to 1. 6797 Expr::EvalResult Result; 6798 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 6799 return true; // Can have more that size 1. 6800 6801 llvm::APSInt ConstLength = Result.Val.getInt(); 6802 return ConstLength.getSExtValue() != 1; 6803 } 6804 6805 /// Generate the base pointers, section pointers, sizes, map type bits, and 6806 /// user-defined mappers (all included in \a CombinedInfo) for the provided 6807 /// map type, map or motion modifiers, and expression components. 6808 /// \a IsFirstComponent should be set to true if the provided set of 6809 /// components is the first associated with a capture. 6810 void generateInfoForComponentList( 6811 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 6812 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 6813 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 6814 MapCombinedInfoTy &CombinedInfo, 6815 MapCombinedInfoTy &StructBaseCombinedInfo, 6816 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 6817 bool IsImplicit, bool GenerateAllInfoForClauses, 6818 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 6819 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 6820 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 6821 OverlappedElements = std::nullopt) const { 6822 // The following summarizes what has to be generated for each map and the 6823 // types below. The generated information is expressed in this order: 6824 // base pointer, section pointer, size, flags 6825 // (to add to the ones that come from the map type and modifier). 6826 // 6827 // double d; 6828 // int i[100]; 6829 // float *p; 6830 // int **a = &i; 6831 // 6832 // struct S1 { 6833 // int i; 6834 // float f[50]; 6835 // } 6836 // struct S2 { 6837 // int i; 6838 // float f[50]; 6839 // S1 s; 6840 // double *p; 6841 // struct S2 *ps; 6842 // int &ref; 6843 // } 6844 // S2 s; 6845 // S2 *ps; 6846 // 6847 // map(d) 6848 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 6849 // 6850 // map(i) 6851 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 6852 // 6853 // map(i[1:23]) 6854 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 6855 // 6856 // map(p) 6857 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 6858 // 6859 // map(p[1:24]) 6860 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 6861 // in unified shared memory mode or for local pointers 6862 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 6863 // 6864 // map((*a)[0:3]) 6865 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM 6866 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM 6867 // 6868 // map(**a) 6869 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM 6870 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM 6871 // 6872 // map(s) 6873 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 6874 // 6875 // map(s.i) 6876 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 6877 // 6878 // map(s.s.f) 6879 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 6880 // 6881 // map(s.p) 6882 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 6883 // 6884 // map(to: s.p[:22]) 6885 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 6886 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 6887 // &(s.p), &(s.p[0]), 22*sizeof(double), 6888 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 6889 // (*) alloc space for struct members, only this is a target parameter 6890 // (**) map the pointer (nothing to be mapped in this example) (the compiler 6891 // optimizes this entry out, same in the examples below) 6892 // (***) map the pointee (map: to) 6893 // 6894 // map(to: s.ref) 6895 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) 6896 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 6897 // (*) alloc space for struct members, only this is a target parameter 6898 // (**) map the pointer (nothing to be mapped in this example) (the compiler 6899 // optimizes this entry out, same in the examples below) 6900 // (***) map the pointee (map: to) 6901 // 6902 // map(s.ps) 6903 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 6904 // 6905 // map(from: s.ps->s.i) 6906 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 6907 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 6908 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 6909 // 6910 // map(to: s.ps->ps) 6911 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 6912 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 6913 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 6914 // 6915 // map(s.ps->ps->ps) 6916 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 6917 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 6918 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 6919 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 6920 // 6921 // map(to: s.ps->ps->s.f[:22]) 6922 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 6923 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 6924 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 6925 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 6926 // 6927 // map(ps) 6928 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 6929 // 6930 // map(ps->i) 6931 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 6932 // 6933 // map(ps->s.f) 6934 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 6935 // 6936 // map(from: ps->p) 6937 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 6938 // 6939 // map(to: ps->p[:22]) 6940 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 6941 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 6942 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 6943 // 6944 // map(ps->ps) 6945 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 6946 // 6947 // map(from: ps->ps->s.i) 6948 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 6949 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 6950 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 6951 // 6952 // map(from: ps->ps->ps) 6953 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 6954 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 6955 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 6956 // 6957 // map(ps->ps->ps->ps) 6958 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 6959 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 6960 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 6961 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 6962 // 6963 // map(to: ps->ps->ps->s.f[:22]) 6964 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 6965 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 6966 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 6967 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 6968 // 6969 // map(to: s.f[:22]) map(from: s.p[:33]) 6970 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 6971 // sizeof(double*) (**), TARGET_PARAM 6972 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 6973 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 6974 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 6975 // (*) allocate contiguous space needed to fit all mapped members even if 6976 // we allocate space for members not mapped (in this example, 6977 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 6978 // them as well because they fall between &s.f[0] and &s.p) 6979 // 6980 // map(from: s.f[:22]) map(to: ps->p[:33]) 6981 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 6982 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 6983 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 6984 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 6985 // (*) the struct this entry pertains to is the 2nd element in the list of 6986 // arguments, hence MEMBER_OF(2) 6987 // 6988 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 6989 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 6990 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 6991 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 6992 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 6993 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 6994 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 6995 // (*) the struct this entry pertains to is the 4th element in the list 6996 // of arguments, hence MEMBER_OF(4) 6997 6998 // Track if the map information being generated is the first for a capture. 6999 bool IsCaptureFirstInfo = IsFirstComponentList; 7000 // When the variable is on a declare target link or in a to clause with 7001 // unified memory, a reference is needed to hold the host/device address 7002 // of the variable. 7003 bool RequiresReference = false; 7004 7005 // Scan the components from the base to the complete expression. 7006 auto CI = Components.rbegin(); 7007 auto CE = Components.rend(); 7008 auto I = CI; 7009 7010 // Track if the map information being generated is the first for a list of 7011 // components. 7012 bool IsExpressionFirstInfo = true; 7013 bool FirstPointerInComplexData = false; 7014 Address BP = Address::invalid(); 7015 const Expr *AssocExpr = I->getAssociatedExpression(); 7016 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7017 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7018 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7019 7020 if (isa<MemberExpr>(AssocExpr)) { 7021 // The base is the 'this' pointer. The content of the pointer is going 7022 // to be the base of the field being mapped. 7023 BP = CGF.LoadCXXThisAddress(); 7024 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7025 (OASE && 7026 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7027 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7028 } else if (OAShE && 7029 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7030 BP = Address( 7031 CGF.EmitScalarExpr(OAShE->getBase()), 7032 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()), 7033 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7034 } else { 7035 // The base is the reference to the variable. 7036 // BP = &Var. 7037 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7038 if (const auto *VD = 7039 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7040 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7041 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7042 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7043 ((*Res == OMPDeclareTargetDeclAttr::MT_To || 7044 *Res == OMPDeclareTargetDeclAttr::MT_Enter) && 7045 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7046 RequiresReference = true; 7047 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7048 } 7049 } 7050 } 7051 7052 // If the variable is a pointer and is being dereferenced (i.e. is not 7053 // the last component), the base has to be the pointer itself, not its 7054 // reference. References are ignored for mapping purposes. 7055 QualType Ty = 7056 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7057 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7058 // No need to generate individual map information for the pointer, it 7059 // can be associated with the combined storage if shared memory mode is 7060 // active or the base declaration is not global variable. 7061 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7062 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7063 !VD || VD->hasLocalStorage()) 7064 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7065 else 7066 FirstPointerInComplexData = true; 7067 ++I; 7068 } 7069 } 7070 7071 // Track whether a component of the list should be marked as MEMBER_OF some 7072 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7073 // in a component list should be marked as MEMBER_OF, all subsequent entries 7074 // do not belong to the base struct. E.g. 7075 // struct S2 s; 7076 // s.ps->ps->ps->f[:] 7077 // (1) (2) (3) (4) 7078 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7079 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7080 // is the pointee of ps(2) which is not member of struct s, so it should not 7081 // be marked as such (it is still PTR_AND_OBJ). 7082 // The variable is initialized to false so that PTR_AND_OBJ entries which 7083 // are not struct members are not considered (e.g. array of pointers to 7084 // data). 7085 bool ShouldBeMemberOf = false; 7086 7087 // Variable keeping track of whether or not we have encountered a component 7088 // in the component list which is a member expression. Useful when we have a 7089 // pointer or a final array section, in which case it is the previous 7090 // component in the list which tells us whether we have a member expression. 7091 // E.g. X.f[:] 7092 // While processing the final array section "[:]" it is "f" which tells us 7093 // whether we are dealing with a member of a declared struct. 7094 const MemberExpr *EncounteredME = nullptr; 7095 7096 // Track for the total number of dimension. Start from one for the dummy 7097 // dimension. 7098 uint64_t DimSize = 1; 7099 7100 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7101 bool IsPrevMemberReference = false; 7102 7103 // We need to check if we will be encountering any MEs. If we do not 7104 // encounter any ME expression it means we will be mapping the whole struct. 7105 // In that case we need to skip adding an entry for the struct to the 7106 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo 7107 // list only when generating all info for clauses. 7108 bool IsMappingWholeStruct = true; 7109 if (!GenerateAllInfoForClauses) { 7110 IsMappingWholeStruct = false; 7111 } else { 7112 for (auto TempI = I; TempI != CE; ++TempI) { 7113 const MemberExpr *PossibleME = 7114 dyn_cast<MemberExpr>(TempI->getAssociatedExpression()); 7115 if (PossibleME) { 7116 IsMappingWholeStruct = false; 7117 break; 7118 } 7119 } 7120 } 7121 7122 for (; I != CE; ++I) { 7123 // If the current component is member of a struct (parent struct) mark it. 7124 if (!EncounteredME) { 7125 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7126 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7127 // as MEMBER_OF the parent struct. 7128 if (EncounteredME) { 7129 ShouldBeMemberOf = true; 7130 // Do not emit as complex pointer if this is actually not array-like 7131 // expression. 7132 if (FirstPointerInComplexData) { 7133 QualType Ty = std::prev(I) 7134 ->getAssociatedDeclaration() 7135 ->getType() 7136 .getNonReferenceType(); 7137 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7138 FirstPointerInComplexData = false; 7139 } 7140 } 7141 } 7142 7143 auto Next = std::next(I); 7144 7145 // We need to generate the addresses and sizes if this is the last 7146 // component, if the component is a pointer or if it is an array section 7147 // whose length can't be proved to be one. If this is a pointer, it 7148 // becomes the base address for the following components. 7149 7150 // A final array section, is one whose length can't be proved to be one. 7151 // If the map item is non-contiguous then we don't treat any array section 7152 // as final array section. 7153 bool IsFinalArraySection = 7154 !IsNonContiguous && 7155 isFinalArraySectionExpression(I->getAssociatedExpression()); 7156 7157 // If we have a declaration for the mapping use that, otherwise use 7158 // the base declaration of the map clause. 7159 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 7160 ? I->getAssociatedDeclaration() 7161 : BaseDecl; 7162 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression() 7163 : MapExpr; 7164 7165 // Get information on whether the element is a pointer. Have to do a 7166 // special treatment for array sections given that they are built-in 7167 // types. 7168 const auto *OASE = 7169 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7170 const auto *OAShE = 7171 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7172 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7173 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7174 bool IsPointer = 7175 OAShE || 7176 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7177 .getCanonicalType() 7178 ->isAnyPointerType()) || 7179 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7180 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && 7181 MapDecl && 7182 MapDecl->getType()->isLValueReferenceType(); 7183 bool IsNonDerefPointer = IsPointer && 7184 !(UO && UO->getOpcode() != UO_Deref) && !BO && 7185 !IsNonContiguous; 7186 7187 if (OASE) 7188 ++DimSize; 7189 7190 if (Next == CE || IsMemberReference || IsNonDerefPointer || 7191 IsFinalArraySection) { 7192 // If this is not the last component, we expect the pointer to be 7193 // associated with an array expression or member expression. 7194 assert((Next == CE || 7195 isa<MemberExpr>(Next->getAssociatedExpression()) || 7196 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7197 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7198 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 7199 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7200 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7201 "Unexpected expression"); 7202 7203 Address LB = Address::invalid(); 7204 Address LowestElem = Address::invalid(); 7205 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, 7206 const MemberExpr *E) { 7207 const Expr *BaseExpr = E->getBase(); 7208 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a 7209 // scalar. 7210 LValue BaseLV; 7211 if (E->isArrow()) { 7212 LValueBaseInfo BaseInfo; 7213 TBAAAccessInfo TBAAInfo; 7214 Address Addr = 7215 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); 7216 QualType PtrTy = BaseExpr->getType()->getPointeeType(); 7217 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); 7218 } else { 7219 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); 7220 } 7221 return BaseLV; 7222 }; 7223 if (OAShE) { 7224 LowestElem = LB = 7225 Address(CGF.EmitScalarExpr(OAShE->getBase()), 7226 CGF.ConvertTypeForMem( 7227 OAShE->getBase()->getType()->getPointeeType()), 7228 CGF.getContext().getTypeAlignInChars( 7229 OAShE->getBase()->getType())); 7230 } else if (IsMemberReference) { 7231 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); 7232 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 7233 LowestElem = CGF.EmitLValueForFieldInitialization( 7234 BaseLVal, cast<FieldDecl>(MapDecl)) 7235 .getAddress(CGF); 7236 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) 7237 .getAddress(CGF); 7238 } else { 7239 LowestElem = LB = 7240 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7241 .getAddress(CGF); 7242 } 7243 7244 // If this component is a pointer inside the base struct then we don't 7245 // need to create any entry for it - it will be combined with the object 7246 // it is pointing to into a single PTR_AND_OBJ entry. 7247 bool IsMemberPointerOrAddr = 7248 EncounteredME && 7249 (((IsPointer || ForDeviceAddr) && 7250 I->getAssociatedExpression() == EncounteredME) || 7251 (IsPrevMemberReference && !IsPointer) || 7252 (IsMemberReference && Next != CE && 7253 !Next->getAssociatedExpression()->getType()->isPointerType())); 7254 if (!OverlappedElements.empty() && Next == CE) { 7255 // Handle base element with the info for overlapped elements. 7256 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7257 assert(!IsPointer && 7258 "Unexpected base element with the pointer type."); 7259 // Mark the whole struct as the struct that requires allocation on the 7260 // device. 7261 PartialStruct.LowestElem = {0, LowestElem}; 7262 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7263 I->getAssociatedExpression()->getType()); 7264 Address HB = CGF.Builder.CreateConstGEP( 7265 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 7266 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty), 7267 TypeSize.getQuantity() - 1); 7268 PartialStruct.HighestElem = { 7269 std::numeric_limits<decltype( 7270 PartialStruct.HighestElem.first)>::max(), 7271 HB}; 7272 PartialStruct.Base = BP; 7273 PartialStruct.LB = LB; 7274 assert( 7275 PartialStruct.PreliminaryMapData.BasePointers.empty() && 7276 "Overlapped elements must be used only once for the variable."); 7277 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 7278 // Emit data for non-overlapped data. 7279 OpenMPOffloadMappingFlags Flags = 7280 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | 7281 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 7282 /*AddPtrFlag=*/false, 7283 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 7284 llvm::Value *Size = nullptr; 7285 // Do bitcopy of all non-overlapped structure elements. 7286 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7287 Component : OverlappedElements) { 7288 Address ComponentLB = Address::invalid(); 7289 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7290 Component) { 7291 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { 7292 const auto *FD = dyn_cast<FieldDecl>(VD); 7293 if (FD && FD->getType()->isLValueReferenceType()) { 7294 const auto *ME = 7295 cast<MemberExpr>(MC.getAssociatedExpression()); 7296 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 7297 ComponentLB = 7298 CGF.EmitLValueForFieldInitialization(BaseLVal, FD) 7299 .getAddress(CGF); 7300 } else { 7301 ComponentLB = 7302 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7303 .getAddress(CGF); 7304 } 7305 Size = CGF.Builder.CreatePtrDiff( 7306 CGF.Int8Ty, ComponentLB.getPointer(), LB.getPointer()); 7307 break; 7308 } 7309 } 7310 assert(Size && "Failed to determine structure size"); 7311 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7312 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7313 CombinedInfo.DevicePtrDecls.push_back(nullptr); 7314 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 7315 CombinedInfo.Pointers.push_back(LB.getPointer()); 7316 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7317 Size, CGF.Int64Ty, /*isSigned=*/true)); 7318 CombinedInfo.Types.push_back(Flags); 7319 CombinedInfo.Mappers.push_back(nullptr); 7320 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7321 : 1); 7322 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7323 } 7324 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7325 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7326 CombinedInfo.DevicePtrDecls.push_back(nullptr); 7327 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 7328 CombinedInfo.Pointers.push_back(LB.getPointer()); 7329 Size = CGF.Builder.CreatePtrDiff( 7330 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(), 7331 LB.getPointer()); 7332 CombinedInfo.Sizes.push_back( 7333 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7334 CombinedInfo.Types.push_back(Flags); 7335 CombinedInfo.Mappers.push_back(nullptr); 7336 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7337 : 1); 7338 break; 7339 } 7340 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7341 // Skip adding an entry in the CurInfo of this combined entry if the 7342 // whole struct is currently being mapped. The struct needs to be added 7343 // in the first position before any data internal to the struct is being 7344 // mapped. 7345 if (!IsMemberPointerOrAddr || 7346 (Next == CE && MapType != OMPC_MAP_unknown)) { 7347 if (!IsMappingWholeStruct) { 7348 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7349 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7350 CombinedInfo.DevicePtrDecls.push_back(nullptr); 7351 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 7352 CombinedInfo.Pointers.push_back(LB.getPointer()); 7353 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7354 Size, CGF.Int64Ty, /*isSigned=*/true)); 7355 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7356 : 1); 7357 } else { 7358 StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7359 StructBaseCombinedInfo.BasePointers.push_back(BP.getPointer()); 7360 StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr); 7361 StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 7362 StructBaseCombinedInfo.Pointers.push_back(LB.getPointer()); 7363 StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7364 Size, CGF.Int64Ty, /*isSigned=*/true)); 7365 StructBaseCombinedInfo.NonContigInfo.Dims.push_back( 7366 IsNonContiguous ? DimSize : 1); 7367 } 7368 7369 // If Mapper is valid, the last component inherits the mapper. 7370 bool HasMapper = Mapper && Next == CE; 7371 if (!IsMappingWholeStruct) 7372 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 7373 else 7374 StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper 7375 : nullptr); 7376 7377 // We need to add a pointer flag for each map that comes from the 7378 // same expression except for the first one. We also need to signal 7379 // this map is the first one that relates with the current capture 7380 // (there is a set of entries for each capture). 7381 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7382 MapType, MapModifiers, MotionModifiers, IsImplicit, 7383 !IsExpressionFirstInfo || RequiresReference || 7384 FirstPointerInComplexData || IsMemberReference, 7385 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 7386 7387 if (!IsExpressionFirstInfo || IsMemberReference) { 7388 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7389 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7390 if (IsPointer || (IsMemberReference && Next != CE)) 7391 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO | 7392 OpenMPOffloadMappingFlags::OMP_MAP_FROM | 7393 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS | 7394 OpenMPOffloadMappingFlags::OMP_MAP_DELETE | 7395 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE); 7396 7397 if (ShouldBeMemberOf) { 7398 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7399 // should be later updated with the correct value of MEMBER_OF. 7400 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF; 7401 // From now on, all subsequent PTR_AND_OBJ entries should not be 7402 // marked as MEMBER_OF. 7403 ShouldBeMemberOf = false; 7404 } 7405 } 7406 7407 if (!IsMappingWholeStruct) 7408 CombinedInfo.Types.push_back(Flags); 7409 else 7410 StructBaseCombinedInfo.Types.push_back(Flags); 7411 } 7412 7413 // If we have encountered a member expression so far, keep track of the 7414 // mapped member. If the parent is "*this", then the value declaration 7415 // is nullptr. 7416 if (EncounteredME) { 7417 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 7418 unsigned FieldIndex = FD->getFieldIndex(); 7419 7420 // Update info about the lowest and highest elements for this struct 7421 if (!PartialStruct.Base.isValid()) { 7422 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 7423 if (IsFinalArraySection) { 7424 Address HB = 7425 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 7426 .getAddress(CGF); 7427 PartialStruct.HighestElem = {FieldIndex, HB}; 7428 } else { 7429 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 7430 } 7431 PartialStruct.Base = BP; 7432 PartialStruct.LB = BP; 7433 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7434 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 7435 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7436 if (IsFinalArraySection) { 7437 Address HB = 7438 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 7439 .getAddress(CGF); 7440 PartialStruct.HighestElem = {FieldIndex, HB}; 7441 } else { 7442 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 7443 } 7444 } 7445 } 7446 7447 // Need to emit combined struct for array sections. 7448 if (IsFinalArraySection || IsNonContiguous) 7449 PartialStruct.IsArraySection = true; 7450 7451 // If we have a final array section, we are done with this expression. 7452 if (IsFinalArraySection) 7453 break; 7454 7455 // The pointer becomes the base for the next element. 7456 if (Next != CE) 7457 BP = IsMemberReference ? LowestElem : LB; 7458 7459 IsExpressionFirstInfo = false; 7460 IsCaptureFirstInfo = false; 7461 FirstPointerInComplexData = false; 7462 IsPrevMemberReference = IsMemberReference; 7463 } else if (FirstPointerInComplexData) { 7464 QualType Ty = Components.rbegin() 7465 ->getAssociatedDeclaration() 7466 ->getType() 7467 .getNonReferenceType(); 7468 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7469 FirstPointerInComplexData = false; 7470 } 7471 } 7472 // If ran into the whole component - allocate the space for the whole 7473 // record. 7474 if (!EncounteredME) 7475 PartialStruct.HasCompleteRecord = true; 7476 7477 if (!IsNonContiguous) 7478 return; 7479 7480 const ASTContext &Context = CGF.getContext(); 7481 7482 // For supporting stride in array section, we need to initialize the first 7483 // dimension size as 1, first offset as 0, and first count as 1 7484 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 7485 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 7486 MapValuesArrayTy CurStrides; 7487 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 7488 uint64_t ElementTypeSize; 7489 7490 // Collect Size information for each dimension and get the element size as 7491 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 7492 // should be [10, 10] and the first stride is 4 btyes. 7493 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 7494 Components) { 7495 const Expr *AssocExpr = Component.getAssociatedExpression(); 7496 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7497 7498 if (!OASE) 7499 continue; 7500 7501 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 7502 auto *CAT = Context.getAsConstantArrayType(Ty); 7503 auto *VAT = Context.getAsVariableArrayType(Ty); 7504 7505 // We need all the dimension size except for the last dimension. 7506 assert((VAT || CAT || &Component == &*Components.begin()) && 7507 "Should be either ConstantArray or VariableArray if not the " 7508 "first Component"); 7509 7510 // Get element size if CurStrides is empty. 7511 if (CurStrides.empty()) { 7512 const Type *ElementType = nullptr; 7513 if (CAT) 7514 ElementType = CAT->getElementType().getTypePtr(); 7515 else if (VAT) 7516 ElementType = VAT->getElementType().getTypePtr(); 7517 else 7518 assert(&Component == &*Components.begin() && 7519 "Only expect pointer (non CAT or VAT) when this is the " 7520 "first Component"); 7521 // If ElementType is null, then it means the base is a pointer 7522 // (neither CAT nor VAT) and we'll attempt to get ElementType again 7523 // for next iteration. 7524 if (ElementType) { 7525 // For the case that having pointer as base, we need to remove one 7526 // level of indirection. 7527 if (&Component != &*Components.begin()) 7528 ElementType = ElementType->getPointeeOrArrayElementType(); 7529 ElementTypeSize = 7530 Context.getTypeSizeInChars(ElementType).getQuantity(); 7531 CurStrides.push_back( 7532 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 7533 } 7534 } 7535 // Get dimension value except for the last dimension since we don't need 7536 // it. 7537 if (DimSizes.size() < Components.size() - 1) { 7538 if (CAT) 7539 DimSizes.push_back(llvm::ConstantInt::get( 7540 CGF.Int64Ty, CAT->getSize().getZExtValue())); 7541 else if (VAT) 7542 DimSizes.push_back(CGF.Builder.CreateIntCast( 7543 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 7544 /*IsSigned=*/false)); 7545 } 7546 } 7547 7548 // Skip the dummy dimension since we have already have its information. 7549 auto *DI = DimSizes.begin() + 1; 7550 // Product of dimension. 7551 llvm::Value *DimProd = 7552 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 7553 7554 // Collect info for non-contiguous. Notice that offset, count, and stride 7555 // are only meaningful for array-section, so we insert a null for anything 7556 // other than array-section. 7557 // Also, the size of offset, count, and stride are not the same as 7558 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 7559 // count, and stride are the same as the number of non-contiguous 7560 // declaration in target update to/from clause. 7561 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 7562 Components) { 7563 const Expr *AssocExpr = Component.getAssociatedExpression(); 7564 7565 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 7566 llvm::Value *Offset = CGF.Builder.CreateIntCast( 7567 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 7568 /*isSigned=*/false); 7569 CurOffsets.push_back(Offset); 7570 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 7571 CurStrides.push_back(CurStrides.back()); 7572 continue; 7573 } 7574 7575 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7576 7577 if (!OASE) 7578 continue; 7579 7580 // Offset 7581 const Expr *OffsetExpr = OASE->getLowerBound(); 7582 llvm::Value *Offset = nullptr; 7583 if (!OffsetExpr) { 7584 // If offset is absent, then we just set it to zero. 7585 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 7586 } else { 7587 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 7588 CGF.Int64Ty, 7589 /*isSigned=*/false); 7590 } 7591 CurOffsets.push_back(Offset); 7592 7593 // Count 7594 const Expr *CountExpr = OASE->getLength(); 7595 llvm::Value *Count = nullptr; 7596 if (!CountExpr) { 7597 // In Clang, once a high dimension is an array section, we construct all 7598 // the lower dimension as array section, however, for case like 7599 // arr[0:2][2], Clang construct the inner dimension as an array section 7600 // but it actually is not in an array section form according to spec. 7601 if (!OASE->getColonLocFirst().isValid() && 7602 !OASE->getColonLocSecond().isValid()) { 7603 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 7604 } else { 7605 // OpenMP 5.0, 2.1.5 Array Sections, Description. 7606 // When the length is absent it defaults to ⌈(size − 7607 // lower-bound)/stride⌉, where size is the size of the array 7608 // dimension. 7609 const Expr *StrideExpr = OASE->getStride(); 7610 llvm::Value *Stride = 7611 StrideExpr 7612 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 7613 CGF.Int64Ty, /*isSigned=*/false) 7614 : nullptr; 7615 if (Stride) 7616 Count = CGF.Builder.CreateUDiv( 7617 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 7618 else 7619 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 7620 } 7621 } else { 7622 Count = CGF.EmitScalarExpr(CountExpr); 7623 } 7624 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 7625 CurCounts.push_back(Count); 7626 7627 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 7628 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 7629 // Offset Count Stride 7630 // D0 0 1 4 (int) <- dummy dimension 7631 // D1 0 2 8 (2 * (1) * 4) 7632 // D2 1 2 20 (1 * (1 * 5) * 4) 7633 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 7634 const Expr *StrideExpr = OASE->getStride(); 7635 llvm::Value *Stride = 7636 StrideExpr 7637 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 7638 CGF.Int64Ty, /*isSigned=*/false) 7639 : nullptr; 7640 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 7641 if (Stride) 7642 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 7643 else 7644 CurStrides.push_back(DimProd); 7645 if (DI != DimSizes.end()) 7646 ++DI; 7647 } 7648 7649 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 7650 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 7651 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 7652 } 7653 7654 /// Return the adjusted map modifiers if the declaration a capture refers to 7655 /// appears in a first-private clause. This is expected to be used only with 7656 /// directives that start with 'target'. 7657 OpenMPOffloadMappingFlags 7658 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7659 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7660 7661 // A first private variable captured by reference will use only the 7662 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7663 // declaration is known as first-private in this handler. 7664 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 7665 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 7666 return OpenMPOffloadMappingFlags::OMP_MAP_TO | 7667 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; 7668 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE | 7669 OpenMPOffloadMappingFlags::OMP_MAP_TO; 7670 } 7671 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl()); 7672 if (I != LambdasMap.end()) 7673 // for map(to: lambda): using user specified map type. 7674 return getMapTypeBits( 7675 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(), 7676 /*MotionModifiers=*/std::nullopt, I->getSecond()->isImplicit(), 7677 /*AddPtrFlag=*/false, 7678 /*AddIsTargetParamFlag=*/false, 7679 /*isNonContiguous=*/false); 7680 return OpenMPOffloadMappingFlags::OMP_MAP_TO | 7681 OpenMPOffloadMappingFlags::OMP_MAP_FROM; 7682 } 7683 7684 void getPlainLayout(const CXXRecordDecl *RD, 7685 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7686 bool AsBase) const { 7687 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7688 7689 llvm::StructType *St = 7690 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7691 7692 unsigned NumElements = St->getNumElements(); 7693 llvm::SmallVector< 7694 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7695 RecordLayout(NumElements); 7696 7697 // Fill bases. 7698 for (const auto &I : RD->bases()) { 7699 if (I.isVirtual()) 7700 continue; 7701 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7702 // Ignore empty bases. 7703 if (Base->isEmpty() || CGF.getContext() 7704 .getASTRecordLayout(Base) 7705 .getNonVirtualSize() 7706 .isZero()) 7707 continue; 7708 7709 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 7710 RecordLayout[FieldIndex] = Base; 7711 } 7712 // Fill in virtual bases. 7713 for (const auto &I : RD->vbases()) { 7714 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7715 // Ignore empty bases. 7716 if (Base->isEmpty()) 7717 continue; 7718 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 7719 if (RecordLayout[FieldIndex]) 7720 continue; 7721 RecordLayout[FieldIndex] = Base; 7722 } 7723 // Fill in all the fields. 7724 assert(!RD->isUnion() && "Unexpected union."); 7725 for (const auto *Field : RD->fields()) { 7726 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 7727 // will fill in later.) 7728 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 7729 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 7730 RecordLayout[FieldIndex] = Field; 7731 } 7732 } 7733 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 7734 &Data : RecordLayout) { 7735 if (Data.isNull()) 7736 continue; 7737 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 7738 getPlainLayout(Base, Layout, /*AsBase=*/true); 7739 else 7740 Layout.push_back(Data.get<const FieldDecl *>()); 7741 } 7742 } 7743 7744 /// Generate all the base pointers, section pointers, sizes, map types, and 7745 /// mappers for the extracted mappable expressions (all included in \a 7746 /// CombinedInfo). Also, for each item that relates with a device pointer, a 7747 /// pair of the relevant declaration and index where it occurs is appended to 7748 /// the device pointers info array. 7749 void generateAllInfoForClauses( 7750 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 7751 llvm::OpenMPIRBuilder &OMPBuilder, 7752 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 7753 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 7754 // We have to process the component lists that relate with the same 7755 // declaration in a single chunk so that we can generate the map flags 7756 // correctly. Therefore, we organize all lists in a map. 7757 enum MapKind { Present, Allocs, Other, Total }; 7758 llvm::MapVector<CanonicalDeclPtr<const Decl>, 7759 SmallVector<SmallVector<MapInfo, 8>, 4>> 7760 Info; 7761 7762 // Helper function to fill the information map for the different supported 7763 // clauses. 7764 auto &&InfoGen = 7765 [&Info, &SkipVarSet]( 7766 const ValueDecl *D, MapKind Kind, 7767 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 7768 OpenMPMapClauseKind MapType, 7769 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7770 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7771 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 7772 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 7773 if (SkipVarSet.contains(D)) 7774 return; 7775 auto It = Info.find(D); 7776 if (It == Info.end()) 7777 It = Info 7778 .insert(std::make_pair( 7779 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 7780 .first; 7781 It->second[Kind].emplace_back( 7782 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 7783 IsImplicit, Mapper, VarRef, ForDeviceAddr); 7784 }; 7785 7786 for (const auto *Cl : Clauses) { 7787 const auto *C = dyn_cast<OMPMapClause>(Cl); 7788 if (!C) 7789 continue; 7790 MapKind Kind = Other; 7791 if (llvm::is_contained(C->getMapTypeModifiers(), 7792 OMPC_MAP_MODIFIER_present)) 7793 Kind = Present; 7794 else if (C->getMapType() == OMPC_MAP_alloc) 7795 Kind = Allocs; 7796 const auto *EI = C->getVarRefs().begin(); 7797 for (const auto L : C->component_lists()) { 7798 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 7799 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 7800 C->getMapTypeModifiers(), std::nullopt, 7801 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 7802 E); 7803 ++EI; 7804 } 7805 } 7806 for (const auto *Cl : Clauses) { 7807 const auto *C = dyn_cast<OMPToClause>(Cl); 7808 if (!C) 7809 continue; 7810 MapKind Kind = Other; 7811 if (llvm::is_contained(C->getMotionModifiers(), 7812 OMPC_MOTION_MODIFIER_present)) 7813 Kind = Present; 7814 const auto *EI = C->getVarRefs().begin(); 7815 for (const auto L : C->component_lists()) { 7816 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, std::nullopt, 7817 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 7818 C->isImplicit(), std::get<2>(L), *EI); 7819 ++EI; 7820 } 7821 } 7822 for (const auto *Cl : Clauses) { 7823 const auto *C = dyn_cast<OMPFromClause>(Cl); 7824 if (!C) 7825 continue; 7826 MapKind Kind = Other; 7827 if (llvm::is_contained(C->getMotionModifiers(), 7828 OMPC_MOTION_MODIFIER_present)) 7829 Kind = Present; 7830 const auto *EI = C->getVarRefs().begin(); 7831 for (const auto L : C->component_lists()) { 7832 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, 7833 std::nullopt, C->getMotionModifiers(), 7834 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 7835 *EI); 7836 ++EI; 7837 } 7838 } 7839 7840 // Look at the use_device_ptr and use_device_addr clauses information and 7841 // mark the existing map entries as such. If there is no map information for 7842 // an entry in the use_device_ptr and use_device_addr list, we create one 7843 // with map type 'alloc' and zero size section. It is the user fault if that 7844 // was not mapped before. If there is no map information and the pointer is 7845 // a struct member, then we defer the emission of that entry until the whole 7846 // struct has been processed. 7847 llvm::MapVector<CanonicalDeclPtr<const Decl>, 7848 SmallVector<DeferredDevicePtrEntryTy, 4>> 7849 DeferredInfo; 7850 MapCombinedInfoTy UseDeviceDataCombinedInfo; 7851 7852 auto &&UseDeviceDataCombinedInfoGen = 7853 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr, 7854 CodeGenFunction &CGF, bool IsDevAddr) { 7855 UseDeviceDataCombinedInfo.Exprs.push_back(VD); 7856 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr); 7857 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD); 7858 UseDeviceDataCombinedInfo.DevicePointers.emplace_back( 7859 IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer); 7860 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr); 7861 UseDeviceDataCombinedInfo.Sizes.push_back( 7862 llvm::Constant::getNullValue(CGF.Int64Ty)); 7863 UseDeviceDataCombinedInfo.Types.push_back( 7864 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM); 7865 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr); 7866 }; 7867 7868 auto &&MapInfoGen = 7869 [&DeferredInfo, &UseDeviceDataCombinedInfoGen, 7870 &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD, 7871 OMPClauseMappableExprCommon::MappableExprComponentListRef 7872 Components, 7873 bool IsImplicit, bool IsDevAddr) { 7874 // We didn't find any match in our map information - generate a zero 7875 // size array section - if the pointer is a struct member we defer 7876 // this action until the whole struct has been processed. 7877 if (isa<MemberExpr>(IE)) { 7878 // Insert the pointer into Info to be processed by 7879 // generateInfoForComponentList. Because it is a member pointer 7880 // without a pointee, no entry will be generated for it, therefore 7881 // we need to generate one after the whole struct has been 7882 // processed. Nonetheless, generateInfoForComponentList must be 7883 // called to take the pointer into account for the calculation of 7884 // the range of the partial struct. 7885 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, std::nullopt, 7886 std::nullopt, /*ReturnDevicePointer=*/false, IsImplicit, 7887 nullptr, nullptr, IsDevAddr); 7888 DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr); 7889 } else { 7890 llvm::Value *Ptr; 7891 if (IsDevAddr) { 7892 if (IE->isGLValue()) 7893 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 7894 else 7895 Ptr = CGF.EmitScalarExpr(IE); 7896 } else { 7897 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 7898 } 7899 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr); 7900 } 7901 }; 7902 7903 auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD, 7904 const Expr *IE, bool IsDevAddr) -> bool { 7905 // We potentially have map information for this declaration already. 7906 // Look for the first set of components that refer to it. If found, 7907 // return true. 7908 // If the first component is a member expression, we have to look into 7909 // 'this', which maps to null in the map of map information. Otherwise 7910 // look directly for the information. 7911 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 7912 if (It != Info.end()) { 7913 bool Found = false; 7914 for (auto &Data : It->second) { 7915 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 7916 return MI.Components.back().getAssociatedDeclaration() == VD; 7917 }); 7918 // If we found a map entry, signal that the pointer has to be 7919 // returned and move on to the next declaration. Exclude cases where 7920 // the base pointer is mapped as array subscript, array section or 7921 // array shaping. The base address is passed as a pointer to base in 7922 // this case and cannot be used as a base for use_device_ptr list 7923 // item. 7924 if (CI != Data.end()) { 7925 if (IsDevAddr) { 7926 CI->ForDeviceAddr = IsDevAddr; 7927 CI->ReturnDevicePointer = true; 7928 Found = true; 7929 break; 7930 } else { 7931 auto PrevCI = std::next(CI->Components.rbegin()); 7932 const auto *VarD = dyn_cast<VarDecl>(VD); 7933 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7934 isa<MemberExpr>(IE) || 7935 !VD->getType().getNonReferenceType()->isPointerType() || 7936 PrevCI == CI->Components.rend() || 7937 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 7938 VarD->hasLocalStorage()) { 7939 CI->ForDeviceAddr = IsDevAddr; 7940 CI->ReturnDevicePointer = true; 7941 Found = true; 7942 break; 7943 } 7944 } 7945 } 7946 } 7947 return Found; 7948 } 7949 return false; 7950 }; 7951 7952 // Look at the use_device_ptr clause information and mark the existing map 7953 // entries as such. If there is no map information for an entry in the 7954 // use_device_ptr list, we create one with map type 'alloc' and zero size 7955 // section. It is the user fault if that was not mapped before. If there is 7956 // no map information and the pointer is a struct member, then we defer the 7957 // emission of that entry until the whole struct has been processed. 7958 for (const auto *Cl : Clauses) { 7959 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 7960 if (!C) 7961 continue; 7962 for (const auto L : C->component_lists()) { 7963 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 7964 std::get<1>(L); 7965 assert(!Components.empty() && 7966 "Not expecting empty list of components!"); 7967 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 7968 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 7969 const Expr *IE = Components.back().getAssociatedExpression(); 7970 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false)) 7971 continue; 7972 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(), 7973 /*IsDevAddr=*/false); 7974 } 7975 } 7976 7977 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 7978 for (const auto *Cl : Clauses) { 7979 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 7980 if (!C) 7981 continue; 7982 for (const auto L : C->component_lists()) { 7983 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 7984 std::get<1>(L); 7985 assert(!std::get<1>(L).empty() && 7986 "Not expecting empty list of components!"); 7987 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 7988 if (!Processed.insert(VD).second) 7989 continue; 7990 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 7991 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 7992 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true)) 7993 continue; 7994 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(), 7995 /*IsDevAddr=*/true); 7996 } 7997 } 7998 7999 for (const auto &Data : Info) { 8000 StructRangeInfoTy PartialStruct; 8001 // Current struct information: 8002 MapCombinedInfoTy CurInfo; 8003 // Current struct base information: 8004 MapCombinedInfoTy StructBaseCurInfo; 8005 const Decl *D = Data.first; 8006 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8007 for (const auto &M : Data.second) { 8008 for (const MapInfo &L : M) { 8009 assert(!L.Components.empty() && 8010 "Not expecting declaration with no component lists."); 8011 8012 // Remember the current base pointer index. 8013 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8014 unsigned StructBasePointersIdx = 8015 StructBaseCurInfo.BasePointers.size(); 8016 CurInfo.NonContigInfo.IsNonContiguous = 8017 L.Components.back().isNonContiguous(); 8018 generateInfoForComponentList( 8019 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8020 CurInfo, StructBaseCurInfo, PartialStruct, 8021 /*IsFirstComponentList=*/false, L.IsImplicit, 8022 /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD, 8023 L.VarRef); 8024 8025 // If this entry relates to a device pointer, set the relevant 8026 // declaration and add the 'return pointer' flag. 8027 if (L.ReturnDevicePointer) { 8028 // Check whether a value was added to either CurInfo or 8029 // StructBaseCurInfo and error if no value was added to either of 8030 // them: 8031 assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() || 8032 StructBasePointersIdx < 8033 StructBaseCurInfo.BasePointers.size()) && 8034 "Unexpected number of mapped base pointers."); 8035 8036 // Choose a base pointer index which is always valid: 8037 const ValueDecl *RelevantVD = 8038 L.Components.back().getAssociatedDeclaration(); 8039 assert(RelevantVD && 8040 "No relevant declaration related with device pointer??"); 8041 8042 // If StructBaseCurInfo has been updated this iteration then work on 8043 // the first new entry added to it i.e. make sure that when multiple 8044 // values are added to any of the lists, the first value added is 8045 // being modified by the assignments below (not the last value 8046 // added). 8047 if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) { 8048 StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] = 8049 RelevantVD; 8050 StructBaseCurInfo.DevicePointers[StructBasePointersIdx] = 8051 L.ForDeviceAddr ? DeviceInfoTy::Address 8052 : DeviceInfoTy::Pointer; 8053 StructBaseCurInfo.Types[StructBasePointersIdx] |= 8054 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM; 8055 } else { 8056 CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD; 8057 CurInfo.DevicePointers[CurrentBasePointersIdx] = 8058 L.ForDeviceAddr ? DeviceInfoTy::Address 8059 : DeviceInfoTy::Pointer; 8060 CurInfo.Types[CurrentBasePointersIdx] |= 8061 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM; 8062 } 8063 } 8064 } 8065 } 8066 8067 // Append any pending zero-length pointers which are struct members and 8068 // used with use_device_ptr or use_device_addr. 8069 auto CI = DeferredInfo.find(Data.first); 8070 if (CI != DeferredInfo.end()) { 8071 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8072 llvm::Value *BasePtr; 8073 llvm::Value *Ptr; 8074 if (L.ForDeviceAddr) { 8075 if (L.IE->isGLValue()) 8076 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8077 else 8078 Ptr = this->CGF.EmitScalarExpr(L.IE); 8079 BasePtr = Ptr; 8080 // Entry is RETURN_PARAM. Also, set the placeholder value 8081 // MEMBER_OF=FFFF so that the entry is later updated with the 8082 // correct value of MEMBER_OF. 8083 CurInfo.Types.push_back( 8084 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM | 8085 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF); 8086 } else { 8087 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8088 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8089 L.IE->getExprLoc()); 8090 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8091 // placeholder value MEMBER_OF=FFFF so that the entry is later 8092 // updated with the correct value of MEMBER_OF. 8093 CurInfo.Types.push_back( 8094 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | 8095 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM | 8096 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF); 8097 } 8098 CurInfo.Exprs.push_back(L.VD); 8099 CurInfo.BasePointers.emplace_back(BasePtr); 8100 CurInfo.DevicePtrDecls.emplace_back(L.VD); 8101 CurInfo.DevicePointers.emplace_back( 8102 L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer); 8103 CurInfo.Pointers.push_back(Ptr); 8104 CurInfo.Sizes.push_back( 8105 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8106 CurInfo.Mappers.push_back(nullptr); 8107 } 8108 } 8109 8110 // Unify entries in one list making sure the struct mapping precedes the 8111 // individual fields: 8112 MapCombinedInfoTy UnionCurInfo; 8113 UnionCurInfo.append(StructBaseCurInfo); 8114 UnionCurInfo.append(CurInfo); 8115 8116 // If there is an entry in PartialStruct it means we have a struct with 8117 // individual members mapped. Emit an extra combined entry. 8118 if (PartialStruct.Base.isValid()) { 8119 UnionCurInfo.NonContigInfo.Dims.push_back(0); 8120 // Emit a combined entry: 8121 emitCombinedEntry(CombinedInfo, UnionCurInfo.Types, PartialStruct, 8122 /*IsMapThis*/ !VD, OMPBuilder, VD); 8123 } 8124 8125 // We need to append the results of this capture to what we already have. 8126 CombinedInfo.append(UnionCurInfo); 8127 } 8128 // Append data for use_device_ptr clauses. 8129 CombinedInfo.append(UseDeviceDataCombinedInfo); 8130 } 8131 8132 public: 8133 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8134 : CurDir(&Dir), CGF(CGF) { 8135 // Extract firstprivate clause information. 8136 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8137 for (const auto *D : C->varlists()) 8138 FirstPrivateDecls.try_emplace( 8139 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8140 // Extract implicit firstprivates from uses_allocators clauses. 8141 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8142 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8143 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8144 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8145 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8146 /*Implicit=*/true); 8147 else if (const auto *VD = dyn_cast<VarDecl>( 8148 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8149 ->getDecl())) 8150 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8151 } 8152 } 8153 // Extract device pointer clause information. 8154 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8155 for (auto L : C->component_lists()) 8156 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8157 // Extract device addr clause information. 8158 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>()) 8159 for (auto L : C->component_lists()) 8160 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L)); 8161 // Extract map information. 8162 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) { 8163 if (C->getMapType() != OMPC_MAP_to) 8164 continue; 8165 for (auto L : C->component_lists()) { 8166 const ValueDecl *VD = std::get<0>(L); 8167 const auto *RD = VD ? VD->getType() 8168 .getCanonicalType() 8169 .getNonReferenceType() 8170 ->getAsCXXRecordDecl() 8171 : nullptr; 8172 if (RD && RD->isLambda()) 8173 LambdasMap.try_emplace(std::get<0>(L), C); 8174 } 8175 } 8176 } 8177 8178 /// Constructor for the declare mapper directive. 8179 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8180 : CurDir(&Dir), CGF(CGF) {} 8181 8182 /// Generate code for the combined entry if we have a partially mapped struct 8183 /// and take care of the mapping flags of the arguments corresponding to 8184 /// individual struct members. 8185 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8186 MapFlagsArrayTy &CurTypes, 8187 const StructRangeInfoTy &PartialStruct, bool IsMapThis, 8188 llvm::OpenMPIRBuilder &OMPBuilder, 8189 const ValueDecl *VD = nullptr, 8190 bool NotTargetParams = true) const { 8191 if (CurTypes.size() == 1 && 8192 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) != 8193 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) && 8194 !PartialStruct.IsArraySection) 8195 return; 8196 Address LBAddr = PartialStruct.LowestElem.second; 8197 Address HBAddr = PartialStruct.HighestElem.second; 8198 if (PartialStruct.HasCompleteRecord) { 8199 LBAddr = PartialStruct.LB; 8200 HBAddr = PartialStruct.LB; 8201 } 8202 CombinedInfo.Exprs.push_back(VD); 8203 // Base is the base of the struct 8204 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8205 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8206 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8207 // Pointer is the address of the lowest element 8208 llvm::Value *LB = LBAddr.getPointer(); 8209 const CXXMethodDecl *MD = 8210 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr; 8211 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr; 8212 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false; 8213 // There should not be a mapper for a combined entry. 8214 if (HasBaseClass) { 8215 // OpenMP 5.2 148:21: 8216 // If the target construct is within a class non-static member function, 8217 // and a variable is an accessible data member of the object for which the 8218 // non-static data member function is invoked, the variable is treated as 8219 // if the this[:1] expression had appeared in a map clause with a map-type 8220 // of tofrom. 8221 // Emit this[:1] 8222 CombinedInfo.Pointers.push_back(PartialStruct.Base.getPointer()); 8223 QualType Ty = MD->getFunctionObjectParameterType(); 8224 llvm::Value *Size = 8225 CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty, 8226 /*isSigned=*/true); 8227 CombinedInfo.Sizes.push_back(Size); 8228 } else { 8229 CombinedInfo.Pointers.push_back(LB); 8230 // Size is (addr of {highest+1} element) - (addr of lowest element) 8231 llvm::Value *HB = HBAddr.getPointer(); 8232 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32( 8233 HBAddr.getElementType(), HB, /*Idx0=*/1); 8234 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8235 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8236 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr); 8237 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8238 /*isSigned=*/false); 8239 CombinedInfo.Sizes.push_back(Size); 8240 } 8241 CombinedInfo.Mappers.push_back(nullptr); 8242 // Map type is always TARGET_PARAM, if generate info for captures. 8243 CombinedInfo.Types.push_back( 8244 NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE 8245 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM); 8246 // If any element has the present modifier, then make sure the runtime 8247 // doesn't attempt to allocate the struct. 8248 if (CurTypes.end() != 8249 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8250 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 8251 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT); 8252 })) 8253 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT; 8254 // Remove TARGET_PARAM flag from the first element 8255 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; 8256 // If any element has the ompx_hold modifier, then make sure the runtime 8257 // uses the hold reference count for the struct as a whole so that it won't 8258 // be unmapped by an extra dynamic reference count decrement. Add it to all 8259 // elements as well so the runtime knows which reference count to check 8260 // when determining whether it's time for device-to-host transfers of 8261 // individual elements. 8262 if (CurTypes.end() != 8263 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8264 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 8265 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD); 8266 })) { 8267 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; 8268 for (auto &M : CurTypes) 8269 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; 8270 } 8271 8272 // All other current entries will be MEMBER_OF the combined entry 8273 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8274 // 0xFFFF in the MEMBER_OF field). 8275 OpenMPOffloadMappingFlags MemberOfFlag = 8276 OMPBuilder.getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 8277 for (auto &M : CurTypes) 8278 OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag); 8279 } 8280 8281 /// Generate all the base pointers, section pointers, sizes, map types, and 8282 /// mappers for the extracted mappable expressions (all included in \a 8283 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8284 /// pair of the relevant declaration and index where it occurs is appended to 8285 /// the device pointers info array. 8286 void generateAllInfo( 8287 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder, 8288 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8289 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8290 assert(CurDir.is<const OMPExecutableDirective *>() && 8291 "Expect a executable directive"); 8292 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8293 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder, 8294 SkipVarSet); 8295 } 8296 8297 /// Generate all the base pointers, section pointers, sizes, map types, and 8298 /// mappers for the extracted map clauses of user-defined mapper (all included 8299 /// in \a CombinedInfo). 8300 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo, 8301 llvm::OpenMPIRBuilder &OMPBuilder) const { 8302 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8303 "Expect a declare mapper directive"); 8304 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8305 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo, 8306 OMPBuilder); 8307 } 8308 8309 /// Emit capture info for lambdas for variables captured by reference. 8310 void generateInfoForLambdaCaptures( 8311 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8312 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8313 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType(); 8314 const auto *RD = VDType->getAsCXXRecordDecl(); 8315 if (!RD || !RD->isLambda()) 8316 return; 8317 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType), 8318 CGF.getContext().getDeclAlign(VD)); 8319 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType); 8320 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures; 8321 FieldDecl *ThisCapture = nullptr; 8322 RD->getCaptureFields(Captures, ThisCapture); 8323 if (ThisCapture) { 8324 LValue ThisLVal = 8325 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8326 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8327 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8328 VDLVal.getPointer(CGF)); 8329 CombinedInfo.Exprs.push_back(VD); 8330 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 8331 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8332 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8333 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 8334 CombinedInfo.Sizes.push_back( 8335 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8336 CGF.Int64Ty, /*isSigned=*/true)); 8337 CombinedInfo.Types.push_back( 8338 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | 8339 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | 8340 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | 8341 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); 8342 CombinedInfo.Mappers.push_back(nullptr); 8343 } 8344 for (const LambdaCapture &LC : RD->captures()) { 8345 if (!LC.capturesVariable()) 8346 continue; 8347 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar()); 8348 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8349 continue; 8350 auto It = Captures.find(VD); 8351 assert(It != Captures.end() && "Found lambda capture without field."); 8352 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8353 if (LC.getCaptureKind() == LCK_ByRef) { 8354 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8355 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8356 VDLVal.getPointer(CGF)); 8357 CombinedInfo.Exprs.push_back(VD); 8358 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8359 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8360 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8361 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 8362 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8363 CGF.getTypeSize( 8364 VD->getType().getCanonicalType().getNonReferenceType()), 8365 CGF.Int64Ty, /*isSigned=*/true)); 8366 } else { 8367 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8368 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8369 VDLVal.getPointer(CGF)); 8370 CombinedInfo.Exprs.push_back(VD); 8371 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8372 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8373 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8374 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 8375 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8376 } 8377 CombinedInfo.Types.push_back( 8378 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | 8379 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | 8380 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | 8381 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); 8382 CombinedInfo.Mappers.push_back(nullptr); 8383 } 8384 } 8385 8386 /// Set correct indices for lambdas captures. 8387 void adjustMemberOfForLambdaCaptures( 8388 llvm::OpenMPIRBuilder &OMPBuilder, 8389 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8390 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8391 MapFlagsArrayTy &Types) const { 8392 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8393 // Set correct member_of idx for all implicit lambda captures. 8394 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | 8395 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | 8396 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | 8397 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)) 8398 continue; 8399 llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]); 8400 assert(BasePtr && "Unable to find base lambda address."); 8401 int TgtIdx = -1; 8402 for (unsigned J = I; J > 0; --J) { 8403 unsigned Idx = J - 1; 8404 if (Pointers[Idx] != BasePtr) 8405 continue; 8406 TgtIdx = Idx; 8407 break; 8408 } 8409 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8410 // All other current entries will be MEMBER_OF the combined entry 8411 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8412 // 0xFFFF in the MEMBER_OF field). 8413 OpenMPOffloadMappingFlags MemberOfFlag = 8414 OMPBuilder.getMemberOfFlag(TgtIdx); 8415 OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8416 } 8417 } 8418 8419 /// Generate the base pointers, section pointers, sizes, map types, and 8420 /// mappers associated to a given capture (all included in \a CombinedInfo). 8421 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8422 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8423 StructRangeInfoTy &PartialStruct) const { 8424 assert(!Cap->capturesVariableArrayType() && 8425 "Not expecting to generate map info for a variable array type!"); 8426 8427 // We need to know when we generating information for the first component 8428 const ValueDecl *VD = Cap->capturesThis() 8429 ? nullptr 8430 : Cap->getCapturedVar()->getCanonicalDecl(); 8431 8432 // for map(to: lambda): skip here, processing it in 8433 // generateDefaultMapInfo 8434 if (LambdasMap.count(VD)) 8435 return; 8436 8437 // If this declaration appears in a is_device_ptr clause we just have to 8438 // pass the pointer by value. If it is a reference to a declaration, we just 8439 // pass its value. 8440 if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) { 8441 CombinedInfo.Exprs.push_back(VD); 8442 CombinedInfo.BasePointers.emplace_back(Arg); 8443 CombinedInfo.DevicePtrDecls.emplace_back(VD); 8444 CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer); 8445 CombinedInfo.Pointers.push_back(Arg); 8446 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8447 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 8448 /*isSigned=*/true)); 8449 CombinedInfo.Types.push_back( 8450 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | 8451 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM); 8452 CombinedInfo.Mappers.push_back(nullptr); 8453 return; 8454 } 8455 8456 using MapData = 8457 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8458 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 8459 const ValueDecl *, const Expr *>; 8460 SmallVector<MapData, 4> DeclComponentLists; 8461 // For member fields list in is_device_ptr, store it in 8462 // DeclComponentLists for generating components info. 8463 static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown; 8464 auto It = DevPointersMap.find(VD); 8465 if (It != DevPointersMap.end()) 8466 for (const auto &MCL : It->second) 8467 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown, 8468 /*IsImpicit = */ true, nullptr, 8469 nullptr); 8470 auto I = HasDevAddrsMap.find(VD); 8471 if (I != HasDevAddrsMap.end()) 8472 for (const auto &MCL : I->second) 8473 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown, 8474 /*IsImpicit = */ true, nullptr, 8475 nullptr); 8476 assert(CurDir.is<const OMPExecutableDirective *>() && 8477 "Expect a executable directive"); 8478 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8479 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8480 const auto *EI = C->getVarRefs().begin(); 8481 for (const auto L : C->decl_component_lists(VD)) { 8482 const ValueDecl *VDecl, *Mapper; 8483 // The Expression is not correct if the mapping is implicit 8484 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8485 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8486 std::tie(VDecl, Components, Mapper) = L; 8487 assert(VDecl == VD && "We got information for the wrong declaration??"); 8488 assert(!Components.empty() && 8489 "Not expecting declaration with no component lists."); 8490 DeclComponentLists.emplace_back(Components, C->getMapType(), 8491 C->getMapTypeModifiers(), 8492 C->isImplicit(), Mapper, E); 8493 ++EI; 8494 } 8495 } 8496 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 8497 const MapData &RHS) { 8498 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 8499 OpenMPMapClauseKind MapType = std::get<1>(RHS); 8500 bool HasPresent = 8501 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 8502 bool HasAllocs = MapType == OMPC_MAP_alloc; 8503 MapModifiers = std::get<2>(RHS); 8504 MapType = std::get<1>(LHS); 8505 bool HasPresentR = 8506 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 8507 bool HasAllocsR = MapType == OMPC_MAP_alloc; 8508 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 8509 }); 8510 8511 // Find overlapping elements (including the offset from the base element). 8512 llvm::SmallDenseMap< 8513 const MapData *, 8514 llvm::SmallVector< 8515 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8516 4> 8517 OverlappedData; 8518 size_t Count = 0; 8519 for (const MapData &L : DeclComponentLists) { 8520 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8521 OpenMPMapClauseKind MapType; 8522 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8523 bool IsImplicit; 8524 const ValueDecl *Mapper; 8525 const Expr *VarRef; 8526 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8527 L; 8528 ++Count; 8529 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) { 8530 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8531 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 8532 VarRef) = L1; 8533 auto CI = Components.rbegin(); 8534 auto CE = Components.rend(); 8535 auto SI = Components1.rbegin(); 8536 auto SE = Components1.rend(); 8537 for (; CI != CE && SI != SE; ++CI, ++SI) { 8538 if (CI->getAssociatedExpression()->getStmtClass() != 8539 SI->getAssociatedExpression()->getStmtClass()) 8540 break; 8541 // Are we dealing with different variables/fields? 8542 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8543 break; 8544 } 8545 // Found overlapping if, at least for one component, reached the head 8546 // of the components list. 8547 if (CI == CE || SI == SE) { 8548 // Ignore it if it is the same component. 8549 if (CI == CE && SI == SE) 8550 continue; 8551 const auto It = (SI == SE) ? CI : SI; 8552 // If one component is a pointer and another one is a kind of 8553 // dereference of this pointer (array subscript, section, dereference, 8554 // etc.), it is not an overlapping. 8555 // Same, if one component is a base and another component is a 8556 // dereferenced pointer memberexpr with the same base. 8557 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 8558 (std::prev(It)->getAssociatedDeclaration() && 8559 std::prev(It) 8560 ->getAssociatedDeclaration() 8561 ->getType() 8562 ->isPointerType()) || 8563 (It->getAssociatedDeclaration() && 8564 It->getAssociatedDeclaration()->getType()->isPointerType() && 8565 std::next(It) != CE && std::next(It) != SE)) 8566 continue; 8567 const MapData &BaseData = CI == CE ? L : L1; 8568 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8569 SI == SE ? Components : Components1; 8570 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8571 OverlappedElements.getSecond().push_back(SubData); 8572 } 8573 } 8574 } 8575 // Sort the overlapped elements for each item. 8576 llvm::SmallVector<const FieldDecl *, 4> Layout; 8577 if (!OverlappedData.empty()) { 8578 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 8579 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 8580 while (BaseType != OrigType) { 8581 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 8582 OrigType = BaseType->getPointeeOrArrayElementType(); 8583 } 8584 8585 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 8586 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8587 else { 8588 const auto *RD = BaseType->getAsRecordDecl(); 8589 Layout.append(RD->field_begin(), RD->field_end()); 8590 } 8591 } 8592 for (auto &Pair : OverlappedData) { 8593 llvm::stable_sort( 8594 Pair.getSecond(), 8595 [&Layout]( 8596 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8597 OMPClauseMappableExprCommon::MappableExprComponentListRef 8598 Second) { 8599 auto CI = First.rbegin(); 8600 auto CE = First.rend(); 8601 auto SI = Second.rbegin(); 8602 auto SE = Second.rend(); 8603 for (; CI != CE && SI != SE; ++CI, ++SI) { 8604 if (CI->getAssociatedExpression()->getStmtClass() != 8605 SI->getAssociatedExpression()->getStmtClass()) 8606 break; 8607 // Are we dealing with different variables/fields? 8608 if (CI->getAssociatedDeclaration() != 8609 SI->getAssociatedDeclaration()) 8610 break; 8611 } 8612 8613 // Lists contain the same elements. 8614 if (CI == CE && SI == SE) 8615 return false; 8616 8617 // List with less elements is less than list with more elements. 8618 if (CI == CE || SI == SE) 8619 return CI == CE; 8620 8621 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8622 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8623 if (FD1->getParent() == FD2->getParent()) 8624 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8625 const auto *It = 8626 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8627 return FD == FD1 || FD == FD2; 8628 }); 8629 return *It == FD1; 8630 }); 8631 } 8632 8633 // Associated with a capture, because the mapping flags depend on it. 8634 // Go through all of the elements with the overlapped elements. 8635 bool IsFirstComponentList = true; 8636 MapCombinedInfoTy StructBaseCombinedInfo; 8637 for (const auto &Pair : OverlappedData) { 8638 const MapData &L = *Pair.getFirst(); 8639 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8640 OpenMPMapClauseKind MapType; 8641 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8642 bool IsImplicit; 8643 const ValueDecl *Mapper; 8644 const Expr *VarRef; 8645 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8646 L; 8647 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8648 OverlappedComponents = Pair.getSecond(); 8649 generateInfoForComponentList( 8650 MapType, MapModifiers, std::nullopt, Components, CombinedInfo, 8651 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList, 8652 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper, 8653 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 8654 IsFirstComponentList = false; 8655 } 8656 // Go through other elements without overlapped elements. 8657 for (const MapData &L : DeclComponentLists) { 8658 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8659 OpenMPMapClauseKind MapType; 8660 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8661 bool IsImplicit; 8662 const ValueDecl *Mapper; 8663 const Expr *VarRef; 8664 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8665 L; 8666 auto It = OverlappedData.find(&L); 8667 if (It == OverlappedData.end()) 8668 generateInfoForComponentList( 8669 MapType, MapModifiers, std::nullopt, Components, CombinedInfo, 8670 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList, 8671 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper, 8672 /*ForDeviceAddr=*/false, VD, VarRef); 8673 IsFirstComponentList = false; 8674 } 8675 } 8676 8677 /// Generate the default map information for a given capture \a CI, 8678 /// record field declaration \a RI and captured value \a CV. 8679 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8680 const FieldDecl &RI, llvm::Value *CV, 8681 MapCombinedInfoTy &CombinedInfo) const { 8682 bool IsImplicit = true; 8683 // Do the default mapping. 8684 if (CI.capturesThis()) { 8685 CombinedInfo.Exprs.push_back(nullptr); 8686 CombinedInfo.BasePointers.push_back(CV); 8687 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8688 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8689 CombinedInfo.Pointers.push_back(CV); 8690 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8691 CombinedInfo.Sizes.push_back( 8692 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8693 CGF.Int64Ty, /*isSigned=*/true)); 8694 // Default map type. 8695 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO | 8696 OpenMPOffloadMappingFlags::OMP_MAP_FROM); 8697 } else if (CI.capturesVariableByCopy()) { 8698 const VarDecl *VD = CI.getCapturedVar(); 8699 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 8700 CombinedInfo.BasePointers.push_back(CV); 8701 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8702 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8703 CombinedInfo.Pointers.push_back(CV); 8704 if (!RI.getType()->isAnyPointerType()) { 8705 // We have to signal to the runtime captures passed by value that are 8706 // not pointers. 8707 CombinedInfo.Types.push_back( 8708 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL); 8709 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8710 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8711 } else { 8712 // Pointers are implicitly mapped with a zero size and no flags 8713 // (other than first map that is added for all implicit maps). 8714 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE); 8715 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8716 } 8717 auto I = FirstPrivateDecls.find(VD); 8718 if (I != FirstPrivateDecls.end()) 8719 IsImplicit = I->getSecond(); 8720 } else { 8721 assert(CI.capturesVariable() && "Expected captured reference."); 8722 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8723 QualType ElementType = PtrTy->getPointeeType(); 8724 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8725 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8726 // The default map type for a scalar/complex type is 'to' because by 8727 // default the value doesn't have to be retrieved. For an aggregate 8728 // type, the default is 'tofrom'. 8729 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 8730 const VarDecl *VD = CI.getCapturedVar(); 8731 auto I = FirstPrivateDecls.find(VD); 8732 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 8733 CombinedInfo.BasePointers.push_back(CV); 8734 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8735 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8736 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 8737 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8738 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8739 AlignmentSource::Decl)); 8740 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 8741 } else { 8742 CombinedInfo.Pointers.push_back(CV); 8743 } 8744 if (I != FirstPrivateDecls.end()) 8745 IsImplicit = I->getSecond(); 8746 } 8747 // Every default map produces a single argument which is a target parameter. 8748 CombinedInfo.Types.back() |= 8749 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; 8750 8751 // Add flag stating this is an implicit map. 8752 if (IsImplicit) 8753 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; 8754 8755 // No user-defined mapper for default mapping. 8756 CombinedInfo.Mappers.push_back(nullptr); 8757 } 8758 }; 8759 } // anonymous namespace 8760 8761 // Try to extract the base declaration from a `this->x` expression if possible. 8762 static ValueDecl *getDeclFromThisExpr(const Expr *E) { 8763 if (!E) 8764 return nullptr; 8765 8766 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts())) 8767 if (const MemberExpr *ME = 8768 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts())) 8769 return ME->getMemberDecl(); 8770 return nullptr; 8771 } 8772 8773 /// Emit a string constant containing the names of the values mapped to the 8774 /// offloading runtime library. 8775 llvm::Constant * 8776 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 8777 MappableExprsHandler::MappingExprInfo &MapExprs) { 8778 8779 uint32_t SrcLocStrSize; 8780 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) 8781 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 8782 8783 SourceLocation Loc; 8784 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { 8785 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr())) 8786 Loc = VD->getLocation(); 8787 else 8788 Loc = MapExprs.getMapExpr()->getExprLoc(); 8789 } else { 8790 Loc = MapExprs.getMapDecl()->getLocation(); 8791 } 8792 8793 std::string ExprName; 8794 if (MapExprs.getMapExpr()) { 8795 PrintingPolicy P(CGF.getContext().getLangOpts()); 8796 llvm::raw_string_ostream OS(ExprName); 8797 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 8798 OS.flush(); 8799 } else { 8800 ExprName = MapExprs.getMapDecl()->getNameAsString(); 8801 } 8802 8803 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 8804 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName, 8805 PLoc.getLine(), PLoc.getColumn(), 8806 SrcLocStrSize); 8807 } 8808 8809 /// Emit the arrays used to pass the captures and map information to the 8810 /// offloading runtime library. If there is no map or capture information, 8811 /// return nullptr by reference. 8812 static void emitOffloadingArrays( 8813 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 8814 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 8815 bool IsNonContiguous = false) { 8816 CodeGenModule &CGM = CGF.CGM; 8817 8818 // Reset the array information. 8819 Info.clearArrayInfo(); 8820 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 8821 8822 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 8823 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(), 8824 CGF.AllocaInsertPt->getIterator()); 8825 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(), 8826 CGF.Builder.GetInsertPoint()); 8827 8828 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 8829 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 8830 }; 8831 if (CGM.getCodeGenOpts().getDebugInfo() != 8832 llvm::codegenoptions::NoDebugInfo) { 8833 CombinedInfo.Names.resize(CombinedInfo.Exprs.size()); 8834 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(), 8835 FillInfoMap); 8836 } 8837 8838 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) { 8839 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) { 8840 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl); 8841 } 8842 }; 8843 8844 auto CustomMapperCB = [&](unsigned int I) { 8845 llvm::Value *MFunc = nullptr; 8846 if (CombinedInfo.Mappers[I]) { 8847 Info.HasMapper = true; 8848 MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 8849 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 8850 } 8851 return MFunc; 8852 }; 8853 OMPBuilder.emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info, 8854 /*IsNonContiguous=*/true, DeviceAddrCB, 8855 CustomMapperCB); 8856 } 8857 8858 /// Check for inner distribute directive. 8859 static const OMPExecutableDirective * 8860 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 8861 const auto *CS = D.getInnermostCapturedStmt(); 8862 const auto *Body = 8863 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 8864 const Stmt *ChildStmt = 8865 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8866 8867 if (const auto *NestedDir = 8868 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8869 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 8870 switch (D.getDirectiveKind()) { 8871 case OMPD_target: 8872 // For now, just treat 'target teams loop' as if it's distributed. 8873 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop) 8874 return NestedDir; 8875 if (DKind == OMPD_teams) { 8876 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 8877 /*IgnoreCaptured=*/true); 8878 if (!Body) 8879 return nullptr; 8880 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8881 if (const auto *NND = 8882 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8883 DKind = NND->getDirectiveKind(); 8884 if (isOpenMPDistributeDirective(DKind)) 8885 return NND; 8886 } 8887 } 8888 return nullptr; 8889 case OMPD_target_teams: 8890 if (isOpenMPDistributeDirective(DKind)) 8891 return NestedDir; 8892 return nullptr; 8893 case OMPD_target_parallel: 8894 case OMPD_target_simd: 8895 case OMPD_target_parallel_for: 8896 case OMPD_target_parallel_for_simd: 8897 return nullptr; 8898 case OMPD_target_teams_distribute: 8899 case OMPD_target_teams_distribute_simd: 8900 case OMPD_target_teams_distribute_parallel_for: 8901 case OMPD_target_teams_distribute_parallel_for_simd: 8902 case OMPD_parallel: 8903 case OMPD_for: 8904 case OMPD_parallel_for: 8905 case OMPD_parallel_master: 8906 case OMPD_parallel_sections: 8907 case OMPD_for_simd: 8908 case OMPD_parallel_for_simd: 8909 case OMPD_cancel: 8910 case OMPD_cancellation_point: 8911 case OMPD_ordered: 8912 case OMPD_threadprivate: 8913 case OMPD_allocate: 8914 case OMPD_task: 8915 case OMPD_simd: 8916 case OMPD_tile: 8917 case OMPD_unroll: 8918 case OMPD_sections: 8919 case OMPD_section: 8920 case OMPD_single: 8921 case OMPD_master: 8922 case OMPD_critical: 8923 case OMPD_taskyield: 8924 case OMPD_barrier: 8925 case OMPD_taskwait: 8926 case OMPD_taskgroup: 8927 case OMPD_atomic: 8928 case OMPD_flush: 8929 case OMPD_depobj: 8930 case OMPD_scan: 8931 case OMPD_teams: 8932 case OMPD_target_data: 8933 case OMPD_target_exit_data: 8934 case OMPD_target_enter_data: 8935 case OMPD_distribute: 8936 case OMPD_distribute_simd: 8937 case OMPD_distribute_parallel_for: 8938 case OMPD_distribute_parallel_for_simd: 8939 case OMPD_teams_distribute: 8940 case OMPD_teams_distribute_simd: 8941 case OMPD_teams_distribute_parallel_for: 8942 case OMPD_teams_distribute_parallel_for_simd: 8943 case OMPD_target_update: 8944 case OMPD_declare_simd: 8945 case OMPD_declare_variant: 8946 case OMPD_begin_declare_variant: 8947 case OMPD_end_declare_variant: 8948 case OMPD_declare_target: 8949 case OMPD_end_declare_target: 8950 case OMPD_declare_reduction: 8951 case OMPD_declare_mapper: 8952 case OMPD_taskloop: 8953 case OMPD_taskloop_simd: 8954 case OMPD_master_taskloop: 8955 case OMPD_master_taskloop_simd: 8956 case OMPD_parallel_master_taskloop: 8957 case OMPD_parallel_master_taskloop_simd: 8958 case OMPD_requires: 8959 case OMPD_metadirective: 8960 case OMPD_unknown: 8961 default: 8962 llvm_unreachable("Unexpected directive."); 8963 } 8964 } 8965 8966 return nullptr; 8967 } 8968 8969 /// Emit the user-defined mapper function. The code generation follows the 8970 /// pattern in the example below. 8971 /// \code 8972 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 8973 /// void *base, void *begin, 8974 /// int64_t size, int64_t type, 8975 /// void *name = nullptr) { 8976 /// // Allocate space for an array section first or add a base/begin for 8977 /// // pointer dereference. 8978 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 8979 /// !maptype.IsDelete) 8980 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8981 /// size*sizeof(Ty), clearToFromMember(type)); 8982 /// // Map members. 8983 /// for (unsigned i = 0; i < size; i++) { 8984 /// // For each component specified by this mapper: 8985 /// for (auto c : begin[i]->all_components) { 8986 /// if (c.hasMapper()) 8987 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 8988 /// c.arg_type, c.arg_name); 8989 /// else 8990 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 8991 /// c.arg_begin, c.arg_size, c.arg_type, 8992 /// c.arg_name); 8993 /// } 8994 /// } 8995 /// // Delete the array section. 8996 /// if (size > 1 && maptype.IsDelete) 8997 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8998 /// size*sizeof(Ty), clearToFromMember(type)); 8999 /// } 9000 /// \endcode 9001 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9002 CodeGenFunction *CGF) { 9003 if (UDMMap.count(D) > 0) 9004 return; 9005 ASTContext &C = CGM.getContext(); 9006 QualType Ty = D->getType(); 9007 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9008 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9009 auto *MapperVarDecl = 9010 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9011 SourceLocation Loc = D->getLocation(); 9012 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9013 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty); 9014 9015 // Prepare mapper function arguments and attributes. 9016 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9017 C.VoidPtrTy, ImplicitParamKind::Other); 9018 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9019 ImplicitParamKind::Other); 9020 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9021 C.VoidPtrTy, ImplicitParamKind::Other); 9022 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9023 ImplicitParamKind::Other); 9024 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9025 ImplicitParamKind::Other); 9026 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9027 ImplicitParamKind::Other); 9028 FunctionArgList Args; 9029 Args.push_back(&HandleArg); 9030 Args.push_back(&BaseArg); 9031 Args.push_back(&BeginArg); 9032 Args.push_back(&SizeArg); 9033 Args.push_back(&TypeArg); 9034 Args.push_back(&NameArg); 9035 const CGFunctionInfo &FnInfo = 9036 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9037 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9038 SmallString<64> TyStr; 9039 llvm::raw_svector_ostream Out(TyStr); 9040 CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out); 9041 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9042 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9043 Name, &CGM.getModule()); 9044 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9045 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9046 // Start the mapper function code generation. 9047 CodeGenFunction MapperCGF(CGM); 9048 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9049 // Compute the starting and end addresses of array elements. 9050 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9051 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9052 C.getPointerType(Int64Ty), Loc); 9053 // Prepare common arguments for array initiation and deletion. 9054 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9055 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9056 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9057 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9058 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9059 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9060 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9061 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9062 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9063 // Convert the size in bytes into the number of array elements. 9064 Size = MapperCGF.Builder.CreateExactUDiv( 9065 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9066 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9067 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 9068 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size); 9069 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9070 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9071 C.getPointerType(Int64Ty), Loc); 9072 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar( 9073 MapperCGF.GetAddrOfLocalVar(&NameArg), 9074 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9075 9076 // Emit array initiation if this is an array section and \p MapType indicates 9077 // that memory allocation is required. 9078 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9079 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9080 MapName, ElementSize, HeadBB, /*IsInit=*/true); 9081 9082 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9083 9084 // Emit the loop header block. 9085 MapperCGF.EmitBlock(HeadBB); 9086 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9087 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9088 // Evaluate whether the initial condition is satisfied. 9089 llvm::Value *IsEmpty = 9090 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9091 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9092 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9093 9094 // Emit the loop body block. 9095 MapperCGF.EmitBlock(BodyBB); 9096 llvm::BasicBlock *LastBB = BodyBB; 9097 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9098 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9099 PtrPHI->addIncoming(PtrBegin, EntryBB); 9100 Address PtrCurrent(PtrPHI, ElemTy, 9101 MapperCGF.GetAddrOfLocalVar(&BeginArg) 9102 .getAlignment() 9103 .alignmentOfArrayElement(ElementSize)); 9104 // Privatize the declared variable of mapper to be the current array element. 9105 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9106 Scope.addPrivate(MapperVarDecl, PtrCurrent); 9107 (void)Scope.Privatize(); 9108 9109 // Get map clause information. Fill up the arrays with all mapped variables. 9110 MappableExprsHandler::MapCombinedInfoTy Info; 9111 MappableExprsHandler MEHandler(*D, MapperCGF); 9112 MEHandler.generateAllInfoForMapper(Info, OMPBuilder); 9113 9114 // Call the runtime API __tgt_mapper_num_components to get the number of 9115 // pre-existing components. 9116 llvm::Value *OffloadingArgs[] = {Handle}; 9117 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9118 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9119 OMPRTL___tgt_mapper_num_components), 9120 OffloadingArgs); 9121 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9122 PreviousSize, 9123 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9124 9125 // Fill up the runtime mapper handle for all components. 9126 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 9127 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9128 Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9129 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9130 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9131 llvm::Value *CurSizeArg = Info.Sizes[I]; 9132 llvm::Value *CurNameArg = 9133 (CGM.getCodeGenOpts().getDebugInfo() == 9134 llvm::codegenoptions::NoDebugInfo) 9135 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 9136 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 9137 9138 // Extract the MEMBER_OF field from the map type. 9139 llvm::Value *OriMapType = MapperCGF.Builder.getInt64( 9140 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9141 Info.Types[I])); 9142 llvm::Value *MemberMapType = 9143 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9144 9145 // Combine the map type inherited from user-defined mapper with that 9146 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9147 // bits of the \a MapType, which is the input argument of the mapper 9148 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9149 // bits of MemberMapType. 9150 // [OpenMP 5.0], 1.2.6. map-type decay. 9151 // | alloc | to | from | tofrom | release | delete 9152 // ---------------------------------------------------------- 9153 // alloc | alloc | alloc | alloc | alloc | release | delete 9154 // to | alloc | to | alloc | to | release | delete 9155 // from | alloc | alloc | from | from | release | delete 9156 // tofrom | alloc | to | from | tofrom | release | delete 9157 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9158 MapType, 9159 MapperCGF.Builder.getInt64( 9160 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9161 OpenMPOffloadMappingFlags::OMP_MAP_TO | 9162 OpenMPOffloadMappingFlags::OMP_MAP_FROM))); 9163 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9164 llvm::BasicBlock *AllocElseBB = 9165 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9166 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9167 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9168 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9169 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9170 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9171 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9172 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9173 MapperCGF.EmitBlock(AllocBB); 9174 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9175 MemberMapType, 9176 MapperCGF.Builder.getInt64( 9177 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9178 OpenMPOffloadMappingFlags::OMP_MAP_TO | 9179 OpenMPOffloadMappingFlags::OMP_MAP_FROM))); 9180 MapperCGF.Builder.CreateBr(EndBB); 9181 MapperCGF.EmitBlock(AllocElseBB); 9182 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9183 LeftToFrom, 9184 MapperCGF.Builder.getInt64( 9185 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9186 OpenMPOffloadMappingFlags::OMP_MAP_TO))); 9187 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9188 // In case of to, clear OMP_MAP_FROM. 9189 MapperCGF.EmitBlock(ToBB); 9190 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9191 MemberMapType, 9192 MapperCGF.Builder.getInt64( 9193 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9194 OpenMPOffloadMappingFlags::OMP_MAP_FROM))); 9195 MapperCGF.Builder.CreateBr(EndBB); 9196 MapperCGF.EmitBlock(ToElseBB); 9197 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9198 LeftToFrom, 9199 MapperCGF.Builder.getInt64( 9200 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9201 OpenMPOffloadMappingFlags::OMP_MAP_FROM))); 9202 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9203 // In case of from, clear OMP_MAP_TO. 9204 MapperCGF.EmitBlock(FromBB); 9205 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9206 MemberMapType, 9207 MapperCGF.Builder.getInt64( 9208 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9209 OpenMPOffloadMappingFlags::OMP_MAP_TO))); 9210 // In case of tofrom, do nothing. 9211 MapperCGF.EmitBlock(EndBB); 9212 LastBB = EndBB; 9213 llvm::PHINode *CurMapType = 9214 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9215 CurMapType->addIncoming(AllocMapType, AllocBB); 9216 CurMapType->addIncoming(ToMapType, ToBB); 9217 CurMapType->addIncoming(FromMapType, FromBB); 9218 CurMapType->addIncoming(MemberMapType, ToElseBB); 9219 9220 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9221 CurSizeArg, CurMapType, CurNameArg}; 9222 if (Info.Mappers[I]) { 9223 // Call the corresponding mapper function. 9224 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 9225 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 9226 assert(MapperFunc && "Expect a valid mapper function is available."); 9227 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 9228 } else { 9229 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9230 // data structure. 9231 MapperCGF.EmitRuntimeCall( 9232 OMPBuilder.getOrCreateRuntimeFunction( 9233 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 9234 OffloadingArgs); 9235 } 9236 } 9237 9238 // Update the pointer to point to the next element that needs to be mapped, 9239 // and check whether we have mapped all elements. 9240 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9241 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9242 PtrPHI->addIncoming(PtrNext, LastBB); 9243 llvm::Value *IsDone = 9244 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9245 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9246 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9247 9248 MapperCGF.EmitBlock(ExitBB); 9249 // Emit array deletion if this is an array section and \p MapType indicates 9250 // that deletion is required. 9251 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9252 MapName, ElementSize, DoneBB, /*IsInit=*/false); 9253 9254 // Emit the function exit block. 9255 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9256 MapperCGF.FinishFunction(); 9257 UDMMap.try_emplace(D, Fn); 9258 if (CGF) { 9259 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9260 Decls.second.push_back(D); 9261 } 9262 } 9263 9264 /// Emit the array initialization or deletion portion for user-defined mapper 9265 /// code generation. First, it evaluates whether an array section is mapped and 9266 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9267 /// true, and \a MapType indicates to not delete this array, array 9268 /// initialization code is generated. If \a IsInit is false, and \a MapType 9269 /// indicates to not this array, array deletion code is generated. 9270 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9271 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9272 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9273 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, 9274 bool IsInit) { 9275 StringRef Prefix = IsInit ? ".init" : ".del"; 9276 9277 // Evaluate if this is an array section. 9278 llvm::BasicBlock *BodyBB = 9279 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9280 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 9281 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9282 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9283 MapType, 9284 MapperCGF.Builder.getInt64( 9285 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9286 OpenMPOffloadMappingFlags::OMP_MAP_DELETE))); 9287 llvm::Value *DeleteCond; 9288 llvm::Value *Cond; 9289 if (IsInit) { 9290 // base != begin? 9291 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin); 9292 // IsPtrAndObj? 9293 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 9294 MapType, 9295 MapperCGF.Builder.getInt64( 9296 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9297 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ))); 9298 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 9299 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 9300 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 9301 DeleteCond = MapperCGF.Builder.CreateIsNull( 9302 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9303 } else { 9304 Cond = IsArray; 9305 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9306 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9307 } 9308 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 9309 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 9310 9311 MapperCGF.EmitBlock(BodyBB); 9312 // Get the array size by multiplying element size and element number (i.e., \p 9313 // Size). 9314 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9315 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9316 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9317 // memory allocation/deletion purpose only. 9318 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9319 MapType, 9320 MapperCGF.Builder.getInt64( 9321 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9322 OpenMPOffloadMappingFlags::OMP_MAP_TO | 9323 OpenMPOffloadMappingFlags::OMP_MAP_FROM))); 9324 MapTypeArg = MapperCGF.Builder.CreateOr( 9325 MapTypeArg, 9326 MapperCGF.Builder.getInt64( 9327 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9328 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))); 9329 9330 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9331 // data structure. 9332 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 9333 ArraySize, MapTypeArg, MapName}; 9334 MapperCGF.EmitRuntimeCall( 9335 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9336 OMPRTL___tgt_push_mapper_component), 9337 OffloadingArgs); 9338 } 9339 9340 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 9341 const OMPDeclareMapperDecl *D) { 9342 auto I = UDMMap.find(D); 9343 if (I != UDMMap.end()) 9344 return I->second; 9345 emitUserDefinedMapper(D); 9346 return UDMMap.lookup(D); 9347 } 9348 9349 llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall( 9350 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9351 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9352 const OMPLoopDirective &D)> 9353 SizeEmitter) { 9354 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9355 const OMPExecutableDirective *TD = &D; 9356 // Get nested teams distribute kind directive, if any. 9357 if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) && 9358 Kind != OMPD_target_teams_loop) 9359 TD = getNestedDistributeDirective(CGM.getContext(), D); 9360 if (!TD) 9361 return llvm::ConstantInt::get(CGF.Int64Ty, 0); 9362 9363 const auto *LD = cast<OMPLoopDirective>(TD); 9364 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) 9365 return NumIterations; 9366 return llvm::ConstantInt::get(CGF.Int64Ty, 0); 9367 } 9368 9369 static void 9370 emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, 9371 const OMPExecutableDirective &D, 9372 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, 9373 bool RequiresOuterTask, const CapturedStmt &CS, 9374 bool OffloadingMandatory, CodeGenFunction &CGF) { 9375 if (OffloadingMandatory) { 9376 CGF.Builder.CreateUnreachable(); 9377 } else { 9378 if (RequiresOuterTask) { 9379 CapturedVars.clear(); 9380 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9381 } 9382 OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, 9383 CapturedVars); 9384 } 9385 } 9386 9387 static llvm::Value *emitDeviceID( 9388 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9389 CodeGenFunction &CGF) { 9390 // Emit device ID if any. 9391 llvm::Value *DeviceID; 9392 if (Device.getPointer()) { 9393 assert((Device.getInt() == OMPC_DEVICE_unknown || 9394 Device.getInt() == OMPC_DEVICE_device_num) && 9395 "Expected device_num modifier."); 9396 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 9397 DeviceID = 9398 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 9399 } else { 9400 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9401 } 9402 return DeviceID; 9403 } 9404 9405 llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D, 9406 CodeGenFunction &CGF) { 9407 llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0); 9408 9409 if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) { 9410 CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF); 9411 llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr( 9412 DynMemClause->getSize(), /*IgnoreResultAssign=*/true); 9413 DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty, 9414 /*isSigned=*/false); 9415 } 9416 return DynCGroupMem; 9417 } 9418 9419 static void emitTargetCallKernelLaunch( 9420 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, 9421 const OMPExecutableDirective &D, 9422 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask, 9423 const CapturedStmt &CS, bool OffloadingMandatory, 9424 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9425 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo, 9426 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray, 9427 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9428 const OMPLoopDirective &D)> 9429 SizeEmitter, 9430 CodeGenFunction &CGF, CodeGenModule &CGM) { 9431 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder(); 9432 9433 // Fill up the arrays with all the captured variables. 9434 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 9435 9436 // Get mappable expression information. 9437 MappableExprsHandler MEHandler(D, CGF); 9438 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 9439 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 9440 9441 auto RI = CS.getCapturedRecordDecl()->field_begin(); 9442 auto *CV = CapturedVars.begin(); 9443 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 9444 CE = CS.capture_end(); 9445 CI != CE; ++CI, ++RI, ++CV) { 9446 MappableExprsHandler::MapCombinedInfoTy CurInfo; 9447 MappableExprsHandler::StructRangeInfoTy PartialStruct; 9448 9449 // VLA sizes are passed to the outlined region by copy and do not have map 9450 // information associated. 9451 if (CI->capturesVariableArrayType()) { 9452 CurInfo.Exprs.push_back(nullptr); 9453 CurInfo.BasePointers.push_back(*CV); 9454 CurInfo.DevicePtrDecls.push_back(nullptr); 9455 CurInfo.DevicePointers.push_back( 9456 MappableExprsHandler::DeviceInfoTy::None); 9457 CurInfo.Pointers.push_back(*CV); 9458 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9459 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 9460 // Copy to the device as an argument. No need to retrieve it. 9461 CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | 9462 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM | 9463 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); 9464 CurInfo.Mappers.push_back(nullptr); 9465 } else { 9466 // If we have any information in the map clause, we use it, otherwise we 9467 // just do a default mapping. 9468 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 9469 if (!CI->capturesThis()) 9470 MappedVarSet.insert(CI->getCapturedVar()); 9471 else 9472 MappedVarSet.insert(nullptr); 9473 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 9474 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 9475 // Generate correct mapping for variables captured by reference in 9476 // lambdas. 9477 if (CI->capturesVariable()) 9478 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 9479 CurInfo, LambdaPointers); 9480 } 9481 // We expect to have at least an element of information for this capture. 9482 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 9483 "Non-existing map pointer for capture!"); 9484 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 9485 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 9486 CurInfo.BasePointers.size() == CurInfo.Types.size() && 9487 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 9488 "Inconsistent map information sizes!"); 9489 9490 // If there is an entry in PartialStruct it means we have a struct with 9491 // individual members mapped. Emit an extra combined entry. 9492 if (PartialStruct.Base.isValid()) { 9493 CombinedInfo.append(PartialStruct.PreliminaryMapData); 9494 MEHandler.emitCombinedEntry( 9495 CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(), 9496 OMPBuilder, nullptr, 9497 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 9498 } 9499 9500 // We need to append the results of this capture to what we already have. 9501 CombinedInfo.append(CurInfo); 9502 } 9503 // Adjust MEMBER_OF flags for the lambdas captures. 9504 MEHandler.adjustMemberOfForLambdaCaptures( 9505 OMPBuilder, LambdaPointers, CombinedInfo.BasePointers, 9506 CombinedInfo.Pointers, CombinedInfo.Types); 9507 // Map any list items in a map clause that were not captures because they 9508 // weren't referenced within the construct. 9509 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet); 9510 9511 CGOpenMPRuntime::TargetDataInfo Info; 9512 // Fill up the arrays and create the arguments. 9513 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 9514 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() != 9515 llvm::codegenoptions::NoDebugInfo; 9516 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info, 9517 EmitDebug, 9518 /*ForEndCall=*/false); 9519 9520 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9521 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, 9522 CGF.VoidPtrTy, CGM.getPointerAlign()); 9523 InputInfo.PointersArray = 9524 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 9525 InputInfo.SizesArray = 9526 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); 9527 InputInfo.MappersArray = 9528 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 9529 MapTypesArray = Info.RTArgs.MapTypesArray; 9530 MapNamesArray = Info.RTArgs.MapNamesArray; 9531 9532 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars, 9533 RequiresOuterTask, &CS, OffloadingMandatory, Device, 9534 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray, 9535 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9536 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor; 9537 9538 if (IsReverseOffloading) { 9539 // Reverse offloading is not supported, so just execute on the host. 9540 // FIXME: This fallback solution is incorrect since it ignores the 9541 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to 9542 // assert here and ensure SEMA emits an error. 9543 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars, 9544 RequiresOuterTask, CS, OffloadingMandatory, CGF); 9545 return; 9546 } 9547 9548 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>(); 9549 unsigned NumTargetItems = InputInfo.NumberOfTargetItems; 9550 9551 llvm::Value *BasePointersArray = InputInfo.BasePointersArray.getPointer(); 9552 llvm::Value *PointersArray = InputInfo.PointersArray.getPointer(); 9553 llvm::Value *SizesArray = InputInfo.SizesArray.getPointer(); 9554 llvm::Value *MappersArray = InputInfo.MappersArray.getPointer(); 9555 9556 auto &&EmitTargetCallFallbackCB = 9557 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS, 9558 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) 9559 -> llvm::OpenMPIRBuilder::InsertPointTy { 9560 CGF.Builder.restoreIP(IP); 9561 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars, 9562 RequiresOuterTask, CS, OffloadingMandatory, CGF); 9563 return CGF.Builder.saveIP(); 9564 }; 9565 9566 llvm::Value *DeviceID = emitDeviceID(Device, CGF); 9567 llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D); 9568 llvm::Value *NumThreads = 9569 OMPRuntime->emitNumThreadsForTargetDirective(CGF, D); 9570 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc()); 9571 llvm::Value *NumIterations = 9572 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter); 9573 llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF); 9574 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( 9575 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator()); 9576 9577 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs( 9578 BasePointersArray, PointersArray, SizesArray, MapTypesArray, 9579 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray); 9580 9581 llvm::OpenMPIRBuilder::TargetKernelArgs Args( 9582 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads, 9583 DynCGGroupMem, HasNoWait); 9584 9585 CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch( 9586 CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args, 9587 DeviceID, RTLoc, AllocaIP)); 9588 }; 9589 9590 if (RequiresOuterTask) 9591 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9592 else 9593 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9594 } 9595 9596 static void 9597 emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, 9598 const OMPExecutableDirective &D, 9599 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, 9600 bool RequiresOuterTask, const CapturedStmt &CS, 9601 bool OffloadingMandatory, CodeGenFunction &CGF) { 9602 9603 // Notify that the host version must be executed. 9604 auto &&ElseGen = 9605 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS, 9606 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) { 9607 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars, 9608 RequiresOuterTask, CS, OffloadingMandatory, CGF); 9609 }; 9610 9611 if (RequiresOuterTask) { 9612 CodeGenFunction::OMPTargetDataInfo InputInfo; 9613 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 9614 } else { 9615 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 9616 } 9617 } 9618 9619 void CGOpenMPRuntime::emitTargetCall( 9620 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9621 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9622 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9623 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9624 const OMPLoopDirective &D)> 9625 SizeEmitter) { 9626 if (!CGF.HaveInsertPoint()) 9627 return; 9628 9629 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice && 9630 CGM.getLangOpts().OpenMPOffloadMandatory; 9631 9632 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!"); 9633 9634 const bool RequiresOuterTask = 9635 D.hasClausesOfKind<OMPDependClause>() || 9636 D.hasClausesOfKind<OMPNowaitClause>() || 9637 D.hasClausesOfKind<OMPInReductionClause>() || 9638 (CGM.getLangOpts().OpenMP >= 51 && 9639 needsTaskBasedThreadLimit(D.getDirectiveKind()) && 9640 D.hasClausesOfKind<OMPThreadLimitClause>()); 9641 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9642 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9643 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9644 PrePostActionTy &) { 9645 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9646 }; 9647 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9648 9649 CodeGenFunction::OMPTargetDataInfo InputInfo; 9650 llvm::Value *MapTypesArray = nullptr; 9651 llvm::Value *MapNamesArray = nullptr; 9652 9653 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars, 9654 RequiresOuterTask, &CS, OffloadingMandatory, Device, 9655 OutlinedFnID, &InputInfo, &MapTypesArray, 9656 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF, 9657 PrePostActionTy &) { 9658 emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars, 9659 RequiresOuterTask, CS, OffloadingMandatory, 9660 Device, OutlinedFnID, InputInfo, MapTypesArray, 9661 MapNamesArray, SizeEmitter, CGF, CGM); 9662 }; 9663 9664 auto &&TargetElseGen = 9665 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS, 9666 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) { 9667 emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask, 9668 CS, OffloadingMandatory, CGF); 9669 }; 9670 9671 // If we have a target function ID it means that we need to support 9672 // offloading, otherwise, just execute on the host. We need to execute on host 9673 // regardless of the conditional in the if clause if, e.g., the user do not 9674 // specify target triples. 9675 if (OutlinedFnID) { 9676 if (IfCond) { 9677 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 9678 } else { 9679 RegionCodeGenTy ThenRCG(TargetThenGen); 9680 ThenRCG(CGF); 9681 } 9682 } else { 9683 RegionCodeGenTy ElseRCG(TargetElseGen); 9684 ElseRCG(CGF); 9685 } 9686 } 9687 9688 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 9689 StringRef ParentName) { 9690 if (!S) 9691 return; 9692 9693 // Codegen OMP target directives that offload compute to the device. 9694 bool RequiresDeviceCodegen = 9695 isa<OMPExecutableDirective>(S) && 9696 isOpenMPTargetExecutionDirective( 9697 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 9698 9699 if (RequiresDeviceCodegen) { 9700 const auto &E = *cast<OMPExecutableDirective>(S); 9701 9702 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc( 9703 CGM, OMPBuilder, E.getBeginLoc(), ParentName); 9704 9705 // Is this a target region that should not be emitted as an entry point? If 9706 // so just signal we are done with this target region. 9707 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo)) 9708 return; 9709 9710 switch (E.getDirectiveKind()) { 9711 case OMPD_target: 9712 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 9713 cast<OMPTargetDirective>(E)); 9714 break; 9715 case OMPD_target_parallel: 9716 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 9717 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 9718 break; 9719 case OMPD_target_teams: 9720 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 9721 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 9722 break; 9723 case OMPD_target_teams_distribute: 9724 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 9725 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 9726 break; 9727 case OMPD_target_teams_distribute_simd: 9728 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 9729 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 9730 break; 9731 case OMPD_target_parallel_for: 9732 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 9733 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 9734 break; 9735 case OMPD_target_parallel_for_simd: 9736 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 9737 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 9738 break; 9739 case OMPD_target_simd: 9740 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 9741 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 9742 break; 9743 case OMPD_target_teams_distribute_parallel_for: 9744 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 9745 CGM, ParentName, 9746 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 9747 break; 9748 case OMPD_target_teams_distribute_parallel_for_simd: 9749 CodeGenFunction:: 9750 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 9751 CGM, ParentName, 9752 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 9753 break; 9754 case OMPD_target_teams_loop: 9755 CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction( 9756 CGM, ParentName, cast<OMPTargetTeamsGenericLoopDirective>(E)); 9757 break; 9758 case OMPD_target_parallel_loop: 9759 CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction( 9760 CGM, ParentName, cast<OMPTargetParallelGenericLoopDirective>(E)); 9761 break; 9762 case OMPD_parallel: 9763 case OMPD_for: 9764 case OMPD_parallel_for: 9765 case OMPD_parallel_master: 9766 case OMPD_parallel_sections: 9767 case OMPD_for_simd: 9768 case OMPD_parallel_for_simd: 9769 case OMPD_cancel: 9770 case OMPD_cancellation_point: 9771 case OMPD_ordered: 9772 case OMPD_threadprivate: 9773 case OMPD_allocate: 9774 case OMPD_task: 9775 case OMPD_simd: 9776 case OMPD_tile: 9777 case OMPD_unroll: 9778 case OMPD_sections: 9779 case OMPD_section: 9780 case OMPD_single: 9781 case OMPD_master: 9782 case OMPD_critical: 9783 case OMPD_taskyield: 9784 case OMPD_barrier: 9785 case OMPD_taskwait: 9786 case OMPD_taskgroup: 9787 case OMPD_atomic: 9788 case OMPD_flush: 9789 case OMPD_depobj: 9790 case OMPD_scan: 9791 case OMPD_teams: 9792 case OMPD_target_data: 9793 case OMPD_target_exit_data: 9794 case OMPD_target_enter_data: 9795 case OMPD_distribute: 9796 case OMPD_distribute_simd: 9797 case OMPD_distribute_parallel_for: 9798 case OMPD_distribute_parallel_for_simd: 9799 case OMPD_teams_distribute: 9800 case OMPD_teams_distribute_simd: 9801 case OMPD_teams_distribute_parallel_for: 9802 case OMPD_teams_distribute_parallel_for_simd: 9803 case OMPD_target_update: 9804 case OMPD_declare_simd: 9805 case OMPD_declare_variant: 9806 case OMPD_begin_declare_variant: 9807 case OMPD_end_declare_variant: 9808 case OMPD_declare_target: 9809 case OMPD_end_declare_target: 9810 case OMPD_declare_reduction: 9811 case OMPD_declare_mapper: 9812 case OMPD_taskloop: 9813 case OMPD_taskloop_simd: 9814 case OMPD_master_taskloop: 9815 case OMPD_master_taskloop_simd: 9816 case OMPD_parallel_master_taskloop: 9817 case OMPD_parallel_master_taskloop_simd: 9818 case OMPD_requires: 9819 case OMPD_metadirective: 9820 case OMPD_unknown: 9821 default: 9822 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 9823 } 9824 return; 9825 } 9826 9827 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 9828 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 9829 return; 9830 9831 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 9832 return; 9833 } 9834 9835 // If this is a lambda function, look into its body. 9836 if (const auto *L = dyn_cast<LambdaExpr>(S)) 9837 S = L->getBody(); 9838 9839 // Keep looking for target regions recursively. 9840 for (const Stmt *II : S->children()) 9841 scanForTargetRegionsFunctions(II, ParentName); 9842 } 9843 9844 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { 9845 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9846 OMPDeclareTargetDeclAttr::getDeviceType(VD); 9847 if (!DevTy) 9848 return false; 9849 // Do not emit device_type(nohost) functions for the host. 9850 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 9851 return true; 9852 // Do not emit device_type(host) functions for the device. 9853 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host) 9854 return true; 9855 return false; 9856 } 9857 9858 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 9859 // If emitting code for the host, we do not process FD here. Instead we do 9860 // the normal code generation. 9861 if (!CGM.getLangOpts().OpenMPIsTargetDevice) { 9862 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) 9863 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 9864 CGM.getLangOpts().OpenMPIsTargetDevice)) 9865 return true; 9866 return false; 9867 } 9868 9869 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 9870 // Try to detect target regions in the function. 9871 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 9872 StringRef Name = CGM.getMangledName(GD); 9873 scanForTargetRegionsFunctions(FD->getBody(), Name); 9874 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 9875 CGM.getLangOpts().OpenMPIsTargetDevice)) 9876 return true; 9877 } 9878 9879 // Do not to emit function if it is not marked as declare target. 9880 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 9881 AlreadyEmittedTargetDecls.count(VD) == 0; 9882 } 9883 9884 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9885 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()), 9886 CGM.getLangOpts().OpenMPIsTargetDevice)) 9887 return true; 9888 9889 if (!CGM.getLangOpts().OpenMPIsTargetDevice) 9890 return false; 9891 9892 // Check if there are Ctors/Dtors in this declaration and look for target 9893 // regions in it. We use the complete variant to produce the kernel name 9894 // mangling. 9895 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 9896 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 9897 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 9898 StringRef ParentName = 9899 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 9900 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 9901 } 9902 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 9903 StringRef ParentName = 9904 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 9905 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 9906 } 9907 } 9908 9909 // Do not to emit variable if it is not marked as declare target. 9910 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9911 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 9912 cast<VarDecl>(GD.getDecl())); 9913 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 9914 ((*Res == OMPDeclareTargetDeclAttr::MT_To || 9915 *Res == OMPDeclareTargetDeclAttr::MT_Enter) && 9916 HasRequiresUnifiedSharedMemory)) { 9917 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 9918 return true; 9919 } 9920 return false; 9921 } 9922 9923 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 9924 llvm::Constant *Addr) { 9925 if (CGM.getLangOpts().OMPTargetTriples.empty() && 9926 !CGM.getLangOpts().OpenMPIsTargetDevice) 9927 return; 9928 9929 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9930 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9931 9932 // If this is an 'extern' declaration we defer to the canonical definition and 9933 // do not emit an offloading entry. 9934 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link && 9935 VD->hasExternalStorage()) 9936 return; 9937 9938 if (!Res) { 9939 if (CGM.getLangOpts().OpenMPIsTargetDevice) { 9940 // Register non-target variables being emitted in device code (debug info 9941 // may cause this). 9942 StringRef VarName = CGM.getMangledName(VD); 9943 EmittedNonTargetVariables.try_emplace(VarName, Addr); 9944 } 9945 return; 9946 } 9947 9948 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); }; 9949 auto LinkageForVariable = [&VD, this]() { 9950 return CGM.getLLVMLinkageVarDefinition(VD); 9951 }; 9952 9953 std::vector<llvm::GlobalVariable *> GeneratedRefs; 9954 OMPBuilder.registerTargetGlobalVariable( 9955 convertCaptureClause(VD), convertDeviceClause(VD), 9956 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly, 9957 VD->isExternallyVisible(), 9958 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, 9959 VD->getCanonicalDecl()->getBeginLoc()), 9960 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd, 9961 CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable, 9962 CGM.getTypes().ConvertTypeForMem( 9963 CGM.getContext().getPointerType(VD->getType())), 9964 Addr); 9965 9966 for (auto *ref : GeneratedRefs) 9967 CGM.addCompilerUsedGlobal(ref); 9968 } 9969 9970 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 9971 if (isa<FunctionDecl>(GD.getDecl()) || 9972 isa<OMPDeclareReductionDecl>(GD.getDecl())) 9973 return emitTargetFunctions(GD); 9974 9975 return emitTargetGlobalVariable(GD); 9976 } 9977 9978 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 9979 for (const VarDecl *VD : DeferredGlobalVariables) { 9980 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9981 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9982 if (!Res) 9983 continue; 9984 if ((*Res == OMPDeclareTargetDeclAttr::MT_To || 9985 *Res == OMPDeclareTargetDeclAttr::MT_Enter) && 9986 !HasRequiresUnifiedSharedMemory) { 9987 CGM.EmitGlobal(VD); 9988 } else { 9989 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 9990 ((*Res == OMPDeclareTargetDeclAttr::MT_To || 9991 *Res == OMPDeclareTargetDeclAttr::MT_Enter) && 9992 HasRequiresUnifiedSharedMemory)) && 9993 "Expected link clause or to clause with unified memory."); 9994 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 9995 } 9996 } 9997 } 9998 9999 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10000 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10001 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10002 " Expected target-based directive."); 10003 } 10004 10005 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10006 for (const OMPClause *Clause : D->clauselists()) { 10007 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10008 HasRequiresUnifiedSharedMemory = true; 10009 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true); 10010 } else if (const auto *AC = 10011 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10012 switch (AC->getAtomicDefaultMemOrderKind()) { 10013 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10014 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10015 break; 10016 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10017 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10018 break; 10019 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10020 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10021 break; 10022 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10023 break; 10024 } 10025 } 10026 } 10027 } 10028 10029 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10030 return RequiresAtomicOrdering; 10031 } 10032 10033 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10034 LangAS &AS) { 10035 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10036 return false; 10037 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10038 switch(A->getAllocatorType()) { 10039 case OMPAllocateDeclAttr::OMPNullMemAlloc: 10040 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10041 // Not supported, fallback to the default mem space. 10042 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10043 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10044 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10045 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10046 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10047 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10048 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10049 AS = LangAS::Default; 10050 return true; 10051 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10052 llvm_unreachable("Expected predefined allocator for the variables with the " 10053 "static storage."); 10054 } 10055 return false; 10056 } 10057 10058 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10059 return HasRequiresUnifiedSharedMemory; 10060 } 10061 10062 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10063 CodeGenModule &CGM) 10064 : CGM(CGM) { 10065 if (CGM.getLangOpts().OpenMPIsTargetDevice) { 10066 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10067 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10068 } 10069 } 10070 10071 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10072 if (CGM.getLangOpts().OpenMPIsTargetDevice) 10073 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10074 } 10075 10076 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10077 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal) 10078 return true; 10079 10080 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10081 // Do not to emit function if it is marked as declare target as it was already 10082 // emitted. 10083 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10084 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10085 if (auto *F = dyn_cast_or_null<llvm::Function>( 10086 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10087 return !F->isDeclaration(); 10088 return false; 10089 } 10090 return true; 10091 } 10092 10093 return !AlreadyEmittedTargetDecls.insert(D).second; 10094 } 10095 10096 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10097 // If we don't have entries or if we are emitting code for the device, we 10098 // don't need to do anything. 10099 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10100 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsTargetDevice || 10101 (OMPBuilder.OffloadInfoManager.empty() && 10102 !HasEmittedDeclareTargetRegion && !HasEmittedTargetRegion)) 10103 return nullptr; 10104 10105 // Create and register the function that handles the requires directives. 10106 ASTContext &C = CGM.getContext(); 10107 10108 llvm::Function *RequiresRegFn; 10109 { 10110 CodeGenFunction CGF(CGM); 10111 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10112 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10113 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10114 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 10115 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10116 // TODO: check for other requires clauses. 10117 // The requires directive takes effect only when a target region is 10118 // present in the compilation unit. Otherwise it is ignored and not 10119 // passed to the runtime. This avoids the runtime from throwing an error 10120 // for mismatching requires clauses across compilation units that don't 10121 // contain at least 1 target region. 10122 assert((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion || 10123 !OMPBuilder.OffloadInfoManager.empty()) && 10124 "Target or declare target region expected."); 10125 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10126 CGM.getModule(), OMPRTL___tgt_register_requires), 10127 llvm::ConstantInt::get( 10128 CGM.Int64Ty, OMPBuilder.Config.getRequiresFlags())); 10129 CGF.FinishFunction(); 10130 } 10131 return RequiresRegFn; 10132 } 10133 10134 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10135 const OMPExecutableDirective &D, 10136 SourceLocation Loc, 10137 llvm::Function *OutlinedFn, 10138 ArrayRef<llvm::Value *> CapturedVars) { 10139 if (!CGF.HaveInsertPoint()) 10140 return; 10141 10142 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10143 CodeGenFunction::RunCleanupsScope Scope(CGF); 10144 10145 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10146 llvm::Value *Args[] = { 10147 RTLoc, 10148 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10149 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10150 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10151 RealArgs.append(std::begin(Args), std::end(Args)); 10152 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10153 10154 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 10155 CGM.getModule(), OMPRTL___kmpc_fork_teams); 10156 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10157 } 10158 10159 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10160 const Expr *NumTeams, 10161 const Expr *ThreadLimit, 10162 SourceLocation Loc) { 10163 if (!CGF.HaveInsertPoint()) 10164 return; 10165 10166 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10167 10168 llvm::Value *NumTeamsVal = 10169 NumTeams 10170 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10171 CGF.CGM.Int32Ty, /* isSigned = */ true) 10172 : CGF.Builder.getInt32(0); 10173 10174 llvm::Value *ThreadLimitVal = 10175 ThreadLimit 10176 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10177 CGF.CGM.Int32Ty, /* isSigned = */ true) 10178 : CGF.Builder.getInt32(0); 10179 10180 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10181 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10182 ThreadLimitVal}; 10183 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10184 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 10185 PushNumTeamsArgs); 10186 } 10187 10188 void CGOpenMPRuntime::emitThreadLimitClause(CodeGenFunction &CGF, 10189 const Expr *ThreadLimit, 10190 SourceLocation Loc) { 10191 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10192 llvm::Value *ThreadLimitVal = 10193 ThreadLimit 10194 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10195 CGF.CGM.Int32Ty, /* isSigned = */ true) 10196 : CGF.Builder.getInt32(0); 10197 10198 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit) 10199 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc), 10200 ThreadLimitVal}; 10201 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10202 CGM.getModule(), OMPRTL___kmpc_set_thread_limit), 10203 ThreadLimitArgs); 10204 } 10205 10206 void CGOpenMPRuntime::emitTargetDataCalls( 10207 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10208 const Expr *Device, const RegionCodeGenTy &CodeGen, 10209 CGOpenMPRuntime::TargetDataInfo &Info) { 10210 if (!CGF.HaveInsertPoint()) 10211 return; 10212 10213 // Action used to replace the default codegen action and turn privatization 10214 // off. 10215 PrePostActionTy NoPrivAction; 10216 10217 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 10218 10219 llvm::Value *IfCondVal = nullptr; 10220 if (IfCond) 10221 IfCondVal = CGF.EvaluateExprAsBool(IfCond); 10222 10223 // Emit device ID if any. 10224 llvm::Value *DeviceID = nullptr; 10225 if (Device) { 10226 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10227 CGF.Int64Ty, /*isSigned=*/true); 10228 } else { 10229 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10230 } 10231 10232 // Fill up the arrays with all the mapped variables. 10233 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10234 auto GenMapInfoCB = 10235 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & { 10236 CGF.Builder.restoreIP(CodeGenIP); 10237 // Get map clause information. 10238 MappableExprsHandler MEHandler(D, CGF); 10239 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder); 10240 10241 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 10242 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 10243 }; 10244 if (CGM.getCodeGenOpts().getDebugInfo() != 10245 llvm::codegenoptions::NoDebugInfo) { 10246 CombinedInfo.Names.resize(CombinedInfo.Exprs.size()); 10247 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(), 10248 FillInfoMap); 10249 } 10250 10251 return CombinedInfo; 10252 }; 10253 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy; 10254 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) { 10255 CGF.Builder.restoreIP(CodeGenIP); 10256 switch (BodyGenType) { 10257 case BodyGenTy::Priv: 10258 if (!Info.CaptureDeviceAddrMap.empty()) 10259 CodeGen(CGF); 10260 break; 10261 case BodyGenTy::DupNoPriv: 10262 if (!Info.CaptureDeviceAddrMap.empty()) { 10263 CodeGen.setAction(NoPrivAction); 10264 CodeGen(CGF); 10265 } 10266 break; 10267 case BodyGenTy::NoPriv: 10268 if (Info.CaptureDeviceAddrMap.empty()) { 10269 CodeGen.setAction(NoPrivAction); 10270 CodeGen(CGF); 10271 } 10272 break; 10273 } 10274 return InsertPointTy(CGF.Builder.GetInsertBlock(), 10275 CGF.Builder.GetInsertPoint()); 10276 }; 10277 10278 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) { 10279 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) { 10280 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl); 10281 } 10282 }; 10283 10284 auto CustomMapperCB = [&](unsigned int I) { 10285 llvm::Value *MFunc = nullptr; 10286 if (CombinedInfo.Mappers[I]) { 10287 Info.HasMapper = true; 10288 MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 10289 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 10290 } 10291 return MFunc; 10292 }; 10293 10294 // Source location for the ident struct 10295 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10296 10297 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(), 10298 CGF.AllocaInsertPt->getIterator()); 10299 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(), 10300 CGF.Builder.GetInsertPoint()); 10301 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP); 10302 CGF.Builder.restoreIP(OMPBuilder.createTargetData( 10303 OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB, 10304 /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc)); 10305 } 10306 10307 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10308 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10309 const Expr *Device) { 10310 if (!CGF.HaveInsertPoint()) 10311 return; 10312 10313 assert((isa<OMPTargetEnterDataDirective>(D) || 10314 isa<OMPTargetExitDataDirective>(D) || 10315 isa<OMPTargetUpdateDirective>(D)) && 10316 "Expecting either target enter, exit data, or update directives."); 10317 10318 CodeGenFunction::OMPTargetDataInfo InputInfo; 10319 llvm::Value *MapTypesArray = nullptr; 10320 llvm::Value *MapNamesArray = nullptr; 10321 // Generate the code for the opening of the data environment. 10322 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 10323 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10324 // Emit device ID if any. 10325 llvm::Value *DeviceID = nullptr; 10326 if (Device) { 10327 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10328 CGF.Int64Ty, /*isSigned=*/true); 10329 } else { 10330 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10331 } 10332 10333 // Emit the number of elements in the offloading arrays. 10334 llvm::Constant *PointerNum = 10335 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10336 10337 // Source location for the ident struct 10338 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10339 10340 llvm::Value *OffloadingArgs[] = {RTLoc, 10341 DeviceID, 10342 PointerNum, 10343 InputInfo.BasePointersArray.getPointer(), 10344 InputInfo.PointersArray.getPointer(), 10345 InputInfo.SizesArray.getPointer(), 10346 MapTypesArray, 10347 MapNamesArray, 10348 InputInfo.MappersArray.getPointer()}; 10349 10350 // Select the right runtime function call for each standalone 10351 // directive. 10352 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10353 RuntimeFunction RTLFn; 10354 switch (D.getDirectiveKind()) { 10355 case OMPD_target_enter_data: 10356 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 10357 : OMPRTL___tgt_target_data_begin_mapper; 10358 break; 10359 case OMPD_target_exit_data: 10360 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 10361 : OMPRTL___tgt_target_data_end_mapper; 10362 break; 10363 case OMPD_target_update: 10364 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 10365 : OMPRTL___tgt_target_data_update_mapper; 10366 break; 10367 case OMPD_parallel: 10368 case OMPD_for: 10369 case OMPD_parallel_for: 10370 case OMPD_parallel_master: 10371 case OMPD_parallel_sections: 10372 case OMPD_for_simd: 10373 case OMPD_parallel_for_simd: 10374 case OMPD_cancel: 10375 case OMPD_cancellation_point: 10376 case OMPD_ordered: 10377 case OMPD_threadprivate: 10378 case OMPD_allocate: 10379 case OMPD_task: 10380 case OMPD_simd: 10381 case OMPD_tile: 10382 case OMPD_unroll: 10383 case OMPD_sections: 10384 case OMPD_section: 10385 case OMPD_single: 10386 case OMPD_master: 10387 case OMPD_critical: 10388 case OMPD_taskyield: 10389 case OMPD_barrier: 10390 case OMPD_taskwait: 10391 case OMPD_taskgroup: 10392 case OMPD_atomic: 10393 case OMPD_flush: 10394 case OMPD_depobj: 10395 case OMPD_scan: 10396 case OMPD_teams: 10397 case OMPD_target_data: 10398 case OMPD_distribute: 10399 case OMPD_distribute_simd: 10400 case OMPD_distribute_parallel_for: 10401 case OMPD_distribute_parallel_for_simd: 10402 case OMPD_teams_distribute: 10403 case OMPD_teams_distribute_simd: 10404 case OMPD_teams_distribute_parallel_for: 10405 case OMPD_teams_distribute_parallel_for_simd: 10406 case OMPD_declare_simd: 10407 case OMPD_declare_variant: 10408 case OMPD_begin_declare_variant: 10409 case OMPD_end_declare_variant: 10410 case OMPD_declare_target: 10411 case OMPD_end_declare_target: 10412 case OMPD_declare_reduction: 10413 case OMPD_declare_mapper: 10414 case OMPD_taskloop: 10415 case OMPD_taskloop_simd: 10416 case OMPD_master_taskloop: 10417 case OMPD_master_taskloop_simd: 10418 case OMPD_parallel_master_taskloop: 10419 case OMPD_parallel_master_taskloop_simd: 10420 case OMPD_target: 10421 case OMPD_target_simd: 10422 case OMPD_target_teams_distribute: 10423 case OMPD_target_teams_distribute_simd: 10424 case OMPD_target_teams_distribute_parallel_for: 10425 case OMPD_target_teams_distribute_parallel_for_simd: 10426 case OMPD_target_teams: 10427 case OMPD_target_parallel: 10428 case OMPD_target_parallel_for: 10429 case OMPD_target_parallel_for_simd: 10430 case OMPD_requires: 10431 case OMPD_metadirective: 10432 case OMPD_unknown: 10433 default: 10434 llvm_unreachable("Unexpected standalone target data directive."); 10435 break; 10436 } 10437 CGF.EmitRuntimeCall( 10438 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 10439 OffloadingArgs); 10440 }; 10441 10442 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10443 &MapNamesArray](CodeGenFunction &CGF, 10444 PrePostActionTy &) { 10445 // Fill up the arrays with all the mapped variables. 10446 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10447 10448 // Get map clause information. 10449 MappableExprsHandler MEHandler(D, CGF); 10450 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder); 10451 10452 CGOpenMPRuntime::TargetDataInfo Info; 10453 // Fill up the arrays and create the arguments. 10454 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 10455 /*IsNonContiguous=*/true); 10456 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10457 D.hasClausesOfKind<OMPNowaitClause>(); 10458 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() != 10459 llvm::codegenoptions::NoDebugInfo; 10460 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info, 10461 EmitDebug, 10462 /*ForEndCall=*/false); 10463 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10464 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, 10465 CGF.VoidPtrTy, CGM.getPointerAlign()); 10466 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, 10467 CGM.getPointerAlign()); 10468 InputInfo.SizesArray = 10469 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); 10470 InputInfo.MappersArray = 10471 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 10472 MapTypesArray = Info.RTArgs.MapTypesArray; 10473 MapNamesArray = Info.RTArgs.MapNamesArray; 10474 if (RequiresOuterTask) 10475 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10476 else 10477 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10478 }; 10479 10480 if (IfCond) { 10481 emitIfClause(CGF, IfCond, TargetThenGen, 10482 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 10483 } else { 10484 RegionCodeGenTy ThenRCG(TargetThenGen); 10485 ThenRCG(CGF); 10486 } 10487 } 10488 10489 namespace { 10490 /// Kind of parameter in a function with 'declare simd' directive. 10491 enum ParamKindTy { 10492 Linear, 10493 LinearRef, 10494 LinearUVal, 10495 LinearVal, 10496 Uniform, 10497 Vector, 10498 }; 10499 /// Attribute set of the parameter. 10500 struct ParamAttrTy { 10501 ParamKindTy Kind = Vector; 10502 llvm::APSInt StrideOrArg; 10503 llvm::APSInt Alignment; 10504 bool HasVarStride = false; 10505 }; 10506 } // namespace 10507 10508 static unsigned evaluateCDTSize(const FunctionDecl *FD, 10509 ArrayRef<ParamAttrTy> ParamAttrs) { 10510 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 10511 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 10512 // of that clause. The VLEN value must be power of 2. 10513 // In other case the notion of the function`s "characteristic data type" (CDT) 10514 // is used to compute the vector length. 10515 // CDT is defined in the following order: 10516 // a) For non-void function, the CDT is the return type. 10517 // b) If the function has any non-uniform, non-linear parameters, then the 10518 // CDT is the type of the first such parameter. 10519 // c) If the CDT determined by a) or b) above is struct, union, or class 10520 // type which is pass-by-value (except for the type that maps to the 10521 // built-in complex data type), the characteristic data type is int. 10522 // d) If none of the above three cases is applicable, the CDT is int. 10523 // The VLEN is then determined based on the CDT and the size of vector 10524 // register of that ISA for which current vector version is generated. The 10525 // VLEN is computed using the formula below: 10526 // VLEN = sizeof(vector_register) / sizeof(CDT), 10527 // where vector register size specified in section 3.2.1 Registers and the 10528 // Stack Frame of original AMD64 ABI document. 10529 QualType RetType = FD->getReturnType(); 10530 if (RetType.isNull()) 10531 return 0; 10532 ASTContext &C = FD->getASTContext(); 10533 QualType CDT; 10534 if (!RetType.isNull() && !RetType->isVoidType()) { 10535 CDT = RetType; 10536 } else { 10537 unsigned Offset = 0; 10538 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 10539 if (ParamAttrs[Offset].Kind == Vector) 10540 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 10541 ++Offset; 10542 } 10543 if (CDT.isNull()) { 10544 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10545 if (ParamAttrs[I + Offset].Kind == Vector) { 10546 CDT = FD->getParamDecl(I)->getType(); 10547 break; 10548 } 10549 } 10550 } 10551 } 10552 if (CDT.isNull()) 10553 CDT = C.IntTy; 10554 CDT = CDT->getCanonicalTypeUnqualified(); 10555 if (CDT->isRecordType() || CDT->isUnionType()) 10556 CDT = C.IntTy; 10557 return C.getTypeSize(CDT); 10558 } 10559 10560 /// Mangle the parameter part of the vector function name according to 10561 /// their OpenMP classification. The mangling function is defined in 10562 /// section 4.5 of the AAVFABI(2021Q1). 10563 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 10564 SmallString<256> Buffer; 10565 llvm::raw_svector_ostream Out(Buffer); 10566 for (const auto &ParamAttr : ParamAttrs) { 10567 switch (ParamAttr.Kind) { 10568 case Linear: 10569 Out << 'l'; 10570 break; 10571 case LinearRef: 10572 Out << 'R'; 10573 break; 10574 case LinearUVal: 10575 Out << 'U'; 10576 break; 10577 case LinearVal: 10578 Out << 'L'; 10579 break; 10580 case Uniform: 10581 Out << 'u'; 10582 break; 10583 case Vector: 10584 Out << 'v'; 10585 break; 10586 } 10587 if (ParamAttr.HasVarStride) 10588 Out << "s" << ParamAttr.StrideOrArg; 10589 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef || 10590 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) { 10591 // Don't print the step value if it is not present or if it is 10592 // equal to 1. 10593 if (ParamAttr.StrideOrArg < 0) 10594 Out << 'n' << -ParamAttr.StrideOrArg; 10595 else if (ParamAttr.StrideOrArg != 1) 10596 Out << ParamAttr.StrideOrArg; 10597 } 10598 10599 if (!!ParamAttr.Alignment) 10600 Out << 'a' << ParamAttr.Alignment; 10601 } 10602 10603 return std::string(Out.str()); 10604 } 10605 10606 static void 10607 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 10608 const llvm::APSInt &VLENVal, 10609 ArrayRef<ParamAttrTy> ParamAttrs, 10610 OMPDeclareSimdDeclAttr::BranchStateTy State) { 10611 struct ISADataTy { 10612 char ISA; 10613 unsigned VecRegSize; 10614 }; 10615 ISADataTy ISAData[] = { 10616 { 10617 'b', 128 10618 }, // SSE 10619 { 10620 'c', 256 10621 }, // AVX 10622 { 10623 'd', 256 10624 }, // AVX2 10625 { 10626 'e', 512 10627 }, // AVX512 10628 }; 10629 llvm::SmallVector<char, 2> Masked; 10630 switch (State) { 10631 case OMPDeclareSimdDeclAttr::BS_Undefined: 10632 Masked.push_back('N'); 10633 Masked.push_back('M'); 10634 break; 10635 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10636 Masked.push_back('N'); 10637 break; 10638 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10639 Masked.push_back('M'); 10640 break; 10641 } 10642 for (char Mask : Masked) { 10643 for (const ISADataTy &Data : ISAData) { 10644 SmallString<256> Buffer; 10645 llvm::raw_svector_ostream Out(Buffer); 10646 Out << "_ZGV" << Data.ISA << Mask; 10647 if (!VLENVal) { 10648 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 10649 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 10650 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 10651 } else { 10652 Out << VLENVal; 10653 } 10654 Out << mangleVectorParameters(ParamAttrs); 10655 Out << '_' << Fn->getName(); 10656 Fn->addFnAttr(Out.str()); 10657 } 10658 } 10659 } 10660 10661 // This are the Functions that are needed to mangle the name of the 10662 // vector functions generated by the compiler, according to the rules 10663 // defined in the "Vector Function ABI specifications for AArch64", 10664 // available at 10665 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 10666 10667 /// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1). 10668 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 10669 QT = QT.getCanonicalType(); 10670 10671 if (QT->isVoidType()) 10672 return false; 10673 10674 if (Kind == ParamKindTy::Uniform) 10675 return false; 10676 10677 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef) 10678 return false; 10679 10680 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) && 10681 !QT->isReferenceType()) 10682 return false; 10683 10684 return true; 10685 } 10686 10687 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 10688 static bool getAArch64PBV(QualType QT, ASTContext &C) { 10689 QT = QT.getCanonicalType(); 10690 unsigned Size = C.getTypeSize(QT); 10691 10692 // Only scalars and complex within 16 bytes wide set PVB to true. 10693 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 10694 return false; 10695 10696 if (QT->isFloatingType()) 10697 return true; 10698 10699 if (QT->isIntegerType()) 10700 return true; 10701 10702 if (QT->isPointerType()) 10703 return true; 10704 10705 // TODO: Add support for complex types (section 3.1.2, item 2). 10706 10707 return false; 10708 } 10709 10710 /// Computes the lane size (LS) of a return type or of an input parameter, 10711 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 10712 /// TODO: Add support for references, section 3.2.1, item 1. 10713 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 10714 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 10715 QualType PTy = QT.getCanonicalType()->getPointeeType(); 10716 if (getAArch64PBV(PTy, C)) 10717 return C.getTypeSize(PTy); 10718 } 10719 if (getAArch64PBV(QT, C)) 10720 return C.getTypeSize(QT); 10721 10722 return C.getTypeSize(C.getUIntPtrType()); 10723 } 10724 10725 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 10726 // signature of the scalar function, as defined in 3.2.2 of the 10727 // AAVFABI. 10728 static std::tuple<unsigned, unsigned, bool> 10729 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 10730 QualType RetType = FD->getReturnType().getCanonicalType(); 10731 10732 ASTContext &C = FD->getASTContext(); 10733 10734 bool OutputBecomesInput = false; 10735 10736 llvm::SmallVector<unsigned, 8> Sizes; 10737 if (!RetType->isVoidType()) { 10738 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 10739 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 10740 OutputBecomesInput = true; 10741 } 10742 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10743 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 10744 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 10745 } 10746 10747 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 10748 // The LS of a function parameter / return value can only be a power 10749 // of 2, starting from 8 bits, up to 128. 10750 assert(llvm::all_of(Sizes, 10751 [](unsigned Size) { 10752 return Size == 8 || Size == 16 || Size == 32 || 10753 Size == 64 || Size == 128; 10754 }) && 10755 "Invalid size"); 10756 10757 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 10758 *std::max_element(std::begin(Sizes), std::end(Sizes)), 10759 OutputBecomesInput); 10760 } 10761 10762 // Function used to add the attribute. The parameter `VLEN` is 10763 // templated to allow the use of "x" when targeting scalable functions 10764 // for SVE. 10765 template <typename T> 10766 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 10767 char ISA, StringRef ParSeq, 10768 StringRef MangledName, bool OutputBecomesInput, 10769 llvm::Function *Fn) { 10770 SmallString<256> Buffer; 10771 llvm::raw_svector_ostream Out(Buffer); 10772 Out << Prefix << ISA << LMask << VLEN; 10773 if (OutputBecomesInput) 10774 Out << "v"; 10775 Out << ParSeq << "_" << MangledName; 10776 Fn->addFnAttr(Out.str()); 10777 } 10778 10779 // Helper function to generate the Advanced SIMD names depending on 10780 // the value of the NDS when simdlen is not present. 10781 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 10782 StringRef Prefix, char ISA, 10783 StringRef ParSeq, StringRef MangledName, 10784 bool OutputBecomesInput, 10785 llvm::Function *Fn) { 10786 switch (NDS) { 10787 case 8: 10788 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10789 OutputBecomesInput, Fn); 10790 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 10791 OutputBecomesInput, Fn); 10792 break; 10793 case 16: 10794 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10795 OutputBecomesInput, Fn); 10796 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10797 OutputBecomesInput, Fn); 10798 break; 10799 case 32: 10800 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10801 OutputBecomesInput, Fn); 10802 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10803 OutputBecomesInput, Fn); 10804 break; 10805 case 64: 10806 case 128: 10807 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10808 OutputBecomesInput, Fn); 10809 break; 10810 default: 10811 llvm_unreachable("Scalar type is too wide."); 10812 } 10813 } 10814 10815 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 10816 static void emitAArch64DeclareSimdFunction( 10817 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 10818 ArrayRef<ParamAttrTy> ParamAttrs, 10819 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 10820 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 10821 10822 // Get basic data for building the vector signature. 10823 const auto Data = getNDSWDS(FD, ParamAttrs); 10824 const unsigned NDS = std::get<0>(Data); 10825 const unsigned WDS = std::get<1>(Data); 10826 const bool OutputBecomesInput = std::get<2>(Data); 10827 10828 // Check the values provided via `simdlen` by the user. 10829 // 1. A `simdlen(1)` doesn't produce vector signatures, 10830 if (UserVLEN == 1) { 10831 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10832 DiagnosticsEngine::Warning, 10833 "The clause simdlen(1) has no effect when targeting aarch64."); 10834 CGM.getDiags().Report(SLoc, DiagID); 10835 return; 10836 } 10837 10838 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 10839 // Advanced SIMD output. 10840 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 10841 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10842 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 10843 "power of 2 when targeting Advanced SIMD."); 10844 CGM.getDiags().Report(SLoc, DiagID); 10845 return; 10846 } 10847 10848 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 10849 // limits. 10850 if (ISA == 's' && UserVLEN != 0) { 10851 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 10852 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10853 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 10854 "lanes in the architectural constraints " 10855 "for SVE (min is 128-bit, max is " 10856 "2048-bit, by steps of 128-bit)"); 10857 CGM.getDiags().Report(SLoc, DiagID) << WDS; 10858 return; 10859 } 10860 } 10861 10862 // Sort out parameter sequence. 10863 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 10864 StringRef Prefix = "_ZGV"; 10865 // Generate simdlen from user input (if any). 10866 if (UserVLEN) { 10867 if (ISA == 's') { 10868 // SVE generates only a masked function. 10869 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10870 OutputBecomesInput, Fn); 10871 } else { 10872 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10873 // Advanced SIMD generates one or two functions, depending on 10874 // the `[not]inbranch` clause. 10875 switch (State) { 10876 case OMPDeclareSimdDeclAttr::BS_Undefined: 10877 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10878 OutputBecomesInput, Fn); 10879 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10880 OutputBecomesInput, Fn); 10881 break; 10882 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10883 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10884 OutputBecomesInput, Fn); 10885 break; 10886 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10887 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10888 OutputBecomesInput, Fn); 10889 break; 10890 } 10891 } 10892 } else { 10893 // If no user simdlen is provided, follow the AAVFABI rules for 10894 // generating the vector length. 10895 if (ISA == 's') { 10896 // SVE, section 3.4.1, item 1. 10897 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 10898 OutputBecomesInput, Fn); 10899 } else { 10900 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10901 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 10902 // two vector names depending on the use of the clause 10903 // `[not]inbranch`. 10904 switch (State) { 10905 case OMPDeclareSimdDeclAttr::BS_Undefined: 10906 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10907 OutputBecomesInput, Fn); 10908 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10909 OutputBecomesInput, Fn); 10910 break; 10911 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10912 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10913 OutputBecomesInput, Fn); 10914 break; 10915 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10916 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10917 OutputBecomesInput, Fn); 10918 break; 10919 } 10920 } 10921 } 10922 } 10923 10924 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 10925 llvm::Function *Fn) { 10926 ASTContext &C = CGM.getContext(); 10927 FD = FD->getMostRecentDecl(); 10928 while (FD) { 10929 // Map params to their positions in function decl. 10930 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 10931 if (isa<CXXMethodDecl>(FD)) 10932 ParamPositions.try_emplace(FD, 0); 10933 unsigned ParamPos = ParamPositions.size(); 10934 for (const ParmVarDecl *P : FD->parameters()) { 10935 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 10936 ++ParamPos; 10937 } 10938 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 10939 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 10940 // Mark uniform parameters. 10941 for (const Expr *E : Attr->uniforms()) { 10942 E = E->IgnoreParenImpCasts(); 10943 unsigned Pos; 10944 if (isa<CXXThisExpr>(E)) { 10945 Pos = ParamPositions[FD]; 10946 } else { 10947 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10948 ->getCanonicalDecl(); 10949 auto It = ParamPositions.find(PVD); 10950 assert(It != ParamPositions.end() && "Function parameter not found"); 10951 Pos = It->second; 10952 } 10953 ParamAttrs[Pos].Kind = Uniform; 10954 } 10955 // Get alignment info. 10956 auto *NI = Attr->alignments_begin(); 10957 for (const Expr *E : Attr->aligneds()) { 10958 E = E->IgnoreParenImpCasts(); 10959 unsigned Pos; 10960 QualType ParmTy; 10961 if (isa<CXXThisExpr>(E)) { 10962 Pos = ParamPositions[FD]; 10963 ParmTy = E->getType(); 10964 } else { 10965 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10966 ->getCanonicalDecl(); 10967 auto It = ParamPositions.find(PVD); 10968 assert(It != ParamPositions.end() && "Function parameter not found"); 10969 Pos = It->second; 10970 ParmTy = PVD->getType(); 10971 } 10972 ParamAttrs[Pos].Alignment = 10973 (*NI) 10974 ? (*NI)->EvaluateKnownConstInt(C) 10975 : llvm::APSInt::getUnsigned( 10976 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 10977 .getQuantity()); 10978 ++NI; 10979 } 10980 // Mark linear parameters. 10981 auto *SI = Attr->steps_begin(); 10982 auto *MI = Attr->modifiers_begin(); 10983 for (const Expr *E : Attr->linears()) { 10984 E = E->IgnoreParenImpCasts(); 10985 unsigned Pos; 10986 bool IsReferenceType = false; 10987 // Rescaling factor needed to compute the linear parameter 10988 // value in the mangled name. 10989 unsigned PtrRescalingFactor = 1; 10990 if (isa<CXXThisExpr>(E)) { 10991 Pos = ParamPositions[FD]; 10992 auto *P = cast<PointerType>(E->getType()); 10993 PtrRescalingFactor = CGM.getContext() 10994 .getTypeSizeInChars(P->getPointeeType()) 10995 .getQuantity(); 10996 } else { 10997 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10998 ->getCanonicalDecl(); 10999 auto It = ParamPositions.find(PVD); 11000 assert(It != ParamPositions.end() && "Function parameter not found"); 11001 Pos = It->second; 11002 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 11003 PtrRescalingFactor = CGM.getContext() 11004 .getTypeSizeInChars(P->getPointeeType()) 11005 .getQuantity(); 11006 else if (PVD->getType()->isReferenceType()) { 11007 IsReferenceType = true; 11008 PtrRescalingFactor = 11009 CGM.getContext() 11010 .getTypeSizeInChars(PVD->getType().getNonReferenceType()) 11011 .getQuantity(); 11012 } 11013 } 11014 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 11015 if (*MI == OMPC_LINEAR_ref) 11016 ParamAttr.Kind = LinearRef; 11017 else if (*MI == OMPC_LINEAR_uval) 11018 ParamAttr.Kind = LinearUVal; 11019 else if (IsReferenceType) 11020 ParamAttr.Kind = LinearVal; 11021 else 11022 ParamAttr.Kind = Linear; 11023 // Assuming a stride of 1, for `linear` without modifiers. 11024 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 11025 if (*SI) { 11026 Expr::EvalResult Result; 11027 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 11028 if (const auto *DRE = 11029 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 11030 if (const auto *StridePVD = 11031 dyn_cast<ParmVarDecl>(DRE->getDecl())) { 11032 ParamAttr.HasVarStride = true; 11033 auto It = ParamPositions.find(StridePVD->getCanonicalDecl()); 11034 assert(It != ParamPositions.end() && 11035 "Function parameter not found"); 11036 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second); 11037 } 11038 } 11039 } else { 11040 ParamAttr.StrideOrArg = Result.Val.getInt(); 11041 } 11042 } 11043 // If we are using a linear clause on a pointer, we need to 11044 // rescale the value of linear_step with the byte size of the 11045 // pointee type. 11046 if (!ParamAttr.HasVarStride && 11047 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef)) 11048 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 11049 ++SI; 11050 ++MI; 11051 } 11052 llvm::APSInt VLENVal; 11053 SourceLocation ExprLoc; 11054 const Expr *VLENExpr = Attr->getSimdlen(); 11055 if (VLENExpr) { 11056 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11057 ExprLoc = VLENExpr->getExprLoc(); 11058 } 11059 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11060 if (CGM.getTriple().isX86()) { 11061 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11062 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11063 unsigned VLEN = VLENVal.getExtValue(); 11064 StringRef MangledName = Fn->getName(); 11065 if (CGM.getTarget().hasFeature("sve")) 11066 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11067 MangledName, 's', 128, Fn, ExprLoc); 11068 else if (CGM.getTarget().hasFeature("neon")) 11069 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11070 MangledName, 'n', 128, Fn, ExprLoc); 11071 } 11072 } 11073 FD = FD->getPreviousDecl(); 11074 } 11075 } 11076 11077 namespace { 11078 /// Cleanup action for doacross support. 11079 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 11080 public: 11081 static const int DoacrossFinArgs = 2; 11082 11083 private: 11084 llvm::FunctionCallee RTLFn; 11085 llvm::Value *Args[DoacrossFinArgs]; 11086 11087 public: 11088 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 11089 ArrayRef<llvm::Value *> CallArgs) 11090 : RTLFn(RTLFn) { 11091 assert(CallArgs.size() == DoacrossFinArgs); 11092 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11093 } 11094 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11095 if (!CGF.HaveInsertPoint()) 11096 return; 11097 CGF.EmitRuntimeCall(RTLFn, Args); 11098 } 11099 }; 11100 } // namespace 11101 11102 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11103 const OMPLoopDirective &D, 11104 ArrayRef<Expr *> NumIterations) { 11105 if (!CGF.HaveInsertPoint()) 11106 return; 11107 11108 ASTContext &C = CGM.getContext(); 11109 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11110 RecordDecl *RD; 11111 if (KmpDimTy.isNull()) { 11112 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11113 // kmp_int64 lo; // lower 11114 // kmp_int64 up; // upper 11115 // kmp_int64 st; // stride 11116 // }; 11117 RD = C.buildImplicitRecord("kmp_dim"); 11118 RD->startDefinition(); 11119 addFieldToRecordDecl(C, RD, Int64Ty); 11120 addFieldToRecordDecl(C, RD, Int64Ty); 11121 addFieldToRecordDecl(C, RD, Int64Ty); 11122 RD->completeDefinition(); 11123 KmpDimTy = C.getRecordType(RD); 11124 } else { 11125 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11126 } 11127 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11128 QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr, 11129 ArraySizeModifier::Normal, 0); 11130 11131 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11132 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11133 enum { LowerFD = 0, UpperFD, StrideFD }; 11134 // Fill dims with data. 11135 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11136 LValue DimsLVal = CGF.MakeAddrLValue( 11137 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11138 // dims.upper = num_iterations; 11139 LValue UpperLVal = CGF.EmitLValueForField( 11140 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11141 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 11142 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 11143 Int64Ty, NumIterations[I]->getExprLoc()); 11144 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11145 // dims.stride = 1; 11146 LValue StrideLVal = CGF.EmitLValueForField( 11147 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11148 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11149 StrideLVal); 11150 } 11151 11152 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11153 // kmp_int32 num_dims, struct kmp_dim * dims); 11154 llvm::Value *Args[] = { 11155 emitUpdateLocation(CGF, D.getBeginLoc()), 11156 getThreadID(CGF, D.getBeginLoc()), 11157 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11158 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11159 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 11160 CGM.VoidPtrTy)}; 11161 11162 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11163 CGM.getModule(), OMPRTL___kmpc_doacross_init); 11164 CGF.EmitRuntimeCall(RTLFn, Args); 11165 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11166 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11167 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11168 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 11169 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11170 llvm::ArrayRef(FiniArgs)); 11171 } 11172 11173 template <typename T> 11174 static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM, 11175 const T *C, llvm::Value *ULoc, 11176 llvm::Value *ThreadID) { 11177 QualType Int64Ty = 11178 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11179 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11180 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11181 Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0); 11182 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11183 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11184 const Expr *CounterVal = C->getLoopData(I); 11185 assert(CounterVal); 11186 llvm::Value *CntVal = CGF.EmitScalarConversion( 11187 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11188 CounterVal->getExprLoc()); 11189 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11190 /*Volatile=*/false, Int64Ty); 11191 } 11192 llvm::Value *Args[] = { 11193 ULoc, ThreadID, CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11194 llvm::FunctionCallee RTLFn; 11195 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 11196 OMPDoacrossKind<T> ODK; 11197 if (ODK.isSource(C)) { 11198 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11199 OMPRTL___kmpc_doacross_post); 11200 } else { 11201 assert(ODK.isSink(C) && "Expect sink modifier."); 11202 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11203 OMPRTL___kmpc_doacross_wait); 11204 } 11205 CGF.EmitRuntimeCall(RTLFn, Args); 11206 } 11207 11208 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11209 const OMPDependClause *C) { 11210 return EmitDoacrossOrdered<OMPDependClause>( 11211 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()), 11212 getThreadID(CGF, C->getBeginLoc())); 11213 } 11214 11215 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11216 const OMPDoacrossClause *C) { 11217 return EmitDoacrossOrdered<OMPDoacrossClause>( 11218 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()), 11219 getThreadID(CGF, C->getBeginLoc())); 11220 } 11221 11222 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11223 llvm::FunctionCallee Callee, 11224 ArrayRef<llvm::Value *> Args) const { 11225 assert(Loc.isValid() && "Outlined function call location must be valid."); 11226 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11227 11228 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11229 if (Fn->doesNotThrow()) { 11230 CGF.EmitNounwindRuntimeCall(Fn, Args); 11231 return; 11232 } 11233 } 11234 CGF.EmitRuntimeCall(Callee, Args); 11235 } 11236 11237 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11238 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11239 ArrayRef<llvm::Value *> Args) const { 11240 emitCall(CGF, Loc, OutlinedFn, Args); 11241 } 11242 11243 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11244 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11245 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11246 HasEmittedDeclareTargetRegion = true; 11247 } 11248 11249 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11250 const VarDecl *NativeParam, 11251 const VarDecl *TargetParam) const { 11252 return CGF.GetAddrOfLocalVar(NativeParam); 11253 } 11254 11255 /// Return allocator value from expression, or return a null allocator (default 11256 /// when no allocator specified). 11257 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF, 11258 const Expr *Allocator) { 11259 llvm::Value *AllocVal; 11260 if (Allocator) { 11261 AllocVal = CGF.EmitScalarExpr(Allocator); 11262 // According to the standard, the original allocator type is a enum 11263 // (integer). Convert to pointer type, if required. 11264 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 11265 CGF.getContext().VoidPtrTy, 11266 Allocator->getExprLoc()); 11267 } else { 11268 // If no allocator specified, it defaults to the null allocator. 11269 AllocVal = llvm::Constant::getNullValue( 11270 CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy)); 11271 } 11272 return AllocVal; 11273 } 11274 11275 /// Return the alignment from an allocate directive if present. 11276 static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) { 11277 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD); 11278 11279 if (!AllocateAlignment) 11280 return nullptr; 11281 11282 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity()); 11283 } 11284 11285 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11286 const VarDecl *VD) { 11287 if (!VD) 11288 return Address::invalid(); 11289 Address UntiedAddr = Address::invalid(); 11290 Address UntiedRealAddr = Address::invalid(); 11291 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11292 if (It != FunctionToUntiedTaskStackMap.end()) { 11293 const UntiedLocalVarsAddressesMap &UntiedData = 11294 UntiedLocalVarsStack[It->second]; 11295 auto I = UntiedData.find(VD); 11296 if (I != UntiedData.end()) { 11297 UntiedAddr = I->second.first; 11298 UntiedRealAddr = I->second.second; 11299 } 11300 } 11301 const VarDecl *CVD = VD->getCanonicalDecl(); 11302 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 11303 // Use the default allocation. 11304 if (!isAllocatableDecl(VD)) 11305 return UntiedAddr; 11306 llvm::Value *Size; 11307 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11308 if (CVD->getType()->isVariablyModifiedType()) { 11309 Size = CGF.getTypeSize(CVD->getType()); 11310 // Align the size: ((size + align - 1) / align) * align 11311 Size = CGF.Builder.CreateNUWAdd( 11312 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11313 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11314 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11315 } else { 11316 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11317 Size = CGM.getSize(Sz.alignTo(Align)); 11318 } 11319 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11320 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11321 const Expr *Allocator = AA->getAllocator(); 11322 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator); 11323 llvm::Value *Alignment = getAlignmentValue(CGM, CVD); 11324 SmallVector<llvm::Value *, 4> Args; 11325 Args.push_back(ThreadID); 11326 if (Alignment) 11327 Args.push_back(Alignment); 11328 Args.push_back(Size); 11329 Args.push_back(AllocVal); 11330 llvm::omp::RuntimeFunction FnID = 11331 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc; 11332 llvm::Value *Addr = CGF.EmitRuntimeCall( 11333 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args, 11334 getName({CVD->getName(), ".void.addr"})); 11335 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11336 CGM.getModule(), OMPRTL___kmpc_free); 11337 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 11338 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11339 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 11340 if (UntiedAddr.isValid()) 11341 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 11342 11343 // Cleanup action for allocate support. 11344 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 11345 llvm::FunctionCallee RTLFn; 11346 SourceLocation::UIntTy LocEncoding; 11347 Address Addr; 11348 const Expr *AllocExpr; 11349 11350 public: 11351 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 11352 SourceLocation::UIntTy LocEncoding, Address Addr, 11353 const Expr *AllocExpr) 11354 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 11355 AllocExpr(AllocExpr) {} 11356 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11357 if (!CGF.HaveInsertPoint()) 11358 return; 11359 llvm::Value *Args[3]; 11360 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 11361 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 11362 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11363 Addr.getPointer(), CGF.VoidPtrTy); 11364 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr); 11365 Args[2] = AllocVal; 11366 CGF.EmitRuntimeCall(RTLFn, Args); 11367 } 11368 }; 11369 Address VDAddr = 11370 UntiedRealAddr.isValid() 11371 ? UntiedRealAddr 11372 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align); 11373 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 11374 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 11375 VDAddr, Allocator); 11376 if (UntiedRealAddr.isValid()) 11377 if (auto *Region = 11378 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 11379 Region->emitUntiedSwitch(CGF); 11380 return VDAddr; 11381 } 11382 return UntiedAddr; 11383 } 11384 11385 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 11386 const VarDecl *VD) const { 11387 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11388 if (It == FunctionToUntiedTaskStackMap.end()) 11389 return false; 11390 return UntiedLocalVarsStack[It->second].count(VD) > 0; 11391 } 11392 11393 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11394 CodeGenModule &CGM, const OMPLoopDirective &S) 11395 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11396 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11397 if (!NeedToPush) 11398 return; 11399 NontemporalDeclsSet &DS = 11400 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11401 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11402 for (const Stmt *Ref : C->private_refs()) { 11403 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11404 const ValueDecl *VD; 11405 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11406 VD = DRE->getDecl(); 11407 } else { 11408 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11409 assert((ME->isImplicitCXXThis() || 11410 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11411 "Expected member of current class."); 11412 VD = ME->getMemberDecl(); 11413 } 11414 DS.insert(VD); 11415 } 11416 } 11417 } 11418 11419 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 11420 if (!NeedToPush) 11421 return; 11422 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 11423 } 11424 11425 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 11426 CodeGenFunction &CGF, 11427 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 11428 std::pair<Address, Address>> &LocalVars) 11429 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 11430 if (!NeedToPush) 11431 return; 11432 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 11433 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 11434 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 11435 } 11436 11437 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 11438 if (!NeedToPush) 11439 return; 11440 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 11441 } 11442 11443 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 11444 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11445 11446 return llvm::any_of( 11447 CGM.getOpenMPRuntime().NontemporalDeclsStack, 11448 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); }); 11449 } 11450 11451 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 11452 const OMPExecutableDirective &S, 11453 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 11454 const { 11455 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 11456 // Vars in target/task regions must be excluded completely. 11457 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 11458 isOpenMPTaskingDirective(S.getDirectiveKind())) { 11459 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11460 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 11461 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 11462 for (const CapturedStmt::Capture &Cap : CS->captures()) { 11463 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 11464 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 11465 } 11466 } 11467 // Exclude vars in private clauses. 11468 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 11469 for (const Expr *Ref : C->varlists()) { 11470 if (!Ref->getType()->isScalarType()) 11471 continue; 11472 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11473 if (!DRE) 11474 continue; 11475 NeedToCheckForLPCs.insert(DRE->getDecl()); 11476 } 11477 } 11478 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 11479 for (const Expr *Ref : C->varlists()) { 11480 if (!Ref->getType()->isScalarType()) 11481 continue; 11482 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11483 if (!DRE) 11484 continue; 11485 NeedToCheckForLPCs.insert(DRE->getDecl()); 11486 } 11487 } 11488 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11489 for (const Expr *Ref : C->varlists()) { 11490 if (!Ref->getType()->isScalarType()) 11491 continue; 11492 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11493 if (!DRE) 11494 continue; 11495 NeedToCheckForLPCs.insert(DRE->getDecl()); 11496 } 11497 } 11498 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 11499 for (const Expr *Ref : C->varlists()) { 11500 if (!Ref->getType()->isScalarType()) 11501 continue; 11502 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11503 if (!DRE) 11504 continue; 11505 NeedToCheckForLPCs.insert(DRE->getDecl()); 11506 } 11507 } 11508 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 11509 for (const Expr *Ref : C->varlists()) { 11510 if (!Ref->getType()->isScalarType()) 11511 continue; 11512 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11513 if (!DRE) 11514 continue; 11515 NeedToCheckForLPCs.insert(DRE->getDecl()); 11516 } 11517 } 11518 for (const Decl *VD : NeedToCheckForLPCs) { 11519 for (const LastprivateConditionalData &Data : 11520 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 11521 if (Data.DeclToUniqueName.count(VD) > 0) { 11522 if (!Data.Disabled) 11523 NeedToAddForLPCsAsDisabled.insert(VD); 11524 break; 11525 } 11526 } 11527 } 11528 } 11529 11530 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11531 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 11532 : CGM(CGF.CGM), 11533 Action((CGM.getLangOpts().OpenMP >= 50 && 11534 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 11535 [](const OMPLastprivateClause *C) { 11536 return C->getKind() == 11537 OMPC_LASTPRIVATE_conditional; 11538 })) 11539 ? ActionToDo::PushAsLastprivateConditional 11540 : ActionToDo::DoNotPush) { 11541 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11542 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 11543 return; 11544 assert(Action == ActionToDo::PushAsLastprivateConditional && 11545 "Expected a push action."); 11546 LastprivateConditionalData &Data = 11547 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11548 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11549 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 11550 continue; 11551 11552 for (const Expr *Ref : C->varlists()) { 11553 Data.DeclToUniqueName.insert(std::make_pair( 11554 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 11555 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 11556 } 11557 } 11558 Data.IVLVal = IVLVal; 11559 Data.Fn = CGF.CurFn; 11560 } 11561 11562 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11563 CodeGenFunction &CGF, const OMPExecutableDirective &S) 11564 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 11565 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11566 if (CGM.getLangOpts().OpenMP < 50) 11567 return; 11568 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 11569 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 11570 if (!NeedToAddForLPCsAsDisabled.empty()) { 11571 Action = ActionToDo::DisableLastprivateConditional; 11572 LastprivateConditionalData &Data = 11573 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11574 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 11575 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 11576 Data.Fn = CGF.CurFn; 11577 Data.Disabled = true; 11578 } 11579 } 11580 11581 CGOpenMPRuntime::LastprivateConditionalRAII 11582 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 11583 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 11584 return LastprivateConditionalRAII(CGF, S); 11585 } 11586 11587 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 11588 if (CGM.getLangOpts().OpenMP < 50) 11589 return; 11590 if (Action == ActionToDo::DisableLastprivateConditional) { 11591 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11592 "Expected list of disabled private vars."); 11593 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11594 } 11595 if (Action == ActionToDo::PushAsLastprivateConditional) { 11596 assert( 11597 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11598 "Expected list of lastprivate conditional vars."); 11599 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11600 } 11601 } 11602 11603 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 11604 const VarDecl *VD) { 11605 ASTContext &C = CGM.getContext(); 11606 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 11607 if (I == LastprivateConditionalToTypes.end()) 11608 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 11609 QualType NewType; 11610 const FieldDecl *VDField; 11611 const FieldDecl *FiredField; 11612 LValue BaseLVal; 11613 auto VI = I->getSecond().find(VD); 11614 if (VI == I->getSecond().end()) { 11615 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 11616 RD->startDefinition(); 11617 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 11618 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 11619 RD->completeDefinition(); 11620 NewType = C.getRecordType(RD); 11621 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 11622 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 11623 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 11624 } else { 11625 NewType = std::get<0>(VI->getSecond()); 11626 VDField = std::get<1>(VI->getSecond()); 11627 FiredField = std::get<2>(VI->getSecond()); 11628 BaseLVal = std::get<3>(VI->getSecond()); 11629 } 11630 LValue FiredLVal = 11631 CGF.EmitLValueForField(BaseLVal, FiredField); 11632 CGF.EmitStoreOfScalar( 11633 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 11634 FiredLVal); 11635 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 11636 } 11637 11638 namespace { 11639 /// Checks if the lastprivate conditional variable is referenced in LHS. 11640 class LastprivateConditionalRefChecker final 11641 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 11642 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 11643 const Expr *FoundE = nullptr; 11644 const Decl *FoundD = nullptr; 11645 StringRef UniqueDeclName; 11646 LValue IVLVal; 11647 llvm::Function *FoundFn = nullptr; 11648 SourceLocation Loc; 11649 11650 public: 11651 bool VisitDeclRefExpr(const DeclRefExpr *E) { 11652 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11653 llvm::reverse(LPM)) { 11654 auto It = D.DeclToUniqueName.find(E->getDecl()); 11655 if (It == D.DeclToUniqueName.end()) 11656 continue; 11657 if (D.Disabled) 11658 return false; 11659 FoundE = E; 11660 FoundD = E->getDecl()->getCanonicalDecl(); 11661 UniqueDeclName = It->second; 11662 IVLVal = D.IVLVal; 11663 FoundFn = D.Fn; 11664 break; 11665 } 11666 return FoundE == E; 11667 } 11668 bool VisitMemberExpr(const MemberExpr *E) { 11669 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 11670 return false; 11671 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11672 llvm::reverse(LPM)) { 11673 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 11674 if (It == D.DeclToUniqueName.end()) 11675 continue; 11676 if (D.Disabled) 11677 return false; 11678 FoundE = E; 11679 FoundD = E->getMemberDecl()->getCanonicalDecl(); 11680 UniqueDeclName = It->second; 11681 IVLVal = D.IVLVal; 11682 FoundFn = D.Fn; 11683 break; 11684 } 11685 return FoundE == E; 11686 } 11687 bool VisitStmt(const Stmt *S) { 11688 for (const Stmt *Child : S->children()) { 11689 if (!Child) 11690 continue; 11691 if (const auto *E = dyn_cast<Expr>(Child)) 11692 if (!E->isGLValue()) 11693 continue; 11694 if (Visit(Child)) 11695 return true; 11696 } 11697 return false; 11698 } 11699 explicit LastprivateConditionalRefChecker( 11700 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 11701 : LPM(LPM) {} 11702 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 11703 getFoundData() const { 11704 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 11705 } 11706 }; 11707 } // namespace 11708 11709 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 11710 LValue IVLVal, 11711 StringRef UniqueDeclName, 11712 LValue LVal, 11713 SourceLocation Loc) { 11714 // Last updated loop counter for the lastprivate conditional var. 11715 // int<xx> last_iv = 0; 11716 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 11717 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable( 11718 LLIVTy, getName({UniqueDeclName, "iv"})); 11719 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 11720 IVLVal.getAlignment().getAsAlign()); 11721 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 11722 11723 // Last value of the lastprivate conditional. 11724 // decltype(priv_a) last_a; 11725 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable( 11726 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 11727 Last->setAlignment(LVal.getAlignment().getAsAlign()); 11728 LValue LastLVal = CGF.MakeAddrLValue( 11729 Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType()); 11730 11731 // Global loop counter. Required to handle inner parallel-for regions. 11732 // iv 11733 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 11734 11735 // #pragma omp critical(a) 11736 // if (last_iv <= iv) { 11737 // last_iv = iv; 11738 // last_a = priv_a; 11739 // } 11740 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 11741 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 11742 Action.Enter(CGF); 11743 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 11744 // (last_iv <= iv) ? Check if the variable is updated and store new 11745 // value in global var. 11746 llvm::Value *CmpRes; 11747 if (IVLVal.getType()->isSignedIntegerType()) { 11748 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 11749 } else { 11750 assert(IVLVal.getType()->isUnsignedIntegerType() && 11751 "Loop iteration variable must be integer."); 11752 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 11753 } 11754 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 11755 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 11756 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 11757 // { 11758 CGF.EmitBlock(ThenBB); 11759 11760 // last_iv = iv; 11761 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 11762 11763 // last_a = priv_a; 11764 switch (CGF.getEvaluationKind(LVal.getType())) { 11765 case TEK_Scalar: { 11766 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 11767 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 11768 break; 11769 } 11770 case TEK_Complex: { 11771 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 11772 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 11773 break; 11774 } 11775 case TEK_Aggregate: 11776 llvm_unreachable( 11777 "Aggregates are not supported in lastprivate conditional."); 11778 } 11779 // } 11780 CGF.EmitBranch(ExitBB); 11781 // There is no need to emit line number for unconditional branch. 11782 (void)ApplyDebugLocation::CreateEmpty(CGF); 11783 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 11784 }; 11785 11786 if (CGM.getLangOpts().OpenMPSimd) { 11787 // Do not emit as a critical region as no parallel region could be emitted. 11788 RegionCodeGenTy ThenRCG(CodeGen); 11789 ThenRCG(CGF); 11790 } else { 11791 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 11792 } 11793 } 11794 11795 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 11796 const Expr *LHS) { 11797 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11798 return; 11799 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 11800 if (!Checker.Visit(LHS)) 11801 return; 11802 const Expr *FoundE; 11803 const Decl *FoundD; 11804 StringRef UniqueDeclName; 11805 LValue IVLVal; 11806 llvm::Function *FoundFn; 11807 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 11808 Checker.getFoundData(); 11809 if (FoundFn != CGF.CurFn) { 11810 // Special codegen for inner parallel regions. 11811 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 11812 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 11813 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 11814 "Lastprivate conditional is not found in outer region."); 11815 QualType StructTy = std::get<0>(It->getSecond()); 11816 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 11817 LValue PrivLVal = CGF.EmitLValue(FoundE); 11818 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11819 PrivLVal.getAddress(CGF), 11820 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)), 11821 CGF.ConvertTypeForMem(StructTy)); 11822 LValue BaseLVal = 11823 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 11824 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 11825 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 11826 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 11827 FiredLVal, llvm::AtomicOrdering::Unordered, 11828 /*IsVolatile=*/true, /*isInit=*/false); 11829 return; 11830 } 11831 11832 // Private address of the lastprivate conditional in the current context. 11833 // priv_a 11834 LValue LVal = CGF.EmitLValue(FoundE); 11835 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 11836 FoundE->getExprLoc()); 11837 } 11838 11839 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 11840 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11841 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 11842 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11843 return; 11844 auto Range = llvm::reverse(LastprivateConditionalStack); 11845 auto It = llvm::find_if( 11846 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 11847 if (It == Range.end() || It->Fn != CGF.CurFn) 11848 return; 11849 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 11850 assert(LPCI != LastprivateConditionalToTypes.end() && 11851 "Lastprivates must be registered already."); 11852 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11853 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 11854 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 11855 for (const auto &Pair : It->DeclToUniqueName) { 11856 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 11857 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD)) 11858 continue; 11859 auto I = LPCI->getSecond().find(Pair.first); 11860 assert(I != LPCI->getSecond().end() && 11861 "Lastprivate must be rehistered already."); 11862 // bool Cmp = priv_a.Fired != 0; 11863 LValue BaseLVal = std::get<3>(I->getSecond()); 11864 LValue FiredLVal = 11865 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 11866 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 11867 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 11868 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 11869 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 11870 // if (Cmp) { 11871 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 11872 CGF.EmitBlock(ThenBB); 11873 Address Addr = CGF.GetAddrOfLocalVar(VD); 11874 LValue LVal; 11875 if (VD->getType()->isReferenceType()) 11876 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 11877 AlignmentSource::Decl); 11878 else 11879 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 11880 AlignmentSource::Decl); 11881 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 11882 D.getBeginLoc()); 11883 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 11884 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 11885 // } 11886 } 11887 } 11888 11889 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 11890 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 11891 SourceLocation Loc) { 11892 if (CGF.getLangOpts().OpenMP < 50) 11893 return; 11894 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 11895 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 11896 "Unknown lastprivate conditional variable."); 11897 StringRef UniqueName = It->second; 11898 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 11899 // The variable was not updated in the region - exit. 11900 if (!GV) 11901 return; 11902 LValue LPLVal = CGF.MakeAddrLValue( 11903 Address(GV, GV->getValueType(), PrivLVal.getAlignment()), 11904 PrivLVal.getType().getNonReferenceType()); 11905 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 11906 CGF.EmitStoreOfScalar(Res, PrivLVal); 11907 } 11908 11909 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 11910 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11911 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 11912 const RegionCodeGenTy &CodeGen) { 11913 llvm_unreachable("Not supported in SIMD-only mode"); 11914 } 11915 11916 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 11917 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11918 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 11919 const RegionCodeGenTy &CodeGen) { 11920 llvm_unreachable("Not supported in SIMD-only mode"); 11921 } 11922 11923 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 11924 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11925 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 11926 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 11927 bool Tied, unsigned &NumberOfParts) { 11928 llvm_unreachable("Not supported in SIMD-only mode"); 11929 } 11930 11931 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 11932 SourceLocation Loc, 11933 llvm::Function *OutlinedFn, 11934 ArrayRef<llvm::Value *> CapturedVars, 11935 const Expr *IfCond, 11936 llvm::Value *NumThreads) { 11937 llvm_unreachable("Not supported in SIMD-only mode"); 11938 } 11939 11940 void CGOpenMPSIMDRuntime::emitCriticalRegion( 11941 CodeGenFunction &CGF, StringRef CriticalName, 11942 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 11943 const Expr *Hint) { 11944 llvm_unreachable("Not supported in SIMD-only mode"); 11945 } 11946 11947 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 11948 const RegionCodeGenTy &MasterOpGen, 11949 SourceLocation Loc) { 11950 llvm_unreachable("Not supported in SIMD-only mode"); 11951 } 11952 11953 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, 11954 const RegionCodeGenTy &MasterOpGen, 11955 SourceLocation Loc, 11956 const Expr *Filter) { 11957 llvm_unreachable("Not supported in SIMD-only mode"); 11958 } 11959 11960 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 11961 SourceLocation Loc) { 11962 llvm_unreachable("Not supported in SIMD-only mode"); 11963 } 11964 11965 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 11966 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 11967 SourceLocation Loc) { 11968 llvm_unreachable("Not supported in SIMD-only mode"); 11969 } 11970 11971 void CGOpenMPSIMDRuntime::emitSingleRegion( 11972 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 11973 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 11974 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 11975 ArrayRef<const Expr *> AssignmentOps) { 11976 llvm_unreachable("Not supported in SIMD-only mode"); 11977 } 11978 11979 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 11980 const RegionCodeGenTy &OrderedOpGen, 11981 SourceLocation Loc, 11982 bool IsThreads) { 11983 llvm_unreachable("Not supported in SIMD-only mode"); 11984 } 11985 11986 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 11987 SourceLocation Loc, 11988 OpenMPDirectiveKind Kind, 11989 bool EmitChecks, 11990 bool ForceSimpleCall) { 11991 llvm_unreachable("Not supported in SIMD-only mode"); 11992 } 11993 11994 void CGOpenMPSIMDRuntime::emitForDispatchInit( 11995 CodeGenFunction &CGF, SourceLocation Loc, 11996 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 11997 bool Ordered, const DispatchRTInput &DispatchValues) { 11998 llvm_unreachable("Not supported in SIMD-only mode"); 11999 } 12000 12001 void CGOpenMPSIMDRuntime::emitForStaticInit( 12002 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12003 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12004 llvm_unreachable("Not supported in SIMD-only mode"); 12005 } 12006 12007 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12008 CodeGenFunction &CGF, SourceLocation Loc, 12009 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12010 llvm_unreachable("Not supported in SIMD-only mode"); 12011 } 12012 12013 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12014 SourceLocation Loc, 12015 unsigned IVSize, 12016 bool IVSigned) { 12017 llvm_unreachable("Not supported in SIMD-only mode"); 12018 } 12019 12020 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12021 SourceLocation Loc, 12022 OpenMPDirectiveKind DKind) { 12023 llvm_unreachable("Not supported in SIMD-only mode"); 12024 } 12025 12026 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12027 SourceLocation Loc, 12028 unsigned IVSize, bool IVSigned, 12029 Address IL, Address LB, 12030 Address UB, Address ST) { 12031 llvm_unreachable("Not supported in SIMD-only mode"); 12032 } 12033 12034 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12035 llvm::Value *NumThreads, 12036 SourceLocation Loc) { 12037 llvm_unreachable("Not supported in SIMD-only mode"); 12038 } 12039 12040 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12041 ProcBindKind ProcBind, 12042 SourceLocation Loc) { 12043 llvm_unreachable("Not supported in SIMD-only mode"); 12044 } 12045 12046 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12047 const VarDecl *VD, 12048 Address VDAddr, 12049 SourceLocation Loc) { 12050 llvm_unreachable("Not supported in SIMD-only mode"); 12051 } 12052 12053 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12054 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12055 CodeGenFunction *CGF) { 12056 llvm_unreachable("Not supported in SIMD-only mode"); 12057 } 12058 12059 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12060 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12061 llvm_unreachable("Not supported in SIMD-only mode"); 12062 } 12063 12064 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 12065 ArrayRef<const Expr *> Vars, 12066 SourceLocation Loc, 12067 llvm::AtomicOrdering AO) { 12068 llvm_unreachable("Not supported in SIMD-only mode"); 12069 } 12070 12071 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 12072 const OMPExecutableDirective &D, 12073 llvm::Function *TaskFunction, 12074 QualType SharedsTy, Address Shareds, 12075 const Expr *IfCond, 12076 const OMPTaskDataTy &Data) { 12077 llvm_unreachable("Not supported in SIMD-only mode"); 12078 } 12079 12080 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 12081 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 12082 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 12083 const Expr *IfCond, const OMPTaskDataTy &Data) { 12084 llvm_unreachable("Not supported in SIMD-only mode"); 12085 } 12086 12087 void CGOpenMPSIMDRuntime::emitReduction( 12088 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 12089 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 12090 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 12091 assert(Options.SimpleReduction && "Only simple reduction is expected."); 12092 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 12093 ReductionOps, Options); 12094 } 12095 12096 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 12097 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 12098 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 12099 llvm_unreachable("Not supported in SIMD-only mode"); 12100 } 12101 12102 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 12103 SourceLocation Loc, 12104 bool IsWorksharingReduction) { 12105 llvm_unreachable("Not supported in SIMD-only mode"); 12106 } 12107 12108 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 12109 SourceLocation Loc, 12110 ReductionCodeGen &RCG, 12111 unsigned N) { 12112 llvm_unreachable("Not supported in SIMD-only mode"); 12113 } 12114 12115 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 12116 SourceLocation Loc, 12117 llvm::Value *ReductionsPtr, 12118 LValue SharedLVal) { 12119 llvm_unreachable("Not supported in SIMD-only mode"); 12120 } 12121 12122 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 12123 SourceLocation Loc, 12124 const OMPTaskDataTy &Data) { 12125 llvm_unreachable("Not supported in SIMD-only mode"); 12126 } 12127 12128 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 12129 CodeGenFunction &CGF, SourceLocation Loc, 12130 OpenMPDirectiveKind CancelRegion) { 12131 llvm_unreachable("Not supported in SIMD-only mode"); 12132 } 12133 12134 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 12135 SourceLocation Loc, const Expr *IfCond, 12136 OpenMPDirectiveKind CancelRegion) { 12137 llvm_unreachable("Not supported in SIMD-only mode"); 12138 } 12139 12140 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 12141 const OMPExecutableDirective &D, StringRef ParentName, 12142 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 12143 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 12144 llvm_unreachable("Not supported in SIMD-only mode"); 12145 } 12146 12147 void CGOpenMPSIMDRuntime::emitTargetCall( 12148 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12149 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 12150 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 12151 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 12152 const OMPLoopDirective &D)> 12153 SizeEmitter) { 12154 llvm_unreachable("Not supported in SIMD-only mode"); 12155 } 12156 12157 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 12158 llvm_unreachable("Not supported in SIMD-only mode"); 12159 } 12160 12161 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 12162 llvm_unreachable("Not supported in SIMD-only mode"); 12163 } 12164 12165 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 12166 return false; 12167 } 12168 12169 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 12170 const OMPExecutableDirective &D, 12171 SourceLocation Loc, 12172 llvm::Function *OutlinedFn, 12173 ArrayRef<llvm::Value *> CapturedVars) { 12174 llvm_unreachable("Not supported in SIMD-only mode"); 12175 } 12176 12177 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 12178 const Expr *NumTeams, 12179 const Expr *ThreadLimit, 12180 SourceLocation Loc) { 12181 llvm_unreachable("Not supported in SIMD-only mode"); 12182 } 12183 12184 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 12185 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12186 const Expr *Device, const RegionCodeGenTy &CodeGen, 12187 CGOpenMPRuntime::TargetDataInfo &Info) { 12188 llvm_unreachable("Not supported in SIMD-only mode"); 12189 } 12190 12191 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 12192 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12193 const Expr *Device) { 12194 llvm_unreachable("Not supported in SIMD-only mode"); 12195 } 12196 12197 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12198 const OMPLoopDirective &D, 12199 ArrayRef<Expr *> NumIterations) { 12200 llvm_unreachable("Not supported in SIMD-only mode"); 12201 } 12202 12203 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12204 const OMPDependClause *C) { 12205 llvm_unreachable("Not supported in SIMD-only mode"); 12206 } 12207 12208 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12209 const OMPDoacrossClause *C) { 12210 llvm_unreachable("Not supported in SIMD-only mode"); 12211 } 12212 12213 const VarDecl * 12214 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12215 const VarDecl *NativeParam) const { 12216 llvm_unreachable("Not supported in SIMD-only mode"); 12217 } 12218 12219 Address 12220 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12221 const VarDecl *NativeParam, 12222 const VarDecl *TargetParam) const { 12223 llvm_unreachable("Not supported in SIMD-only mode"); 12224 } 12225